Code:
void*
NOTHROW STDCALL
vmul(void* SRC, size_t srcSize, char srcEndian, void *NUM, size_t numSize, char numEndian)
{
if (!SRC || !srcSize || !NUM || !numSize) return NULL;
if (is0(SRC, srcSize, srcEndian) || is0(NUM, numSize, numEndian))
return memset(SRC, 0, srcSize);
uchar *tmp = calloc(srcSize, 1);
if (!tmp) return NULL;
schar s1, s2, n1, n2;
uintptr_t TEND, TBEG = vendian((uintptr_t)tmp, srcSize, &TEND, &s1, &s2, srcEndian);
uintptr_t SEND, SBEG = vendian((uintptr_t)SRC, srcSize, &SEND, &s1, &s2, srcEndian);
uintptr_t NEND, NBEG = vendian((uintptr_t)NUM, numSize, &NEND, &n1, &n2, numEndian);
uchar i = 1, bit, shift = CHAR_BIT - 1, *num = (uchar*)NBEG;
loop:
for (bit = 1; bit; bit <<= 1, vshl_(SBEG, SEND, shift, 1, s1, s2))
if (*num & bit) (void)vadd_(TBEG, TEND, s1, s2, SBEG, SEND, s1, s2);
if (NBEG == NEND)
{
(void)memcpy(SRC, tmp, srcSize);
free(tmp);
return SRC;
}
num = (uchar*)(NBEG += i ? n1 : n2);
i ^= 1;
goto loop;
}
uintptr_t
NOTHROW STDCALL
vmul_karatsuba(
uintptr_t SRC, size_t srcSize, char srcEndian,
uintptr_t NUM, size_t numSize, char numEndian)
{
//#error "Bug causing write to BEFORE buffer"
if (!srcSize || !numSize) return 0;
size_t srcSize2 = srcSize / 2;
size_t numSize2 = numSize / 2;
uchar si = 1, ni = 1;
schar s1, s2, s3,s4, n1, n2,n3,n4;
void *SCPY = malloc(srcSize);
if (!SCPY) return 0;
void *NCPY = malloc(numSize);
if (!NCPY)
{
free(SCPY);
return 0;
}
uintptr_t SHIGH, SLOW = vendian(SRC, srcSize2, &SHIGH, &s1, &s2, srcEndian), SEND = SHIGH;
uintptr_t NHIGH, NLOW = vendian(NUM, numSize2, &NHIGH, &n1, &n2, numEndian), NEND = NHIGH;
uintptr_t SCHIGH, SCLOW = vendian((uintptr_t)SCPY, srcSize2, &SCHIGH, &s1, &s2, srcEndian);
uintptr_t NCHIGH, NCLOW = vendian((uintptr_t)NCPY, numSize2, &NCHIGH, &n1, &n2, numEndian);
s3 = -s1;
s4 = -s2;
while (srcSize-- > srcSize2)
{
SHIGH += si ? s3 : s4;
SCHIGH += si ? s3 : s4;
si ^= 1;
}
n3 = -n1;
n4 = -n2;
while (numSize-- > numSize2)
{
NHIGH += ni ? n3 : n4;
NCHIGH += ni ? n3 : n4;
ni ^= 1;
}
uintptr_t z0 = vmul_karatsuba(SCLOW, srcSize2, srcEndian, NCLOW, numSize2, numEndian);
uintptr_t z2 = vmul_karatsuba(SCHIGH, srcSize2, srcEndian, NCHIGH, numSize2, numEndian);
vadd_(SLOW, SHIGH + si ? s3 : s4, s1, s2, SHIGH, SEND, s1, s2);
vadd_(NLOW, NHIGH + ni ? n3 : n4, n1, n2, NHIGH, NEND, n1, n2);
uintptr_t z1 = vmul_karatsuba(SLOW, srcSize2, srcEndian, NUM, numSize2, numEndian);
free(SCPY);
free(NCPY);
// return (z2 * 10 ^ (2 * srcSize2)) + ((z1 - z2 - z0) * 10 ^ (srcSize2)) + (z0);
return (uintptr_t)vadd(
vadd(
vmul(
vsub(
vsub((void*)z1, srcSize2, srcEndian, (void*)z2, srcSize2, srcEndian),
srcSize2, srcEndian, (void*)z0, srcSize2, srcEndian),
srcSize2, srcEndian, &srcSize2, sizeof(size_t), getIntEndian()),
srcSize2, srcEndian, (void*)z0, srcSize2, srcEndian),
srcSize2, srcEndian, vmul((void*)z2, srcSize2, srcEndian, &srcSize, sizeof(size_t), getIntEndian()), srcSize2, srcEndian);
}
You can see the output at the link I posted, I modified the normal math functions to use labels instead to stop Microsoft VC complaining about loops with constant expressions and somehow broke my multiplication function, I would take a look at it myself but I have some things to do now so figured best to just post it and see if anyone posts an explanation of what went wrong on either or both functions by the time I get back.