# Thread: RSA encryption with 1024 bit keys

1. Yeah, Ruby (it's a programming language) does. Equivalent C++ code will run faster than Ruby code, because Ruby is interpreted (depending on your definition of "equivalent").

Your multiplication function has been tested and works correctly, right? We can't see what's wrong until we see the actual code for your exponentiation algorithm.

2. Code:
```MassiveInt MassiveInt::PowMod(const MassiveInt& power, const MassiveInt& mod) const {
int nFactors=power.nSize*8;
MassiveInt **Factors=new MassiveInt*[nFactors];
newcount++;
for (int i=0;i<nFactors;i++) {
Factors[i]=0;
}
Factors[0]=new MassiveInt(*this);
newcount++;
*(Factors[0])=(*(Factors[0])/mod).r;
for (int i=0;i<power.nSize;i++) {
for (int j=0;j<8;j++) {
if ((i==0) && (j==0)) continue;
Factors[(8*i)+j]=new MassiveInt(((*(Factors[(8*i)+j-1])).Pow(2)/mod).r);
std::cout << Factors[(8*i)+j]->PrintDec() << " " << std::flush;
newcount++;
std::cout << "| " << std::flush;
}
}
MassiveInt Product=1;
for (int i=0;i<power.nSize;i++) {
for (int j=0;j<8;j++) {
if (GETBIT(power.pNum[i],j)) {
std::cout << "/" << std::flush;
Product*=*(Factors[(8*i)+j]);
Product=(Product/mod).r;
}
}
}
for (int i=0;i<nFactors;i++) {
if (Factors[i]!=0) {
delete Factors[i];
newcount--;
}
}

delete [] Factors;
newcount--;
return Product;
}```
Code:
```MassiveIntDiv MassiveInt::operator/(const MassiveInt& rhs) {
MassiveIntDiv mid;
mid.q=0;
MassiveInt q=0;
MassiveInt Interval=*this;
MassiveInt Two=2;

do {
mid.q+=q;
q=0;
for (int i=0;i<Interval.nSize;i++) {
for (int j=0;j<8;j++) {
if (rhs.DigitCount()>(8*i)+j) continue;
if (GETBIT(Interval.pNum[i],j)) q+=Two.Pow((8*i)+j-rhs.DigitCount());
}
}
Interval-=q*rhs;
} while (q>0);
while (Interval>=rhs) {
mid.q++;
Interval-=rhs;
}
mid.r=Interval;

return mid;
}```
Code:
```MassiveInt MassiveInt::operator* (const MassiveInt &param) const {
const MassiveInt &a=this->nSize>param.nSize?(*this):param;
const MassiveInt &b=this->nSize>param.nSize?param:(*this);
MassiveInt **Terms=new MassiveInt*[b.nSize*8];
newcount++;
for (int i=0;i<b.nSize;i++) {
for (int j=0;j<8;j++) {
if (GETBIT(b.pNum[i],j)) {
MassiveInt *&ThisTerm=Terms[(8*i)+j];
ThisTerm=new MassiveInt();
newcount++;
if (ThisTerm->pNum!=NULL) {
delete [] ThisTerm->pNum;
newcount--;
}
ThisTerm->nSize=static_cast<int>(((a.nSize*8)+(8*i)+j)/8)+1;
ThisTerm->pNum=new BYTE[ThisTerm->nSize];
newcount++;
//Shift a across by 8i+j places
int iSrc=(a.nSize*8)-1;
int iDest=(ThisTerm->nSize*8)-1;

while (iDest>(8*i)+j+(a.nSize*8)-1) {
SETBIT(ThisTerm->pNum[static_cast<int>(iDest/8)],iDest%8,0);
iDest--;
}
while (iSrc>=0) {
int BitSet=GETBIT(a.pNum[static_cast<int>(iSrc/8)],iSrc%8);
SETBIT(ThisTerm->pNum[static_cast<int>(iDest/8)],iDest%8,BitSet);
iSrc--;
iDest--;
}
while (iDest>=0) {
SETBIT(ThisTerm->pNum[static_cast<int>(iDest/8)],iDest%8,0);
iDest--;
}
}
else {
Terms[(8*i)+j]=0;
}

}
}
//Sum all MassiveInts pointed to by non-zero elts of Terms[]
MassiveInt Sum;
for (int i=0;i<b.nSize*8;i++) {
if (Terms[i]!=0) {
Sum+=*(Terms[i]);
delete Terms[i];
newcount--;
}
}
delete [] Terms;
newcount--;
Sum.bNeg=a.bNeg^b.bNeg;
return Sum;
}```
Code:
```MassiveInt MassiveInt::operator- (const MassiveInt& rhs) const{
if (this->bNeg && !rhs.bNeg) {
return -(-*this+rhs);
}
if (!this->bNeg && rhs.bNeg) {
return (-rhs+*this);
}
if (this->bNeg && rhs.bNeg) {
if (rhs<=*this) return -rhs-(-*this);
if (rhs>*this) return -(-*this-(-rhs));
}
if (!this->bNeg && !rhs.bNeg) {
if (*this<rhs) return -(rhs-*this);
if (*this>=rhs) {

unsigned int NewSize=this->nSize>rhs.nSize?this->nSize:rhs.nSize;
BYTE *pSum=new BYTE[NewSize];
newcount++;
for (int i=0;i<NewSize;i++) {
pSum[i]=0;
}

int Carry=0;

for (int i=0;i<NewSize;i++){
for (int j=0;j<8;j++) {
int ColSum=0;
if (this->nSize>i) ColSum+=GETBIT(this->pNum[i],j);
if (rhs.nSize>i) ColSum-=GETBIT(rhs.pNum[i],j);
ColSum+=Carry;

Carry=-(ColSum<0);
SETBIT(pSum[i],j,(ColSum%2));
}

}
MassiveInt m;
if (m.pNum!=NULL) {
delete [] m.pNum;
newcount--;
}
m.pNum=pSum;
m.nSize=NewSize;
m.bNeg=false;
return m;
}
}
return *this;
}```
Code:
```MassiveInt MassiveInt::operator+ (const MassiveInt &param) const {
if (this->pNum==NULL) return param;
if (param.pNum==NULL) return *this;
if (*this==0) return param;
if (param==0) return *this;

if (this->bNeg && param.bNeg) return -(-*this+(-param));
if (this->bNeg && !param.bNeg) return param-(-*this);
if (!this->bNeg && param.bNeg) return *this-(-param);
int NewSize=(this->nSize>param.nSize?this->nSize:param.nSize);
NewSize+=(GETBIT(this->pNum[this->nSize-1],7)&&GETBIT(param.pNum[param.nSize-1],7));
BYTE *pSum=new BYTE[NewSize];
newcount++;
for (int i=0;i<NewSize;i++) {
pSum[i]=0;
}

unsigned int Carry=0;
for (int i=0;i<NewSize;i++) {
for (int j=0;j<8;j++) {
int ColSum=Carry;
if (this->nSize>i) ColSum+=GETBIT(this->pNum[i],j);
if (param.nSize>i) ColSum+=GETBIT(param.pNum[i],j);
Carry=(ColSum>1);
SETBIT(pSum[i],j,(ColSum%2));
}
}
MassiveInt newMassive;
newMassive.pNum=pSum;
newMassive.nSize=NewSize;
return newMassive;
}```
That's the bulk of the code. Hopefully someone can see some gross inefficiencies.

3. new inside loops is a recipe for disaster when you want speed. Consider allocating a large chunk thru new and splitting that chunk down to your needs yourself. new is expensive, in a loop usually prohibitively so.

4. Okay, I'll give that a shot. Are you sure however that there are no absolutely horrible methods that I am using? It doesn't seem like I could improve my speed so much just by allocating memory more efficiently.

5. to call new is about 9 times slower than calling another function. Do that inside a loop and the penalties soon mount up. You would be surprised how much you gain by using better allocation techniques. If you zip up massiveint.h,massiveint.cpp and massiveinttest.cpp ill look over the class when I get in from work if you want.

6. That'd be great if you could have a look. rsa_dll.cpp compiles to a dynamic link library, and rsa_prog.cpp imports it.

7. better allocations will help. use prefix not postfix. might not matter for ints but does for class types. some of your algorithms could be improved but you would have to dig down into knuth(TAOCP) for details or maybe check GMP's docs. I think they have a section annotating the used algorithms.You can really get a speed increase especially on p4 machines by going the asm route.there is plenty of details on how to do this in the link i posted earlier but obviously you will need to know asm to do that.

8. Thanks for the advice. I'll fix those aspects up, do some reading, and get back to you on the improvement I get. Maybe I'll even do some efficiency charts or something crazy like that.

9. I cleaned up the code for long multiplication, but could only get it a little bit faster than it was before. I'm going to look into the Karatsuba algorithm, where each number is recursively split into smaller and smaller chunks, which can be added together with less effort. Hopefully that can help.

I'm thinking I could also do bit shifting more efficiently. As it is, I'm doing a lot of division and moduli to shift the multiplicand so that I can add it on to the total. Here's the code:

Code:
```                    signed iSrc=(s1*8)-1;
signed iDest=((*s3)*8)-1;
signed offset=(8*i)+j;

while (iDest>offset+iSrc) {
SETBIT(term[int(iDest/8)],iDest%8,0);
iDest--;
}
while (iDest>=offset) {
SETBIT( term[int(iDest/8)], iDest%8, GETBIT( multiplicand[int(iSrc/8)], iSrc%8));
iDest--;
iSrc--;
}
while (iDest>=0) {
SETBIT(term[int(iDest/8)],iDest%8,0);
iDest--;
}```
As you can see, a lot of work is done converting from the bit coordinate to the byte coordinate. Can I do this more efficiently?