I hope this is helpful, and that you can understand whats happening.
This is much faster (O(n)) than a nested loop for long sequences (i dont know how long a virus's sequence typically is)
Code:
#include <iostream.h>
// just a fast way to get from 'A' -> 0, 'B' -> 1, etc.
// instead of reserving 256 bytes you can reserve 'T'+1 (whatever
// value that is) since the highest input would be 'T'
char toint_tbl[256] = {0};
// im assuming there are only four chemicals/whatever in DNA,
// i cant remeber if thats true
const char toascii[4] = {'A', 'C', 'G', 'T'};
inline int toint(char c)
{
return toint_tbl[c];
}
void matseq(char *seq1, char *seq2, char matching[][4][4])
{
// a table of all three letter sequences in a particular DNA sequence.
// if you want to add detection for four-letter sequences, just add
// another dimension, and *a4 etc
char seqtable[4][4][4];
register char *a0, *a1, *a2;
for (a0=seq1, a1=seq1+1, a2=seq1+2; *a2; ++a0, ++a1, ++a2)
seqtable[toint(*a0)][toint(*a1)][toint(*a2)] = 0x01;
for (a0=seq2, a1=seq2+1, a2=seq2+2; *a2; ++a0, ++a1, ++a2)
if (seqtable[toint(*a0)][toint(*a1)][toint(*a2)])
matching[toint(*a0)][toint(*a1)][toint(*a2)] = 0x01;
}
int main()
{
char *seq1 = "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTACGTTTTTTTTTTT";
char *seq2 = "TTTTTTTTTTTTTTTTTTTTCGTACGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT";
char matching[4][4][4] = {0x00};
toint_tbl['A'] = 0; // makes sure an input of 'A' gets a 0, 'C' gets a 1, etc...
toint_tbl['C'] = 1;
toint_tbl['G'] = 2;
toint_tbl['T'] = 3;
matseq(seq1, seq2, matching);
// output the results
int a, b, c;
for (a = 0; a < 4; ++a)
for (b = 0; b < 4; ++b)
for (c = 0; c < 4; ++c)
if (matching[a][b][c])
cout << toascii[a] << toascii[b] << toascii[c] << endl;
return 0;
}