I am not sure what I did wrong but, I am getting all kinds of errors and warnings. I've looked through the code and can't pick up on anything. Any help would be great.
Code:
// A Program to count the count of 4-mers in a nucleotide sequence.
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
FILE *input ;
FILE *outpur ;
char buffer[1000] ;
int i = 0 ;
int c ;
int w,x,y,z ;
char tr ;
char *seqData ;
seqData = (char *)malloc(10000000) ;
char t1, t2, t3, t4 ;
int index ;
int acgt_to_0123(char *tr)
{
if(*tr == 'a')
{
return 0 ;
}
else if(*tr == 'c')
{
return 1 ;
}
else if(*tr == 'g')
{
return 2 ;
}
else if(*tr == 't')
{
return 3 ;
}
else
{
printf("Non acgt character\n") ;
exit(0) ;
}
}
main( int argc, char **argv )
{
// Initialize 4-D array
int tupleCount[4][4][4][4] ;
for(w = 0 ; w < 4 ; w++ )
for(x = 0 ; x < 4 ; x++ )
for(y = 0 ; y < 4 ; y++ )
for(z = 0 ; z < 4 ; z++ )
{
tupleCount[w][x][y][z] ;
}
// Open input file to read from
if( ! ( input = fopen( argv[1], "r" ) ) )
{
printf( "COULD NOT OPEN FILE %s - Exit!\n", argv[1]) ;
exit(1) ;
}
// Collect sequence from GenBank file
while(fgets(buffer, 1000, input))
{
// start obtaining bases after ORIGIN
if(strstr(buffer, "ORIGIN"))
{
while((c=getc(input)) != '/' && c != EOF)
{
if(c >= 'a' && c <= 'z')
{
seqData[i++] = c ;
}
}
}
}
// Scan DNA sequence for each 4-mer
for( index = 0 ; index < strlen(seqData) - 3 ; ++index )
{
t1 = acgt_to_0123(seqData[index]) ;
t2 = acgt_to_0123(seqData[index + 1]) ;
t3 = acgt_to_0123(seqData[index + 2]) ;
t4 = acgt_to_0123(seqData[index + 3]) ;
// Accumulate a count to find distribution
tupleCount[t1][t2][t3][t4]++ ;
}
fclose(input) ;
printf("Here is the distribution of 4-mers:\n\n%s", tupleCount ) ;
free(seqData) ;
return(0) ;
}
thanks