Code:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
main()
{
char test[] = "ORIGIN" ;
char header[100] ;
char sequence[100000] ;
char buffer[10000] ;
char i, c, n, w ;
/* for(n=0 ; n < 999 ; ++n)
{
sequence[n] = 0 ;
{
*/
while(fgets(header, 100, stdin))
{
if(strstr(header, "LOCUS"))
{
int w = 12 ;
while(isblank(header[w]))
{
++w ;
}
printf(">%s", &header[w] ) ;
break ;
}
}
while(fgets(buffer, 10000, stdin))
{
if(strstr(buffer, test)) // start obtaining bases after ORIGIN
{
int n = 0 ;
while((c=getchar()) != '/')
{
if(c >= 'a' && c <= 'z')
{
sequence[i++] = c ;
}
}
}
}
printf("%s\n", sequence) ;
return 0 ;
}
INPUT (its an entire file but i am only concerned the following argument of it
1 cctcagatca ctctttggca acgacccctc gycacmataa agataggggg gcaactaaag
61 gaagctctat tagahacagg agcagatgat accatattma aagaaataaa tttgccagga
121 agatggaarc caaaaatgat agggggaatt ggaggtttta tcaaagtaag acagtatgat
181 cagatactca tagaaatctg tggacataaa gttataggta cagtattagt aggacctaca
241 cctgtcaacg taattggaag aaatctgttr actcagattg gttgcacttt aaatttt
//
WRONG OUTPUT
gaagaaatctgttractcagattggttgcactttaaattttgataggggg gcaactaaaggaagctctattagahacaggagcagatgataccatattma aagaaataaatttgccaggaagatggaa
The output should be:
>AJ002507 297 bp DNA linear VRL 14-NOV-2006
cctcagatca ctctttggca acgacccctc gycacmataa agataggggg gcaactaaag
gaagctctat tagahacagg agcagatgat accatattma aagaaataaa tttgccagga
agatggaarc caaaaatgat agggggaatt ggaggtttta tcaaagtaag acagtatgat
cctgtcaacg taattggaag aaatctgttr actcagattg gttgcacttt aaatttt