Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include <errno.h>
//Model: Seed Template, Template: Other template hits
typedef struct atom
{
char type[10]; //Type of the structure, i.e. ATOM
int an; //Atom Number treated as Index (2nd column)
char mol[10]; //Molecule (3rd column)
char amino[10]; //Amino Acid (4th column)
char chainid[2]; //Chain ID (5th column)
int pos; //Position (6th column)
float xc; //X Coordinate (7th column)
float yc; //Y Coordinate (8th column)
float zc; //Z Coordinate (9th column)
float occ; //Occupancy
float temp; //Temperature Identifier
char element[3]; //Element Symbol
} atom;
void copyField ( char *to, size_t toLen, const char *from, size_t start, size_t end ) {
size_t length = end - start + 1;
if ( length >= toLen ) length = toLen - 1; // raise an error if this happens
strncpy( to, &from[start-1], length );
to[length] = '\0';
}
// You could generalise this by passing fields[] as a parameter as well.
size_t tokenise ( const char *line, char temp[][15], size_t length ) {
static struct {
size_t from, to;
} fields[] = {
{ 1, 6 }, // Record name "ATOM "
{ 7, 11 }, // Integer Atom serial number.
{ 13, 16 }, // Atom Atom name.
{ 17, 17 }, // Character Alternate location indicator.
{ 18, 20 }, // Residue name Residue name.
{ 22, 22 }, // Character Chain identifier.
{ 23, 26 }, // Integer Residue sequence number.
{ 27, 27 }, // AChar Code for insertion of residues.
{ 31, 38 }, // Real(8.3) Orthogonal coordinates for X in Angstroms.
{ 39, 46 }, // Real(8.3) Orthogonal coordinates for Y in Angstroms.
{ 47, 54 }, // Real(8.3) Orthogonal coordinates for Z in Angstroms.
{ 55, 60 }, // Real(6.2) Occupancy.
{ 61, 66 }, // Real(6.2) Temperature factor (Default = 0.0).
{ 73, 76 }, // LString(4) Segment identifier, left-justified.
{ 77, 78 }, // LString(2) Element symbol, right-justified.
{ 79, 80 }, // LString(2) Charge on the atom.
};
static size_t nFields = sizeof(fields)/sizeof(fields[0]);
size_t i;
for ( i = 0 ; i < nFields && i < length ; i++ ) {
copyField( temp[i], sizeof(temp[i]), line, fields[i].from, fields[i].to );
}
// return actual number of tokens extracted
return i;
}
atom *parse ( const char *filename, size_t *numAtoms ) {
atom *result = NULL;
size_t arrSize = 0;
size_t arrLen = 0;
FILE *fp = fopen(filename,"r");
if ( fp ) {
char buff[BUFSIZ];
char temp[16][15];
size_t tempLen = sizeof(temp)/sizeof(temp[0]);
// read each line
while ( fgets( buff, BUFSIZ, fp ) != NULL ) {
// is it an ATOM, and did it parse OK
if ( strncmp(buff,"ATOM ",6) == 0 &&
tokenise( buff, temp, tempLen ) == tempLen ) {
// Extend the array if necessary
if ( arrSize == arrLen ) {
size_t newSize;
// newSize starts at 16, then doubles up to 1MB
// then by 1MB
if ( arrSize == 0 ) newSize = 16;
else if ( arrSize < 1024*1024 ) newSize = arrSize * 2;
else newSize = arrSize + 1024*1024;
void *t = realloc( result, newSize * sizeof(*result) );
if ( t != NULL ) {
result = t;
arrSize = newSize;
} else {
// no more room, but result still valid
break;
}
}
// Now copy and convert the fields of interest.
// Extra points for validating string lengths prior to strcpy
// and for checking the error result of strtol and strtol
strcpy(result[arrLen].type,temp[0]);
result[arrLen].an = strtol(temp[1],NULL,10);
strcpy(result[arrLen].mol,temp[2]);
strcpy(result[arrLen].amino,temp[3]);
strcpy(result[arrLen].chainid,temp[4]);
result[arrLen].pos = strtol(temp[6],NULL,10);
result[arrLen].xc = strtof(temp[8],NULL);
result[arrLen].yc = strtof(temp[9],NULL);
result[arrLen].zc = strtof(temp[10],NULL);
result[arrLen].occ = strtof(temp[11],NULL);
result[arrLen].temp = strtof(temp[12],NULL);
strcpy(result[arrLen].element,temp[14]);
arrLen++;
}
}
fclose(fp);
} else {
perror("Oops");
}
*numAtoms = arrLen;
return result;
}
int main ( ) {
size_t len, i;
atom *arr = NULL;
arr = parse( "foo.txt", &len );
printf("Result=%p, len=%zd\n", (void*)arr, len );
for ( i = 0 ; i < len ; i++ ) {
printf("%s %d %s %s %s %d %f %f %f %f %f %s\n",
arr[i].type,
arr[i].an,
arr[i].mol,
arr[i].amino,
arr[i].chainid,
arr[i].pos,
arr[i].xc,
arr[i].yc,
arr[i].zc,
arr[i].occ,
arr[i].temp,
arr[i].element );
}
free(arr);
return 0;
}