Hi,

I'm working on a lab project to parse data into its proper format and output to screen & file. However, I have encountered some bugs where I can't determine their origin. I've modularly tested & reviewed my parsing functions & they seem ok. I will modularly review the output functions but in the meantime, I think they are supposed to be ok.

The lab project info is at http://www.engr.sjsu.edu/~mrobins/ce130lab2.html
Source/Raw data file is at http://www.engr.sjsu.edu/~mrobins/lab2sampleu.txt
Source code listed below & also available at http://www.engr.sjsu.edu/daluu/showdata.txt

I need to find where my bugs are coming from to fix them. There are 2 major bugs: 1) output different from intended output and 2) output not sent to output file for some reason, though my code should be correct, output only sent to screen. However, the output file was created successfully, but why won't my program write to it.

Here's the output bug
--------------------
//Intended Output:
1: N87BC |Prel |2/28/03 |Deer Valley |AZ |Creitz, Robe|Nonfatal|Part 91 |General Avia
2: CP-188|Prel |2/28/03 |COLCHANI |Bolivia |Cessna 411 |Fatal(1)|NSCH Non-U.S|Commercial T
3: HC-BMD|Fact |1/17/03 |Quito |Ecuador |Fokker F28 |Nonfatal|SCHD Non-U.S|Commercial T

//Actual Output:
1: N87BC |Prel |2/28/03 |Deer Valley |AZ |Creitz, Robe|Nonfatal|Part 91 |General Avia

2: N87BC |Prel |2/28/03 |Deer Valley |AZ |Creitz, Robe|Nonfatal|Part 91 |General Avia
CP-188|Prel |2/28/03 |COLCHANI |Bolivia |Cessna 411 |Fatal(1)|NSCH Non-U.S|Commercial T

3: N87BC |Prel |2/28/03 |Deer Valley |AZ |Creitz, Robe|Nonfatal|Part 91 |General Avia
CP-188|Prel |2/28/03 |COLCHANI |Bolivia |Cessna 411 |Fatal(1)|NSCH Non-U.S|Commercial T
HC-BMD|Fact |1/17/03 |Quito |Ecuador |Fokker F28 |Nonfatal|SCHD Non-U.S|Commercial T
------------------------

It seems the previous output is kept in memory & new output is appended to it when that shouldn't be the case. But I can't find where that happens in my code as the previous output should have been flushed out for new output.

---------
Code:
#include <stdio.h>
#include <string.h>

//3 Function prototypes
void parse_input(unsigned char ch);
//Input: one byte/char extracted from file
//Output: fields stored in memory for output
//Passes input along to subfunctions to parse data into fields
void parse_output();
//Input: fields stored in memory
//Output: fields combined into one long char. string for output to screen & file
void outscr();
//Input: Record char string
//Output: Outputs 1 data record per line with RRN & "|" delimiter per field to screen
//Assumes user's screen width set to accept 120 characters wide
void outstor();
//Input: Record char string
//Output: Outputs 1 data record per line in output file, "data.txt"
void ini_rec();
//Initializes/resets the data extraction record structure for next record/set of fields
long FileSize (FILE *stream);
//Input: input file stream
//Output: returns input file size

//Subfunctions of parse_input() -> Individual field parsing functions
//Takes input from parent & parses it to appropriate field to store in memory
void parse_f1(unsigned char ch);
void parse_f2(unsigned char ch);
void parse_f3(unsigned char ch);
void parse_f4(unsigned char ch);
void parse_f5(unsigned char ch);
void parse_f6(unsigned char ch);
void parse_f7(unsigned char ch);
void parse_f8(unsigned char ch);
void parse_f9(unsigned char ch);


//Declare global variables
//Global variables used to avoid passing variables into functions
FILE *infile; //file handle for input file
FILE *outfile; //file handle for output file
long flength; //var used to calculate filesize of input file
//var "byte" indicates byte offset for comparison with filesize
int byte = 0, rrn = 0; //var "rrn" self explanatory
//positioning variables
//var "k" for array position to hold field char
// var "fnum" indicates field number to parse, default is 1
//var "atrec" indicates when one set of fields or a record has been reached for output
int k = 0, fnum = 1, atrec = 0; 

//Data extraction record structure
typedef struct record{ // 1 record contains these 9 fields
     char status[6];
     char date[9];
     char location[13];
     char state_country[13];
     char make_model[13];
     char arn[7];
     char severity[9];
     char op_type[13];
     char car_name[13];
}Record;

Record the_record; //initialize a record struct for use

//Data output record structure
//One long char string, each, to hold 1 record
char outdata[97]; //for output to file
char scrdata[97]; //for output to screen

int main(int argc,char *argv[])
{
     //initialize certain variables when 1st run program.
     unsigned char ch;

     if (argc<2)
     {
          puts("No input file specified. Please type command again with input filename.\n");
          return 0;//Self explanatory
     }
     
     //Open file handle to input file
     infile = fopen(argv[1],"rb");
     
     //Abort program & display error if file open fails
     if(infile==NULL)
     {
          printf("Error opening %s\n",argv[1]);
          return 1;
     }

     //Create file handle to output file
     outfile = fopen("datafile.txt", "wb");
     
     //Abort program & display error if file open fails
     if(outfile==NULL)
     {
          printf("Error creating datafile.txt\n");
          return 1;
     }
     
     //Calculate filesize
     flength = FileSize(infile);

     //Initialize record struct for input
     ini_rec();

     //Extract data until EOF reached
     while((ch=fgetc(infile))!= EOF){
          if(atrec){ //output when 1 record reached
               parse_output();
               outscr();
               outstor();
               ini_rec();
               atrec = 0;
          }
          byte++; //increment byte offset indicator
          parse_input(ch);
     }
     //output last record
     parse_output();
     outscr();
     outstor();

     //close file handles when done reading & writing file
     fclose(infile);
     fclose(outfile);
     
     return 0;
}

void parse_input(unsigned char ch){
     switch(fnum){ //Route to subfunction based on fnum
     case 1:
          parse_f1(ch);
          break;
     case 2:
          parse_f2(ch);
          break;
     case 3:
          parse_f3(ch);
          break;
     case 4:
          parse_f4(ch);
          break;
     case 5:
          parse_f5(ch);
          break;
     case 6:
          parse_f6(ch);
          break;
     case 7:
          parse_f7(ch);
          break;
     case 8:
          parse_f8(ch);
          break;
     case 9:
          parse_f9(ch);
          break;
     default: 
          parse_f1(ch); //default is route to parse field 1
          break;
     }
     return;
}

void parse_f1(unsigned char ch){
     if(byte == flength) return; //discard last byte due to use of unsigned char
     if(ch == 13) return;
     if(ch == 10){
          fnum = 2;
          k = 0;
          return;}
     if(k < 5) the_record.status[k++] = ch;
     return;
}

void parse_f2(unsigned char ch){
     if(ch == 13) return;
     if(ch == 10){
          fnum = 3;
          k = 0;
          return;}
     if(k < 8) the_record.date[k++] = ch;
     return;
}

void parse_f3(unsigned char ch){
     if(ch == 44){
          fnum = 4;
          k = 0;
          return;}
     if(k < 12) the_record.location[k++] = ch;
     return;
}

void parse_f4(unsigned char ch){
     if(!k){
          if(ch == ' ') return;}
     if(ch == 13) return;
     if(ch == 10){
          fnum = 5;
          k = 0;
          return;}
     if(k < 12) the_record.state_country[k++] = ch;
     return;
}

void parse_f5(unsigned char ch){
     if(ch == 13) return;
     if(ch == 10){
          fnum = 6;
          k = 0;
          return;}
     if(k < 12) the_record.make_model[k++] = ch;
     return;
}

void parse_f6(unsigned char ch){
     if(ch == 13) return;
     if(ch == 10){
          fnum = 7;
          k = 0;
          return;}
     if(k < 6) the_record.arn[k++] = ch;
     return;
}

void parse_f7(unsigned char ch){
     if(ch == 13) return;
     if(ch == 10){
          fnum = 8;
          k = 0;
          return;}
     if(k < 8) the_record.severity[k++] = ch;
     return;
}

void parse_f8(unsigned char ch){
     if(ch == 13) return;
     if((ch == 44) || (ch == 58) || (ch == 10)){
          fnum = 9;
          k = 0;
          return;}
     if(k < 12) the_record.op_type[k++] = ch;
     return;
}

void parse_f9(unsigned char ch){
     if(!k){
          if(ch == ' ') return;}
     if(ch == 13) return;
     if(ch == 10){
          fnum = 1;
          k = 0;
          atrec = 1;
          rrn++;
          return;}
     if(k < 12) the_record.car_name[k++] = ch;
     return;
}

void parse_output(){
     strcat(scrdata, the_record.arn);
     strcat(scrdata, "|");
     strcat(scrdata, the_record.status);
     strcat(scrdata, "|");
     strcat(scrdata, the_record.date);
     strcat(scrdata, "|");
     strcat(scrdata, the_record.location);
     strcat(scrdata, "|");
     strcat(scrdata, the_record.state_country);
     strcat(scrdata, "|");
     strcat(scrdata, the_record.make_model);
     strcat(scrdata, "|");
     strcat(scrdata, the_record.severity);
     strcat(scrdata, "|");
     strcat(scrdata, the_record.op_type);
     strcat(scrdata, "|");
     strcat(scrdata, the_record.car_name);
     strcat(scrdata, "\n");
     for(int i = 0; i < 96; i++){
          if(scrdata[i] == '|') outdata[i] = ' ';
          else outdata[i] = scrdata[i];
     }
     outdata[96] = '\0';
     return;
}

/* old output parsing method
void parse_output(){
     int i;
     for(i = 0; i < 6; i++) scrdata[i] = the_record.arn[i];
     scrdata[6] = '|';
     for(i = 7; i < 12; i++) scrdata[i] = the_record.status[i];
     scrdata[12] = '|';
     for(i = 13; i < 21; i++) scrdata[i] = the_record.date[i];
     scrdata[21] = '|';
     for(i = 22; i < 34; i++) scrdata[i] = the_record.location[i];
     scrdata[34] = '|';
     for(i = 35; i < 47; i++) scrdata[i] = the_record.state_country[i];
     scrdata[47] = '|';
     for(i = 48; i < 60; i++) scrdata[i] = the_record.make_model[i];
     scrdata[60] = '|';
     for(i = 61; i < 69; i++) scrdata[i] = the_record.severity[i];
     scrdata[69] = '|';
     for(i = 70; i < 82; i++) scrdata[i] = the_record.op_type[i];
     scrdata[82] = '|';
     for(i = 83; i < 95; i++) scrdata[i] = the_record.car_name[i];
     scrdata[95] = 10;
     scrdata[96] = '\0';
     for(i = 0; i < 96; i++){
          if(scrdata[i] == 124) outdata[i] = ' ';
          else outdata[i] = scrdata[i];
     }
     outdata[96] = '\0';
     return;
}
*/

void outscr(){
     printf("%d: ",rrn);
     puts(scrdata);
     return;
}

void outstor(){
     fputs(outdata,outfile);
     return;
}

void ini_rec(){
     int i;
     for(i = 0; i < 5; i++) the_record.status[i] = ' ';
     the_record.status[5] = '\0';
     for(i = 0; i < 8; i++) the_record.date[i] = ' ';
     the_record.date[8] = '\0';
     for(i = 0; i < 12; i++) the_record.location[i] = ' ';
     the_record.location[12] = '\0';
     for(i = 0; i < 12; i++) the_record.state_country[i] = ' ';
     the_record.state_country[12] = '\0';
     for(i = 0; i < 12; i++) the_record.make_model[i] = ' ';
     the_record.make_model[12] = '\0';
     for(i = 0; i < 6; i++) the_record.arn[i] = ' ';
     the_record.arn[6] = '\0';
     for(i = 0; i < 8; i++) the_record.severity[i] = ' ';
     the_record.severity[8] = '\0';
     for(i = 0; i < 12; i++) the_record.op_type[i] = ' ';
     the_record.op_type[12] = '\0';
     for(i = 0; i < 12; i++) the_record.car_name[i] = ' ';
     the_record.car_name[12] = '\0';
     return;
}

long FileSize (FILE *stream)
{
     long length;//temp variable
     fseek (stream, 0L, SEEK_END);//seek to EOF
     length = ftell(stream);//store EOF byte position which is filesize
     //reset file for future reading by seeking to origin
     fseek (stream, 0L, SEEK_SET);
     return length;//return filesize
}
---------
I know debugging is one of the worst parts of programming BUT I can't do it all myself, so I really need your help here to find my bugs. All help appreciated.

NOTE: The source code is some length but I've commented half the code. If you've looked over the project info, I've used the decimal equivalents to check for ASCII delimiters (",", ":") and HEX values for line feed/new line & carriage return in my input parsing subfunctions. And data is truncated while parsing input. If you have any questions about my code, let me know.