Working on a simple C program to process exported .html bookmark files. Specifically to eliminate everything but certain tags and their contents.
I'm actually pretty pleased with what I've come up with so far as it does what it's intended to do.
However, in the spirit of writing efficient code, I'm wondering if this is the best way to go about the business.
Specifically I'm focusing on 3 particular html tags and the code just looks kind of messy despite seeming to work with out any problems.
Code:
void processtags_n_call(void)
{
const char new_line=10;
/*various counters with different functions*/
int href_cntr1=0,/*char matching confirmation counter for <A HREF="HTTP...> tags*/
h3_cntr1=0,/*char matching confirmation counter for <H3...> tags*/
dd_cntr1=0;/*char matching confirmation counter for <DD...> tags*/
while ((cur_char=getc(input_html))!=EOF)/*main loop for filtering out relevant tags for processing*/
{
if ((cur_char=='<' && href_cntr1==0) ||
(cur_char=='<' && h3_cntr1==0) ||
(cur_char=='<' && dd_cntr1==0))/*looking for tags matching requirements for conversion, if found, convert tags and scrape*/
{
href_cntr1++;
h3_cntr1++;
dd_cntr1++;
continue;
}
else if ((cur_char=='A' && href_cntr1==1) ||
(cur_char=='H' && h3_cntr1==1) ||
(cur_char=='D' && dd_cntr1==1))
{
if (cur_char=='A')
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
}
else if (cur_char=='H')
{
h3_cntr1++;
href_cntr1=dd_cntr1=0;
}
else if (cur_char=='D')
{
dd_cntr1++;
href_cntr1=h3_cntr1=0;
}
continue;
}
else if ((cur_char==' ' && href_cntr1==2) ||
(cur_char=='3' && h3_cntr1==2) ||
(cur_char=='D' && dd_cntr1==2))
{
if (cur_char==' ')
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
}
else if (cur_char=='3')
{
h3_cntr1++;
href_cntr1=dd_cntr1=0;
}
else if (cur_char=='D')
{
dd_cntr1++;
href_cntr1=h3_cntr1=0;
}
continue;
}
else if ((cur_char=='H' && href_cntr1==3) ||
(h3_cntr1==3) ||
(dd_cntr1==3))
{
if (h3_cntr1==3)/*if true we have a <H3> tag to process and scrape*/
{
scrape_h3();/*make call to the specific scraper and let it do it's thing*/
h3_cntr1=href_cntr1=dd_cntr1=0;/*zero out our counters*/
putc(new_line, output_html);/*go to the next line*/
continue;
}
else if (dd_cntr1==3)/*If true we have a <DD> tag to process and scrape*/
{
scrape_dd();
href_cntr1=h3_cntr1=dd_cntr1=0;
putc(new_line, output_html);
continue;
}
else if (href_cntr1==3)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
}
else if (cur_char=='R' && href_cntr1==4)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='E' && href_cntr1==5)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='F' && href_cntr1==6)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='=' && href_cntr1==7)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='"' && href_cntr1==8)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='h' && href_cntr1==9)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='t' && href_cntr1==10)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='t' && href_cntr1==11)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='p' && href_cntr1==12)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char==':' && href_cntr1==13)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='/' && href_cntr1==14)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (cur_char=='/' && href_cntr1==15)
{
href_cntr1++;
h3_cntr1=dd_cntr1=0;
continue;
}
else if (href_cntr1==16) /*If true we have a <a href="http://...> instance to process and scrape*/
{
scrape_href();
href_cntr1=h3_cntr1=dd_cntr1=0;
putc(new_line, output_html);
continue;
}
else
{
href_cntr1=h3_cntr1=dd_cntr1=0;
}
}
}
Any input is valued and appreciated.