Code:
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
/* The Following is the grammar to be implemented:
<http-url> ::= "http://" <hostport> [ "/" <hpath> [ "?" <search> ]]
<hpath> ::= <hsegment> * [ "/" <hsegment> ]
<hsegment> ::= *[ <uchar> | ";" | ":" | "@" | "&" | "=" ]
<search> ::= *[ <uchar> | ";" | ":" | "@" | "&" | "=" ]
<hostport> ::= <host> [ ":" <port> ]
<host> ::= <hostname> | <hostnumber>
<hostname> ::= 1*[ <domainlabel> "." ] <toplabel>
<hostnumber> ::= <digits> "." <digits> "." <digits> "." <digits>
<port> ::= <digits>
<domainlabel> ::= <alphadigit> | <alphadigit> *[ <alphadigit > | "-" ] <alphadigit>
<toplabel> ::= <alpha> | <alpha> *[ <alphadigit> | "-" ] <alphadigit>
<alphadigit> ::= <alpha> | <digit>
<ftp-url> ::= "ftp://" <login> [ "/" <fpath> [ ";type=" <ftptype> ]]
<fpath> ::= <fsegment> *[ "/" <fsegment> ]
<fsegment> ::= *[ <uchar> | "?" | ":" | "@" | "&" | "=" ]
<ftptype> ::= "A" | "I" | "D" | "a" | "i" | "d"
<login> ::= [ <user> [ ":" <password> ] "@" ] <hostport>
<user> = *[ <uchar> | ";" | "?" | "&" | "=" ]
<password> = *[ <uchar> | ";" | "?" | "&" | "=" ]
<uchar> ::= <unreserved> | <escape>
<unreserved> ::= <alpha> | <digit> | <safe> | <extra>
<alpha> ::= <lowalpha> | <hialpha>
<lowalpha> ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |"i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
"q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |"y" | "z"
<hialpha> ::= "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |"J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
"S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
<digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |"8" | "9"
<digits> ::= 1*[<digit>]
<safe> ::= "$" | "-" | "_" | "." | "+"
<extra> ::= "!" | "*" | "'" | "(" | ")" | ","
<escape> ::= "%" <hex> <hex>
<reserved> ::= ";" | "/" | "?" | ":" | "@" | "&" | "="
<hex> ::= <digit> | "A" | "B" | "C" | "D" | "E" | "F" |
*/
// Declaring the methods to be used
void httpUrl(void); void hpath(void); void hsegment(void); void search(void);
void hostport(void); void host(void); void hostname(void); void hostnumber(void);
void port(void); void domainlabel(void); void toplabel(void);
int alphadigit(char c); int uchar(void);
void ftpUrl(void); void fpath(void);
void fsegment(void); void ftptype(void); void login(void);
void user(void); void password(void); int unreserved(char c);
int alpha(char c); int lowalpha(char c); int hialpha(char c); int digit(char c) ;
int digits(char *c); int safe(char c); int extra(char c); int escape(void);
int reserved(char c); int hex(void);
void scan(void); void error(char *n);
int level = 0; // for the "primitive" parse tree
int read;
char *url, *next; // saving url string, and "next" scan through it
// Declaring "primitive" parse tree methods
void enter(char *name); void leave(char *name); void spaces(int local_level);
FILE *fw;
FILE *fw1;
int main(void)
{
do
{
url = (char*) malloc(1000); // allocating memory for url string
printf("\n\n\nInput(0 to quit): "); // telling user to enter input
scan(); // scanning input and saving into url (more info in scan())
printf("\n");
next = url; // setting next to url[0]
if(*next != '0') // in case user decides to quit
{
fw = fopen("Parse_Tree.txt","w"); // open the file to WRITE in it the URL and the parse tree
fprintf(fw, "\n%s\n\n", next); // use fprintf to write the url in the text file
fprintf(fw, "\n"); // print new line in the text file
if(*next == 'h') // if url starts with an 'h'
httpUrl(); // then http potential
else
if(*next == 'f') // if url starts with an 'f'
ftpUrl(); // then ftp potential
else
error("Must start with \"http://\" or \"ftp://\"."); // obvious error
fclose(fw); //close the writing file when finish writing the parse tree
fw1 = fopen("Parse_Tree.txt","r"); //Now open the reading file in order to read the parse tree and print it out to the screen if the url is correct
while((read = getc(fw1)) != EOF)
printf("%c", read);
printf("\t\t\t\t SUCCESS!!\n"); // notify user with the validity of the URL
}
if(fw1 != NULL) // incase file was not opened
fclose(fw1); //Close the reading file after finishing
}while(*next != '0'); // looping for user friendly interface
fw1 = fopen("Parse_Tree.txt","w"); //Reopen the writing file inorder to print empty character inorder to emptify the file
fprintf(fw1, "");
fclose(fw); //Close
printf("\t\t\t\t Disconnecting...\n\n"); // likewise
}
//<http-url> ::= "http://" <hostport> [ "/" <hpath> [ "?" <search> ]]
void httpUrl(void)
{
enter("httpUrl"); // for parse tree
if(strncmp(url, "http://", 7) != 0) // checking for "http://"
error("Must start with \"http://\"."); // if not found then error
next = url+7; // skipping "http://" since already dealt with
hostport(); // hostport is now expected
if(*next == '/') // if '/' expected
{
next++; // then skip it
hpath(); // and expect an hpath
}
else // if no '/' was found, then must have encountered end of url
if(*next != '\0') // otherwise error
error(" Hostport can only be follwed by a \"/\".");
if(*next == '?') // after hpath might encounter '?'
{
next++; // if encountered then skip it
search(); // search grammar now expected
}
else // if no '?' was found, then must have enocountered end of url
if(*next != '\0') // if not end of url
error("hpath can only be followed by a ?."); // print an error
if(*next == '\0') // after all functions have been run safely
leave("httpUrl"); // close parse-tree
}
//<hostport> ::= <host> [ ":" <port> ]
void hostport(void)
{
enter("hostport"); // start parse-tree
host(); // calling mandatory host call
if(*next == ':') // incase ':' encountered
{
next++; //skip it
port(); // port is expected
}
leave("hostport"); // close parse-tree
}
//<host> ::= <hostname> | <hostnumber>
void host(void)
{
char *c = strrchr(url, '.'); // checking for rightmost '.'
enter("host"); // start parse-tree
if(c != NULL) // as long as we have a '.' then we're good
{
if(isdigit(*(++c))) // checking character after rightmost '.'
hostnumber(); // if is digit then we have a host number
else
hostname(); // else we have a host name
}
else
error("\tNo Domain Label Detected."); // if no '.' was found then Error
leave("host"); // exiting parse-tree
}
//<hostname> ::= 1*[ <domainlabel> "." ] <toplabel>
void hostname(void)
{
char *s, *d; // char pointers
enter("hostname"); // starting parse tree
s = strchr(next, '/'); // pointer to next '/' in the remaining string
d = strchr(next, '.'); // pointer to next '.' in the remaining string
if(s < d && (s != NULL)) // If no '.' available before the first '/'
error(" No Domain Label Present."); // then print error
if(s == NULL) // if no '/' available then keep loopin below till no more '.'s available
{
while(d != NULL) // loop while '.' is found
{
domainlabel(); // expecting domain label
next++; // skipping the next dot ('.')
d = strchr(next, '.'); // looking for next '.'
}
}
else // if a '/' has been found
{
while(s > d && d != NULL) // then make sure only '.'s before the '/' are accounted for
{
domainlabel(); // domain label expected
next++; // skipping the next dot ('.')
s = strchr(next, '/');
d = strchr(next, '.'); //updating the dot ('.')
}
}
toplabel(); // after last '.' has been read, then head to toplabel
leave("hostname"); // closing parse tree
}
//<hostnumber> ::= <digits> "." <digits> "." <digits> "." <digits>
void hostnumber(void)
{
char *temp, *p; // local char pointers
int ptNum = 0; // point numbers to keep track of how many points encountered so far
enter("hostnumber"); // start parse-tree
//while none of the following is encountered loop for next number
while((*next != ':') && (*next != '/') && (*next != '\0'))
{
temp = next; // saving next's current location
if(strchr(next, '.')) // as long as a dot is found
{
ptNum++; // incremebt dot counter
while(*next != '.')
next++; // skip all characters till next loop
}
else // otherwise, then end portion of the host number has been reached
{
while((*next != ':') && (*next != '/') && (*next != '\0'))
next++; // skip its characters
}
p = (char*) malloc(next-temp); // allocate enough space for p, to copy all skipped 'next' characters
strncpy(p, temp, next-temp); // copying all skipped host number character
p[next-temp] = '\0'; // end of string to avoid any errors
if(digits(p) != 0 && ((*next) != '/')) // checking p for valid host number grammar
error("Host Number may only contain Digits."); // if not then print error
if(ptNum > 3) // condition to limit hostnumber to three dots
error("Host Number cannot contain more than 3 dots (\".\").");
if((*next != '\0') && (*next != ':') && (*next != '/')) //skipping next dot
next++;
}
if(ptNum < 3) // handling cases where hostnumber contains less than 3 dots
error("Host Number cannot contain less than 3 dots (\".\").");
leave("hostnumber"); // closing parse tree
}
//<port> ::= <digits>
void port(void)
{
char *temp, *p; // local pointer variables
enter("port"); // starting tree parser
temp = next; // saving "next's" current position for later use
while((*next != '\0') && (*next != '/')) // loop while not end of string nor '/'
next++; // skip character
p = (char*) malloc(next-temp); // allocate enough space to save from temp -> next
strncpy(p, temp, next-temp); // copying the skipped characters
p[next-temp] = '\0'; // closing string
if(digits(p) != 0) // making sure port is digit only array
error("Port may only contain Digits."); // else throw error
leave("port"); // exiting parse tree
}
//<hpath> ::= [ <hsegment> ] * [ "/" <hsegment> ]
void hpath(void)
{
enter("hpath"); // enter parse tree
if((*next != '?') && (*next != '\0')) // if hpath is not empty
hsegment(); // then hsegment expected
while(*next == '/') // taking care of the 0 or more other hsegments
{
next++; // skipping '/'
hsegment(); // expecting hsegment
}
leave("hpath"); // exiting parse tree
}
//hsegment> ::= *[ <uchar> | ";" | ":" | "@" | "&" | "=" ]
void hsegment(void)
{
enter("hsegment"); // starting parse tree
while((*next != '?') && (*next != '/') && (*next != '\0')) // making sure only hsegment is scanned
{
if((*next != ';') && (*next != ':') && (*next != '@') && (*next != '&') && (*next != '=')) // Checking Validity
if(uchar() != 0) // likewise
error("hsegment contains Illegal characters."); // If Illegal then error
next++; // skipping the character just checked
}
leave("hsegment"); // closing parse tree
}
//<search> ::= *[ <uchar> | ";" | ":" | "@" | "&" | "=" ]
void search(void)
{
enter("search"); // starting parse tree
while((*next != '\0')) // check while not end of string
{
if((*next != ';') && (*next != ':') && (*next != '@') && (*next != '&') && (*next != '=')) // Check Validity
if(uchar() != 0) // likewise
error("search contains illegal Characters."); // else error
next++; // skippin the character just checked
}
leave("search"); // exiting parse tree
}
//<domainlabel> ::= <alphadigit> | <alphadigit> *[ <alphadigit > | "-" ] <alphadigit>
void domainlabel(void)
{
enter("domainlabel"); // starting parse tree
if(alphadigit(*next) != 0) // has to be ATLEAST one alpha or digit
error("Domain Labels can only contain Alphabets & Digits."); // else error
next++; // skipping the character just checked
while(*next != '.') // taking care of the maybe repetitive '-' or alphadigits to follow
{
if(*next != '-')
if(alphadigit(*next) != 0) // making sure only alpha or digit follows
error("Domain Labels can only contain Alphabets & Digits.");
next++; // skipping character just checked or '-' if not checked
}
next--; // goin back
if(alphadigit(*next) != 0) // to make sure last character was an alphadigit and not a '-'
error("Domain Labels can only contain Alphabets & Digits."); // else error
next++; // returning next to position before decrement
leave("domainlabel"); // closing parse tree
}
//<toplabel> ::= <alpha> | <alpha> *[ <alphadigit> | "-" ] <alphadigit>
void toplabel(void)
{
enter("toplabel"); // starting parse tree
if(*next != 0) // making sure ATLEAST one alpha is found
alpha(*next); // checkin for alpha validity
else
error(" Top Label cannot be empty. "); // else error
next++; // skipping character just checked
while((*next != ':') && (*next != '/') && (*next != '\0')) // checking remaining characters
{
if(*next != '-') // same as in domain label
if(alphadigit(*next) != 0) // checking for alphadigit validity
error("Top Labels can only contain Alphabets & Digits."); // else error
next++; // skipping character just checked
}
next--; // decrementing
if(alphadigit(*next) != 0) // to check last character in domain label to make sure it is only alphadigit and not '-'
error("Top Labels can only contain Alphabets & Digits."); // else error
next++; // returning "next" to its position before previous decrement
leave("toplabel"); // closing parse tree
}
//<ftp-url> ::= "ftp://" <login> [ "/" <fpath> [ ";type=" <ftptype> ]]
void ftpUrl(void)
{
enter("ftpUrl"); // starting parse tree
if(strncmp(url, "ftp://", 6) != 0) // checking for right "ftp://" syntax at beginning or "url"
error("Must start with \"ftp://\"."); // else error
next = url + 6; // skipping characters already checked
login(); // especting mandatory login function
if(*next == '/') // after login might expect '/'
{
next++; // then skip it
fpath(); // and enter expected fpath
}
else
if(*next != '\0') // else end of string expected
error("ftp Hostport can only be followed by a \'/\'."); // else error
if(*next == ';') // after login might expect ftptype
{
if(strncmp(next, ";type=", 6) != 0) // making sure of right syntax
error("ftp type must start with \";type=\"."); // else error
next += 6; // skip checked characters
ftptype(); // call expected ftptype
}
else
if(*next != '\0') // should encounter end of string now
error("fpath may only be followed by a semi-colon (\";\")."); // else error
else
if(*next == '\0') // incase of clean url
leave("ftpUrl"); // closing parse tree
}
//<fpath> ::= <fsegment> *[ "/" <fsegment> ]
void fpath(void)
{
enter("fpath"); // starting parse tree
fsegment(); // fsegment expected
while(*next == '/') // taking care of 0 or more following fsegments
{
next++; // skipping '/'
fsegment(); // calling expected fsegment to be proccessed
}
leave("fpath"); // closing parse tree
}
//<fsegment> ::= *[ <uchar> | "?" | ":" | "@" | "&" | "=" ]
void fsegment(void)
{
enter("fsegment"); // starting parse tree
while((*next != '/') && (*next != ';') && (*next != '\0')) // making sure only current fsegment is checked
{
if((*next != '?') && (*next != ':') && (*next != '@') && (*next != '&') && (*next != '=')) // checking Validity
if(uchar() != 0) // likewise
error("fsegment contains Illegal Characters."); // else error
next++; // skipping character that has just been checked
}
leave("fsegment"); // closing parse tree
}
//<ftptype> ::= "A" | "I" | "D" | "a" | "i" | "d"
void ftptype(void)
{
enter("ftptype"); // starting parse tree
// Checking Validity
if((*next != 'A') && (*next != 'I') && (*next != 'D') && (*next != 'a') && (*next != 'i') && (*next != 'd'))
error("Illegal ftp type (Allowed: a, A, i, I, d, D)."); // else error
leave("ftptype"); // closing parse tree
}
//<login> ::= [ <user> [ ":" <password> ] "@" ] <hostport>
void login(void)
{
enter("login"); // starting parse tree
if(strchr(next, '@') != NULL) // if @ is found in remaining string then user is expected
{
user(); // calling expected user
if(*next == ':') // after user id ':' is found then password is expected
{
next++; // skipping ':'
password(); // calling expected password
}
next++; // skipping '@'
}
hostport(); // calling mandatory hostport function
leave("login"); // closing parse tree
}
//<user> = *[ <uchar> | ";" | "?" | "&" | "=" ]
void user()
{
enter("user"); // starting user
while((*next != ':') && (*next != '@') && (*next != '\0')) // limiting checking to user only
{
if((*next != ';') || (*next != '?') || (*next != '&') || (*next != '=')) // checking Validity
if(uchar() != 0) // likewise
error("User contains Illegal Characters."); // else error
next++; // skipping character just checked
}
leave("user"); // closing parse tree
}
//<password> = *[ <uchar> | ";" | "?" | "&" | "=" ]
void password(void)
{
enter("password"); // starting parse tree
while((*next != '@') && (*next != '\0')) // limiting checking to password only
{
if((*next != ';') || (*next != '?') || (*next != '&') || (*next != '=')) // checking Validity
if(uchar() != 0) // likewise
error("Password contains Illegal Characters."); // else error
next++; // skipping character just checked
}
leave("password"); // closing parse tree
}
//<uchar> ::= <unreserved> | <escape>
int uchar(void)
{
enter("uchar"); // starting parse tree
if(*next == '%') // checking for escape character
if(escape() != 0) // checking escape character Validity
return 1; // if not Valid then return 1, false
else
if(unreserved(*next) != 0) // else check validity for unreserved character
return 1; // if not Valid then return 1, for false
leave("uchar"); // closing parse tree
return 0; // if all goes well then return 0, for true
}
//<unreserved> ::= <alpha> | <digit> | <safe> | <extra>
int unreserved(char c)
{
enter("unreserved"); // starting parse tree
if(alpha(c) == 0 || digit(c) == 0 || safe(c) == 0 || extra(c) == 0) // checking validity
{
leave("unreserved"); // closing parse tree
return 0; // return 0, for true
}
else
return 1; // else return 1, for false
}
//<reserved> ::= ";" | "/" | "?" | ":" | "@" | "&" | "="
int reserved (char c)
{
enter("reserved"); // starting parse tree
if(c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c == '&' || c == '=') // checking valisity
{
leave("reserved"); // closing parse tree
return 0; // returning 0, for true
}
else
return 1; // else return 1, for false
}
//<alphadigit> ::= <alpha> | <digit>
int alphadigit(char c)
{
enter("alphadigit"); // starting parse tree
if((alpha(c) != 0) && (digit(c) != 0)) // checking validity
return 1; // return 1, for false if not a digt nor an alpha
else
{ // else if all goes well, then
leave("alphadigit"); // close parse tree
return 0; // and return 0. for true
}
}
//<alpha> ::= <lowalpha> | <hialpha>
int alpha(char c) //0 true, 1 false
{
enter("alpha"); // starting parse tree
if(lowalpha(c) == 0 || hialpha(c) == 0) // checking Validity
{ // if all goes well
leave("alpha"); // closing parse tree
return 0; // return 0, for true
}
else
return 1; // else, return 1, for false
}
int lowalpha(char c)
{
enter("lowalpha"); // starting parse tree
if((c >= 97) && (c <= 122)) // character should be between 'a' and 'z'
{ // if all goes well
leave("lowalpha"); // closing parse tree
return 0; // returning 0, for true
}
else
return 1; // else, reutrning 1, for false
}
int hialpha(char c)
{
enter("hiaplha"); // starting parse tree
if((c >= 65) && (c <= 90)) // character should be between 'A' and 'Z'
{ // if all goes well
leave("hialpha"); // closing parse tree
return 0; // returning 0, for false
}
else
return 1; // else, returning 1, for false
}
int digit(char c)
{
enter("digit"); // starting parse tree
if(isdigit(c)) // checking to see if character is digit
{ // if all's good
leave("digit"); // closing parse tree
return 0; // returning 0, if true
}
else
return 1; // else, returning 1, if false
}
int digits(char *c)
{
enter("digits"); // starting parse tree
if(*c == '\0') // checking if c is empty
error("\tNo Digits Detected!!"); // if so, then giving error
while(*c != '\0') // while c still has characters
{
if(digit(*c) != 0) // check to see if they are digits
return 1; // return 1, for false
c++; // skip character already checked
}
leave("digits"); // closing parse tree if all's good
return 0; // return 0, for true
}
int safe(char c)
{
enter("safe"); // starting parse tree
if(c == '$' || c == '-' || c == '_' || c == '.' || c == '+') // checking Validity
{ // if all's good
leave("safe"); // closing parse tree
return 0; // returning 0, for true
}
else
return 1; // else, returning 1, for false
}
//<extra> ::= "!" | "*" | "'" | "(" | ")" | ","
int extra(char c)
{
enter("extra"); // starting parse tree
if(c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || c == ',') // checking for Validity
{ // if all's good
leave("extra"); // closing parse tree
return 0; // returning 0, for true
}
else
return 1; // else, returning 1, for false
}
//<escape> ::= "%" <hex> <hex>
int escape()
{
enter("escape"); // starting parse tree
next++; // skipping '%'
if(hex() != 0) // checking for validity of next expected hex character
return 1; // return 1, for false
next++; // skip already checked hex character
if(hex() != 0) // check second hec character
return 1; // return 1, for false
leave("escape"); // if all's good then close parse tree
return 0; // and return 0, for true
}
//<hex> ::= <digit> | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f"
int hex(void)
{
enter("hex"); // starting parse tree
if(digit(*next) == 0 || (*next >= 'A' && *next <= 'F') || (*next >= 'a' && *next <= 'f')) // checking for Validity
{
leave("hex"); // if all's good then close parse tree
return 0; // and return 0, for true
}
else
return 1; // else return 1, for false
}
// scans user input and saves it into url character array (string)
void scan(void)
{
char *p = url; // pointer to the beginning og url array
char curr = getchar(); // read character from input
while(curr != '\r' && curr != '\n') // make sure no enter nor new line has been encountered
{
*p++ = tolower(curr); // change to lower case and insert into url and increment p
curr = getchar(); // get next character from input
}
*p = '\0'; // close the url string at where p is finally pointing to
}
// error function, prints error, and exits processing
void error(char *n)
{
// friendly user interface
printf("\n\n\n-------------------------------------------------------------------------------\n");
printf(" INVALID URL: %s", n);
printf("\n-------------------------------------------------------------------------------\n\n");
level = 0; // reseting level for parse tree in case of error
fclose(fw); // close fw that was already opened
main(); // calling main again after error has been detected
level = 0; // reseting level for parse tree again
exit(1); // exiting program
}
// parse tree function to emulate function start
void enter(char *name)
{
spaces(level++);
fprintf(fw, "+%c", *name); // incrementing level and printing enough '|'
name++;
while(*name != '\0')
{
fprintf(fw, "%c", *name);
name++;
}
fprintf(fw, "\n");
}
// parse tree function to emulate function end
void leave(char *name)
{
spaces(--level);
fprintf(fw, "-%c", *name);
name++;
while(*name != '\0')
{
fprintf(fw, "%c", *name);
name++;
}
fprintf(fw, "\n");
}
// parse tree fucntion that prints the '|'s
void spaces(int local_level)
{
while (local_level-- > 0)
fprintf(fw, "| ");
}