An oversimplified tokenizer
Everyone,
Since I'm not a C expert, please criticize this program. I emailed to a friend fluent in that language. So, he said "Nice!" and told me how optimize the program slightly by replacing a call to the strlen function with the body of the "nonempty" function.
The program mimics the strtok function without looking for characters that separate tokens.
Thanks for your thoughts. The program won't seem too amateurish, I hope. I'm fluent in ISO Standard Pascal, though I'd rather write in C instead.
Code:
#define MAX 80
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
bool token_may_contain(const int c)
{
return isalnum(c) || index("-'+", c) != NULL;
}
bool nonempty(const char some_string[])
{
return some_string[0] != '\0';
}
/* Return a token from the line if there's at least one. Otherwise, set the stating topint to -1 */
const char *a_token_from(const char line[], int * start)
{
static char token[MAX + 1] = "";
register int here = *start, token_length = 0, line_length = strlen(line);
while (here < line_length && !token_may_contain(line[here]))
here++;
while (token_may_contain(line[here]))
token[token_length++] = line[here++];
if (nonempty(token))
token[line_length] = '\0';
*start = (here < line_length - 1) ? here : -1;
return token;
}
void tokenize(const char line[])
{
register int counter = 0;
int start = 0;
char token[MAX + 1];
while (start >= 0)
{
puts(a_token_from(line, &start));
counter++;
}
}
int main(void)
{
char line[MAX];
while (fgets(line, MAX, stdin) != NULL)
{
puts("Please type the string to tokenize.");
tokenize(line);
}
return 0;
}