I wanted to write a function that would accept a string and a regular expression, then fill in a global struct with the beginning and end positions of a match, and return the actual match as another character string. Because regcomp() from regex.h only finds the first match in a line, I then wanted to use a static char copy of the original string which could be "blanked out" and matched against again to find multiple matches per line.
As far as I can tell, something about including the static char causes regfree() to fail, such that regcomp acts like it's regmatch_t structure has not been freed from the last call (so matches nothing). I say this because an alternate version of the following script works fine (in which there is no static char in the "regexp_match" function, and the string to match must be "blanked out" before it is fed back into the function again. But I would prefer it to work this way and can't see what the problem is!
Code:
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include <stdio.h>
char *chomp (char *line); // removes *nix newline from string
char *regexp_match (char *string, char *regexp);
struct matchspec {
int begin;
int end;
} MSpec;
int main () {
int i, count=1;
char input[164], *regexp, *string, *match;
puts("character string:");
fgets(input,164,stdin);
string=chomp(input);
while (1) {
regexp = NULL;
puts("regular expression:");
fgets(input,164,stdin);
if (input[0] == '\n') return;
regexp=chomp(input);
printf("input: \"%s\" regexp: \"%s\"\n", string, regexp);
if ((match=regexp_match(string,regexp)) != NULL) {
printf("%d -- B:%d E:%d is \"%s\"\n", count,MSpec.begin,MSpec.end,match);
// retain value of static char in regexp_match by using NULL
while ((match=regexp_match(NULL,regexp)) != NULL) {
count++;
printf("%d -- B:%d E:%d is \"%s\"\n", count,MSpec.begin,MSpec.end,match);
}
} else puts("No match.");
free(regexp);
}
}
char *chomp (char *line) { // a standard function
int len, elen;
char *chompd, *end;
len = strlen(line);
end = strrchr(line,10);
if (end == NULL) return line;
else { elen = strlen(end);
if (elen > 1) return line;
else { chompd = (char *)malloc(len-1);
strncpy(chompd,line,(len-1));
return chompd;
}
}
}
char *regexp_match (char *string, char *regexp) {
int i, w=0, len;
char *word = NULL;
static char *copy;
regex_t rgT;
regmatch_t match;
regcomp(&rgT,regexp,REG_EXTENDED);
if (string != NULL) { // write "string" to the static char ("copy")
len=strlen(string); // not needed in alternate version
copy=(char *)malloc(len+1);
strcpy(copy,string);
}
if ((regexec(&rgT,copy,1,&match,0)) == 0) {
MSpec.begin = (int)match.rm_so;
MSpec.end = (int)match.rm_eo;
len = MSpec.end-MSpec.begin;
word=(char *)malloc(len+1);
for (i=MSpec.begin; i<MSpec.end; i++) {
word[w] = copy[i];
w++; }
word[w]=0;
// blank out match in "copy" for next call
for (i=MSpec.begin; i<MSpec.end; i++) {
copy[i]=32;}
}
regfree(&rgT); // DOESN'T WORK !!!
return word;
}
The output goes like this:
Code:
character string:
the 6th one
regular expression:
th
input: "the 6th one" regexp: "th"
1 -- B:0 E:2 is "th"
2 -- B:5 E:7 is "th"
regular expression:
th
input: "the 6th one" regexp: "th"
No match.
Which actually means that regfree() worked the first time...it's not until "copy" is reinitialized with a new string that the problem begins.