It's because this line:
Code:
printf("String: %s\n", line + matches[2].rm_so);
is wrong. You are not taking into account the end offset of the match (rm_eo). This works:
Code:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <regex.h>
#define MAX_STRING_SIZE 1024
int main() {
int rc;
regex_t myregex;
regmatch_t matches[2];
FILE *fp;
char line[MAX_STRING_SIZE];
char match[MAX_STRING_SIZE];
if (NULL != (fp = fopen("./index.html", "r"))) {
rc = regcomp(&myregex,
"href[:space:]*=[:space:]*['|\"]([^\"|']*)",
REG_EXTENDED);
while(fgets(line, MAX_STRING_SIZE, fp) != NULL) {
if(regexec(&myregex, line, 2, matches, 0) == 0) {
if (matches[1].rm_so != -1) {
size_t match_len = matches[1].rm_eo - matches[1].rm_so;
strncpy(match,
&line[matches[1].rm_so],
match_len);
match[match_len] = '\0';
printf("String: %s\n", match);
}
}
}
fclose(fp);
}
regfree(&myregex);
return 0;
}