I'm not sure about using strstr directly as "strstr(source, *sorted_dict)", as it would return only the occurrence of complete "source" in dictionary, and what I want is the substrings of source.
strstr("europeaninvestmentbank", "investment") will return &"europeaninvestmentbank"[8] because that is where the substring was found. It will return NULL if the substring was not found. Assuming you have the complete source string in memory and only search for the longest substrings first, strstr() will do what you are asking for.
Code:
#include <stdio.h>
#include <string.h>
// Ordered by length of the string
char const* dictionary[] =
{
"investment",
"european",
"bank",
NULL
};
int main()
{
char const* source = "europeaninvestmentbank";
char const** sorted_dict = dictionary;
while (sorted_dict)
{
if (strstr(source, *sorted_dict))
{
printf("Longest meaningful substring: '%s'\n", *sorted_dict);
break;
}
++sorted_dict;
}
}
And currently got stuck at the very first step: while importing dictionary text into a 2d array of char. realloc() gave invalid next size error from now and then! No clue what's happening.
Any kind of buffer overflow on a pointer to heap memory can mess up the heap and cause weird errors. Try this code on your dictionary file and see if the same thing happens:
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TEST
int length_order(void const* lhs, void const* rhs);
int main()
{
FILE* fp = fopen("test.txt", "r");
char** dictionary = NULL;
char word[1024];
size_t size = 0;
// Populate the dictionary
while (fscanf(fp, "%1023s", word) == 1)
{
// Grow dictionary by 1 each time and assume allocations succeed for simplicity
dictionary = (char**)realloc(dictionary, ++size * sizeof(char**));
dictionary[size-1] = (char*)malloc(strlen(word)+1);
strcpy(dictionary[size-1], word);
}
fclose(fp);
// Sort the dictionary by string length
qsort(dictionary, size, sizeof(char*), length_order);
#if defined(TEST)
// Dump the dictionary for testing
for (size_t x = 0; x < size; ++x)
{
puts(dictionary[x]);
}
#endif
// Clean up the dictionary
for (size_t x = 0; x < size; ++x)
{
free(dictionary[x]);
}
free(dictionary);
}
int length_order(void const* lhs, void const* rhs)
{
return strlen(*(char const**)rhs) - strlen(*(char const**)lhs);
}