In C, I would personally create a new string type, which would make implementing those operations much easier. There are many working approaches, but here's the one I'd most likely use:
Code:
#include <stdlib.h>
#include <string.h>
typedef struct {
size_t size; /* Allocated for the string, including the '\0' at end */
size_t used; /* Used by the string, not including the '\0' at end */
char *data; /* Dynamically allocated data */
} string_t;
#define STRING_INIT { 0, 0, NULL }
The structure describes a dynamically allocated string. The string itself is mutable (modifiable) and can be grown and shrunk as necessary. Although the actual data can contain even NUL bytes ('\0'), there is a '\0' added at end so you can use data as a C string, too.
Here's the function to create a new string (followed by a couple of helper functions it uses). It copies length characters from source to the new string, so you can use if for both string_t strings, and for normal C strings (via str = string_new(c_string, strlen(c_string));).
Code:
static void out_of_memory(void)
{
fflush(stdout);
fprintf(stderr, "Out of memory error.\n");
fflush(stderr);
exit(1);
}
static size_t size_for_length(const size_t length)
{
/* If the strings ofthen grow, change this logic to
* initially allocate larger area for each string.
* The return value must be larger than length,
* so that the '\0' can be appended.
*/
return length + 1;
}
string_t string_new(const char *const source, const size_t length)
{
string_t str;
str.used = length;
str.size = size_for_length(length);
str.data = malloc(str.size);
if (!str.data)
out_of_memory();
if (length > 0)
memcpy(str.data, source, length);
str.data[length] = '\0';
return str;
}
When you replace an existing string, the old one has to be destroyed first, or you'll leak the dynamically allocated memory used by the old string. (When the program exits, all dynamically allocated memory will be released, so there's no need to worry about that; only about when assigning new values over existing ones.)
I like to do that thoroughly, "poisoning" the data, so I can easily detect if I try to reuse the string:
Code:
void string_destroy(string_t *const str)
{
if (str) {
free(str->data);
str->data = NULL;
str->size = 0;
str->used = 0;
}
}
Here are some string manipulation functions. Each of them returns a new, independent structure containing a copy of the desired part of the source string.
Code:
/* Return a copy of the initial part (if length > 0),
* or tail part (if length < 0), of the source string.
*/
string_t string_part(const string_t source, const long length)
{
if (length < 0L) {
const size_t len = (size_t)(-length);
if (source.used < len)
return string_new(source.data, source.used);
else
return string_new(source.data + source.used - len, len);
} else
if (length > 0L) {
const size_t len = (size_t)length;
if (source.used < len)
return string_new(source.data, source.used);
else
return string_new(source.data, len);
} else
return string_new(NULL, 0);
}
/* Return a middle part of the source string.
* Length must be positive, but offset can be
* positive (from start) or negative (from end).
*/
string_t string_sub(const string_t source, const long offset, const long length)
{
size_t off, len;
if (length < 1L)
return string_new(NULL, 0);
if (offset < 0L) {
if ((size_t)(-offset) > source.used)
off = 0;
else
off = source.used - (size_t)(-offset);
} else
if (offset > 0L) {
if ((size_t)offset > source.used)
off = source.used;
else
off = (size_t)offset;
} else
off = 0;
len = (size_t)length;
if (off + len > source.used)
len = source.used - off;
return string_new(source.data + off, len);
}
Finally, here is a short example program that illustrates the use of the above. (It uses stdio.h, so remember to add #include <stdio.h> to the top of the program.)
Code:
int main(int argc, char *argv[])
{
string_t src = STRING_INIT;
string_t dst = STRING_INIT;
long offset, length;
char dummy;
if (argc < 3 || argc > 4 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help]\n", argv[0]);
fprintf(stderr, " %s STRING LENGTH\n", argv[0]);
fprintf(stderr, " %s STRING OFFSET LENGTH\n", argv[0]);
fprintf(stderr, "\n");
return 1;
}
/* Create a string_t out of first command-line parameter. */
src = string_new(argv[1], strlen(argv[1]));
if (argc < 4) {
/* STRING LENGTH */
if (sscanf(argv[2], " %ld %c", &length, &dummy) != 1) {
fprintf(stderr, "%s: Invalid length.\n", argv[2]);
return 1;
}
dst = string_part(src, length);
} else {
/* STRING OFFSET LENGTH */
if (sscanf(argv[2], " %ld %c", &offset, &dummy) != 1) {
fprintf(stderr, "%s: Invalid offset.\n", argv[2]);
return 1;
}
if (sscanf(argv[3], " %ld %c", &length, &dummy) != 1) {
fprintf(stderr, "%s: Invalid length.\n", argv[3]);
return 1;
}
dst = string_sub(src, offset, length);
}
printf("Input: '%s' (%lu bytes)\n", src.data, (unsigned long)src.used);
printf("Result: '%s' (%lu bytes)\n", dst.data, (unsigned long)dst.used);
/* Since the strings are no longer needed, destroy them. */
string_destroy(&src);
string_destroy(&dst);
/* Note: you could now assign new strings to src and dst,
* without leaking memory. Assuming you also remember
* to destroy those too afterwards.
* Note: When the program exits, all dynamically allocated
* memory will be released automatically.
* So, you don't need to string_destroy() all strings before
* exiting; only before reusing them. */
return 0;
}
Examples:
Code:
./example "Some string" 50
Input: 'Some string' (11 bytes)
Result: 'Some string' (11 bytes)
./example "Some string" 6
Input: 'Some string' (11 bytes)
Result: 'Some s' (6 bytes)
./example "Some string" -6
Input: 'Some string' (11 bytes)
Result: 'string' (6 bytes)
./example "Some string" 6 3
Input: 'Some string' (11 bytes)
Result: 'tri' (3 bytes)
./example "Some string" -6 3
Input: 'Some string' (11 bytes)
Result: 'str' (3 bytes)
Feel free to use the above code in whatever ways you wish. I consider it to be public domain. (It might not contain enough creative input to be considered actually copyright-worthy, as a lot of C programmers with similar objectives as I had, would write functionally the same code as above.)