-
Hm... what are some other interesting ways...
1) Open in binary mode.
2) Make a "start" marker, initially set to zero.
3) Scan through until you find the linewine, incrementing a counter.
4) Seek back to the start marker.
5) Malloc a block based on your counter's size (allow for null if you feel like it).
6) fread into your block.
7) Skip the newline as it appears in your file. (\n) (\n\r) (\r)
8) Set the start marker to here and reset your counter.
9) Repeat until bored.
Quzah.
-
On my NetBSD machine, there is a function called fgetln(). I'm confused of whether this does the indeterminate allocation for you, though.
I haven't written the algorithm of how to read in a line using strcat() and strcpy(), but I suppose you could use a loop and keep on reading chunks at a time, allocate a new string with the size of the chunk just read plus the original size of the string, use strcpy() to copy the old string to the new buffer, and then use strcat() to append the buffer to the string just read in. Would this be more efficient than using realloc() and malloc() considering that realloc() and malloc() are system calls?
-
Code:
#include <stdio.h>
#include <stdlib.h>
/* a linked list of line fragments */
typedef struct foo_tag {
char buff[10];
struct foo_tag *next;
} foo;
foo *newnode ( void ) {
foo *new = malloc ( sizeof *new );
if ( new ) {
new->next = NULL;
}
return new;
}
foo *read_long_line ( FILE *fp ) {
foo *head = NULL, *tail = NULL;
foo *this = NULL;
this = newnode();
while ( this && fgets( this->buff, sizeof this->buff, fp ) != NULL ) {
if ( head == NULL ) {
head = tail = this;
} else {
tail->next = this;
tail = this;
}
if ( !strchr(this->buff,'\n') ) {
this = newnode(); /* no newline yet - prepare for more */
} else {
this = NULL; /* th th that's all folks */
}
}
free( this );
return head;
}
void print_long_line ( foo *line ) {
while ( line ) {
printf( "'%s'\n", line->buff );
line = line->next;
}
}
int main ( ) {
foo *myline = read_long_line( stdin );
print_long_line( myline );
return 0;
}
$ ./a.exe
hey there, what do you think of this long line reading function?
'hey there'
', what do'
' you thin'
'k of this'
' long lin'
'e reading'
' function'
'?
'
Copying the linked list of line fragments to a single large buffer, and freeing the list are left as easy exercises for the reader :)
-
>Would this be more efficient than using realloc() and malloc() considering that realloc() and malloc() are system calls?
Would it matter at all since device I/O is about as slow as it gets? If you want you can use a linked list of buffers and the efficiency of your list building code, string building code, and list destruction code put together would probably pale in comparison to the time consumed by the I/O itself.
[edit]
Hmm, Salem and I had the same idea at roughly the same time...
[/edit]
-
Salem, that looks good. But is what Quzah suggested a better option?
-
>But is what Quzah suggested a better option?
Try them both and see.
-
I'm doing what Quzah suggested, and I'm noticing that '\n' is added to every file. This is evident when I run my program on a one line file and print out something whenever '\n' is encountered (I'm reading one character at a time). Does that happen with everyone else? That doesn't seem right to me.
-
You really can't ask if Quzah's or Salem's idea is the better idea. This is because its subjective on what you you think is better. Until you decide on the parameters there is no way to determine the better algorithm.
-
I wasn't suggesting it, so much as simply providing yet one more way to do it. The only reason I posted is to show that there are many ways to do something. Not necessarily that it is a good way, but simply that the way itself exists.
"The Philosophies of C", Page 42, by Quzah.
-
>"The Philosophies of C", Page 42, by Quzah.
Can I get a copy? :D
-
But why is there a newline character at the end of a one-line file as per my last post?
-
Because that's part of the definition of a text file.
Variable length records delimited by newlines
-
Here's what I came up. I'm not sure if my implementation of the seek algorithm is the best.
list.c:
Code:
#include <stdio.h>
#include <stdlib.h>
/* a linked list of line fragments */
typedef struct foo_tag {
char buff[10];
struct foo_tag *next;
} foo;
foo *newnode ( void ) {
foo *new = malloc ( sizeof *new );
if ( new ) {
new->next = NULL;
}
return new;
}
foo *read_long_line ( FILE *fp, int *num_nodes ) {
foo *head = NULL, *tail = NULL;
foo *this = NULL;
char *eol;
*num_nodes = 1;
this = newnode();
while ( this && fgets( this->buff, sizeof this->buff, fp ) != NULL ) {
if ( head == NULL ) {
head = tail = this;
} else {
tail->next = this;
tail = this;
}
if ( !(eol = strchr(this->buff,'\n') )) {
this = newnode(); /* no newline yet - prepare for more */
(*num_nodes)++;
} else {
*eol = 0; // replace newline with NULL character
this = NULL; /* th th that's all folks */
}
}
free( this );
return head;
}
void print_long_line ( foo *line ) {
while ( line ) {
printf( "'%s'\n", line->buff );
line = line->next;
}
}
char *pack_line(foo *line, int num_nodes)
{
foo *head = line;
char *ln = (char *) malloc(num_nodes*9 + 1); // buff holds nine char string
int length, i, j = 0;
while(head)
{
length = strlen(head->buff);
for(i = 0; i < length; i++, j++)
ln[j] = head->buff[i];
head = head->next;
}
ln[j] = '\0';
return ln;
}
int main (int argc, char **argv) {
FILE *fp;
int num_nodes;
char *line;
int i;
foo *myline;
for(i = 0; i < 10; i++)
{
fp = fopen(argv[1], "r");
while((myline = read_long_line( fp, &num_nodes )))
{
line = pack_line(myline, num_nodes);
printf("%s\n", line);
free(line);
}
fclose(fp);
}
return 0;
}
seek.c:
Code:
#include <stdio.h>
char *getline(FILE *fp)
{
char *line;
char ch;
int start;
int end;
start = ftell(fp); // get the initial file pointer
do
{
fread(&ch, 1, 1, fp);
}while(!feof(fp) && ch != '\n');
if(feof(fp))
return (char *) 0;
end = ftell(fp);
fseek(fp, -(end - start), SEEK_CUR); // go back to where start was
line = (char *) malloc(end - start);
fgets(line, end - start, fp);
fseek(fp, 1, SEEK_CUR); // move ahead of the newline character
return line;
}
int main(int argc, char *argv[])
{
FILE *fp;
char *line;
int i;
for(i = 0; i < 10; i++)
{
fp = fopen(argv[1], "r");
while((line = getline(fp)))
{
printf("%s\n", line);
free(line);
}
fclose(fp);
}
return 1;
}
Results:
ymalik-> gcc -o seek seek.c
ymalik-> gcc -o list list.c
ymalik-> /usr/bin/time ./seek b
...
9.96 real 0.88 user 0.25 sys
ymalik-> /usr/bin/time ./list b
...
9.74 real 0.64 user 0.25 sys
-
I realized I didn't free the linked list.
-
I fixed the memory leek. The linked list version is still a little faster.
list.c
Code:
#include <stdio.h>
#include <stdlib.h>
/* a linked list of line fragments */
typedef struct foo_tag {
char buff[10];
struct foo_tag *next;
} foo;
foo *newnode ( void ) {
foo *new = malloc ( sizeof *new );
if ( new ) {
new->next = NULL;
}
return new;
}
foo *read_long_line ( FILE *fp, int *num_nodes ) {
foo *head = NULL, *tail = NULL;
foo *this = NULL;
char *eol;
*num_nodes = 1;
this = newnode();
while ( this && fgets( this->buff, sizeof this->buff, fp ) != NULL ) {
if ( head == NULL ) {
head = tail = this;
} else {
tail->next = this;
tail = this;
}
if ( !(eol = strchr(this->buff,'\n') )) {
this = newnode(); /* no newline yet - prepare for more */
(*num_nodes)++;
} else {
*eol = 0; // replace newline with NULL character
this = NULL; /* th th that's all folks */
}
}
free( this );
return head;
}
void print_long_line ( foo *line ) {
while ( line ) {
printf( "'%s'\n", line->buff );
line = line->next;
}
}
char *pack_line(foo *line, int num_nodes)
{
foo *head = line;
char *ln = (char *) malloc(num_nodes*9 + 1); // buff holds nine char string
int length, i, j = 0;
while(head)
{
length = strlen(head->buff);
for(i = 0; i < length; i++, j++)
ln[j] = head->buff[i];
head = head->next;
}
ln[j] = '\0';
return ln;
}
int main (int argc, char **argv) {
FILE *fp;
int num_nodes;
char *line;
int i;
foo *myline, *head;
for(i = 0; i < 10; i++)
{
fp = fopen(argv[1], "r");
while((myline = read_long_line( fp, &num_nodes )))
{
line = pack_line(myline, num_nodes);
printf("%s\n", line);
free(line);
head = myline;
while(head)
{
myline = myline->next;
free(head);
head = myline;
}
}
fclose(fp);
}
return 0;
}