Thread: MIPS file into char* array

  1. #1
    Registered User
    Join Date
    Oct 2012
    Posts
    2

    MIPS file into char* array

    Hey guys I am entirely new to these forums so i'm not sure what you want to know, but i'll try to explain the situation. In my layers file, which you see just under, I am trying to save a file of mips-code onto an array of label-structs. I know for a fact that my initiate_labels works fine, returning a line at a time of the file, where all leading spaces and tabs are removed. In my insertintolabel() I am trying to grab the first token of this line and save it as the opcode. Thereafter i use the reduceOpcode to remove the opcode and leading spaces up until the first register appears in the line. So, if the first line of the file is line="addi $a1, $a2, $a3", my insertintolabel() should so far be able to save "addi" into label[1].opcode, and "a1" into label[1].rs . thereafter reduceOpcode makes line="a1, $a2, $3", but in line 8 of insertintolabel, it just stops working.
    Code:
    #include <stdio.h>
    #include <string.h>
    
    struct label
    {
        char opcode[99];
        char rs[99];
        char rt[99];
        char rd[99];
    };
    
    struct label labels[200];
    
    char line[99];
    
    void reduceOpcode(int linenumber)
    {
        char temp[99];
        int i,j;
        i=j=0;
    
        while(line[i] == labels[linenumber].opcode[i])
        {
            i++;
        }
        while(line[i] == 9 || line[i] == 32 || line[i] == 36 || line[i] == 44)
        {
            i++;
        }
        while(line[i] != 0 && line[i] != 10)
        {
            temp[j] = line[i];
            j++;
            i++;
        }
        temp[j] = 0;
        strcpy(line, temp);
    }
    
    
    
    void insertintolabel(int linenumber)
    {
        char temp[99];
        strcpy(temp, line);
        strcpy(labels[linenumber].opcode, strtok(line, "     "));
        strcpy(line, temp);
        reduceOpcode(linenumber);
        if (line != 0)
        {
            strcpy(labels[linenumber].rs, strtok(line, " $,    "));// THIS IS WHERE IT GOES WRONG !!
        }
    labels[linenumber].opcode, labels[linenumber].rs, labels[linenumber].rt, labels[linenumber].rd);
    }
    
    void initiate_labels(FILE* f)
    {
        char temp[99];
        int linenumber, i, j;
        linenumber = i = j = 0;
        
        while(!feof(f)) //this while loop removes all lines before the .data line in the mips code
        {
            fgets(line,99,f);
            i = firstchar(line);
            if(line[i] == 46)
            {
                while(line[i] != '#' && line[i] != 0 && line[i] != 10)//saves the necessary part of the line into temp
                {
                    temp[j]=line[i];
                    i++;
                    j++;
                }
                temp[j] = 0;
                j=0;
                strcpy(line, temp);
                linenumber = 1;
                insertintolabel(linenumber);//sends the dataline to insertintolabel
                break;
            }
        }
        while(!feof(f))//sends the rest of the file-lines on to insertintolabel, with all leading spaces and tabs removed
        {
            fgets(line, 99, f);
            i = firstchar(line);
            if(i != -1 && line[i] != 35)//fjerner alle tomme linier
            {
            linenumber++;
            j = 0;
            while(line[i] != '#' && line[i] != 0 && line[i] != 10)
            {
                temp[j] = line[i];
                i++;
                j++;
            }
            temp[j] = 0;
            strcpy(line, temp);
            j=0;
            insertintolabel(linenumber);
            }
        }
    }
    
    int firstchar(char* string)//finds the first index of a string that is not a tab or a space
    {
        int i = 0;
        while(string[i] == 32 || string[i] == 9)
        {
            i++;
        }
        
        if (string[i] == 10)
        {
            return -1;
        }
        
        return i;
    }
    
    main()//trying to save a file of mips code into an array, labels, of label structures that contain the opcode, rs, rt, and rd as char*.
    {
        FILE* myfile;
        myfile = fopen("C:\\g24\\test1.asm", "r");
        initiate_labels(myfile);
        fclose(myfile);
    }
    myfile:
    Code:
    # This code does not use any arguments you may call it with. It reads 10 integers from console,
    # performs the calculations and outputs the result containing the same integers.
    
    # This code uses the instructions found in the MIPS-lite set, together with some pseudo-instructions
    # and 'syscall'.
        
        .data
        .align 2
    array:    .space 40
    space:    .asciiz " "
        .text
    main:
        add $a1, $zero, 10
        la $a0, array
        jal readInts
        la $a0, array
        add $a1, $a0, 36
        jal partition
        add $a1, $zero, 10
        la $a0, array
        jal printInts
        add $v0, $zero, 10
        syscall
        
    swap:
        lw $t0, 0($a0)
        lw $t1, 0($a1)
        sw $t1, 0($a0)
        sw $t0, 0($a1)
        jr $ra
    
    partition:
        sub $sp, $sp, 24
        sw   $ra, 20($sp)
        sw   $s5, 16($sp)
        sw   $s4, 12($sp)
        sw   $s3, 8($sp)
        sw   $s1, 4($sp)
        sw   $s0, 0($sp)
    
        move $s0, $a0
        move $s1, $a1
        lw   $s3, 0($s1)
        move $s4, $s0
        sub $s4, $s4, 4
        move $s5, $s0
        
    loop:
        beq  $s5, $s1, exit
        lw   $t0, 0($s5)
        slt  $t1, $t0, $s3
        beq  $t1, $zero, over
        add $s4, $s4, 4
        move $a0, $s4
        move $a1, $s5
        jal  swap
    
    over:
        add $s5, $s5, 4
        j    loop
    
    exit:
        add $a0, $s4, 4
        move $a1, $s1
        jal  swap
    
        add $v0, $s4, 4 
        lw   $s0, 0($sp)
        lw   $s1, 4($sp)
        lw   $s3, 8($sp)
        lw   $s4, 12($sp)
        lw   $s5, 16($sp)
        lw   $ra, 20($sp)
        add $sp, $sp, 24
        jr   $ra
        
    readInts:
        add $v0, $zero, 5
        beq $a1, $zero, endRead
        syscall
        sub $a1, $a1, 1
        sw $v0, ($a0)
        add $a0, $a0, 4
        j readInts
    endRead:
        jr $ra
        
    printInts:
        move $t0, $a0
    print:
        add $v0, $zero, 1
        beq $a1, $zero, endPrint
        lw $a0, ($t0)
        syscall
        sub $a1, $a1, 1
        add $t0, $t0, 4
        la $a0, space
        add $v0, $zero, 4
        syscall
        j print
    endPrint:
        jr $ra
    Any help is greatly appreciated!

  2. #2
    - - - - - - - - oogabooga's Avatar
    Join Date
    Jan 2008
    Posts
    2,808
    Please post code that actually compiles.
    Remember to use a high warning level. For gcc:
    gcc -W -Wall yourprog.c

    I know for a fact that my initiate_labels works fine
    I wouldn't be so sure!

    Run it in a debugger:
    gcc -g -W -Wall yourprog.c
    gdb ./a
    Then in gdb:
    run
    Then when it segfaults:
    bt #this gives a stack backtrace
    That should give you some info on what's going wrong.
    Last edited by oogabooga; 10-05-2012 at 10:59 AM.
    The cost of software maintenance increases with the square of the programmer's creativity. - Robert D. Bliss

  3. #3
    Registered User
    Join Date
    Nov 2010
    Location
    Long Beach, CA
    Posts
    5,909
    This link should give you one big tip as to why your initiate_labels function doesn't work fine: FAQ > Why it's bad to use feof() to control a loop - Cprogramming.com.

    You use a lot of magic numbers. You should consider #defining a constant, instead of scattering 99's and 200's all over your code. Also, use character literals instead of more magic numbers like 32, 9 and 10:
    Code:
    ch == 10 || ch == 9 || ch == 10
    // should be
    ch == ' ' || ch == '\t' || ch == '\n'
    // same goes for 35, 36 and 44, which should be '#' '$' and ',' and any other magic number that's supposed to represent a character
    You should then use the #defines for 99 and 200 to check and make sure you never copy past the end of your string. Perhaps use strncpy instead, just remember to manually null-terminate the destination string when you're done, since it doesn't guarantee that. Also, make sure you don't go off the end of your labels array.

    strtok may return NULL, so it's not wise to pass the result of that directly to strcpy without checking it first, you may get a seg fault.

    There's probably more, but I got tired of reading your code. The indentation is a little messy, which makes it hard to follow. And if it's hard to follow, it's easy to make mistakes, but hard to find and fix them.
    Last edited by anduril462; 10-05-2012 at 11:51 AM.

  4. #4
    - - - - - - - - oogabooga's Avatar
    Join Date
    Jan 2008
    Posts
    2,808
    Adding one more thing in the same vein as anduril's critique, you shouldn't type actual tabs into strings (which I assume you did in the strtok format strings). Use \t instead.
    The cost of software maintenance increases with the square of the programmer's creativity. - Robert D. Bliss

  5. #5
    Ticked and off
    Join Date
    Oct 2011
    Location
    La-la land
    Posts
    1,728
    I recommend against the OP's approach. It is too fragile, and has issues with for example quoted strings.

    Instead, I recommend writing an input function (preferably similar to POSIX getline()), which additionally skips comments, accepts any newline convention, skips leading and trailing whitespace, keeps a count of lines, and only returns non-comment lines. Note that in this one, I deliberately "hardcode" the LWS() macro to ASCII whitespace, since isspace() etc. are locale-specific.
    Code:
    #include <stdlib.h>
    #include <string.h>
    #include <stdio.h>
    #include <errno.h>
    
    #define   LWS(c) ((c) == '\t' || (c) == '\v' || (c) == '\f' || (c) == ' ')
    
    /* Read an input line from the specified file,
     * skipping empty and comment lines,
     * ignoring leading and trailing whitespace,
     * maintaining the newline count,
     * using any newline convention.
     *
     * The input buffer will be dynamically reallocated
     * when necessary.
     *
     * The return value is positive line length if success,
     * zero if end of file,
     * or -1 with errno set if an error occurs.
    */
    ssize_t fline(char **const lineptr, size_t *const sizeptr, FILE *const in, unsigned long *const newlines)
    {
    	char   *line, *temp;
    	size_t  size;
    	size_t  used = 0;
    	int     c;
    
    	/* Line pointer pointer, size pointer, and file handle are all required. */
    	if (!lineptr || !sizeptr || !in) {
    		errno = EINVAL;
    		return (ssize_t)-1;
    	}
    
    	/* Cache the line buffer and size. This is just an optimization. */
    	line = *lineptr;
    	size = *sizeptr;
    
    	/* Check if the file is in error state. */
    	if (ferror(in)) {
    		errno = EIO;
    		return (ssize_t)-1;
    	}
    
    	/* End-of-file already encountered? */
    	if (feof(in)) {
    		errno = 0;
    		return (ssize_t)0;
    	}
    
    	c = getc(in);
    
    	/* Retry loop. */
    	while (!used) {
    
    		/* Scan for the start of a non-comment line. */
    		while (c != EOF)
    			if (c == '\n') {		/* Newline: LF or LF CR */
    				if (newlines)
    					(*newlines)++;
    				c = getc(in);
    				if (c == '\r')
    					c = getc(in);
    
    			} else
    			if (c == '\r') {		/* Newline: CR or CR LF */
    				if (newlines)
    					(*newlines)++;
    				c = getc(in);
    				if (c == '\n')
    					c = getc(in);
    
    			} else
    			if (c == '#' || c == ';') {	/* Comment line */
    
    				/* Skip the rest of this line */
    				while (c != EOF && c != '\n' && c != '\r')
    					c = getc(in);
    
    			} else
    			if (LWS(c)) {			/* Whitespace */
    				c = getc(in);
    
    			} else
    				break;
    
    		/* No more input? */
    		if (c == EOF) {
    			errno = 0;
    			return (ssize_t)0;
    		}
    
    		/* Read loop. */
    		while (c != EOF && c != '\n' && c != '\r') {
    
    			/* Grow the line buffer if necessary,
    			 * so we can append at least c and '\0'. */
    			if (used + 1 >= size) {
    
    				/* Grow to next multiple of 1024. */
    				size = (used | 1023) + 1;
    				temp = realloc(line, size);
    				if (!temp) {
    					errno = ENOMEM;
    					return (ssize_t)-1;
    				}
    
    				line = temp;
    
    				/* Update the actual values given to the function. */
    				*lineptr = line;
    				*sizeptr = size;
    			}
    
    			line[used++] = c;
    			c = getc(in);
    		}
    
    		/* Remove trailing whitespace. */
    		while (used > 0 && LWS(line[used - 1]))
    			used--;
    
    		/* Add trailing EOS to the line. */
    		line[used] = '\0';
    	}
    
    	/* The last read character has not been handled.
    	 * It is a newline, or part of a newline.
    	 * Push it back into the stream, so that it is handled
    	 * at the start of the next fline() call.
    	 * This way we also keep track of the line number for
    	 * the current line correctly.
    	*/
    	if (c != EOF)
    		ungetc(c, in);
    
    	return (ssize_t)used;
    }
    The fline() above is designed to work with line-oriented data, like assembly, configuration data, or simple scripting languages. While normally I'd recommend keeping the two functionalities (reading complete lines from a file, and removing extra whitespace and comment lines) separate, in this particular case it makes the overall code simpler when combined like this. Consider this a special case, normally functions should be split into simpler subfunctions.

    Tokenizing the string can be done using strtok() , except for quoted data containing whitespace or separators: strtok() just does not support those. Instead, I've found it easier to use context-specific opportunistic tokenizers. Context-specific meaning that the caller can pick from more than one function depending on what they expect, and opportunistic meaning the tokenizer can "fail", state that the next token is not suitable, without modifying the underlying string. The following function takes a pointer to a pointer to the current position in the string (both of which the function modifies), and returns either NULL, or the pointer to a label:
    Code:
    /* Extract label from the string.
     * Note: the string is modified, and the pointer advanced.
    */
    char *str_label(char **const string)
    {
    	if (string && *string) {
    		char *const origin = *string;
    		char       *str = *string;
    
    		/* If the label starts with # or ; it ends the string. */
    		if (*str == '#' || *str == ';') {
    			*origin = '\0';
    			errno = 0;
    			return NULL;
    		}
    
    		/* A label must start with a letter or . or _. */
    		if ((*str >= 'A' && *str <= 'Z') ||
    		    (*str >= 'a' && *str <= 'z') ||
    		    (*str == '_' || *str == '.')) {
    			str++;
    
    			/* Labels contain letters, digits, @, or _. */
    			while ((*str >= '0' && *str <= '9') ||
    			       (*str >= 'A' && *str <= 'Z') ||
    			       (*str >= 'a' && *str <= 'z') ||
    			       (*str == '@' || *str == '_' || *str == '.'))
    				str++;
    
    		} else {
    			errno = 0;
    			return NULL;
    		}
    
    		/* A label must be immediately followed by a :. */
    		if (*str != ':') {
    			errno = 0;
    			return NULL;
    		}
    
    		/* Terminate label, replacing the :, */
    		*(str++) = '\0';
    
    		/* skip trailing whitespace, */
    		while (LWS(*str))
    			str++;
    
    		/* update string position, */
    		*string = str;
    
    		/* and return a pointer to the label. */
    		return origin;
    
    	}
    
    	/* Invalid parameters. */
    	errno = EINVAL;
    	return NULL;
    }
    For everything else I saw in the example file, a generic token extractor will suffice. Quoted strings are kept intact, except that for doublequoted strings a backslash escapes one character (allowing \" et cetera within the string). This one saves the trailing separator: either a comma, or if no comma the smallest-code whitespace character included in the separator. That way it will be space if there were only trailing spaces, but a tab if the trailing whitespace contained a tab.
    Code:
    /* Extract token from the string.
     * Saves the end separator (comma, tab, or space) to *ends,
     * in that order.
     * Note: the string is modified, and the pointer advanced.
    */
    char *str_token(char **const string, char *const separator)
    {
    	if (separator)
    		*separator = '\0';
    
    	if (string && *string) {
    		char *const origin = *string;
    		char       *str = *string;
    
    		/* If a token starts with # or ; it ends the string. */
    		if (*str == '#' || *str == ';') {
    			*origin = '\0';
    			errno = 0;
    			return NULL;
    		}
    
    		if (*str == '"') {
    			/* Doublequoted. Skips \-escapes. */
    
    			str++;
    			while (*str && *str != '"')
    				if (*str == '\\' && str[1])
    					str += 2;
    				else
    					str++;
    
    			if (*str != '"') {
    				errno = 0;
    				return NULL;
    			}
    			str++;
    
    		} else
    		if (*str == '\'') {
    			/* Single quoted */
    
    			str++;
    			while (*str && *str != '\'')
    				str++;
    
    			if (*str != '\'') {
    				errno = 0;
    				return NULL;
    			}
    			str++;
    
    		} else {
    			/* Space/comma-separated word */
    			while (*str && *str != ',' &&
    			       !(*str == '\t' || *str == '\v' ||
    			         *str == '\f' || *str == ' '))
    				str++;
    		}
    
    		/* End-of-string, comma or a whitespace is required.
    		 * Terminate the parameter by consuming them. */
    		if (*str) {
    			char *const end = str;
    			char        space = ' ';
    
    			if (*str != ',' && !LWS(*str)) {
    				errno = 0;
    				return NULL;
    			}
    
    			/* Skip whitespace, saving the smallest
    			 * character code in space. */
    			while (LWS(*str)) {
    				if (*str < space)
    					space = *str;
    				str++;
    			}
    
    			/* Do we have a comma? */
    			if (*str == ',') {
    				str++;
    
    				/* Skip whitespace. */
    				while (LWS(*str))
    					str++;
    
    				/* We had a comma. */
    				if (separator)
    					*separator = ',';
    			} else {
    
    				/* No comma, use the smallest-code
    				 * whitespace character we saw. */
    				if (separator)
    					*separator = space;
    			}
    
    			/* Terminate the token. */
    			*end = '\0';
    		}
    
    		/* Update the string, */
    		*string = str;
    
    		/* and return the parameter. */
    		return origin;
    	}
    
    	/* Invalid parameters. */
    	errno = EINVAL;
    	return NULL;
    }
    Here is an example main() that seems to tokenize the example input using the above code correctly. Just supply the input in standard input. I did not bother to check if the rules (especially characters allowed in labels) matches the MIPS assembly syntax, though; I'll leave that to you.
    Code:
    int main(void)
    {
    	char          *data = NULL;
    	size_t         size = 0;
    	unsigned long  lineno = 1UL;
    	ssize_t        len;
    	char          *curr, *part, separator;
    
    	while (1) {
    
    		/* Get a new (non-comment, non-empty) line of input */
    		len = fline(&data, &size, stdin, &lineno);
    
    		/* End of input? */
    		if (!len)
    			break;
    
    		/* Error? */
    		if (len < (ssize_t)0) {
    			fprintf(stderr, "Standard input: line %lu: %s.\n", lineno, strerror(errno));
    			return 1;
    		}
    
    		printf("Line %lu: \"%s\" (%lu bytes)\n", lineno, data, (unsigned long)len);
    
    		curr = data;
    
    		/* Each line may begin with a label. */
    		if ((part = str_label(&curr)))
    			printf("\tLabel: %s\n", part);
    
    		/* Parse tokens. If the token was followed by a comma, say so. */
    		while (*curr && (part = str_token(&curr, &separator))) {
    			if (separator == ',')
    				printf("\tToken: %s\n\tComma\n", part);
    			else
    				printf("\tToken: %s\n", part);
    		}
    
    		/* Was anything left unparsed? */
    		if (*curr)
    			printf("\tUnparsed: \"%s\"\n", curr);
    	}
    
    	/* Release the line buffer. */
    	free(data);
    	size = 0;	
    
    	return 0;
    }
    To interpret the assembly instructions, you could replace the above main() with a function that generates some kind of a tree from the input lines -- linked lists being much better suited for this than arrays.

    Unlike strtok(), all of the above functions are thread-safe. (You can call them from different threads at the same time, as long as you work on different files and string buffers.)

  6. #6
    - - - - - - - - oogabooga's Avatar
    Join Date
    Jan 2008
    Posts
    2,808
    Quote Originally Posted by Nominal Animal View Post
    I'll leave that to you
    Nice of you to leave something to the OP.
    The cost of software maintenance increases with the square of the programmer's creativity. - Robert D. Bliss

  7. #7
    Registered User
    Join Date
    Oct 2012
    Posts
    2
    wauw thanks you guys, I don't have time to check out your answers atm, but am so thankful for all the responses!

  8. #8
    Ticked and off
    Join Date
    Oct 2011
    Location
    La-la land
    Posts
    1,728
    Quote Originally Posted by oogabooga View Post
    Nice of you to leave something to the OP.
    My, aren't you especially snide this morning.

    I'd wager a favourite beer that mehid is working on a MIPS assembly compiler or simulator.

    Getting the MIPS assembly into an array is useless. First, you'll stall on tricky issues like getting quoted strings correctly tokenized. After you get all that done, you'll notice you have a lot more code you started with to work around the corner cases. Then, when you start to try interpreting it all, you find out the array was the absolutely worst choice, and makes everything overly complicated. In particular, code optimization and rearranging is practically impossible, whereas using a tree structure (similar to an abstract syntax tree) it is quite straightforward. For emulation, the AST is enough.

    I understand you think I gave the mehid a full solution. If you were to stop to think, you'd realize it is not. First, it does not answer what mehid asked: it does not generate any array. Second, I clearly stated the direction mehid is taking is fragile. In this post I expand on why. Pointing out another direction (which is what I think you were trying to snidely suggest would have been the correct behaviour here) would not only be frustrating to mehid, but also ultimately pointless: just saying to somebody else that they should take a totally different approach, without showing at least an example to get them as far on that other tack as they are right now on their current one, is not going to sway anyone.

    I also realize that if one is going to write a compiler, most people recommend using bison or flex or similar parser generators. For real applications, they are usually the best choice. For learning, as I think mehid is doing here, I do recommend writing the parser yourself, because that helps you understand how everything works, and when you run into issues, you have a better picture of where the correct place to start fixing it is.

    Finally, my solution does not conform to MIPS assembly. It does parse the example file, but if you want it to work for any MIPS assembly, you'll have to delve in into the code I showed, understand what it does, and adjust it in a couple of places to ensure it handles all MIPS assembly correctly. If you do that, and you're still learning how to do it, you'll probably have two or three enlightening moments while doing that, when you realize the simple tricks left in the code. (As an example, consider how fline() reallocates memory "early", so the function always knows it can add both the current character, and an extra '\0' at end to terminate the current string. This means there is only place where the data array is reallocated.)

    In my view, the code I posted is not a solution, but an enticement to learn the stuff that you might normally avoid, because it does not look like it would give as good rewards as the array-based "simple" approach.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Replies: 12
    Last Post: 08-06-2012, 06:13 AM
  2. Read (BIG) File into char array
    By doia in forum C Programming
    Replies: 2
    Last Post: 03-19-2010, 05:26 PM
  3. Write text file to char array
    By mjh in forum C Programming
    Replies: 2
    Last Post: 04-02-2007, 08:11 AM
  4. Read File To Char Array with Null char init
    By MicroFiend in forum Windows Programming
    Replies: 1
    Last Post: 10-28-2003, 06:18 PM
  5. Replies: 3
    Last Post: 04-25-2003, 01:30 PM

Tags for this Thread