Thread: Compressing problem

  1. #1
    Registered User
    Join Date
    Sep 2009
    Posts
    11

    Compressing problem

    Hi

    I'm currently writing a program that can take a file of ascii symbols and compress each symbol into 2 bits, and then decompress it again and print out the file. My decompress method works 100% but my compress method is writing out what seems to be spaces inbetween some characters, so that I get an obscured version of the original ascii image, but there is no doubt about it that the image is 75% correct IE you see that its semi correct. The problem probably lies in something I've written in the compress algorithm, so I'm gonna post both the decompress and compress ones and hopefully you can help me out. Thanks in advance

    Decompress:

    Code:
    void decompress(char a)
    {
    
    	char bits[4];
    	int k = 0;
    
    	bits[0] = (a >> 6) &3;
    	bits[1] = (a >> 4) &3;
    	bits[2] = (a >> 2) &3;
    	bits[3] = a & 3;
    	
    	for(; k < 4; k++){
    	  if(bits[k] == 0) printf(" ");
    	  else if(bits[k] == 1) printf(":");
    	  else if(bits[k] == 2) printf("@");
    	  else if(bits[k] == 3) printf("\n");
    	}// end for
    }// end decompress
    compress:

    Code:
    else if(strcmp("e", argc[1]) == 0){
          c = fgetc(f);
          f2 = fopen(argc[3], "w");
         
    	if(f2 == NULL) printf("Can't open file %s\n", output);
    	else{
    	  while(c != EOF){
    		    
    	    int l = 0;
    	    int com = 0;
    	    for(; l < 4; l++){	      
    	      if(c == ' ') com += ((0 << (l*2)));
    	      else if(c == ':') com += ((1 << (l*2)));
    	      else if(c == '@') com += ((2 << (l*2)));
    	      else if(c == '\n')  com += ((3 << (l*2)));
    	      c = fgetc(f);
    	    }// end for
    	   
    	    fprintf(f2, "%c",com);
    	  }// end while
    	  fclose(f);
    	  fclose(f2);
    	}// end else
          }// end else if

  2. #2
    Guest Sebastiani's Avatar
    Join Date
    Aug 2001
    Location
    Waterloo, Texas
    Posts
    5,708
    The decompressor does indeed look fine, except that if the input contained a number of symbols that isn't a multiple of 4, then it will generate too many symbols on output.

    As far as the compressor is concerned, you should be or'ing, not adding. On a side note, you should check for EOF each time fgetc is called. If it is an EOF then you should report an error if 'l' is less than 3 (or else factor in some way to communicate to the decoder the number of 'end' bytes to generate). Also check for unexpected symbols, just for good measure. And just to be sure, 'c' is declared as an 'int' (not 'char'), correct?

  3. #3
    and the Hat of Guessing tabstop's Avatar
    Join Date
    Nov 2007
    Posts
    14,336
    One small point: is this a direct copy-paste? It's strange to see the command-line arguments in an argc vector.

  4. #4
    Registered User
    Join Date
    Sep 2009
    Posts
    11
    Thats a direct copy paste yes. Thats how I've been told to do command line arguments for now at least.

    The c was a char, I then changed it to an int after seeing your post and it didnt change anything. As for or'ing, I tried doing that and it only obscured the picture more, but then again im very new to these operations.

  5. #5
    Guest Sebastiani's Avatar
    Join Date
    Aug 2001
    Location
    Waterloo, Texas
    Posts
    5,708
    Quote Originally Posted by Stenland View Post
    Thats a direct copy paste yes. Thats how I've been told to do command line arguments for now at least.

    The c was a char, I then changed it to an int after seeing your post and it didnt change anything. As for or'ing, I tried doing that and it only obscured the picture more, but then again im very new to these operations.
    Post the entire code, if possible.

  6. #6
    Registered User
    Join Date
    Sep 2009
    Posts
    11
    Code:
    #include <stdlib.h>
    #include <stdio.h>
    #include <string.h>
    
    //const char EOL = '\n';
    
    /* decompress(char a)
       Denne metoden leser inn en char(int) i form av en byte.
       Deretter puttes de 4 forskjellige verdiene i byten(siden vi jobber med 2 bit per tegn, og de skiller vi ut
       ved å flytte hele byten n tegn til høyre(6, 4, 2, 0) og så dytte de inn i en array.Hvis vi f.eks har en innlesing:
       11011000 så vil vi etter å ha flyttet bitene og sagt at vi skal ha en maks verdi(mask) på 3 dvs 2 bit står vi igjen da med:
       {3, 1, 2, 0} i arrayet vårt. Så kjøres en for løkke 4 ganger slik at vi kan gå gjennom alle parene i en byte. Derettes testes verdiene
       mot sitt tegn, og vi får en utskrift. I eksemplet over vil utskriften bli: \n : @ mellomrom.
    
       
     */
    
    void decompress(char a)
    {
    
    	char bits[4];
    	int k = 0;
    
    	bits[0] = (a >> 6) &3;
    	bits[1] = (a >> 4) &3;
    	bits[2] = (a >> 2) &3;
    	bits[3] = a & 3;
    	
    	for(; k < 4; k++){
    	  if(bits[k] == 0) printf(" ");
    	  else if(bits[k] == 1) printf(":");
    	  else if(bits[k] == 2) printf("@");
    	  else if(bits[k] == 3) printf("\n");
    	}// end for
    }// end decompress
    
    
    
    int o2(int argv, char *argc[])
    {
      FILE *f;
      FILE *f2;
      char c;
      int cc;
      unsigned char byte;
      
      char input[400];
      char output[400];
      
      strcpy(input, argc[2]);
      //strcpy(output, argc[3]);
    
      f = fopen(input, "r");
      
      if(f == NULL) printf("Can't open file %s\n", input);
      else{
        
        if(strcmp("p", argc[1]) == 0){
          c = fgetc(f);
          while(c != EOF){
    	  printf("%c", c);
    	  c = fgetc(f);
          }// end while
          fclose(f);
        }// end if
    
        else if(strcmp("e", argc[1]) == 0){
          
          cc = fgetc(f);
          f2 = fopen(argc[3], "w");
         
    	if(f2 == NULL) printf("Can't open file %s\n", output);
    	else{
    	  while(cc != EOF){
    		    
    	    int l = 0;
    	    unsigned char com = 0;
    	    for(; l < 4; l++){	      
    	      if(cc == ' ') com += ((0 << (l*2)));
    	      else if(cc == ':') com += ((1 << (l*2)));
    	      else if(cc == '@') com += ((2 << (l*2)));
    	      else if(cc == '\n')  com += ((3 << (l*2)));
    	      else com += ((0 << (l*2)));
    
    	      /*   if(cc == ' ') com += (com | cc);
    	      else if(cc == ':') com += (com | cc);
    	      else if(cc == '@') com += (com | cc);
    	      else if(cc == '\n')  com += (com | cc);
    	        else com
    	      */
    	    cc = fgetc(f);
    	    }// end for
    	   
    	    fprintf(f2, "%c",com);
    	  }// end while
    	  fclose(f);
    	  fclose(f2);
    	}// end else
          }// end else if
        
        else if(strcmp("d", argc[1]) == 0){
          int d;
          int k = 0;
          d = fgetc(f);
         
          while(d != EOF){
    	decompress((char) d);
    	d = fgetc(f);
    	
          }// end 
          printf("\n");
        }// end if
        
        else printf("Please enter a valid option(p,e or d)\n");
    	
    
      }// end else
    
    
    }// end main
    Comments in Norwegian, but shouldn't matter much. Also adding that in the above outcommented code with oring, was my first attempt at it ever and I tried other solutions that netted the same result

  7. #7
    Guest Sebastiani's Avatar
    Join Date
    Aug 2001
    Location
    Waterloo, Texas
    Posts
    5,708
    >> com += ((0 << (l*2)))

    No, as in:

    com |= ((0 << (l*2)))

    Another approach that works well in situations like this are lookup tables. They're usually faster, and generally more compact. Here's an example (albeit a bit involved one):

    Code:
    #include <stdio.h>
    #include <stdlib.h>
    
    /*
    	A case-insensitive strcmp
    */
    int stricmp( char const* s1, char const* s2 )
    {
    	int
    		c1, 
    		c2, 
    		diff;
    	for( ;; )
    	{
    		c1 = tolower( *s1++ );
    		c2 = tolower( *s2++ );
    		diff = c1 - c2;
    		if( diff )
    			return diff;
    	/*
    		Since c1 == c2 only one check is needed
    	*/
    		if( !c1 )
    			break;
    	}
    	return 0;
    }
    
    enum
    {
    	first_code, 
    	second_code,
    	third_code, 
    	fourth_code, 
    	undefined_code
    };
    
    char
    	first_symbol = ' ', 
    	second_symbol = ':', 
    	third_symbol = '@', 
    	fourth_symbol = '\n',		
    	lookup_code[ 256 ] = 
    	{  
    		undefined_code 
    	}, 
    	translate_symbols[ 4 ][ 4 ][ 4 ][ 4 ], 
    	translate_code[ 4 ];
    
    void initialize( void )
    {
    	int 
    		level_0, 
    		level_1, 
    		level_2, 
    		level_3;
    	lookup_code[ first_symbol ] = first_code;
    	lookup_code[ second_symbol ] = second_code;
    	lookup_code[ third_symbol ] = third_code;
    	lookup_code[ fourth_symbol ] = fourth_code;
    	for( level_0 = 0; level_0 < 4; ++level_0 )
    		for( level_1 = 0; level_1 < 4; ++level_1 )
    			for( level_2 = 0; level_2 < 4; ++level_2 )
    				for( level_3 = 0; level_3 < 4; ++level_3 )
    					translate_symbols[ level_0 ][ level_1 ][ level_2 ][ level_3 ] = 
    					level_0 | ( level_1 << 2 ) | ( level_2 << 4 ) | ( level_3 << 6 );
    	translate_code[ first_code ] = first_symbol;
    	translate_code[ second_code ] = second_symbol;
    	translate_code[ third_code ] = third_symbol;
    	translate_code[ fourth_code ] = fourth_symbol;
    }	
    	
    int compress( FILE* in, FILE* out )
    {
    	int
    		ch, 
    		index, 
    		levels[ 4 ];		
    	for( ;; )
    	{
    		for( index = 0; index < 4; ++index )
    		{
    			ch = fgetc( in );
    			if( ch == EOF )
    			{
    				if( index != 0 )
    					return 0;
    				return 1;
    			}	
    			ch = lookup_code[ ch ];
    			if( ch == undefined_code )
    				return 0;
    			levels[ index ] = ch;
    		}
    		fputc( translate_symbols[ levels[ 0 ] ][ levels[ 1 ] ][ levels[ 2 ] ][ levels[ 3 ] ], out );	
    	}
    /*
    	We never actually end up here
    */	
    	return 1;
    }
    
    int decompress( FILE* in, FILE* out )
    {
    	int
    		ch, 
    		index;
    	while( ( ch = fgetc( in ) ) != EOF )
    		for( index = 0; index < 4; ++index, ch >>= 2 )
    			fputc( translate_code[ ch & 0x3 ], out );
    /*
    	Never fails
    */			
    	return 1;
    }
    
    int usage( char const* program, char const* message )
    {
    	if( message )
    		fprintf( stderr, "\nError: '%s'\n", message );
    	fprintf( stderr, "Usage: %s <mode> <infile> <outfile>\n", program );
    	fprintf( stderr, "Mode: \n" );
    	fprintf( stderr, "\t'c': Compress\n" );
    	fprintf( stderr, "\t'd': Decompress\n" );
    	fprintf( stderr, "Infile: Input file ('stdin' to use standard input)\n" );
    	fprintf( stderr, "Outfile: Output file ('stdout' to use standard output)\n" );
    	fprintf( stderr, "Note: Allowed symbols to compress are <newline>, <space>, ':', and '@'\n" );
    	return EXIT_FAILURE;
    }
    
    int main( int argc, char** argv )
    {
    	fprintf( stderr, "Simple Compressor\n" );
    	char const
    		* program = argv[ 0 ];
    	if( argc != 4 )
    		return usage( program, 0 );
    	FILE
    		* in, 
    		* out;
    	int
    		result = EXIT_FAILURE, 
    		( * process )( FILE*, FILE* );
    	char const
    		* mode = argv[ 1 ], 
    		* infile = argv[ 2 ], 
    		* outfile = argv[ 3 ];		
    	initialize( );
    	if( stricmp( mode, "c" ) == 0 )
    		process = compress;
    	else if( stricmp( mode, "d" ) == 0 )
    		process = decompress;
    	else
    		return usage( program, "invalid arguments" );
    	if( stricmp( infile, "stdin" ) == 0 )
    		in = stdin;
    	else
    		in = fopen( infile, "rb" );
    	if( !in )
    		usage( program, "cannot open input file" );
    	else
    	{
    		if( stricmp( outfile, "stdout" ) == 0 )
    			out = stdout;
    		else
    			out = fopen( outfile, "wb" );
    		if( !out )
    			usage( program, "cannot open output file" );
    		else
    		{
    			if( !process( in, out ) )
    				usage( program, "invalid format" );
    			else
    				result = EXIT_SUCCESS;
    			fclose( out );
    		}
    		fclose( in );
    	}
    	return result;
    }
    Good luck.
    Last edited by Sebastiani; 10-22-2009 at 06:59 PM. Reason: Formatting

  8. #8
    Registered User
    Join Date
    Sep 2009
    Posts
    11
    Thanks for the help Sebastiani, always helpful

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Need help understanding a problem
    By dnguyen1022 in forum C++ Programming
    Replies: 2
    Last Post: 04-29-2009, 04:21 PM
  2. Memory problem with Borland C 3.1
    By AZ1699 in forum C Programming
    Replies: 16
    Last Post: 11-16-2007, 11:22 AM
  3. Someone having same problem with Code Block?
    By ofayto in forum C++ Programming
    Replies: 1
    Last Post: 07-12-2007, 08:38 AM
  4. A question related to strcmp
    By meili100 in forum C++ Programming
    Replies: 6
    Last Post: 07-07-2007, 02:51 PM
  5. WS_POPUP, continuation of old problem
    By blurrymadness in forum Windows Programming
    Replies: 1
    Last Post: 04-20-2007, 06:54 PM