Thread: gcc elf output

  1. #1
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    1,733

    gcc elf output

    I tried feeding it a file with just
    Code:
    int main(){ return 0; }
    and looked at the hex and found it had added a whole bunch of unused CRT stuff, I wanted to see a bare bone example of an entry point so that I could try writing my own compiler (never found a MIT licensed one so going in with the idea of it being extensible like firefox with it's addons but from preprocessor side), I'm using x86_64 byte code as my playground for this and attempted to make an elf that just returns 0, while the header was correctly recognized the attempt at executing just resulted in a crazy file being created and that's it. By the by the entry code I used was "90 C3 00 00 00 00"
    If you know any good resources for this kinda stuff I'll happily look at that instead since the GCC result just looks obfuscated with unneeded stuff to me.

  2. #2
    Registered User
    Join Date
    Feb 2019
    Posts
    1,078
    Try:
    Code:
    $ gcc -O2 -ffreestanding -c -o test.o <<< 'int main(void){return 0;}'
    $ objdump -dM intel test.o
    Disassembly of section .text.startup:
    
    0000000000000000 <main>:
       0:    31 c0                    xor    eax,eax
       2:    c3                       ret
    And, when linking, use -nostdlib option... But you have to provide your own initialization and finalization code and your own linker script... And you cannot use any header files, except for stddef.h and stdint.h (I think)...
    Last edited by flp1969; 03-25-2019 at 06:06 PM.

  3. #3
    Registered User
    Join Date
    Feb 2019
    Posts
    1,078
    Example:

    Code:
    # init.S
      .code64
    
      .section .text
      .globl _start
      .extern main
    
    _start:
      movl  (%rsp),%edi   # int argc;
      lea   8(%rsp),%rsi  # char **argv;
    
      call  main
    
      # exit(main(argc, argv));
      movl  %eax,%edi
      movl  $60,%eax
      syscall
    Code:
    /* main.c */
    #include <stddef.h>
    
    static void printstr( char * );
    
    int main( int argc, char *argv[] )
    {
      if (argc != 2)
        return 1;
    
      printstr( *++argv );
      printstr( "\n" );
    
      return 0;
    }
    
    void printstr( char *s )
    {
      size_t count = 0;
      char *p;
    
      p = s;
      while (*p++) count++;
    
      __asm__ __volatile__ (
        "movl $1,%%eax\n\t"
        "movl %%eax,%%edi\n\t"
        "syscall" : :
        "S" (s),  // string ptr.
        "d" (count) // string size.
        : "rax", "rdi"
      );
    }
    Code:
    # Makefile
    main: main.o init.o
        ld -nostdlib -o $@ $^
    
    main.o: main.c
        cc -O2 -ffreestanding -c -o $@ $<
    
    init.o: init.S
        as -o $@ $<
    Compiling, linking and viewing the final file:
    Code:
    $ make
    cc -O2 -ffreestanding -c -o main.o main.c
    as -o init.o init.S
    ld -nostdlib -o main main.o init.o
    $ ./main fred
    fred
    $ objdump -dM intel main
    Disassembly of section .text:
    
    00000000004000b0 <main>:
      4000b0:    83 ff 02                 cmp    edi,0x2
      4000b3:    b8 01 00 00 00           mov    eax,0x1
      4000b8:    74 01                    je     4000bb <main+0xb>
      4000ba:    c3                       ret    
      4000bb:    48 8b 76 08              mov    rsi,QWORD PTR [rsi+0x8]
      4000bf:    31 d2                    xor    edx,edx
      4000c1:    80 3e 00                 cmp    BYTE PTR [rsi],0x0
      4000c4:    74 14                    je     4000da <main+0x2a>
      4000c6:    66 2e 0f 1f 84 00 00     nop    WORD PTR cs:[rax+rax*1+0x0]
      4000cd:    00 00 00 
      4000d0:    48 83 c2 01              add    rdx,0x1
      4000d4:    80 3c 16 00              cmp    BYTE PTR [rsi+rdx*1],0x0
      4000d8:    75 f6                    jne    4000d0 <main+0x20>
      4000da:    b8 01 00 00 00           mov    eax,0x1
      4000df:    89 c7                    mov    edi,eax
      4000e1:    0f 05                    syscall 
      4000e3:    48 8d 35 27 00 00 00     lea    rsi,[rip+0x27]        # 400111 <_start+0x16>
      4000ea:    ba 01 00 00 00           mov    edx,0x1
      4000ef:    b8 01 00 00 00           mov    eax,0x1
      4000f4:    89 c7                    mov    edi,eax
      4000f6:    0f 05                    syscall 
      4000f8:    31 c0                    xor    eax,eax
      4000fa:    c3                       ret    
    
    00000000004000fb <_start>:
      4000fb:    8b 3c 24                 mov    edi,DWORD PTR [rsp]
      4000fe:    48 8d 74 24 08           lea    rsi,[rsp+0x8]
      400103:    e8 a8 ff ff ff           call   4000b0 <main>
      400108:    89 c7                    mov    edi,eax
      40010a:    b8 3c 00 00 00           mov    eax,0x3c
      40010f:    0f 05                    syscall

  4. #4
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    1,733
    Thanks , This was the most helpful answer:
    Quote Originally Posted by flp1969 View Post
    Example:

    Code:
    # init.S
      .code64
    
      .section .text
      .globl _start
      .extern main
    
    _start:
      movl  (%rsp),%edi   # int argc;
      lea   8(%rsp),%rsi  # char **argv;
    
      call  main
    
      # exit(main(argc, argv));
      movl  %eax,%edi
      movl  $60,%eax
      syscall
    Code:
    /* main.c */
    #include <stddef.h>
    
    static void printstr( char * );
    
    int main( int argc, char *argv[] )
    {
      if (argc != 2)
        return 1;
    
      printstr( *++argv );
      printstr( "\n" );
    
      return 0;
    }
    
    void printstr( char *s )
    {
      size_t count = 0;
      char *p;
    
      p = s;
      while (*p++) count++;
    
      __asm__ __volatile__ (
        "movl $1,%%eax\n\t"
        "movl %%eax,%%edi\n\t"
        "syscall" : :
        "S" (s),  // string ptr.
        "d" (count) // string size.
        : "rax", "rdi"
      );
    }
    Code:
    # Makefile
    main: main.o init.o
        ld -nostdlib -o $@ $^
    
    main.o: main.c
        cc -O2 -ffreestanding -c -o $@ $<
    
    init.o: init.S
        as -o $@ $<
    Compiling, linking and viewing the final file:
    Code:
    $ make
    cc -O2 -ffreestanding -c -o main.o main.c
    as -o init.o init.S
    ld -nostdlib -o main main.o init.o
    $ ./main fred
    fred
    $ objdump -dM intel main
    Disassembly of section .text:
    
    00000000004000b0 <main>:
      4000b0:    83 ff 02                 cmp    edi,0x2
      4000b3:    b8 01 00 00 00           mov    eax,0x1
      4000b8:    74 01                    je     4000bb <main+0xb>
      4000ba:    c3                       ret    
      4000bb:    48 8b 76 08              mov    rsi,QWORD PTR [rsi+0x8]
      4000bf:    31 d2                    xor    edx,edx
      4000c1:    80 3e 00                 cmp    BYTE PTR [rsi],0x0
      4000c4:    74 14                    je     4000da <main+0x2a>
      4000c6:    66 2e 0f 1f 84 00 00     nop    WORD PTR cs:[rax+rax*1+0x0]
      4000cd:    00 00 00 
      4000d0:    48 83 c2 01              add    rdx,0x1
      4000d4:    80 3c 16 00              cmp    BYTE PTR [rsi+rdx*1],0x0
      4000d8:    75 f6                    jne    4000d0 <main+0x20>
      4000da:    b8 01 00 00 00           mov    eax,0x1
      4000df:    89 c7                    mov    edi,eax
      4000e1:    0f 05                    syscall 
      4000e3:    48 8d 35 27 00 00 00     lea    rsi,[rip+0x27]        # 400111 <_start+0x16>
      4000ea:    ba 01 00 00 00           mov    edx,0x1
      4000ef:    b8 01 00 00 00           mov    eax,0x1
      4000f4:    89 c7                    mov    edi,eax
      4000f6:    0f 05                    syscall 
      4000f8:    31 c0                    xor    eax,eax
      4000fa:    c3                       ret    
    
    00000000004000fb <_start>:
      4000fb:    8b 3c 24                 mov    edi,DWORD PTR [rsp]
      4000fe:    48 8d 74 24 08           lea    rsi,[rsp+0x8]
      400103:    e8 a8 ff ff ff           call   4000b0 <main>
      400108:    89 c7                    mov    edi,eax
      40010a:    b8 3c 00 00 00           mov    eax,0x3c
      40010f:    0f 05                    syscall
    By the way when I looked at the result the _start() function actually ended with 0x0A like below:
    Code:
    /* Copied bytes while referencing post to understand what each instruction is */
    uchar func[] = {
    	0x8B, 0x3C, 0x24, /* mov edi,DWORD PTR [rsp] #argc */
    	0x48, 0x80, 0x74, 0x24, 0x08, /* lea rsi,[rsp+0x8] #argv */
    	0xE8, 0xA8, 0xFF, 0xFF, 0xFF, /* call 0x4000B0<main> #How does this translate to 0x4000B0? */
    	0x89, 0xC7, /* mov edi,eax */
    	0xB8, 0x3C, 0x00, 0x00, 0x00, /* mov eax,0x3C*/
    	0x0F, 0x05, /* syscall */
    	0x0A /* ??? */
    };
    Gonna go look at the instruction reference I did find before starting this thread but just in case that doesn't give me enough information I'd like to hear (or rather read) your thoughts on it

  5. #5
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    1,733
    Well according to coder64 edition | X86 Opcode and Instruction Reference 1.12 it's an OR instruction for presumably byte sized integers, having double check the example code there is no instance of an OR operation being used, I do not understand the reason for it without a resource of some sort to reference. For now I'm gonna go do some shopping for essentials (food for instance, basically have only rice left ) and hopefully someone who knows something about it or some possible resource for it will post something, otherwise I'll just try making my test elf without the OR intructions and see what happens

  6. #6
    Registered User
    Join Date
    Feb 2019
    Posts
    1,078
    Quote Originally Posted by awsdert View Post
    By the way when I looked at the result the _start() function actually ended with 0x0A like below:
    Code:
    /* Copied bytes while referencing post to understand what each instruction is */
    uchar func[] = {
        0x8B, 0x3C, 0x24, /* mov edi,DWORD PTR [rsp] #argc */
        0x48, 0x80, 0x74, 0x24, 0x08, /* lea rsi,[rsp+0x8] #argv */
        0xE8, 0xA8, 0xFF, 0xFF, 0xFF, /* call 0x4000B0<main> #How does this translate to 0x4000B0? */
        0x89, 0xC7, /* mov edi,eax */
        0xB8, 0x3C, 0x00, 0x00, 0x00, /* mov eax,0x3C*/
        0x0F, 0x05, /* syscall */
        0x0A /* ??? */
    };
    Gonna go look at the instruction reference I did find before starting this thread but just in case that doesn't give me enough information I'd like to hear (or rather read) your thoughts on it
    How did you got this extra byte?
    Code:
    $ as -o init.o init.S
    $ objdump -dM intel init.o
    Disassembly of section .text:
    
    0000000000000000 <_start>:
       0:    8b 3c 24                 mov    edi,DWORD PTR [rsp]
       3:    48 8d 74 24 08           lea    rsi,[rsp+0x8]
       8:    e8 00 00 00 00           call   d <_start+0xd>
       d:    89 c7                    mov    edi,eax
       f:    b8 3c 00 00 00           mov    eax,0x3c
      14:    0f 05                    syscall
    $ objdump -s init.o
    Contents of section .text:
     0000 8b3c2448 8d742408 e8000000 0089c7b8  .<$H.t$.........
     0010 3c000000 0f05                        <.....
    See? No extra byte!
    And not in the entire program as well:
    Code:
    $ objdump -s main
    Contents of section .text:
     4000b0 83ff02b8 01000000 7401c348 8b760831  ........t..H.v.1
     4000c0 d2803e00 7414662e 0f1f8400 00000000  ..>.t.f.........
     4000d0 4883c201 803c1600 75f6b801 00000089  H....<..u.......
     4000e0 c70f0548 8d352700 0000ba01 000000b8  ...H.5'.........
     4000f0 01000000 89c70f05 31c0c38b 3c24488d  ........1...<$H.
     400100 742408e8 a8ffffff 89c7b83c 0000000f  t$.........<....
     400110 05                                   .               
    Contents of section .rodata:
     400111 0a00                                 ..              
    Contents of section .eh_frame:
     400118 14000000 00000000 017a5200 01781001  .........zR..x..
     400128 1b0c0708 90010000 10000000 1c000000  ................
     400138 78ffffff 4b000000 00000000           x...K.......    
    Contents of section .comment:
     0000 4743433a 20285562 756e7475 20372e33  GCC: (Ubuntu 7.3
     0010 2e302d32 37756275 6e747531 7e31382e  .0-27ubuntu1~18.
     0020 30342920 372e332e 3000               04) 7.3.0.
    Maybe you are reading the "\n" at .rodata section as well?
    Yep... you can get rid of these extra sections:
    Code:
    $ objcopy -j .text -j .rodata main main_clean
    Now 'main_clean' have only the important sections...

  7. #7
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    1,733
    I'm looking via wxHexEditor (only suitable one in official software list for my distro), thanks for the tip on removing extra stuff anyway, might use it later if I can't get my test elf working (been using "example.elf" as name for what you gave).
    There is one other thing I need help with though, on the code64 site I mentioned there's a lot of r/m8, r/m16/32/64, r8/r16 etc stuff, how should I visualize what that actually represents? Is it multiple formats for the same instruction? Is it just pointers? Is it something else?

    Edit: For now I got this in my converter:
    Code:
    /* Breakdown dst & src into little endian */
    d[7] = (uchar)(dst >> 56);
    d[6] = (uchar)(dst >> 48);
    d[5] = (uchar)(dst >> 40);
    d[4] = (uchar)(dst >> 32);
    d[3] = (uchar)(dst >> 24);
    d[2] = (uchar)(dst >> 16);
    d[1] = (uchar)(dst >> 8);
    d[0] = (uchar)dst;
    s[7] = (uchar)(src >> 56);
    s[6] = (uchar)(src >> 48);
    s[5] = (uchar)(src >> 40);
    s[4] = (uchar)(src >> 32);
    s[3] = (uchar)(src >> 24);
    s[2] = (uchar)(src >> 16);
    s[1] = (uchar)(src >> 8);
    s[0] = (uchar)src;
    /* As displayed at http://ref.x86asm.net/coder64.html */
    switch(x86_64) {
    case 0x00:
    	buff[0] = 0x00;
    	buff[1] = d[0];
    	buff[2] = s[0];
    	size = 3; break;
    Last edited by awsdert; 03-26-2019 at 04:38 AM.

  8. #8
    Registered User
    Join Date
    Feb 2019
    Posts
    1,078
    Quote Originally Posted by awsdert View Post
    I'm looking via wxHexEditor (only suitable one in official software list for my distro), thanks for the tip on removing extra stuff anyway, might use it later if I can't get my test elf working (been using "example.elf" as name for what you gave).
    There is an old viewer for Windows called "Hacker's View" I like very much, and a free source, packaged in 'ht':

    gcc elf output-untitled-png

    Quote Originally Posted by awsdert View Post
    There is one other thing I need help with though, on the code64 site I mentioned there's a lot of r/m8, r/m16/32/64, r8/r16 etc stuff, how should I visualize what that actually represents? Is it multiple formats for the same instruction? Is it just pointers? Is it something else?
    This is encoded on the mod-RM field of the instruction. Take a look at Intel's Software development manuals, volume 2.

  9. #9
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    1,733
    I think I get it now, the "r/m" part represents read/modify respectively and would expect an address at that location, the next question then is does it expect a full 64bit address or does it expect smaller?

    Edit: By the way thanks for the link, was quite surprised to find a 4000 odd page report there but that should be really helpful later

  10. #10
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    1,733
    Related thread I just started
    Need problem points of custom elf highlighted

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Replies: 2
    Last Post: 09-22-2018, 08:55 AM
  2. Replies: 5
    Last Post: 04-14-2016, 08:27 AM
  3. C++ overlapping output and adding extensions to output files
    By lordmorgul in forum Linux Programming
    Replies: 9
    Last Post: 05-11-2010, 08:26 AM
  4. terminal output not showing output properly
    By stanlvw in forum C Programming
    Replies: 13
    Last Post: 11-19-2007, 10:46 PM
  5. Replies: 3
    Last Post: 02-19-2003, 08:34 PM

Tags for this Thread