> However, I tried to generate assembly using "GCC" and "Clang" and they seem to make two comparisons and treat both of them as labels and jump.
Well if you turn on optimisation, you end up with assembler that looks a lot more like what you crafted by hand.
The primary purpose of unoptimised code is to
a) make compilation quick
b) make debugging sane
Trying to debug optimised code can be a weird experience.
gcc -S foo.c
Code:
main:
.LFB0:
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %fs:40, %rax
movq %rax, -8(%rbp)
xorl %eax, %eax
leaq -16(%rbp), %rax
movq %rax, %rsi
leaq .LC0(%rip), %rdi
movl $0, %eax
call __isoc99_scanf@PLT
movl $3, -12(%rbp)
movl -16(%rbp), %eax
cmpl %eax, -12(%rbp)
jge .L2
leaq .LC1(%rip), %rdi
call puts@PLT
jmp .L3
.L2:
leaq .LC2(%rip), %rdi
call puts@PLT
.L3:
movl $0, %eax
movq -8(%rbp), %rdx
xorq %fs:40, %rdx
je .L5
call __stack_chk_fail@PLT
.L5:
leave
.cfi_def_cfa 7, 8
ret
vs gcc -S -O2 foo.c
Code:
main:
.LFB23:
.cfi_startproc
endbr64
subq $24, %rsp
.cfi_def_cfa_offset 32
leaq .LC0(%rip), %rdi
movq %fs:40, %rax
movq %rax, 8(%rsp)
xorl %eax, %eax
leaq 4(%rsp), %rsi
call __isoc99_scanf@PLT
cmpl $3, 4(%rsp)
jle .L2
leaq .LC1(%rip), %rdi
call puts@PLT
.L3:
movq 8(%rsp), %rax
xorq %fs:40, %rax
jne .L7
xorl %eax, %eax
addq $24, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 8
ret
.L2:
.cfi_restore_state
leaq .LC2(%rip), %rdi
call puts@PLT
jmp .L3
.L7:
call __stack_chk_fail@PLT
.cfi_endproc