前言

动手实践并写文章花5倍的时间一次性把事情做到90分,好过读别人文章只能做到60分,后面还需要花时间继续深入学习。本文目的希望通过分析一个简单的函数调用加深对x86-64寄存器及栈帧的结构的认识,以便在定位问题需要的时候能够熟练运用。

环境

1.操作系统和内核

1
2
[root@localhost ~]# cat /proc/version 
Linux version 3.10.0-229.4.2.el7.x86_64 (builder@kbuilder.dev.centos.org) (gcc version 4.8.2 20140120 (Red Hat 4.8.2-16) (GCC) ) #1 SMP Wed May 13 10:06:09 UTC 2015

2.GCC版本

1
2
3
4
5
6
7
8
[root@localhost ~]# gcc  -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/4.8.3/lto-wrapper
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto --enable-plugin --enable-initfini-array --disable-libgcj --with-isl=/builddir/build/BUILD/gcc-4.8.3-20140911/obj-x86_64-redhat-linux/isl-install --with-cloog=/builddir/build/BUILD/gcc-4.8.3-20140911/obj-x86_64-redhat-linux/cloog-install --enable-gnu-indirect-function --with-tune=generic --with-arch_32=x86-64 --build=x86_64-redhat-linux
Thread model: posix
gcc version 4.8.3 20140911 (Red Hat 4.8.3-9) (GCC) 

3.GDB版本

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
[root@localhost ~]# gdb --version
GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-64.el7
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.

GDB 分析调用栈

1. 准备代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#include <stdio.h>


int sum(int a1, int a2, int a3, int a4, int a5, 
        int a6, int a7, int a8, int a9)
{
    int s = 0xaaaa;
    s = a1+a2+a3+a4+a5+a6+a7+a8+a9;

    return s;
}

void  caller(void)
{
    int iRet = 0x0;

    iRet = sum(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99);
    printf("sum: %x\r\n", iRet);
}

int main(void)
{
    caller();

    return 0;
}

2. 生成可执行文件

1
2
3
[root@localhost cpp]# gcc  -g  stackexample.c  -o stackexample
[root@localhost cpp]# ./stackexample
sum: 2FD

3. 分析调用栈

主要通过gdb分析调用栈变化情况

3.1 初始化

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
Breakpoint 1, main () at stackexample.c:22
warning: Source file is more recent than executable.
22              caller();
Missing separate debuginfos, use: debuginfo-install glibc-2.17-78.el7.x86_64
(gdb) info  registers 
rax            0x4005e6 4195814
rbx            0x0      0
rcx            0x400600 4195840
rdx            0x7fffffffe518   140737488348440
rsi            0x7fffffffe508   140737488348424
rdi            0x1      1
rbp            0x7fffffffe420   0x7fffffffe420
rsp            0x7fffffffe420   0x7fffffffe420
r8             0x7ffff7dd5e80   140737351868032
r9             0x0      0
r10            0x7fffffffe270   140737488347760
r11            0x7ffff7a3ba00   140737348090368
r12            0x400440 4195392
r13            0x7fffffffe500   140737488348416
r14            0x0      0
r15            0x0      0
rip            0x4005ea 0x4005ea <main+4>
eflags         0x246    [ PF ZF IF ]
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
---Type <return> to continue, or q <return> to quit--- 
gs             0x0      0
(gdb) 

栈内存(堆栈向下生长):

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
rbp            0x7fffffffe420   0x7fffffffe420
rsp            0x7fffffffe420   0x7fffffffe420

         0x7fffffffe428 +------------------+
                        |                  |
    rbp->0x7fffffffe420 +------------------+  <-rsp
                        |                  |
         0x7fffffffe218 +------------------+
                        |                  |
         0x7fffffffe210 +------------------+
                        |                  |
         0x7fffffffe208 +------------------+

3.2 调用caller

寄存器信息:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
(gdb) s
caller () at stackexample.c:14
14              int iRet = 0x0;
(gdb) info registers 
rax            0x4005e6 4195814
rbx            0x0      0
rcx            0x400600 4195840
rdx            0x7fffffffe518   140737488348440
rsi            0x7fffffffe508   140737488348424
rdi            0x1      1
rbp            0x7fffffffe410   0x7fffffffe410
rsp            0x7fffffffe3e0   0x7fffffffe3e0
r8             0x7ffff7dd5e80   140737351868032
r9             0x0      0
r10            0x7fffffffe270   140737488347760
r11            0x7ffff7a3ba00   140737348090368
r12            0x400440 4195392
r13            0x7fffffffe500   140737488348416
r14            0x0      0
r15            0x0      0
rip            0x40058a 0x40058a <caller+8>
eflags         0x202    [ IF ]
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
---Type <return> to continue, or q <return> to quit---
gs             0x0      0
(gdb) x /6gx  0x7fffffffe3e0
0x7fffffffe3e0: 0x0000000000000001      0x000000000040064d
0x7fffffffe3f0: 0x0000000000000000      0x0000000000000000
0x7fffffffe400: 0x0000000000400600      0x0000000000400440
(gdb)
(gdb) x /12gx  0x7fffffffe3e0
0x7fffffffe3e0: 0x0000000000000001      0x000000000040064d
0x7fffffffe3f0: 0x0000000000000000      0x0000000000000000
0x7fffffffe400: 0x0000000000400600      0x0000000000400440 #小端系统,前8位存储iRet的值
0x7fffffffe410: 0x00007fffffffe420      0x00000000004005ef
0x7fffffffe420: 0x0000000000000000      0x00007ffff7a3baf5
0x7fffffffe430: 0x0000002000000000      0x00007fffffffe508
(gdb) 
(gdb) p iRet
$3 = 0
(gdb) p &iRet
$4 = (int *) 0x7fffffffe40c #局部变量在堆栈的位置

栈内存(堆栈向下生长):

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
rbp            0x7fffffffe410   0x7fffffffe410
rsp            0x7fffffffe3e0   0x7fffffffe3e0

        0x7fffffffe428 +------------------+
                       |                  |
        0x7fffffffe420 +------------------+
                       |0x00000000004005ef| #保存返回main的指令地址,参考汇编代码
        0x7fffffffe418 +------------------+
                       |0x00007fffffffe420| #保存main函数对应rbp
 rbp -->0x7fffffffe410 +------------------+
                       |                  |
        0x7fffffffe408 +------------------+
                       |                  |
        0x7fffffffe400 +------------------+
                       |                  |
        0x7fffffffe3f8 +------------------+
                       |                  |
        0x7fffffffe3f0 +------------------+
                       |                  |
        0x7fffffffe3e8 +------------------+
                       |                  |
 rsp -->0x7fffffffe3e0 +------------------+        

main函数的汇编代码如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
(gdb) disassemble  /m main
Dump of assembler code for function main:
21      {
   0x00000000004005e6 <+0>:     push   %rbp
   0x00000000004005e7 <+1>:     mov    %rsp,%rbp

22              caller();
   0x00000000004005ea <+4>:     callq  0x400582 <caller>

23
24              return 0;
   0x00000000004005ef <+9>:     mov    $0x0,%eax  #调用caller返回后执行的指令地址

25      }
   0x00000000004005f4 <+14>:    pop    %rbp
   0x00000000004005f5 <+15>:    retq   

End of assembler dump.
(gdb) 

3.3调用sum

寄存器信息:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
Breakpoint 2, sum (a1=17, a2=34, a3=51, a4=68, a5=85, a6=102, a7=119, a8=136, 
    a9=153) at stackexample.c:6
6               int s = 0x0;
(gdb) info  registers 
rax            0x4005e6 4195814
rbx            0x0      0
rcx            0x44     68                             #第四个参数
rdx            0x33     51                             #第三个参数
rsi            0x22     34                             #第二个参数
rdi            0x11     17                             #第一个参数 
rbp            0x7fffffffe3d0   0x7fffffffe3d0
rsp            0x7fffffffe3d0   0x7fffffffe3d0
r8             0x55     85                             #第五个参数 
r9             0x66     102                            #第六个参数 
r10            0x7fffffffe270   140737488347760
r11            0x7ffff7a3ba00   140737348090368
r12            0x400440 4195392
r13            0x7fffffffe500   140737488348416
r14            0x0      0
r15            0x0      0
rip            0x400548 0x400548 <sum+24>
eflags         0x202    [ IF ]
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
---Type <return> to continue, or q <return> to quit--- 
gs             0x0      0
(gdb) 

caller对应汇编代码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
(gdb) disassemble   /m caller
Dump of assembler code for function caller:
13      {
   0x0000000000400582 <+0>:     push   %rbp  #保存调用函数main的rbp
   0x0000000000400583 <+1>:     mov    %rsp,%rbp #将上一个栈顶指针作为本函数的栈底(设置当前函数的栈基址)
   0x0000000000400586 <+4>:     sub    $0x30,%rsp #分配栈空间

14              int iRet = 0x0;
   0x000000000040058a <+8>:     movl   $0x0,-0x4(%rbp)

15
16              iRet = sum(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99);
   0x0000000000400591 <+15>:    movl   $0x99,0x10(%rsp)   #第9个参数
   0x0000000000400599 <+23>:    movl   $0x88,0x8(%rsp)    #第8个参考 
   0x00000000004005a1 <+31>:    movl   $0x77,(%rsp)       #第7个参数
   0x00000000004005a8 <+38>:    mov    $0x66,%r9d         #第6个参数 
   0x00000000004005ae <+44>:    mov    $0x55,%r8d         #第5个参数 
   0x00000000004005b4 <+50>:    mov    $0x44,%ecx         #第4个参数
   0x00000000004005b9 <+55>:    mov    $0x33,%edx         #第3个参数 
   0x00000000004005be <+60>:    mov    $0x22,%esi         #第2个参数
   0x00000000004005c3 <+65>:    mov    $0x11,%edi         #第1个参数 
   0x00000000004005c8 <+70>:    callq  0x400530 <sum>     #调用函数sum
   0x00000000004005cd <+75>:    mov    %eax,-0x4(%rbp)    #取返回值
---Type <return> to continue, or q <return> to quit---

17              printf("sum: %X\r\n", iRet);
   0x00000000004005d0 <+78>:    mov    -0x4(%rbp),%eax
   0x00000000004005d3 <+81>:    mov    %eax,%esi
   0x00000000004005d5 <+83>:    mov    $0x400690,%edi
   0x00000000004005da <+88>:    mov    $0x0,%eax
   0x00000000004005df <+93>:    callq  0x400410 <printf@plt>

18      }
   0x00000000004005e4 <+98>:    leaveq                   #出栈处理
   0x00000000004005e5 <+99>:    retq   

End of assembler dump.
(gdb) 

通过上面汇编与寄存器信息,可以了解到函数调用的参数一般先通过寄存器传递。但是可用于传递函数的参数有限,超出的参数怎么传递呢?入调用函数的栈空间,具体如下:

1
2
3
4
5
6
7
8
9
(gdb) x  /12gx  $rsp
0x7fffffffe3d0: 0x00007fffffffe410      0x00000000004005cd   #main函数栈底,返回指令
0x7fffffffe3e0: 0x0000000000000077      0x0000000000000088   #第七个参数  第八个参数
0x7fffffffe3f0: 0x0000000000000099      0x0000000000000000   #第九个参数
0x7fffffffe400: 0x0000000000400600      0x0000000000400440   #这两个内容是什么
0x7fffffffe410: 0x00007fffffffe420      0x00000000004005ef
0x7fffffffe420: 0x0000000000000000      0x00007ffff7a3baf5
(gdb) 

栈内存(栈向下生长):

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
rbp            0x7fffffffe3d0   0x7fffffffe3d0
rsp            0x7fffffffe3d0   0x7fffffffe3d0

        0x7fffffffe428 +------------------+
                       |                  |
        0x7fffffffe420 +------------------+
                       |0x00000000004005ef| #保存返回main的地址
        0x7fffffffe418 +------------------+
                       |0x00007fffffffe420| #保存main函数对应rbp
        0x7fffffffe410 +------------------+
                       |                  |
        0x7fffffffe408 +------------------+
                       |                  |
        0x7fffffffe400 +------------------+
                       |                  |
        0x7fffffffe3f8 +------------------+
                       |  0x99            |
        0x7fffffffe3f0 +------------------+
                       |  0x88            |
        0x7fffffffe3e8 +------------------+
                       |  0x77            |
        0x7fffffffe3e0 +------------------+    
                       |0x00000000004005cd| #保存调用caller返回的指令地址
        0x7fffffffe3e0 +------------------+ 
                       |0x00007fffffffe410| #保存caller函数对应的rbp
 rsp -->0x7fffffffe3e0 +------------------+ 
    

3.4 sum获取参数

1
2
3
4
5
6
7
8
9
(gdb) x  /16gx $rsp-0x28
0x7fffffffe3a8: 0x0000005500000066      0x0000003300000044
0x7fffffffe3b8: 0x0000001100000022      0x0000000000000006
0x7fffffffe3c8: 0x00000000f7ab9646      0x00007fffffffe410
0x7fffffffe3d8: 0x00000000004005cd      0x0000000000000077
0x7fffffffe3e8: 0x0000000000000088      0x0000000000000099
0x7fffffffe3f8: 0x0000000000000000      0x0000000000400600
0x7fffffffe408: 0x0000000000400440      0x00007fffffffe420
0x7fffffffe418: 0x00000000004005ef      0x0000000000000000

栈内存(栈向下生长):

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
rbp            0x7fffffffe3e0   0x7fffffffe3e0
rsp            0x7fffffffe3e0   0x7fffffffe3e0

        0x7fffffffe428 +------------------+
                       |                  |
        0x7fffffffe420 +------------------+
                       |0x00000000004005ef| #保存返回main的地址
        0x7fffffffe418 +------------------+
                       |0x00007fffffffe420| #保存main函数对应rbp
        0x7fffffffe410 +------------------+
                       |                  |
        0x7fffffffe408 +------------------+
                       |                  |
        0x7fffffffe400 +------------------+
                       |                  |
        0x7fffffffe3f8 +------------------+
                       |  0x99            |
        0x7fffffffe3f0 +------------------+
                       |  0x88            |
        0x7fffffffe3e8 +------------------+
                       |  0x77            |
        0x7fffffffe3e0 +------------------+    
                       |0x00000000004005cd| #保存返回caller的指令地址
        0x7fffffffe3e0 +------------------+ 
                       |0x00007fffffffe410| #保存caller函数对应的rbp
 rsp -->0x7fffffffe3e0 +------------------+  
                       |                  |
        0x7fffffffe3d8 +------------------+  
                       |                  |
        0x7fffffffe3d0 +------------------+
                       |                  |
        0x7fffffffe3e8 +------------------+
                       |                  |
        0x7fffffffe3e0 +------------------+ 
                       |                  |
        0x7fffffffe3c8 +------------------+  
                       |                  |
        0x7fffffffe3c0 +------------------+
                       |0x0000001100000022|
        0x7fffffffe3b8 +------------------+
                       |0x0000005500000066|
        0x7fffffffe3b0 +------------------+                                    
                       |0x0000003300000044|
        0x7fffffffe3a8 +------------------+
                       |                  |
        0x7fffffffe3a0 +------------------+    

sum函数对应汇编如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
(gdb) disassemble  /m sum 
Dump of assembler code for function sum:
5       {
   0x0000000000400530 <+0>:     push   %rbp
   0x0000000000400531 <+1>:     mov    %rsp,%rbp
   0x0000000000400534 <+4>:     mov    %edi,-0x14(%rbp)  #获取函数参数,顺序为从左到右 
   0x0000000000400537 <+7>:     mov    %esi,-0x18(%rbp)
   0x000000000040053a <+10>:    mov    %edx,-0x1c(%rbp)
   0x000000000040053d <+13>:    mov    %ecx,-0x20(%rbp)
   0x0000000000400540 <+16>:    mov    %r8d,-0x24(%rbp)
   0x0000000000400544 <+20>:    mov    %r9d,-0x28(%rbp)

6               int s = 0x0;
   0x0000000000400548 <+24>:    movl   $0x0,-0x4(%rbp)

7               s = a1+a2+a3+a4+a5+a6+a7+a8+a9;
=> 0x000000000040054f <+31>:    mov    -0x18(%rbp),%eax
   0x0000000000400552 <+34>:    mov    -0x14(%rbp),%edx
   0x0000000000400555 <+37>:    add    %eax,%edx
   0x0000000000400557 <+39>:    mov    -0x1c(%rbp),%eax
   0x000000000040055a <+42>:    add    %eax,%edx
   0x000000000040055c <+44>:    mov    -0x20(%rbp),%eax
   0x000000000040055f <+47>:    add    %eax,%edx
   0x0000000000400561 <+49>:    mov    -0x24(%rbp),%eax
---Type <return> to continue, or q <return> to quit---
   0x0000000000400564 <+52>:    add    %eax,%edx
   0x0000000000400566 <+54>:    mov    -0x28(%rbp),%eax
   0x0000000000400569 <+57>:    add    %eax,%edx
   0x000000000040056b <+59>:    mov    0x10(%rbp),%eax #取参数a7 = 0x77
   0x000000000040056e <+62>:    add    %eax,%edx
   0x0000000000400570 <+64>:    mov    0x18(%rbp),%eax #取参数a8 = 0x88 
   0x0000000000400573 <+67>:    add    %eax,%edx
   0x0000000000400575 <+69>:    mov    0x20(%rbp),%eax #取参数a9 = 0x99
   0x0000000000400578 <+72>:    add    %edx,%eax
   0x000000000040057a <+74>:    mov    %eax,-0x4(%rbp)

8
9               return s;
   0x000000000040057d <+77>:    mov    -0x4(%rbp),%eax #计算结果通过eax寄存器返回

10      }
   0x0000000000400580 <+80>:    pop    %rbp
   0x0000000000400581 <+81>:    retq   

End of assembler dump.
(gdb) 

注意的是调用sum的时候并没有分配栈空间?主要原因是sum函数内没有调用其他函数,sum就只能被别的函数调用,sum的局部变量直接由rsp向下生长,调用结束后,直接恢复调用者的栈,这样的好处有两个:

  1. 少一个指令,提高执行效率
  2. 编译的时候并不需要计算需要开辟多少栈空间

为什么需要将值从寄存器取出?直接利用寄存器取值不可以吗? 传递参数的寄存器在函数执行指令的时候有其他用处,调用函数需要将参数从寄存器取出到栈内存中(在一定的情况下即调用函数的指令不需要占用传递参数的寄存器,如果加大编译的优化级别,是否不会从参数寄存器取出参数到函数的栈内存中而是直接使用寄存器?)另外可以从这里看出,从代码层面优化性能可以考虑减少函数调用以及优化一些不必要的参数传递,尽管只能尽一丝绵薄之力。

3.5 sum返回值

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
(gdb) n
caller () at stackexample.c:17
17              printf("sum: %X\r\n", iRet);
(gdb) info registers 
rax            0x2fd    765  #返回值 
rbx            0x0      0
rcx            0x44     68
rdx            0x264    612
rsi            0x22     34
rdi            0x11     17
rbp            0x7fffffffe410   0x7fffffffe410
rsp            0x7fffffffe3e0   0x7fffffffe3e0
r8             0x55     85
r9             0x66     102
r10            0x7fffffffe270   140737488347760
r11            0x7ffff7a3ba00   140737348090368
r12            0x400440 4195392
r13            0x7fffffffe500   140737488348416
r14            0x0      0
r15            0x0      0
rip            0x4005d0 0x4005d0 <caller+78>
eflags         0x202    [ IF ]
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
---Type <return> to continue, or q <return> to quit---
gs             0x0      0
(gdb) 
(gdb) x /16gx 0x7fffffffe3c0 
0x7fffffffe3c0: 0x0000000000000006      0x000002fdf7ab9646  #返回值0x2fd
0x7fffffffe3d0: 0x00007fffffffe410      0x00000000004005cd
0x7fffffffe3e0: 0x0000000000000077      0x0000000000000088
0x7fffffffe3f0: 0x0000000000000099      0x0000000000000000
0x7fffffffe400: 0x0000000000400600      0x000002fd00400440
0x7fffffffe410: 0x00007fffffffe420      0x00000000004005ef
0x7fffffffe420: 0x0000000000000000      0x00007ffff7a3baf5
0x7fffffffe430: 0x0000002000000000      0x00007fffffffe508
(gdb) 

x86-64寄存器说明

  1. rsp:对应32位esp寄存器,保存当前堆栈栈顶指针的寄存器
  2. rbp:对应32位ebp寄存器,保存了当前堆栈基地址指针的寄存器
  3. rax: 临时寄存器,当我们调用系统调用的时候,rax保外系统调用号
  4. rdx:传递第3个参数到函数
  5. rdi:传递第1个参数到函数
  6. rsi:传递第2个参数到函数
  7. rip: 下一个执行指令地址

参考

  1. Calling_convention
  2. GDB and Reverse Debugging
  3. Assembly x86_64 programming for Linux