實做 c backtrace

c 的 backtrace 是在 call function 時, 如何得知誰 call 了這個 function。

c 的 backtrace 如何做到, 問 chatgpt 馬上就給出 list 1. 的範例程式, 真的好用, 以 list 2 來說:

_start -> __libc_start_main -> main -> f1 -> f2 -> f3 -> print_backtrace

會得到

xx [0x400b7b]
xx [0x400d60]
xx [0x401087]
xx [0x4010a4]
xx [0x4010c0]
xx [0x401689]
xx [0x400a5a]

使用 addr2line 可以查到對應的 function

addr2line -f -e t2 0x400b7b
print_backtrace
/home/descent/git/progs/backtrace/t2.c:15

addr2line -f -e t2 0x400a5a
_start
??:?

會得到: print_backtrace -> f3 -> f2 -> f1 -> main -> __libc_start_main -> _start

但如果再問 chatgpt 要怎麼實做 backtrace(), backtrace_symbols(), 它就鬼打牆了。

所以如果不借助 backtrace(), backtrace_symbols() 要怎麼辦到呢?

這和平台有關, 本篇是在 x86_64 環境的實做。

list 1. print_backtrace()

  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <stdint.h>
  4 
  5 #include <execinfo.h>
  6 
  7 #define BT_BUF_SIZE 100
  8 
  9 void print_backtrace() 
 10 {
 11     void *buffer[BT_BUF_SIZE];
 12     char **strings;
 13     int nptrs;
 14 
 15     nptrs = backtrace(buffer, BT_BUF_SIZE);
 16     printf("backtrace() returned %d addresses\n", nptrs);
 17 
 18     strings = backtrace_symbols(buffer, nptrs);
 19     if (strings == NULL) {
 20         perror("backtrace_symbols");
 21         exit(EXIT_FAILURE);
 22     }
 23 
 24     for (int i = 0; i < nptrs; i++) {
 25         printf("xx %s\n", strings[i]);
 26     }
 27 
 28     free(strings);
 29 }

list 2. t2.c

  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <stdint.h>
  4 
  5 #include <execinfo.h>
  6 
  7 #define BT_BUF_SIZE 100
  8 
  9 void print_backtrace() 
 10 {
 11     void *buffer[BT_BUF_SIZE];
 12     char **strings;
 13     int nptrs;
 14 
 15     nptrs = backtrace(buffer, BT_BUF_SIZE);
 16     printf("backtrace() returned %d addresses\n", nptrs);
 17 
 18     strings = backtrace_symbols(buffer, nptrs);
 19     if (strings == NULL) {
 20         perror("backtrace_symbols");
 21         exit(EXIT_FAILURE);
 22     }
 23 
 24     for (int i = 0; i < nptrs; i++) {
 25         printf("xx %s\n", strings[i]);
 26     }
 27 
 28     free(strings);
 29 }
 30 
 31 void f5()
 32 {
 33   printf("in f5\n");
 34   print_backtrace();
 35 }
 36 
 37 
 38 #include <stdio.h>
 39 
 40 uintptr_t get_rip_value() 
 41 {
 42   //uintptr_t rip_value;
 43   //asm volatile("movq $0, %%rax; movq (%%rsp), %%rax" : "=a" (rip_value));
 44   //asm("mov %%rip, %0" : "=r" (rip_value));
 45   uintptr_t current_address;
 46   asm("lea (%%rip), %0" : "=r" (current_address));
 47   return current_address;
 48 }
 49 
 50 unsigned long get_rbp()
 51 {
 52   unsigned long rbp_value;
 53   asm("movq %%rbp, %0" : "=r" (rbp_value));
 54   printf("The value of RBP register is: %#lx\n", rbp_value);
 55   return rbp_value;
 56 }
 57 
 58 int addr2func(uintptr_t addr)
 59 {
 60   char cmd[128] = {0};
 61   sprintf(cmd, "addr2line -f -e t2 %#lx\n", addr);
 62   printf("cmd: %s\n", cmd);
 63   system(cmd);
 64   return 0;
 65 }
 66 
 67 void f3()
 68 {
 69   print_backtrace();
 70   #if 1
 71   int level = 0;
 72   printf("in f3\n");
 73   //while(1)
 74   {
 75     {
 76       uintptr_t current_address;
 77       asm("lea (%%rip), %0" : "=r" (current_address));
 78       printf("current_address : %#lx\n", current_address);
 79       addr2func(current_address);
 80 
 81       printf("======\n");
 82 
 83       unsigned long rbp_value, previous_rbp;
 84       asm("movq %%rbp, %0" : "=r" (rbp_value));
 85       printf("%d ## The value of RBP register is: %#lx\n", level, rbp_value);
 86       ++level;
 87 
 88       uintptr_t ret_addr = *(uintptr_t*)(rbp_value + sizeof(uintptr_t));
 89       printf("ret_addr : %#lx\n", ret_addr);
 90 
 91       addr2func(ret_addr);
 92 
 93       printf("======\n");
 94 
 95       rbp_value = *(uintptr_t*)(rbp_value);
 96       printf("%d ## The value of RBP register is: %#lx\n", level, rbp_value);
 97       ++level;
 98 
 99       ret_addr = *(uintptr_t*)(rbp_value + sizeof(uintptr_t));
100       addr2func(ret_addr);
101 
102 
103       printf("======\n");
104 
105       rbp_value = *(uintptr_t*)(rbp_value);
106       printf("%d ## The value of RBP register is: %#lx\n", level, rbp_value);
107       ++level;
108 
109       ret_addr = *(uintptr_t*)(rbp_value + sizeof(uintptr_t));
110       addr2func(ret_addr);
111 
112       printf("======\n");
113 
114       rbp_value = *(uintptr_t*)(rbp_value);
115       printf("%d ## The value of RBP register is: %#lx\n", level, rbp_value);
116       ++level;
117 
118       ret_addr = *(uintptr_t*)(rbp_value + sizeof(uintptr_t));
119       addr2func(ret_addr);
120 
121       printf("======\n");
122 
123       previous_rbp = rbp_value;
124       
125       rbp_value = *(uintptr_t*)(rbp_value);
126       printf("%d ## The value of RBP register is: %#lx, previous_rbp: %#lx\n", level, rbp_value, previous_rbp);
127       ++level;
128 
129       if (rbp_value > previous_rbp)
130       {
131         ret_addr = *(uintptr_t*)(rbp_value + sizeof(uintptr_t));
132         addr2func(ret_addr);
133       }
134       else
135       {
136         printf("top stack frame\n");
137       }
138 
139       printf("======\n");
140 
141 
142     void* frame_address = __builtin_frame_address(0);
143     if (frame_address)
144       printf("Frame 0 address of f3: %p\n", frame_address);
145 
146     void* return_address = __builtin_return_address(0);
147     if (return_address)
148       printf("Return 0 address of f3: %p\n", return_address);
149     }
150 
151 #if 1
152     {
153     void* frame_address = __builtin_frame_address(1);
154     if (frame_address)
155       printf("Frame 1 address of f3: %p\n", frame_address);
156 
157     void* return_address = __builtin_return_address(1);
158     if (return_address)
159       printf("Return 1 address of f3: %p\n", return_address);
160     }
161 
162     {
163     void* frame_address = __builtin_frame_address(2);
164     if (frame_address)
165       printf("Frame 2 address of f3: %p\n", frame_address);
166 
167     void* return_address = __builtin_return_address(2);
168     if (return_address)
169       printf("Return 2 address of f3: %p\n", return_address);
170     }
171 #endif
172 #if 0
173     {
174     void* frame_address = __builtin_frame_address(3);
175     if (frame_address)
176       printf("Frame 3 address of f2: %p\n", frame_address);
177 
178     void* return_address = __builtin_return_address(3);
179     if (return_address)
180       printf("Return 3 address of f2: %p\n", return_address);
181     }
182 #endif
183 
184   }
185 #endif
186 
187 #if 0
188   printf("in f2\n");
189   f3();
190 #endif
191 }
192 
193 void f2()
194 {
195   printf("in f2\n");
196   f3();
197 }
198 
199 void f1()
200 {
201   printf("in f1\n");
202   f2();
203 }
204 
205 int main(int argc, char *argv[])
206 {
207   f1(); 
208   printf("main: %p\n", main);
209   printf("f1: %p\n", f1);
210   printf("f2: %p\n", f2);
211   #if 0
212   printf("f3: %p\n", f3);
213   printf("f5: %p\n", f5);
214   #endif
215   return 0;
216 }

先來理解 c 語言呼叫 function 時做的動作, 參考 list 3 的反組譯。

list 3. objdump -d t2

     1 
     2 t2:     file format elf64-x86-64
     3 

   611 0000000000400c50 <f5>:
   612   400c50:	55                   	push   %rbp
   613   400c51:	48 89 e5             	mov    %rsp,%rbp
   614   400c54:	48 8d 3d 49 2c 09 00 	lea    0x92c49(%rip),%rdi        # 4938a4 <_IO_stdin_used+0x44>
   615   400c5b:	e8 f0 06 01 00       	callq  411350 <_IO_puts>
   616   400c60:	b8 00 00 00 00       	mov    $0x0,%eax
   617   400c65:	e8 e3 fe ff ff       	callq  400b4d <print_backtrace>
   618   400c6a:	90                   	nop
   619   400c6b:	5d                   	pop    %rbp
   620   400c6c:	c3                   	retq   
   621 
   682 
   683 0000000000400d4e <f3>:
   684   400d4e:	55                   	push   %rbp
   685   400d4f:	48 89 e5             	mov    %rsp,%rbp
   686   400d52:	48 83 ec 60          	sub    $0x60,%rsp
   687   400d56:	b8 00 00 00 00       	mov    $0x0,%eax
   688   400d5b:	e8 ed fd ff ff       	callq  400b4d <print_backtrace>
   689   400d60:	c7 45 ac 00 00 00 00 	movl   $0x0,-0x54(%rbp)
   690   400d67:	48 8d 3d 88 2b 09 00 	lea    0x92b88(%rip),%rdi        # 4938f6 <_IO_stdin_used+0x96>
   691   400d6e:	e8 dd 05 01 00       	callq  411350 <_IO_puts>
   692   400d73:	48 8d 05 00 00 00 00 	lea    0x0(%rip),%rax        # 400d7a <f3+0x2c>
   693   400d7a:	48 89 45 b0          	mov    %rax,-0x50(%rbp)
   694   400d7e:	48 8b 45 b0          	mov    -0x50(%rbp),%rax
   695   400d82:	48 89 c6             	mov    %rax,%rsi
   696   400d85:	48 8d 3d 70 2b 09 00 	lea    0x92b70(%rip),%rdi        # 4938fc <_IO_stdin_used+0x9c>
   697   400d8c:	b8 00 00 00 00       	mov    $0x0,%eax
   698   400d91:	e8 fa f3 00 00       	callq  410190 <_IO_printf>
   699   400d96:	48 8b 45 b0          	mov    -0x50(%rbp),%rax
   700   400d9a:	48 89 c7             	mov    %rax,%rdi
   701   400d9d:	e8 0d ff ff ff       	callq  400caf <addr2func>
   702   400da2:	48 8d 3d 6b 2b 09 00 	lea    0x92b6b(%rip),%rdi        # 493914 <_IO_stdin_used+0xb4>
   703   400da9:	e8 a2 05 01 00       	callq  411350 <_IO_puts>
   704   400dae:	48 89 e8             	mov    %rbp,%rax
   705   400db1:	48 89 45 b8          	mov    %rax,-0x48(%rbp)
   706   400db5:	48 8b 55 b8          	mov    -0x48(%rbp),%rdx
   707   400db9:	8b 45 ac             	mov    -0x54(%rbp),%eax
   708   400dbc:	89 c6                	mov    %eax,%esi
   709   400dbe:	48 8d 3d 5b 2b 09 00 	lea    0x92b5b(%rip),%rdi        # 493920 <_IO_stdin_used+0xc0>
   710   400dc5:	b8 00 00 00 00       	mov    $0x0,%eax
   711   400dca:	e8 c1 f3 00 00       	callq  410190 <_IO_printf>
   712   400dcf:	83 45 ac 01          	addl   $0x1,-0x54(%rbp)
   713   400dd3:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   714   400dd7:	48 83 c0 08          	add    $0x8,%rax
   715   400ddb:	48 8b 00             	mov    (%rax),%rax
   716   400dde:	48 89 45 c0          	mov    %rax,-0x40(%rbp)
   717   400de2:	48 8b 45 c0          	mov    -0x40(%rbp),%rax
   718   400de6:	48 89 c6             	mov    %rax,%rsi
   719   400de9:	48 8d 3d 5a 2b 09 00 	lea    0x92b5a(%rip),%rdi        # 49394a <_IO_stdin_used+0xea>
   720   400df0:	b8 00 00 00 00       	mov    $0x0,%eax
   721   400df5:	e8 96 f3 00 00       	callq  410190 <_IO_printf>
   722   400dfa:	48 8b 45 c0          	mov    -0x40(%rbp),%rax
   723   400dfe:	48 89 c7             	mov    %rax,%rdi
   724   400e01:	e8 a9 fe ff ff       	callq  400caf <addr2func>
   725   400e06:	48 8d 3d 07 2b 09 00 	lea    0x92b07(%rip),%rdi        # 493914 <_IO_stdin_used+0xb4>
   726   400e0d:	e8 3e 05 01 00       	callq  411350 <_IO_puts>
   727   400e12:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   728   400e16:	48 8b 00             	mov    (%rax),%rax
   729   400e19:	48 89 45 b8          	mov    %rax,-0x48(%rbp)
   730   400e1d:	48 8b 55 b8          	mov    -0x48(%rbp),%rdx
   731   400e21:	8b 45 ac             	mov    -0x54(%rbp),%eax
   732   400e24:	89 c6                	mov    %eax,%esi
   733   400e26:	48 8d 3d f3 2a 09 00 	lea    0x92af3(%rip),%rdi        # 493920 <_IO_stdin_used+0xc0>
   734   400e2d:	b8 00 00 00 00       	mov    $0x0,%eax
   735   400e32:	e8 59 f3 00 00       	callq  410190 <_IO_printf>
   736   400e37:	83 45 ac 01          	addl   $0x1,-0x54(%rbp)
   737   400e3b:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   738   400e3f:	48 83 c0 08          	add    $0x8,%rax
   739   400e43:	48 8b 00             	mov    (%rax),%rax
   740   400e46:	48 89 45 c0          	mov    %rax,-0x40(%rbp)
   741   400e4a:	48 8b 45 c0          	mov    -0x40(%rbp),%rax
   742   400e4e:	48 89 c7             	mov    %rax,%rdi
   743   400e51:	e8 59 fe ff ff       	callq  400caf <addr2func>
   744   400e56:	48 8d 3d b7 2a 09 00 	lea    0x92ab7(%rip),%rdi        # 493914 <_IO_stdin_used+0xb4>
   745   400e5d:	e8 ee 04 01 00       	callq  411350 <_IO_puts>
   746   400e62:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   747   400e66:	48 8b 00             	mov    (%rax),%rax
   748   400e69:	48 89 45 b8          	mov    %rax,-0x48(%rbp)
   749   400e6d:	48 8b 55 b8          	mov    -0x48(%rbp),%rdx
   750   400e71:	8b 45 ac             	mov    -0x54(%rbp),%eax
   751   400e74:	89 c6                	mov    %eax,%esi
   752   400e76:	48 8d 3d a3 2a 09 00 	lea    0x92aa3(%rip),%rdi        # 493920 <_IO_stdin_used+0xc0>
   753   400e7d:	b8 00 00 00 00       	mov    $0x0,%eax
   754   400e82:	e8 09 f3 00 00       	callq  410190 <_IO_printf>
   755   400e87:	83 45 ac 01          	addl   $0x1,-0x54(%rbp)
   756   400e8b:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   757   400e8f:	48 83 c0 08          	add    $0x8,%rax
   758   400e93:	48 8b 00             	mov    (%rax),%rax
   759   400e96:	48 89 45 c0          	mov    %rax,-0x40(%rbp)
   760   400e9a:	48 8b 45 c0          	mov    -0x40(%rbp),%rax
   761   400e9e:	48 89 c7             	mov    %rax,%rdi
   762   400ea1:	e8 09 fe ff ff       	callq  400caf <addr2func>
   763   400ea6:	48 8d 3d 67 2a 09 00 	lea    0x92a67(%rip),%rdi        # 493914 <_IO_stdin_used+0xb4>
   764   400ead:	e8 9e 04 01 00       	callq  411350 <_IO_puts>
   765   400eb2:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   766   400eb6:	48 8b 00             	mov    (%rax),%rax
   767   400eb9:	48 89 45 b8          	mov    %rax,-0x48(%rbp)
   768   400ebd:	48 8b 55 b8          	mov    -0x48(%rbp),%rdx
   769   400ec1:	8b 45 ac             	mov    -0x54(%rbp),%eax
   770   400ec4:	89 c6                	mov    %eax,%esi
   771   400ec6:	48 8d 3d 53 2a 09 00 	lea    0x92a53(%rip),%rdi        # 493920 <_IO_stdin_used+0xc0>
   772   400ecd:	b8 00 00 00 00       	mov    $0x0,%eax
   773   400ed2:	e8 b9 f2 00 00       	callq  410190 <_IO_printf>
   774   400ed7:	83 45 ac 01          	addl   $0x1,-0x54(%rbp)
   775   400edb:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   776   400edf:	48 83 c0 08          	add    $0x8,%rax
   777   400ee3:	48 8b 00             	mov    (%rax),%rax
   778   400ee6:	48 89 45 c0          	mov    %rax,-0x40(%rbp)
   779   400eea:	48 8b 45 c0          	mov    -0x40(%rbp),%rax
   780   400eee:	48 89 c7             	mov    %rax,%rdi
   781   400ef1:	e8 b9 fd ff ff       	callq  400caf <addr2func>
   782   400ef6:	48 8d 3d 17 2a 09 00 	lea    0x92a17(%rip),%rdi        # 493914 <_IO_stdin_used+0xb4>
   783   400efd:	e8 4e 04 01 00       	callq  411350 <_IO_puts>
   784   400f02:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   785   400f06:	48 89 45 c8          	mov    %rax,-0x38(%rbp)
   786   400f0a:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   787   400f0e:	48 8b 00             	mov    (%rax),%rax
   788   400f11:	48 89 45 b8          	mov    %rax,-0x48(%rbp)
   789   400f15:	48 8b 4d c8          	mov    -0x38(%rbp),%rcx
   790   400f19:	48 8b 55 b8          	mov    -0x48(%rbp),%rdx
   791   400f1d:	8b 45 ac             	mov    -0x54(%rbp),%eax
   792   400f20:	89 c6                	mov    %eax,%esi
   793   400f22:	48 8d 3d 37 2a 09 00 	lea    0x92a37(%rip),%rdi        # 493960 <_IO_stdin_used+0x100>
   794   400f29:	b8 00 00 00 00       	mov    $0x0,%eax
   795   400f2e:	e8 5d f2 00 00       	callq  410190 <_IO_printf>
   796   400f33:	83 45 ac 01          	addl   $0x1,-0x54(%rbp)
   797   400f37:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   798   400f3b:	48 3b 45 c8          	cmp    -0x38(%rbp),%rax
   799   400f3f:	76 1d                	jbe    400f5e <f3+0x210>
   800   400f41:	48 8b 45 b8          	mov    -0x48(%rbp),%rax
   801   400f45:	48 83 c0 08          	add    $0x8,%rax
   802   400f49:	48 8b 00             	mov    (%rax),%rax
   803   400f4c:	48 89 45 c0          	mov    %rax,-0x40(%rbp)
   804   400f50:	48 8b 45 c0          	mov    -0x40(%rbp),%rax
   805   400f54:	48 89 c7             	mov    %rax,%rdi
   806   400f57:	e8 53 fd ff ff       	callq  400caf <addr2func>
   807   400f5c:	eb 0c                	jmp    400f6a <f3+0x21c>
   808   400f5e:	48 8d 3d 39 2a 09 00 	lea    0x92a39(%rip),%rdi        # 49399e <_IO_stdin_used+0x13e>
   809   400f65:	e8 e6 03 01 00       	callq  411350 <_IO_puts>
   810   400f6a:	48 8d 3d a3 29 09 00 	lea    0x929a3(%rip),%rdi        # 493914 <_IO_stdin_used+0xb4>
   811   400f71:	e8 da 03 01 00       	callq  411350 <_IO_puts>
   812   400f76:	48 89 6d d0          	mov    %rbp,-0x30(%rbp)
   813   400f7a:	48 83 7d d0 00       	cmpq   $0x0,-0x30(%rbp)
   814   400f7f:	74 18                	je     400f99 <f3+0x24b>
   815   400f81:	48 8b 45 d0          	mov    -0x30(%rbp),%rax
   816   400f85:	48 89 c6             	mov    %rax,%rsi
   817   400f88:	48 8d 3d 1f 2a 09 00 	lea    0x92a1f(%rip),%rdi        # 4939ae <_IO_stdin_used+0x14e>
   818   400f8f:	b8 00 00 00 00       	mov    $0x0,%eax
   819   400f94:	e8 f7 f1 00 00       	callq  410190 <_IO_printf>
   820   400f99:	48 8b 45 08          	mov    0x8(%rbp),%rax
   821   400f9d:	48 89 45 d8          	mov    %rax,-0x28(%rbp)
   822   400fa1:	48 83 7d d8 00       	cmpq   $0x0,-0x28(%rbp)
   823   400fa6:	74 18                	je     400fc0 <f3+0x272>
   824   400fa8:	48 8b 45 d8          	mov    -0x28(%rbp),%rax
   825   400fac:	48 89 c6             	mov    %rax,%rsi
   826   400faf:	48 8d 3d 13 2a 09 00 	lea    0x92a13(%rip),%rdi        # 4939c9 <_IO_stdin_used+0x169>
   827   400fb6:	b8 00 00 00 00       	mov    $0x0,%eax
   828   400fbb:	e8 d0 f1 00 00       	callq  410190 <_IO_printf>
   829   400fc0:	48 8b 45 00          	mov    0x0(%rbp),%rax
   830   400fc4:	48 89 45 e0          	mov    %rax,-0x20(%rbp)
   831   400fc8:	48 83 7d e0 00       	cmpq   $0x0,-0x20(%rbp)
   832   400fcd:	74 18                	je     400fe7 <f3+0x299>
   833   400fcf:	48 8b 45 e0          	mov    -0x20(%rbp),%rax
   834   400fd3:	48 89 c6             	mov    %rax,%rsi
   835   400fd6:	48 8d 3d 08 2a 09 00 	lea    0x92a08(%rip),%rdi        # 4939e5 <_IO_stdin_used+0x185>
   836   400fdd:	b8 00 00 00 00       	mov    $0x0,%eax
   837   400fe2:	e8 a9 f1 00 00       	callq  410190 <_IO_printf>
   838   400fe7:	48 8b 45 00          	mov    0x0(%rbp),%rax
   839   400feb:	48 8b 40 08          	mov    0x8(%rax),%rax
   840   400fef:	48 89 45 e8          	mov    %rax,-0x18(%rbp)
   841   400ff3:	48 83 7d e8 00       	cmpq   $0x0,-0x18(%rbp)
   842   400ff8:	74 18                	je     401012 <f3+0x2c4>
   843   400ffa:	48 8b 45 e8          	mov    -0x18(%rbp),%rax
   844   400ffe:	48 89 c6             	mov    %rax,%rsi
   845   401001:	48 8d 3d f8 29 09 00 	lea    0x929f8(%rip),%rdi        # 493a00 <_IO_stdin_used+0x1a0>
   846   401008:	b8 00 00 00 00       	mov    $0x0,%eax
   847   40100d:	e8 7e f1 00 00       	callq  410190 <_IO_printf>
   848   401012:	48 8b 45 00          	mov    0x0(%rbp),%rax
   849   401016:	48 8b 00             	mov    (%rax),%rax
   850   401019:	48 89 45 f0          	mov    %rax,-0x10(%rbp)
   851   40101d:	48 83 7d f0 00       	cmpq   $0x0,-0x10(%rbp)
   852   401022:	74 18                	je     40103c <f3+0x2ee>
   853   401024:	48 8b 45 f0          	mov    -0x10(%rbp),%rax
   854   401028:	48 89 c6             	mov    %rax,%rsi
   855   40102b:	48 8d 3d ea 29 09 00 	lea    0x929ea(%rip),%rdi        # 493a1c <_IO_stdin_used+0x1bc>
   856   401032:	b8 00 00 00 00       	mov    $0x0,%eax
   857   401037:	e8 54 f1 00 00       	callq  410190 <_IO_printf>
   858   40103c:	48 8b 45 00          	mov    0x0(%rbp),%rax
   859   401040:	48 8b 00             	mov    (%rax),%rax
   860   401043:	48 8b 40 08          	mov    0x8(%rax),%rax
   861   401047:	48 89 45 f8          	mov    %rax,-0x8(%rbp)
   862   40104b:	48 83 7d f8 00       	cmpq   $0x0,-0x8(%rbp)
   863   401050:	74 18                	je     40106a <f3+0x31c>
   864   401052:	48 8b 45 f8          	mov    -0x8(%rbp),%rax
   865   401056:	48 89 c6             	mov    %rax,%rsi
   866   401059:	48 8d 3d d7 29 09 00 	lea    0x929d7(%rip),%rdi        # 493a37 <_IO_stdin_used+0x1d7>
   867   401060:	b8 00 00 00 00       	mov    $0x0,%eax
   868   401065:	e8 26 f1 00 00       	callq  410190 <_IO_printf>
   869   40106a:	90                   	nop
   870   40106b:	c9                   	leaveq 
   871   40106c:	c3                   	retq   
   872 
   873 000000000040106d <f2>:
   874   40106d:	55                   	push   %rbp
   875   40106e:	48 89 e5             	mov    %rsp,%rbp
   876   401071:	48 8d 3d db 29 09 00 	lea    0x929db(%rip),%rdi        # 493a53 <_IO_stdin_used+0x1f3>
   877   401078:	e8 d3 02 01 00       	callq  411350 <_IO_puts>
   878   40107d:	b8 00 00 00 00       	mov    $0x0,%eax
   879   401082:	e8 c7 fc ff ff       	callq  400d4e <f3>
   880   401087:	90                   	nop
   881   401088:	5d                   	pop    %rbp
   882   401089:	c3                   	retq   
   883 
   884 000000000040108a <f1>:
   885   40108a:	55                   	push   %rbp
   886   40108b:	48 89 e5             	mov    %rsp,%rbp
   887   40108e:	48 8d 3d c4 29 09 00 	lea    0x929c4(%rip),%rdi        # 493a59 <_IO_stdin_used+0x1f9>
   888   401095:	e8 b6 02 01 00       	callq  411350 <_IO_puts>
   889   40109a:	b8 00 00 00 00       	mov    $0x0,%eax
   890   40109f:	e8 c9 ff ff ff       	callq  40106d <f2>
   891   4010a4:	90                   	nop
   892   4010a5:	5d                   	pop    %rbp
   893   4010a6:	c3                   	retq   
   894 
   895 00000000004010a7 <main>:
   896   4010a7:	55                   	push   %rbp
   897   4010a8:	48 89 e5             	mov    %rsp,%rbp
   898   4010ab:	48 83 ec 10          	sub    $0x10,%rsp
   899   4010af:	89 7d fc             	mov    %edi,-0x4(%rbp)
   900   4010b2:	48 89 75 f0          	mov    %rsi,-0x10(%rbp)
   901   4010b6:	b8 00 00 00 00       	mov    $0x0,%eax
   902   4010bb:	e8 ca ff ff ff       	callq  40108a <f1>
   903   4010c0:	48 8d 35 e0 ff ff ff 	lea    -0x20(%rip),%rsi        # 4010a7 <main>
   904   4010c7:	48 8d 3d 91 29 09 00 	lea    0x92991(%rip),%rdi        # 493a5f <_IO_stdin_used+0x1ff>
   905   4010ce:	b8 00 00 00 00       	mov    $0x0,%eax
   906   4010d3:	e8 b8 f0 00 00       	callq  410190 <_IO_printf>
   907   4010d8:	48 8d 35 ab ff ff ff 	lea    -0x55(%rip),%rsi        # 40108a <f1>
   908   4010df:	48 8d 3d 83 29 09 00 	lea    0x92983(%rip),%rdi        # 493a69 <_IO_stdin_used+0x209>
   909   4010e6:	b8 00 00 00 00       	mov    $0x0,%eax
   910   4010eb:	e8 a0 f0 00 00       	callq  410190 <_IO_printf>
   911   4010f0:	48 8d 35 76 ff ff ff 	lea    -0x8a(%rip),%rsi        # 40106d <f2>
   912   4010f7:	48 8d 3d 73 29 09 00 	lea    0x92973(%rip),%rdi        # 493a71 <_IO_stdin_used+0x211>
   913   4010fe:	b8 00 00 00 00       	mov    $0x0,%eax
   914   401103:	e8 88 f0 00 00       	callq  410190 <_IO_printf>
   915   401108:	b8 00 00 00 00       	mov    $0x0,%eax
   916   40110d:	c9                   	leaveq 
   917   40110e:	c3                   	retq

L902 main call f1, L884 ~ L886 是進入 f1 時做的事情。

   902   4010bb:	e8 ca ff ff ff       	callq  40108a <f1>

main call f1 時, f1 會做
   884 000000000040108a <f1>:
   885   40108a:	55                   	push   %rbp
   886   40108b:	48 89 e5             	mov    %rsp,%rbp


   890   40109f:	e8 c9 ff ff ff       	callq  40106d <f2>

f1 call f2 時, f2 會做
   873 000000000040106d <f2>:
   874   40106d:	55                   	push   %rbp
   875   40106e:	48 89 e5             	mov    %rsp,%rbp

push   %rbp
mov    %rsp,%rbp

都是在 function 的最開始時會做的事情 (table 1. L2, L4)。table 1. main call f1, f1 call f2 把 rsp 的內容整理起來。

table 1. main call f1; f1 call f2 stack 內容
0			sp
1	call f1		0x7fffffffe058	ret addr 0x4010c0
2		push %rbp (main_rbp) ; f1_rbp=rsp = 0x7fffffffe050	0x7fffffffe050	main_rbp (0x7fffffffe070)
3	call f2		0x7fffffffe048	ret addr 0x4010a4
4		push %rbp (f1_rbp) ; f2_rbp=rsp = 0x7fffffffe040	0x7fffffffe040	f1_rbp (0x7fffffffe050)

指令 call 會發生的 sp 操作: rsp - 8, 再把 ret address 放入 rsp, 我用 gdb 把從 main 到 f2 時的 stack 內容記錄在 table 1。

list 5. t2 的編譯指令

gcc -static -Wall -save-temps -g -no-pie t2.c -o t2

用了 -no-pie 是希望不要編譯成 relocation 的執行檔, 用 objdump 在對照位址時比較方便。其他編譯選項沒太大影響。

原理是這樣, 先抓到目前 rbp 的值, 假如目前在 f2, 抓到 f2_rpb 就可以知道 f1 function 的 rpb f1_rpb, 知道了 f1_rpb 就可以知道 main function 的 rpb, 那麼知道每個 function 的 rbp 要幹麻呢? 為了取得 return address, 怎麼取得, 每層 function 的 rbp + 8 的位址就可以得到 (對照 table 1. L1, L3)。

寫成 c code 就是: 上一層 function 的 return address = *(uintptr_t*)(rpb + 8)

那麼又怎麼從目前 function 的 rpb 值得到上一層的 rpb 值呢? 從 rpb 值的位址取得 (參考 table 1. L2, L4), 寫成 c code 就是: 上一層 function 的 rpb = *(uintptr_t*)(rpb)

這樣一層一層追, 就可以追到 __libc_start_main, 那 _start 追得到嗎? 抱歉, 目前我還不知道怎麼從 __libc_start_main 追到 _start (backtrace(), backtrace_symbols() 可以追到 _start)。另外還有一個問題, 從這層的 rpb 一直追到上一層的 rpb, 要怎麼判定追到 __libc_start_main 這層了, 這邊是觀察出來的, 上一層的 rpb 應該會比目前這層的 rpb 大, 我就這樣判定, 有可能會出錯嗎? 當然有可能, 但我想不到別的辦法了。list 6 L105, L106 有類似的檢查條件。

list 2. L81 ~ L139 就是在做這樣的事情。另外還需要知道目前在那個 function, 所以用了 L77 得到目前的位址。這些抓暫存器、抓目前位址都是透過 chatgpt 問到, 相當方便。

再來有了位址要怎麼找出對應的 function, 這邊我偷懶了, 直接使用 addr2line 這個指令幫忙。光是 addr2line 怎麼辦到的, 可能又是一個主題了。

list 2 L142, L146 就是在取得 rbp 和 return address, __builtin_frame_address(), __builtin_return_address() 是 gcc 內建 function, 比較有可攜性。我不滿足這樣的作法, 想「知道」怎麼辦到的, 才有了本篇文章。

在 glibc 2.39 sysdeps/i386/backtrace.c __backtrace (void **array, int size) 可以看到類似的作法。

list 6. glibc-2.39/sysdeps/i386/backtrace.c

  1 /* Return backtrace of current program state.
  2    Copyright (C) 1998-2024 Free Software Foundation, Inc.
  3    This file is part of the GNU C Library.
  4
  5    The GNU C Library is free software; you can redistribute it and/or
  6    modify it under the terms of the GNU Lesser General Public
  7    License as published by the Free Software Foundation; either
  8    version 2.1 of the License, or (at your option) any later version.
  9
 10    The GNU C Library is distributed in the hope that it will be useful,
 11    but WITHOUT ANY WARRANTY; without even the implied warranty of
 12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13    Lesser General Public License for more details.
 14
 15    You should have received a copy of the GNU Lesser General Public
 16    License along with the GNU C Library; if not, see
 17    <https://www.gnu.org/licenses/>.  */
 18
 19 #include <execinfo.h>
 20 #include <stdlib.h>
 21 #include <unwind-link.h>
 22
 23 struct trace_arg
 24 {
 25   void **array;
 26   struct unwind_link *unwind_link;
 27   int cnt, size;
 28   void *lastebp, *lastesp;
 29 };
 30
 31 static _Unwind_Reason_Code
 32 backtrace_helper (struct _Unwind_Context *ctx, void *a)
 33 {
 34   struct trace_arg *arg = a;
 35
 36   /* We are first called with address in the __backtrace function.
 37      Skip it.  */
 38   if (arg->cnt != -1)
 39     arg->array[arg->cnt]
 40       = (void *) UNWIND_LINK_PTR (arg->unwind_link, _Unwind_GetIP) (ctx);
 41   if (++arg->cnt == arg->size)
 42     return _URC_END_OF_STACK;
 43
 44   /* %ebp is DWARF2 register 5 on IA-32.  */
 45   arg->lastebp
 46     = (void *) UNWIND_LINK_PTR (arg->unwind_link, _Unwind_GetGR) (ctx, 5);
 47   arg->lastesp
 48     = (void *) UNWIND_LINK_PTR (arg->unwind_link, _Unwind_GetCFA) (ctx);
 49   return _URC_NO_REASON;
 50 }
 51
 52
 53 /* This is a global variable set at program start time.  It marks the
 54    highest used stack address.  */
 55 extern void *__libc_stack_end;
 56
 57
 58 /* This is the stack layout we see with every stack frame
 59    if not compiled without frame pointer.
 60
 61             +-----------------+        +-----------------+
 62     %ebp -> | %ebp last frame--------> | %ebp last frame--->...
 63             |                 |        |                 |
 64             | return address  |        | return address  |
 65             +-----------------+        +-----------------+
 66
 67    First try as far to get as far as possible using
 68    _Unwind_Backtrace which handles -fomit-frame-pointer
 69    as well, but requires .eh_frame info.  Then fall back to
 70    walking the stack manually.  */
 71
 72 struct layout
 73 {
 74   struct layout *ebp;
 75   void *ret;
 76 };
 77
 78
 79 int
 80 __backtrace (void **array, int size)
 81 {
 82   struct trace_arg arg =
 83     {
 84      .array = array,
 85      .unwind_link = __libc_unwind_link_get (),
 86      .size = size,
 87      .cnt = -1,
 88     };
 89
 90   if (size <= 0 || arg.unwind_link == NULL)
 91     return 0;
 92
 93   UNWIND_LINK_PTR (arg.unwind_link, _Unwind_Backtrace)
 94     (backtrace_helper, &arg);
 95
 96   if (arg.cnt > 1 && arg.array[arg.cnt - 1] == NULL)
 97     --arg.cnt;
 98   else if (arg.cnt < size)
 99     {
100       struct layout *ebp = (struct layout *) arg.lastebp;
101
102       while (arg.cnt < size)
103 	{
104 	  /* Check for out of range.  */
105 	  if ((void *) ebp < arg.lastesp || (void *) ebp > __libc_stack_end
106 	      || ((long) ebp & 3))
107 	    break;
108
109 	  array[arg.cnt++] = ebp->ret;
110 	  ebp = ebp->ebp;
111 	}
112     }
113   return arg.cnt != -1 ? arg.cnt : 0;
114 }
115 weak_alias (__backtrace, backtrace)
116 libc_hidden_def (__backtrace)

ref:
誰在呼叫我？不同的backtrace實作說明好文章

人體汽肛 - 風馳電掣, 人車一體

blog 文章

2024年3月21日星期四