filename | src/sh4/ia32abi.h |
changeset | 926:68f3e0fe02f1 |
prev | 907:5ecafd8d7923 |
next | 927:17b6b9e245d8 |
author | nkeynes |
date | Sun Dec 14 07:50:48 2008 +0000 (15 years ago) |
permissions | -rw-r--r-- |
last change | Setup a 'proper' stackframe in translated blocks. This doesn't affect performance noticeably, but does ensure that a) The stack is aligned correctly on OS X with no extra effort, and b) We can't mess up the stack and crash that way anymore. Replace all PUSH/POP instructions (outside of prologue/epilogue) with ESP-rel moves to stack local variables. Finally merge ia32mac and ia32abi together, since they're pretty much the same now anyway (and thereby simplifying maintenance a good deal) |
file | annotate | diff | log | raw |
1.1 --- a/src/sh4/ia32abi.h Thu Oct 30 05:42:24 2008 +00001.2 +++ b/src/sh4/ia32abi.h Sun Dec 14 07:50:48 2008 +00001.3 @@ -1,8 +1,10 @@1.4 /**1.5 * $Id$1.6 *1.7 - * Provides the implementation for the ia32 ABI (eg prologue, epilogue, and1.8 - * calling conventions)1.9 + * Provides the implementation for the ia32 ABI variant1.10 + * (eg prologue, epilogue, and calling conventions). Stack frame is1.11 + * aligned on 16-byte boundaries for the benefit of OS X (which1.12 + * requires it).1.13 *1.14 * Copyright (c) 2007 Nathan Keynes.1.15 *1.16 @@ -17,8 +19,8 @@1.17 * GNU General Public License for more details.1.18 */1.20 -#ifndef lxdream_ia32abi_H1.21 -#define lxdream_ia32abi_H 11.22 +#ifndef lxdream_ia32mac_H1.23 +#define lxdream_ia32mac_H 11.25 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );1.27 @@ -28,8 +30,7 @@1.28 */1.29 static inline void call_func0( void *ptr )1.30 {1.31 - load_imm32(R_ECX, (uint32_t)ptr);1.32 - CALL_r32(R_ECX);1.33 + CALL_ptr(ptr);1.34 }1.36 #ifdef HAVE_FASTCALL1.37 @@ -38,8 +39,7 @@1.38 if( arg1 != R_EAX ) {1.39 MOV_r32_r32( arg1, R_EAX );1.40 }1.41 - load_imm32(R_ECX, (uint32_t)ptr);1.42 - CALL_r32(R_ECX);1.43 + CALL_ptr(ptr);1.44 }1.46 static inline void call_func2( void *ptr, int arg1, int arg2 )1.47 @@ -50,8 +50,7 @@1.48 if( arg1 != R_EAX ) {1.49 MOV_r32_r32( arg1, R_EAX );1.50 }1.51 - load_imm32(R_ECX, (uint32_t)ptr);1.52 - CALL_r32(R_ECX);1.53 + CALL_ptr(ptr);1.54 }1.56 /**1.57 @@ -60,11 +59,11 @@1.58 */1.59 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )1.60 {1.61 - PUSH_r32(arg2b);1.62 - PUSH_r32(addr);1.63 + MOV_r32_esp8(addr, 0);1.64 + MOV_r32_esp8(arg2b, 4);1.65 call_func2(sh4_write_long, addr, arg2a);1.66 - POP_r32(R_EAX);1.67 - POP_r32(R_EDX);1.68 + MOV_esp8_r32(0, R_EAX);1.69 + MOV_esp8_r32(4, R_EDX);1.70 ADD_imm8s_r32(4, R_EAX);1.71 call_func0(sh4_write_long);1.72 }1.73 @@ -75,32 +74,33 @@1.74 */1.75 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )1.76 {1.77 - PUSH_r32(addr);1.78 + MOV_r32_esp8(addr, 0);1.79 call_func1(sh4_read_long, addr);1.80 - POP_r32(R_ECX);1.81 - PUSH_r32(R_EAX);1.82 - MOV_r32_r32(R_ECX, R_EAX);1.83 + MOV_r32_esp8(R_EAX, 4);1.84 + MOV_esp8_r32(0, R_EAX);1.85 ADD_imm8s_r32(4, R_EAX);1.86 call_func0(sh4_read_long);1.87 if( arg2b != R_EAX ) {1.88 MOV_r32_r32(R_EAX, arg2b);1.89 }1.90 - POP_r32(arg2a);1.91 + MOV_esp8_r32(4, arg2a);1.92 }1.93 #else1.94 static inline void call_func1( void *ptr, int arg1 )1.95 {1.96 + SUB_imm8s_r32( 12, R_ESP );1.97 PUSH_r32(arg1);1.98 - call_func0(ptr);1.99 - ADD_imm8s_r32( 4, R_ESP );1.100 + CALL_ptr(ptr);1.101 + ADD_imm8s_r32( 16, R_ESP );1.102 }1.104 static inline void call_func2( void *ptr, int arg1, int arg2 )1.105 {1.106 + SUB_imm8s_r32( 8, R_ESP );1.107 PUSH_r32(arg2);1.108 PUSH_r32(arg1);1.109 - call_func0(ptr);1.110 - ADD_imm8s_r32( 8, R_ESP );1.111 + CALL_ptr(ptr);1.112 + ADD_imm8s_r32( 16, R_ESP );1.113 }1.115 /**1.116 @@ -109,16 +109,17 @@1.117 */1.118 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )1.119 {1.120 - ADD_imm8s_r32( 4, addr );1.121 + SUB_imm8s_r32( 8, R_ESP );1.122 PUSH_r32(arg2b);1.123 - PUSH_r32(addr);1.124 - ADD_imm8s_r32( -4, addr );1.125 + LEA_r32disp8_r32( addr, 4, arg2b );1.126 + PUSH_r32(arg2b);1.127 + SUB_imm8s_r32( 8, R_ESP );1.128 PUSH_r32(arg2a);1.129 PUSH_r32(addr);1.130 - call_func0(sh4_write_long);1.131 - ADD_imm8s_r32( 8, R_ESP );1.132 - call_func0(sh4_write_long);1.133 - ADD_imm8s_r32( 8, R_ESP );1.134 + CALL_ptr(sh4_write_long);1.135 + ADD_imm8s_r32( 16, R_ESP );1.136 + CALL_ptr(sh4_write_long);1.137 + ADD_imm8s_r32( 16, R_ESP );1.138 }1.140 /**1.141 @@ -127,36 +128,43 @@1.142 */1.143 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )1.144 {1.145 + SUB_imm8s_r32( 12, R_ESP );1.146 PUSH_r32(addr);1.147 - call_func0(sh4_read_long);1.148 - POP_r32(R_ECX);1.149 - PUSH_r32(R_EAX);1.150 - ADD_imm8s_r32( 4, R_ECX );1.151 - PUSH_r32(R_ECX);1.152 - call_func0(sh4_read_long);1.153 - ADD_imm8s_r32( 4, R_ESP );1.154 - MOV_r32_r32( R_EAX, arg2b );1.155 - POP_r32(arg2a);1.156 + CALL_ptr(sh4_read_long);1.157 + MOV_r32_esp8(R_EAX, 4);1.158 + ADD_imm8s_esp8(4, 0);1.159 + CALL_ptr(sh4_read_long);1.160 + if( arg2b != R_EAX ) {1.161 + MOV_r32_r32( R_EAX, arg2b );1.162 + }1.163 + MOV_esp8_r32( 4, arg2a );1.164 + ADD_imm8s_r32( 16, R_ESP );1.165 }1.166 +1.167 #endif1.169 /**1.170 * Emit the 'start of block' assembly. Sets up the stack frame and save1.171 * SI/DI as required1.172 + * Allocates 8 bytes for local variables, which also has the convenient1.173 + * side-effect of aligning the stack.1.174 */1.175 void enter_block( )1.176 {1.177 PUSH_r32(R_EBP);1.178 - /* mov &sh4r, ebp */1.179 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );1.180 + SUB_imm8s_r32( 8, R_ESP );1.181 +}1.183 -#ifdef STACK_ALIGN1.184 - sh4_x86.stack_posn = 8;1.185 -#endif1.186 +static inline void exit_block( )1.187 +{1.188 + ADD_imm8s_r32( 8, R_ESP );1.189 + POP_r32(R_EBP);1.190 + RET();1.191 }1.193 /**1.194 - * Exit the block with sh4r.pc already written1.195 + * Exit the block with sh4r.new_pc written with the target pc1.196 */1.197 void exit_block_pcset( sh4addr_t pc )1.198 {1.199 @@ -167,9 +175,8 @@1.200 call_func1(xlat_get_code_by_vma,R_EAX);1.201 } else {1.202 call_func1(xlat_get_code,R_EAX);1.203 - }1.204 - POP_r32(R_EBP);1.205 - RET();1.206 + }1.207 + exit_block();1.208 }1.210 /**1.211 @@ -185,37 +192,31 @@1.212 call_func1(xlat_get_code_by_vma,R_EAX);1.213 } else {1.214 call_func1(xlat_get_code,R_EAX);1.215 - }1.216 - POP_r32(R_EBP);1.217 - RET();1.218 + }1.219 + exit_block();1.220 }1.222 -#define EXIT_BLOCK_SIZE(pc) (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))1.223 -1.225 /**1.226 * Exit the block to an absolute PC1.227 */1.228 -void exit_block( sh4addr_t pc, sh4addr_t endpc )1.229 +void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )1.230 {1.231 load_imm32( R_ECX, pc ); // 51.232 store_spreg( R_ECX, REG_OFFSET(pc) ); // 31.233 if( IS_IN_ICACHE(pc) ) {1.234 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 51.235 + AND_imm8s_r32( 0xFC, R_EAX ); // 31.236 } else if( sh4_x86.tlb_on ) {1.237 call_func1(xlat_get_code_by_vma,R_ECX);1.238 } else {1.239 call_func1(xlat_get_code,R_ECX);1.240 }1.241 - AND_imm8s_r32( 0xFC, R_EAX ); // 31.242 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 51.243 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 61.244 - POP_r32(R_EBP);1.245 - RET();1.246 + exit_block();1.247 }1.249 -#define EXIT_BLOCK_REL_SIZE(pc) (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))1.250 -1.251 /**1.252 * Exit the block to a relative PC1.253 */1.254 @@ -226,16 +227,15 @@1.255 store_spreg( R_ECX, REG_OFFSET(pc) ); // 31.256 if( IS_IN_ICACHE(pc) ) {1.257 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 51.258 + AND_imm8s_r32( 0xFC, R_EAX ); // 31.259 } else if( sh4_x86.tlb_on ) {1.260 call_func1(xlat_get_code_by_vma,R_ECX);1.261 } else {1.262 call_func1(xlat_get_code,R_ECX);1.263 }1.264 - AND_imm8s_r32( 0xFC, R_EAX ); // 31.265 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 51.266 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 61.267 - POP_r32(R_EBP);1.268 - RET();1.269 + exit_block();1.270 }1.272 /**1.273 @@ -257,7 +257,7 @@1.274 MUL_r32( R_EDX );1.275 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );1.277 - POP_r32( R_EAX );1.278 + POP_r32(R_EAX);1.279 call_func1( sh4_raise_exception, R_EAX );1.280 load_spreg( R_EAX, R_PC );1.281 if( sh4_x86.tlb_on ) {1.282 @@ -265,8 +265,7 @@1.283 } else {1.284 call_func1(xlat_get_code,R_EAX);1.285 }1.286 - POP_r32(R_EBP);1.287 - RET();1.288 + exit_block();1.290 // Exception already raised - just cleanup1.291 uint8_t *preexc_ptr = xlat_output;1.292 @@ -282,8 +281,7 @@1.293 } else {1.294 call_func1(xlat_get_code,R_EAX);1.295 }1.296 - POP_r32(R_EBP);1.297 - RET();1.298 + exit_block();1.300 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {1.301 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];1.302 @@ -306,6 +304,7 @@1.303 }1.304 }1.306 +1.307 /**1.308 * The unwind methods only work if we compiled with DWARF2 frame information1.309 * (ie -fexceptions), otherwise we have to use the direct frame scan.1.310 @@ -314,19 +313,17 @@1.311 #include <unwind.h>1.313 struct UnwindInfo {1.314 - int have_result;1.315 - void *pc;1.316 + uintptr_t block_start;1.317 + uintptr_t block_end;1.318 + void *pc;1.319 };1.321 _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )1.322 {1.323 - void *ebp = (void *)_Unwind_GetGR(context, 5);1.324 - void *expect = (((uint8_t *)&sh4r) + 128 );1.325 - struct UnwindInfo *info = arg;1.326 - if( ebp == expect ) {1.327 - info->have_result = 1;1.328 - info->pc = (void *)_Unwind_GetIP(context);1.329 - } else if( info->have_result ) {1.330 + struct UnwindInfo *info = arg;1.331 + void *pc = (void *)_Unwind_GetIP(context);1.332 + if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {1.333 + info->pc = pc;1.334 return _URC_NORMAL_STOP;1.335 }1.337 @@ -338,12 +335,12 @@1.338 struct _Unwind_Exception exc;1.339 struct UnwindInfo info;1.341 - info.have_result = 0;1.342 + info.pc = NULL;1.343 + info.block_start = (uintptr_t)code;1.344 + info.block_end = info.block_start + code_size;1.345 void *result = NULL;1.346 _Unwind_Backtrace( xlat_check_frame, &info );1.347 - if( info.have_result )1.348 - return info.pc;1.349 - return NULL;1.350 + return info.pc;1.351 }1.352 #else1.353 void *xlat_get_native_pc( void *code, uint32_t code_size )1.354 @@ -370,6 +367,6 @@1.355 }1.356 #endif1.358 -#endif /* !lxdream_ia32abi_H */1.359 +#endif /* !lxdream_ia32mac.h */
.