Search
lxdream.org :: lxdream/src/sh4/ia32abi.h :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/ia32abi.h
changeset 926:68f3e0fe02f1
prev907:5ecafd8d7923
next927:17b6b9e245d8
author nkeynes
date Sun Dec 14 07:50:48 2008 +0000 (15 years ago)
permissions -rw-r--r--
last change Setup a 'proper' stackframe in translated blocks. This doesn't affect performance noticeably,
but does ensure that
a) The stack is aligned correctly on OS X with no extra effort, and
b) We can't mess up the stack and crash that way anymore.
Replace all PUSH/POP instructions (outside of prologue/epilogue) with ESP-rel moves to stack
local variables.
Finally merge ia32mac and ia32abi together, since they're pretty much the same now anyway (and
thereby simplifying maintenance a good deal)
file annotate diff log raw
1.1 --- a/src/sh4/ia32abi.h Thu Oct 30 05:42:24 2008 +0000
1.2 +++ b/src/sh4/ia32abi.h Sun Dec 14 07:50:48 2008 +0000
1.3 @@ -1,8 +1,10 @@
1.4 /**
1.5 * $Id$
1.6 *
1.7 - * Provides the implementation for the ia32 ABI (eg prologue, epilogue, and
1.8 - * calling conventions)
1.9 + * Provides the implementation for the ia32 ABI variant
1.10 + * (eg prologue, epilogue, and calling conventions). Stack frame is
1.11 + * aligned on 16-byte boundaries for the benefit of OS X (which
1.12 + * requires it).
1.13 *
1.14 * Copyright (c) 2007 Nathan Keynes.
1.15 *
1.16 @@ -17,8 +19,8 @@
1.17 * GNU General Public License for more details.
1.18 */
1.19
1.20 -#ifndef lxdream_ia32abi_H
1.21 -#define lxdream_ia32abi_H 1
1.22 +#ifndef lxdream_ia32mac_H
1.23 +#define lxdream_ia32mac_H 1
1.24
1.25 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
1.26
1.27 @@ -28,8 +30,7 @@
1.28 */
1.29 static inline void call_func0( void *ptr )
1.30 {
1.31 - load_imm32(R_ECX, (uint32_t)ptr);
1.32 - CALL_r32(R_ECX);
1.33 + CALL_ptr(ptr);
1.34 }
1.35
1.36 #ifdef HAVE_FASTCALL
1.37 @@ -38,8 +39,7 @@
1.38 if( arg1 != R_EAX ) {
1.39 MOV_r32_r32( arg1, R_EAX );
1.40 }
1.41 - load_imm32(R_ECX, (uint32_t)ptr);
1.42 - CALL_r32(R_ECX);
1.43 + CALL_ptr(ptr);
1.44 }
1.45
1.46 static inline void call_func2( void *ptr, int arg1, int arg2 )
1.47 @@ -50,8 +50,7 @@
1.48 if( arg1 != R_EAX ) {
1.49 MOV_r32_r32( arg1, R_EAX );
1.50 }
1.51 - load_imm32(R_ECX, (uint32_t)ptr);
1.52 - CALL_r32(R_ECX);
1.53 + CALL_ptr(ptr);
1.54 }
1.55
1.56 /**
1.57 @@ -60,11 +59,11 @@
1.58 */
1.59 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.60 {
1.61 - PUSH_r32(arg2b);
1.62 - PUSH_r32(addr);
1.63 + MOV_r32_esp8(addr, 0);
1.64 + MOV_r32_esp8(arg2b, 4);
1.65 call_func2(sh4_write_long, addr, arg2a);
1.66 - POP_r32(R_EAX);
1.67 - POP_r32(R_EDX);
1.68 + MOV_esp8_r32(0, R_EAX);
1.69 + MOV_esp8_r32(4, R_EDX);
1.70 ADD_imm8s_r32(4, R_EAX);
1.71 call_func0(sh4_write_long);
1.72 }
1.73 @@ -75,32 +74,33 @@
1.74 */
1.75 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.76 {
1.77 - PUSH_r32(addr);
1.78 + MOV_r32_esp8(addr, 0);
1.79 call_func1(sh4_read_long, addr);
1.80 - POP_r32(R_ECX);
1.81 - PUSH_r32(R_EAX);
1.82 - MOV_r32_r32(R_ECX, R_EAX);
1.83 + MOV_r32_esp8(R_EAX, 4);
1.84 + MOV_esp8_r32(0, R_EAX);
1.85 ADD_imm8s_r32(4, R_EAX);
1.86 call_func0(sh4_read_long);
1.87 if( arg2b != R_EAX ) {
1.88 MOV_r32_r32(R_EAX, arg2b);
1.89 }
1.90 - POP_r32(arg2a);
1.91 + MOV_esp8_r32(4, arg2a);
1.92 }
1.93 #else
1.94 static inline void call_func1( void *ptr, int arg1 )
1.95 {
1.96 + SUB_imm8s_r32( 12, R_ESP );
1.97 PUSH_r32(arg1);
1.98 - call_func0(ptr);
1.99 - ADD_imm8s_r32( 4, R_ESP );
1.100 + CALL_ptr(ptr);
1.101 + ADD_imm8s_r32( 16, R_ESP );
1.102 }
1.103
1.104 static inline void call_func2( void *ptr, int arg1, int arg2 )
1.105 {
1.106 + SUB_imm8s_r32( 8, R_ESP );
1.107 PUSH_r32(arg2);
1.108 PUSH_r32(arg1);
1.109 - call_func0(ptr);
1.110 - ADD_imm8s_r32( 8, R_ESP );
1.111 + CALL_ptr(ptr);
1.112 + ADD_imm8s_r32( 16, R_ESP );
1.113 }
1.114
1.115 /**
1.116 @@ -109,16 +109,17 @@
1.117 */
1.118 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.119 {
1.120 - ADD_imm8s_r32( 4, addr );
1.121 + SUB_imm8s_r32( 8, R_ESP );
1.122 PUSH_r32(arg2b);
1.123 - PUSH_r32(addr);
1.124 - ADD_imm8s_r32( -4, addr );
1.125 + LEA_r32disp8_r32( addr, 4, arg2b );
1.126 + PUSH_r32(arg2b);
1.127 + SUB_imm8s_r32( 8, R_ESP );
1.128 PUSH_r32(arg2a);
1.129 PUSH_r32(addr);
1.130 - call_func0(sh4_write_long);
1.131 - ADD_imm8s_r32( 8, R_ESP );
1.132 - call_func0(sh4_write_long);
1.133 - ADD_imm8s_r32( 8, R_ESP );
1.134 + CALL_ptr(sh4_write_long);
1.135 + ADD_imm8s_r32( 16, R_ESP );
1.136 + CALL_ptr(sh4_write_long);
1.137 + ADD_imm8s_r32( 16, R_ESP );
1.138 }
1.139
1.140 /**
1.141 @@ -127,36 +128,43 @@
1.142 */
1.143 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.144 {
1.145 + SUB_imm8s_r32( 12, R_ESP );
1.146 PUSH_r32(addr);
1.147 - call_func0(sh4_read_long);
1.148 - POP_r32(R_ECX);
1.149 - PUSH_r32(R_EAX);
1.150 - ADD_imm8s_r32( 4, R_ECX );
1.151 - PUSH_r32(R_ECX);
1.152 - call_func0(sh4_read_long);
1.153 - ADD_imm8s_r32( 4, R_ESP );
1.154 - MOV_r32_r32( R_EAX, arg2b );
1.155 - POP_r32(arg2a);
1.156 + CALL_ptr(sh4_read_long);
1.157 + MOV_r32_esp8(R_EAX, 4);
1.158 + ADD_imm8s_esp8(4, 0);
1.159 + CALL_ptr(sh4_read_long);
1.160 + if( arg2b != R_EAX ) {
1.161 + MOV_r32_r32( R_EAX, arg2b );
1.162 + }
1.163 + MOV_esp8_r32( 4, arg2a );
1.164 + ADD_imm8s_r32( 16, R_ESP );
1.165 }
1.166 +
1.167 #endif
1.168
1.169 /**
1.170 * Emit the 'start of block' assembly. Sets up the stack frame and save
1.171 * SI/DI as required
1.172 + * Allocates 8 bytes for local variables, which also has the convenient
1.173 + * side-effect of aligning the stack.
1.174 */
1.175 void enter_block( )
1.176 {
1.177 PUSH_r32(R_EBP);
1.178 - /* mov &sh4r, ebp */
1.179 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
1.180 + SUB_imm8s_r32( 8, R_ESP );
1.181 +}
1.182
1.183 -#ifdef STACK_ALIGN
1.184 - sh4_x86.stack_posn = 8;
1.185 -#endif
1.186 +static inline void exit_block( )
1.187 +{
1.188 + ADD_imm8s_r32( 8, R_ESP );
1.189 + POP_r32(R_EBP);
1.190 + RET();
1.191 }
1.192
1.193 /**
1.194 - * Exit the block with sh4r.pc already written
1.195 + * Exit the block with sh4r.new_pc written with the target pc
1.196 */
1.197 void exit_block_pcset( sh4addr_t pc )
1.198 {
1.199 @@ -167,9 +175,8 @@
1.200 call_func1(xlat_get_code_by_vma,R_EAX);
1.201 } else {
1.202 call_func1(xlat_get_code,R_EAX);
1.203 - }
1.204 - POP_r32(R_EBP);
1.205 - RET();
1.206 + }
1.207 + exit_block();
1.208 }
1.209
1.210 /**
1.211 @@ -185,37 +192,31 @@
1.212 call_func1(xlat_get_code_by_vma,R_EAX);
1.213 } else {
1.214 call_func1(xlat_get_code,R_EAX);
1.215 - }
1.216 - POP_r32(R_EBP);
1.217 - RET();
1.218 + }
1.219 + exit_block();
1.220 }
1.221
1.222 -#define EXIT_BLOCK_SIZE(pc) (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
1.223 -
1.224
1.225 /**
1.226 * Exit the block to an absolute PC
1.227 */
1.228 -void exit_block( sh4addr_t pc, sh4addr_t endpc )
1.229 +void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
1.230 {
1.231 load_imm32( R_ECX, pc ); // 5
1.232 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
1.233 if( IS_IN_ICACHE(pc) ) {
1.234 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
1.235 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.236 } else if( sh4_x86.tlb_on ) {
1.237 call_func1(xlat_get_code_by_vma,R_ECX);
1.238 } else {
1.239 call_func1(xlat_get_code,R_ECX);
1.240 }
1.241 - AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.242 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.243 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.244 - POP_r32(R_EBP);
1.245 - RET();
1.246 + exit_block();
1.247 }
1.248
1.249 -#define EXIT_BLOCK_REL_SIZE(pc) (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
1.250 -
1.251 /**
1.252 * Exit the block to a relative PC
1.253 */
1.254 @@ -226,16 +227,15 @@
1.255 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
1.256 if( IS_IN_ICACHE(pc) ) {
1.257 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
1.258 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.259 } else if( sh4_x86.tlb_on ) {
1.260 call_func1(xlat_get_code_by_vma,R_ECX);
1.261 } else {
1.262 call_func1(xlat_get_code,R_ECX);
1.263 }
1.264 - AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.265 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.266 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.267 - POP_r32(R_EBP);
1.268 - RET();
1.269 + exit_block();
1.270 }
1.271
1.272 /**
1.273 @@ -257,7 +257,7 @@
1.274 MUL_r32( R_EDX );
1.275 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
1.276
1.277 - POP_r32( R_EAX );
1.278 + POP_r32(R_EAX);
1.279 call_func1( sh4_raise_exception, R_EAX );
1.280 load_spreg( R_EAX, R_PC );
1.281 if( sh4_x86.tlb_on ) {
1.282 @@ -265,8 +265,7 @@
1.283 } else {
1.284 call_func1(xlat_get_code,R_EAX);
1.285 }
1.286 - POP_r32(R_EBP);
1.287 - RET();
1.288 + exit_block();
1.289
1.290 // Exception already raised - just cleanup
1.291 uint8_t *preexc_ptr = xlat_output;
1.292 @@ -282,8 +281,7 @@
1.293 } else {
1.294 call_func1(xlat_get_code,R_EAX);
1.295 }
1.296 - POP_r32(R_EBP);
1.297 - RET();
1.298 + exit_block();
1.299
1.300 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
1.301 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
1.302 @@ -306,6 +304,7 @@
1.303 }
1.304 }
1.305
1.306 +
1.307 /**
1.308 * The unwind methods only work if we compiled with DWARF2 frame information
1.309 * (ie -fexceptions), otherwise we have to use the direct frame scan.
1.310 @@ -314,19 +313,17 @@
1.311 #include <unwind.h>
1.312
1.313 struct UnwindInfo {
1.314 - int have_result;
1.315 - void *pc;
1.316 + uintptr_t block_start;
1.317 + uintptr_t block_end;
1.318 + void *pc;
1.319 };
1.320
1.321 _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
1.322 {
1.323 - void *ebp = (void *)_Unwind_GetGR(context, 5);
1.324 - void *expect = (((uint8_t *)&sh4r) + 128 );
1.325 - struct UnwindInfo *info = arg;
1.326 - if( ebp == expect ) {
1.327 - info->have_result = 1;
1.328 - info->pc = (void *)_Unwind_GetIP(context);
1.329 - } else if( info->have_result ) {
1.330 + struct UnwindInfo *info = arg;
1.331 + void *pc = (void *)_Unwind_GetIP(context);
1.332 + if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
1.333 + info->pc = pc;
1.334 return _URC_NORMAL_STOP;
1.335 }
1.336
1.337 @@ -338,12 +335,12 @@
1.338 struct _Unwind_Exception exc;
1.339 struct UnwindInfo info;
1.340
1.341 - info.have_result = 0;
1.342 + info.pc = NULL;
1.343 + info.block_start = (uintptr_t)code;
1.344 + info.block_end = info.block_start + code_size;
1.345 void *result = NULL;
1.346 _Unwind_Backtrace( xlat_check_frame, &info );
1.347 - if( info.have_result )
1.348 - return info.pc;
1.349 - return NULL;
1.350 + return info.pc;
1.351 }
1.352 #else
1.353 void *xlat_get_native_pc( void *code, uint32_t code_size )
1.354 @@ -370,6 +367,6 @@
1.355 }
1.356 #endif
1.357
1.358 -#endif /* !lxdream_ia32abi_H */
1.359 +#endif /* !lxdream_ia32mac.h */
1.360
1.361
.