revision 926:68f3e0fe02f1
summary |
tree |
shortlog |
changelog |
graph |
changeset |
raw | bz2 | zip | gz changeset | 926:68f3e0fe02f1 |
parent | 925:7cbcc51db63d |
child | 927:17b6b9e245d8 |
author | nkeynes |
date | Sun Dec 14 07:50:48 2008 +0000 (15 years ago) |
Setup a 'proper' stackframe in translated blocks. This doesn't affect performance noticeably,
but does ensure that
a) The stack is aligned correctly on OS X with no extra effort, and
b) We can't mess up the stack and crash that way anymore.
Replace all PUSH/POP instructions (outside of prologue/epilogue) with ESP-rel moves to stack
local variables.
Finally merge ia32mac and ia32abi together, since they're pretty much the same now anyway (and
thereby simplifying maintenance a good deal)
but does ensure that
a) The stack is aligned correctly on OS X with no extra effort, and
b) We can't mess up the stack and crash that way anymore.
Replace all PUSH/POP instructions (outside of prologue/epilogue) with ESP-rel moves to stack
local variables.
Finally merge ia32mac and ia32abi together, since they're pretty much the same now anyway (and
thereby simplifying maintenance a good deal)
src/sh4/ia32abi.h | view | annotate | diff | log | ||
src/sh4/ia32mac.h | view | annotate | diff | log | ||
src/sh4/ia64abi.h | view | annotate | diff | log | ||
src/sh4/sh4trans.h | view | annotate | diff | log | ||
src/sh4/sh4x86.in | view | annotate | diff | log | ||
src/sh4/x86op.h | view | annotate | diff | log |
1.1 --- a/src/sh4/ia32abi.h Sun Dec 14 06:36:49 2008 +00001.2 +++ b/src/sh4/ia32abi.h Sun Dec 14 07:50:48 2008 +00001.3 @@ -1,8 +1,10 @@1.4 /**1.5 * $Id$1.6 *1.7 - * Provides the implementation for the ia32 ABI (eg prologue, epilogue, and1.8 - * calling conventions)1.9 + * Provides the implementation for the ia32 ABI variant1.10 + * (eg prologue, epilogue, and calling conventions). Stack frame is1.11 + * aligned on 16-byte boundaries for the benefit of OS X (which1.12 + * requires it).1.13 *1.14 * Copyright (c) 2007 Nathan Keynes.1.15 *1.16 @@ -17,8 +19,8 @@1.17 * GNU General Public License for more details.1.18 */1.20 -#ifndef lxdream_ia32abi_H1.21 -#define lxdream_ia32abi_H 11.22 +#ifndef lxdream_ia32mac_H1.23 +#define lxdream_ia32mac_H 11.25 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );1.27 @@ -28,8 +30,7 @@1.28 */1.29 static inline void call_func0( void *ptr )1.30 {1.31 - load_imm32(R_ECX, (uint32_t)ptr);1.32 - CALL_r32(R_ECX);1.33 + CALL_ptr(ptr);1.34 }1.36 #ifdef HAVE_FASTCALL1.37 @@ -38,8 +39,7 @@1.38 if( arg1 != R_EAX ) {1.39 MOV_r32_r32( arg1, R_EAX );1.40 }1.41 - load_imm32(R_ECX, (uint32_t)ptr);1.42 - CALL_r32(R_ECX);1.43 + CALL_ptr(ptr);1.44 }1.46 static inline void call_func2( void *ptr, int arg1, int arg2 )1.47 @@ -50,8 +50,7 @@1.48 if( arg1 != R_EAX ) {1.49 MOV_r32_r32( arg1, R_EAX );1.50 }1.51 - load_imm32(R_ECX, (uint32_t)ptr);1.52 - CALL_r32(R_ECX);1.53 + CALL_ptr(ptr);1.54 }1.56 /**1.57 @@ -60,11 +59,11 @@1.58 */1.59 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )1.60 {1.61 - PUSH_r32(arg2b);1.62 - PUSH_r32(addr);1.63 + MOV_r32_esp8(addr, 0);1.64 + MOV_r32_esp8(arg2b, 4);1.65 call_func2(sh4_write_long, addr, arg2a);1.66 - POP_r32(R_EAX);1.67 - POP_r32(R_EDX);1.68 + MOV_esp8_r32(0, R_EAX);1.69 + MOV_esp8_r32(4, R_EDX);1.70 ADD_imm8s_r32(4, R_EAX);1.71 call_func0(sh4_write_long);1.72 }1.73 @@ -75,32 +74,33 @@1.74 */1.75 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )1.76 {1.77 - PUSH_r32(addr);1.78 + MOV_r32_esp8(addr, 0);1.79 call_func1(sh4_read_long, addr);1.80 - POP_r32(R_ECX);1.81 - PUSH_r32(R_EAX);1.82 - MOV_r32_r32(R_ECX, R_EAX);1.83 + MOV_r32_esp8(R_EAX, 4);1.84 + MOV_esp8_r32(0, R_EAX);1.85 ADD_imm8s_r32(4, R_EAX);1.86 call_func0(sh4_read_long);1.87 if( arg2b != R_EAX ) {1.88 MOV_r32_r32(R_EAX, arg2b);1.89 }1.90 - POP_r32(arg2a);1.91 + MOV_esp8_r32(4, arg2a);1.92 }1.93 #else1.94 static inline void call_func1( void *ptr, int arg1 )1.95 {1.96 + SUB_imm8s_r32( 12, R_ESP );1.97 PUSH_r32(arg1);1.98 - call_func0(ptr);1.99 - ADD_imm8s_r32( 4, R_ESP );1.100 + CALL_ptr(ptr);1.101 + ADD_imm8s_r32( 16, R_ESP );1.102 }1.104 static inline void call_func2( void *ptr, int arg1, int arg2 )1.105 {1.106 + SUB_imm8s_r32( 8, R_ESP );1.107 PUSH_r32(arg2);1.108 PUSH_r32(arg1);1.109 - call_func0(ptr);1.110 - ADD_imm8s_r32( 8, R_ESP );1.111 + CALL_ptr(ptr);1.112 + ADD_imm8s_r32( 16, R_ESP );1.113 }1.115 /**1.116 @@ -109,16 +109,17 @@1.117 */1.118 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )1.119 {1.120 - ADD_imm8s_r32( 4, addr );1.121 + SUB_imm8s_r32( 8, R_ESP );1.122 PUSH_r32(arg2b);1.123 - PUSH_r32(addr);1.124 - ADD_imm8s_r32( -4, addr );1.125 + LEA_r32disp8_r32( addr, 4, arg2b );1.126 + PUSH_r32(arg2b);1.127 + SUB_imm8s_r32( 8, R_ESP );1.128 PUSH_r32(arg2a);1.129 PUSH_r32(addr);1.130 - call_func0(sh4_write_long);1.131 - ADD_imm8s_r32( 8, R_ESP );1.132 - call_func0(sh4_write_long);1.133 - ADD_imm8s_r32( 8, R_ESP );1.134 + CALL_ptr(sh4_write_long);1.135 + ADD_imm8s_r32( 16, R_ESP );1.136 + CALL_ptr(sh4_write_long);1.137 + ADD_imm8s_r32( 16, R_ESP );1.138 }1.140 /**1.141 @@ -127,36 +128,43 @@1.142 */1.143 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )1.144 {1.145 + SUB_imm8s_r32( 12, R_ESP );1.146 PUSH_r32(addr);1.147 - call_func0(sh4_read_long);1.148 - POP_r32(R_ECX);1.149 - PUSH_r32(R_EAX);1.150 - ADD_imm8s_r32( 4, R_ECX );1.151 - PUSH_r32(R_ECX);1.152 - call_func0(sh4_read_long);1.153 - ADD_imm8s_r32( 4, R_ESP );1.154 - MOV_r32_r32( R_EAX, arg2b );1.155 - POP_r32(arg2a);1.156 + CALL_ptr(sh4_read_long);1.157 + MOV_r32_esp8(R_EAX, 4);1.158 + ADD_imm8s_esp8(4, 0);1.159 + CALL_ptr(sh4_read_long);1.160 + if( arg2b != R_EAX ) {1.161 + MOV_r32_r32( R_EAX, arg2b );1.162 + }1.163 + MOV_esp8_r32( 4, arg2a );1.164 + ADD_imm8s_r32( 16, R_ESP );1.165 }1.166 +1.167 #endif1.169 /**1.170 * Emit the 'start of block' assembly. Sets up the stack frame and save1.171 * SI/DI as required1.172 + * Allocates 8 bytes for local variables, which also has the convenient1.173 + * side-effect of aligning the stack.1.174 */1.175 void enter_block( )1.176 {1.177 PUSH_r32(R_EBP);1.178 - /* mov &sh4r, ebp */1.179 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );1.180 + SUB_imm8s_r32( 8, R_ESP );1.181 +}1.183 -#ifdef STACK_ALIGN1.184 - sh4_x86.stack_posn = 8;1.185 -#endif1.186 +static inline void exit_block( )1.187 +{1.188 + ADD_imm8s_r32( 8, R_ESP );1.189 + POP_r32(R_EBP);1.190 + RET();1.191 }1.193 /**1.194 - * Exit the block with sh4r.pc already written1.195 + * Exit the block with sh4r.new_pc written with the target pc1.196 */1.197 void exit_block_pcset( sh4addr_t pc )1.198 {1.199 @@ -167,9 +175,8 @@1.200 call_func1(xlat_get_code_by_vma,R_EAX);1.201 } else {1.202 call_func1(xlat_get_code,R_EAX);1.203 - }1.204 - POP_r32(R_EBP);1.205 - RET();1.206 + }1.207 + exit_block();1.208 }1.210 /**1.211 @@ -185,37 +192,31 @@1.212 call_func1(xlat_get_code_by_vma,R_EAX);1.213 } else {1.214 call_func1(xlat_get_code,R_EAX);1.215 - }1.216 - POP_r32(R_EBP);1.217 - RET();1.218 + }1.219 + exit_block();1.220 }1.222 -#define EXIT_BLOCK_SIZE(pc) (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))1.223 -1.225 /**1.226 * Exit the block to an absolute PC1.227 */1.228 -void exit_block( sh4addr_t pc, sh4addr_t endpc )1.229 +void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )1.230 {1.231 load_imm32( R_ECX, pc ); // 51.232 store_spreg( R_ECX, REG_OFFSET(pc) ); // 31.233 if( IS_IN_ICACHE(pc) ) {1.234 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 51.235 + AND_imm8s_r32( 0xFC, R_EAX ); // 31.236 } else if( sh4_x86.tlb_on ) {1.237 call_func1(xlat_get_code_by_vma,R_ECX);1.238 } else {1.239 call_func1(xlat_get_code,R_ECX);1.240 }1.241 - AND_imm8s_r32( 0xFC, R_EAX ); // 31.242 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 51.243 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 61.244 - POP_r32(R_EBP);1.245 - RET();1.246 + exit_block();1.247 }1.249 -#define EXIT_BLOCK_REL_SIZE(pc) (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))1.250 -1.251 /**1.252 * Exit the block to a relative PC1.253 */1.254 @@ -226,16 +227,15 @@1.255 store_spreg( R_ECX, REG_OFFSET(pc) ); // 31.256 if( IS_IN_ICACHE(pc) ) {1.257 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 51.258 + AND_imm8s_r32( 0xFC, R_EAX ); // 31.259 } else if( sh4_x86.tlb_on ) {1.260 call_func1(xlat_get_code_by_vma,R_ECX);1.261 } else {1.262 call_func1(xlat_get_code,R_ECX);1.263 }1.264 - AND_imm8s_r32( 0xFC, R_EAX ); // 31.265 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 51.266 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 61.267 - POP_r32(R_EBP);1.268 - RET();1.269 + exit_block();1.270 }1.272 /**1.273 @@ -257,7 +257,7 @@1.274 MUL_r32( R_EDX );1.275 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );1.277 - POP_r32( R_EAX );1.278 + POP_r32(R_EAX);1.279 call_func1( sh4_raise_exception, R_EAX );1.280 load_spreg( R_EAX, R_PC );1.281 if( sh4_x86.tlb_on ) {1.282 @@ -265,8 +265,7 @@1.283 } else {1.284 call_func1(xlat_get_code,R_EAX);1.285 }1.286 - POP_r32(R_EBP);1.287 - RET();1.288 + exit_block();1.290 // Exception already raised - just cleanup1.291 uint8_t *preexc_ptr = xlat_output;1.292 @@ -282,8 +281,7 @@1.293 } else {1.294 call_func1(xlat_get_code,R_EAX);1.295 }1.296 - POP_r32(R_EBP);1.297 - RET();1.298 + exit_block();1.300 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {1.301 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];1.302 @@ -306,6 +304,7 @@1.303 }1.304 }1.306 +1.307 /**1.308 * The unwind methods only work if we compiled with DWARF2 frame information1.309 * (ie -fexceptions), otherwise we have to use the direct frame scan.1.310 @@ -314,19 +313,17 @@1.311 #include <unwind.h>1.313 struct UnwindInfo {1.314 - int have_result;1.315 - void *pc;1.316 + uintptr_t block_start;1.317 + uintptr_t block_end;1.318 + void *pc;1.319 };1.321 _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )1.322 {1.323 - void *ebp = (void *)_Unwind_GetGR(context, 5);1.324 - void *expect = (((uint8_t *)&sh4r) + 128 );1.325 - struct UnwindInfo *info = arg;1.326 - if( ebp == expect ) {1.327 - info->have_result = 1;1.328 - info->pc = (void *)_Unwind_GetIP(context);1.329 - } else if( info->have_result ) {1.330 + struct UnwindInfo *info = arg;1.331 + void *pc = (void *)_Unwind_GetIP(context);1.332 + if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {1.333 + info->pc = pc;1.334 return _URC_NORMAL_STOP;1.335 }1.337 @@ -338,12 +335,12 @@1.338 struct _Unwind_Exception exc;1.339 struct UnwindInfo info;1.341 - info.have_result = 0;1.342 + info.pc = NULL;1.343 + info.block_start = (uintptr_t)code;1.344 + info.block_end = info.block_start + code_size;1.345 void *result = NULL;1.346 _Unwind_Backtrace( xlat_check_frame, &info );1.347 - if( info.have_result )1.348 - return info.pc;1.349 - return NULL;1.350 + return info.pc;1.351 }1.352 #else1.353 void *xlat_get_native_pc( void *code, uint32_t code_size )1.354 @@ -370,6 +367,6 @@1.355 }1.356 #endif1.358 -#endif /* !lxdream_ia32abi_H */1.359 +#endif /* !lxdream_ia32mac.h */
2.1 --- a/src/sh4/ia32mac.h Sun Dec 14 06:36:49 2008 +00002.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +00002.3 @@ -1,402 +0,0 @@2.4 -/**2.5 - * $Id$2.6 - *2.7 - * Provides the implementation for the ia32 Mac OS X ABI variant2.8 - * (eg prologue, epilogue, and calling conventions). Main difference2.9 - * from ia32abi is that stack frames are aligned on 16-byte boundaries.2.10 - *2.11 - * Copyright (c) 2007 Nathan Keynes.2.12 - *2.13 - * This program is free software; you can redistribute it and/or modify2.14 - * it under the terms of the GNU General Public License as published by2.15 - * the Free Software Foundation; either version 2 of the License, or2.16 - * (at your option) any later version.2.17 - *2.18 - * This program is distributed in the hope that it will be useful,2.19 - * but WITHOUT ANY WARRANTY; without even the implied warranty of2.20 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the2.21 - * GNU General Public License for more details.2.22 - */2.23 -2.24 -#ifndef lxdream_ia32mac_H2.25 -#define lxdream_ia32mac_H 12.26 -2.27 -#define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );2.28 -2.29 -/**2.30 - * Note: clobbers EAX to make the indirect call - this isn't usually2.31 - * a problem since the callee will usually clobber it anyway.2.32 - */2.33 -#define CALL_FUNC0_SIZE 132.34 -static inline void call_func0( void *ptr )2.35 -{2.36 - int adj = (-sh4_x86.stack_posn)&0x0F;2.37 - SUB_imm8s_r32( adj, R_ESP );2.38 - load_imm32(R_ECX, (uint32_t)ptr);2.39 - CALL_r32(R_ECX);2.40 - ADD_imm8s_r32( adj, R_ESP );2.41 -}2.42 -2.43 -#ifdef HAVE_FASTCALL2.44 -static inline void call_func1( void *ptr, int arg1 )2.45 -{2.46 - int adj = (-sh4_x86.stack_posn)&0x0F;2.47 - SUB_imm8s_r32( adj, R_ESP );2.48 - if( arg1 != R_EAX ) {2.49 - MOV_r32_r32( arg1, R_EAX );2.50 - }2.51 - load_imm32(R_ECX, (uint32_t)ptr);2.52 - CALL_r32(R_ECX);2.53 - ADD_imm8s_r32( adj, R_ESP );2.54 -}2.55 -2.56 -static inline void call_func2( void *ptr, int arg1, int arg2 )2.57 -{2.58 - int adj = (-sh4_x86.stack_posn)&0x0F;2.59 - SUB_imm8s_r32( adj, R_ESP );2.60 - if( arg2 != R_EDX ) {2.61 - MOV_r32_r32( arg2, R_EDX );2.62 - }2.63 - if( arg1 != R_EAX ) {2.64 - MOV_r32_r32( arg1, R_EAX );2.65 - }2.66 - load_imm32(R_ECX, (uint32_t)ptr);2.67 - CALL_r32(R_ECX);2.68 - ADD_imm8s_r32( adj, R_ESP );2.69 -}2.70 -2.71 -/**2.72 - * Write a double (64-bit) value into memory, with the first word in arg2a, and2.73 - * the second in arg2b2.74 - */2.75 -static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )2.76 -{2.77 - PUSH_r32(arg2b);2.78 - PUSH_r32(addr);2.79 - call_func2(sh4_write_long, addr, arg2a);2.80 - POP_r32(R_EAX);2.81 - POP_r32(R_EDX);2.82 - ADD_imm8s_r32(4, R_EAX);2.83 - call_func0(sh4_write_long);2.84 -}2.85 -2.86 -/**2.87 - * Read a double (64-bit) value from memory, writing the first word into arg2a2.88 - * and the second into arg2b. The addr must not be in EAX2.89 - */2.90 -static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )2.91 -{2.92 - PUSH_r32(addr);2.93 - call_func1(sh4_read_long, addr);2.94 - POP_r32(R_ECX);2.95 - PUSH_r32(R_EAX);2.96 - MOV_r32_r32(R_ECX, R_EAX);2.97 - ADD_imm8s_r32(4, R_EAX);2.98 - call_func0(sh4_read_long);2.99 - if( arg2b != R_EAX ) {2.100 - MOV_r32_r32(R_EAX, arg2b);2.101 - }2.102 - POP_r32(arg2a);2.103 -}2.104 -#else2.105 -static inline void call_func1( void *ptr, int arg1 )2.106 -{2.107 - int adj = (-4-sh4_x86.stack_posn)&0x0F;2.108 - SUB_imm8s_r32( adj, R_ESP );2.109 - PUSH_r32(arg1);2.110 - load_imm32(R_EAX, (uint32_t)ptr);2.111 - CALL_r32(R_EAX);2.112 - ADD_imm8s_r32( adj+4, R_ESP );2.113 - sh4_x86.stack_posn -= 4;2.114 -}2.115 -2.116 -#define CALL_FUNC2_SIZE 152.117 -static inline void call_func2( void *ptr, int arg1, int arg2 )2.118 -{2.119 - int adj = (-8-sh4_x86.stack_posn)&0x0F;2.120 - SUB_imm8s_r32( adj, R_ESP );2.121 - PUSH_r32(arg2);2.122 - PUSH_r32(arg1);2.123 - load_imm32(R_EAX, (uint32_t)ptr);2.124 - CALL_r32(R_EAX);2.125 - ADD_imm8s_r32( adj+8, R_ESP );2.126 - sh4_x86.stack_posn -= 8;2.127 -}2.128 -2.129 -/**2.130 - * Write a double (64-bit) value into memory, with the first word in arg2a, and2.131 - * the second in arg2b2.132 - */2.133 -static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )2.134 -{2.135 - int adj = (-8-sh4_x86.stack_posn)&0x0F;2.136 - SUB_imm8s_r32( adj, R_ESP );2.137 - ADD_imm8s_r32( 4, addr );2.138 - PUSH_r32(arg2b);2.139 - PUSH_r32(addr);2.140 - ADD_imm8s_r32( -4, addr );2.141 - SUB_imm8s_r32( 8, R_ESP );2.142 - PUSH_r32(arg2a);2.143 - PUSH_r32(addr);2.144 - load_imm32(R_EAX, (uint32_t)sh4_write_long);2.145 - CALL_r32(R_EAX);2.146 - ADD_imm8s_r32( 16, R_ESP );2.147 - load_imm32(R_EAX, (uint32_t)sh4_write_long);2.148 - CALL_r32(R_EAX);2.149 - ADD_imm8s_r32( adj+8, R_ESP );2.150 - sh4_x86.stack_posn -= 16;2.151 -}2.152 -2.153 -/**2.154 - * Read a double (64-bit) value from memory, writing the first word into arg2a2.155 - * and the second into arg2b. The addr must not be in EAX2.156 - */2.157 -static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )2.158 -{2.159 - int adj = (-4-sh4_x86.stack_posn)&0x0F;2.160 - int adj2 = (-8-sh4_x86.stack_posn)&0x0F;2.161 - SUB_imm8s_r32( adj, R_ESP );2.162 - PUSH_r32(addr);2.163 - load_imm32(R_EAX, (uint32_t)sh4_read_long);2.164 - CALL_r32(R_EAX);2.165 - POP_r32(R_ECX);2.166 - SUB_imm8s_r32( adj2-adj, R_ESP );2.167 - PUSH_r32(R_EAX);2.168 - ADD_imm8s_r32( 4, R_ECX );2.169 - PUSH_r32(R_ECX);2.170 - load_imm32(R_EAX, (uint32_t)sh4_read_long);2.171 - CALL_r32(R_EAX);2.172 - ADD_imm8s_r32( 4, R_ESP );2.173 - MOV_r32_r32( R_EAX, arg2b );2.174 - POP_r32(arg2a);2.175 - ADD_imm8s_r32( adj2, R_ESP );2.176 - sh4_x86.stack_posn -= 4;2.177 -}2.178 -2.179 -#endif2.180 -2.181 -/**2.182 - * Emit the 'start of block' assembly. Sets up the stack frame and save2.183 - * SI/DI as required2.184 - */2.185 -void enter_block( )2.186 -{2.187 - PUSH_r32(R_EBP);2.188 - /* mov &sh4r, ebp */2.189 - load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );2.190 - sh4_x86.stack_posn = 8;2.191 -}2.192 -2.193 -/**2.194 - * Exit the block with sh4r.new_pc written with the target pc2.195 - */2.196 -void exit_block_pcset( sh4addr_t pc )2.197 -{2.198 - load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 52.199 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 62.200 - load_spreg( R_EAX, R_PC );2.201 - if( sh4_x86.tlb_on ) {2.202 - call_func1(xlat_get_code_by_vma,R_EAX);2.203 - } else {2.204 - call_func1(xlat_get_code,R_EAX);2.205 - }2.206 - POP_r32(R_EBP);2.207 - RET();2.208 -}2.209 -2.210 -/**2.211 - * Exit the block with sh4r.new_pc written with the target pc2.212 - */2.213 -void exit_block_newpcset( sh4addr_t pc )2.214 -{2.215 - load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 52.216 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 62.217 - load_spreg( R_EAX, R_NEW_PC );2.218 - store_spreg( R_EAX, R_PC );2.219 - if( sh4_x86.tlb_on ) {2.220 - call_func1(xlat_get_code_by_vma,R_EAX);2.221 - } else {2.222 - call_func1(xlat_get_code,R_EAX);2.223 - }2.224 - POP_r32(R_EBP);2.225 - RET();2.226 -}2.227 -2.228 -2.229 -/**2.230 - * Exit the block to an absolute PC2.231 - */2.232 -void exit_block( sh4addr_t pc, sh4addr_t endpc )2.233 -{2.234 - load_imm32( R_ECX, pc ); // 52.235 - store_spreg( R_ECX, REG_OFFSET(pc) ); // 32.236 - if( IS_IN_ICACHE(pc) ) {2.237 - MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 52.238 - } else if( sh4_x86.tlb_on ) {2.239 - call_func1(xlat_get_code_by_vma,R_ECX);2.240 - } else {2.241 - call_func1(xlat_get_code,R_ECX);2.242 - }2.243 - AND_imm8s_r32( 0xFC, R_EAX ); // 32.244 - load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 52.245 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 62.246 - POP_r32(R_EBP);2.247 - RET();2.248 -}2.249 -2.250 -/**2.251 - * Exit the block to a relative PC2.252 - */2.253 -void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )2.254 -{2.255 - load_imm32( R_ECX, pc - sh4_x86.block_start_pc ); // 52.256 - ADD_sh4r_r32( R_PC, R_ECX );2.257 - store_spreg( R_ECX, REG_OFFSET(pc) ); // 32.258 - if( IS_IN_ICACHE(pc) ) {2.259 - MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 52.260 - } else if( sh4_x86.tlb_on ) {2.261 - call_func1(xlat_get_code_by_vma,R_ECX);2.262 - } else {2.263 - call_func1(xlat_get_code,R_ECX);2.264 - }2.265 - AND_imm8s_r32( 0xFC, R_EAX ); // 32.266 - load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 52.267 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 62.268 - POP_r32(R_EBP);2.269 - RET();2.270 -}2.271 -2.272 -/**2.273 - * Write the block trailer (exception handling block)2.274 - */2.275 -void sh4_translate_end_block( sh4addr_t pc ) {2.276 - if( sh4_x86.branch_taken == FALSE ) {2.277 - // Didn't exit unconditionally already, so write the termination here2.278 - exit_block_rel( pc, pc );2.279 - }2.280 - if( sh4_x86.backpatch_posn != 0 ) {2.281 - unsigned int i;2.282 - // Raise exception2.283 - uint8_t *end_ptr = xlat_output;2.284 - MOV_r32_r32( R_EDX, R_ECX );2.285 - ADD_r32_r32( R_EDX, R_ECX );2.286 - ADD_r32_sh4r( R_ECX, R_PC );2.287 - MOV_moff32_EAX( &sh4_cpu_period );2.288 - MUL_r32( R_EDX );2.289 - ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );2.290 -2.291 - POP_r32(R_EDX);2.292 - call_func1( sh4_raise_exception, R_EDX );2.293 - load_spreg( R_EAX, R_PC );2.294 - if( sh4_x86.tlb_on ) {2.295 - call_func1(xlat_get_code_by_vma,R_EAX);2.296 - } else {2.297 - call_func1(xlat_get_code,R_EAX);2.298 - }2.299 - POP_r32(R_EBP);2.300 - RET();2.301 -2.302 - // Exception already raised - just cleanup2.303 - uint8_t *preexc_ptr = xlat_output;2.304 - MOV_r32_r32( R_EDX, R_ECX );2.305 - ADD_r32_r32( R_EDX, R_ECX );2.306 - ADD_r32_sh4r( R_ECX, R_SPC );2.307 - MOV_moff32_EAX( &sh4_cpu_period );2.308 - MUL_r32( R_EDX );2.309 - ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );2.310 - load_spreg( R_EAX, R_PC );2.311 - if( sh4_x86.tlb_on ) {2.312 - call_func1(xlat_get_code_by_vma,R_EAX);2.313 - } else {2.314 - call_func1(xlat_get_code,R_EAX);2.315 - }2.316 - POP_r32(R_EBP);2.317 - RET();2.318 -2.319 - for( i=0; i< sh4_x86.backpatch_posn; i++ ) {2.320 - uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];2.321 - *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;2.322 - if( sh4_x86.backpatch_list[i].exc_code < 0 ) {2.323 - load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );2.324 - int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;2.325 - if( stack_adj > 0 ) {2.326 - ADD_imm8s_r32( stack_adj, R_ESP );2.327 - }2.328 - int rel = preexc_ptr - xlat_output;2.329 - JMP_rel(rel);2.330 - } else {2.331 - PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );2.332 - load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );2.333 - int rel = end_ptr - xlat_output;2.334 - JMP_rel(rel);2.335 - }2.336 - }2.337 - }2.338 -}2.339 -2.340 -2.341 -/**2.342 - * The unwind methods only work if we compiled with DWARF2 frame information2.343 - * (ie -fexceptions), otherwise we have to use the direct frame scan.2.344 - */2.345 -#ifdef HAVE_EXCEPTIONS2.346 -#include <unwind.h>2.347 -2.348 -struct UnwindInfo {2.349 - uintptr_t block_start;2.350 - uintptr_t block_end;2.351 - void *pc;2.352 -};2.353 -2.354 -_Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )2.355 -{2.356 - struct UnwindInfo *info = arg;2.357 - void *pc = (void *)_Unwind_GetIP(context);2.358 - if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {2.359 - info->pc = pc;2.360 - return _URC_NORMAL_STOP;2.361 - }2.362 -2.363 - return _URC_NO_REASON;2.364 -}2.365 -2.366 -void *xlat_get_native_pc( void *code, uint32_t code_size )2.367 -{2.368 - struct _Unwind_Exception exc;2.369 - struct UnwindInfo info;2.370 -2.371 - info.pc = NULL;2.372 - info.block_start = (uintptr_t)code;2.373 - info.block_end = info.block_start + code_size;2.374 - void *result = NULL;2.375 - _Unwind_Backtrace( xlat_check_frame, &info );2.376 - return info.pc;2.377 -}2.378 -#else2.379 -void *xlat_get_native_pc( void *code, uint32_t code_size )2.380 -{2.381 - void *result = NULL;2.382 - asm(2.383 - "mov %%ebp, %%eax\n\t"2.384 - "mov $0x8, %%ecx\n\t"2.385 - "mov %1, %%edx\n"2.386 - "frame_loop: test %%eax, %%eax\n\t"2.387 - "je frame_not_found\n\t"2.388 - "cmp (%%eax), %%edx\n\t"2.389 - "je frame_found\n\t"2.390 - "sub $0x1, %%ecx\n\t"2.391 - "je frame_not_found\n\t"2.392 - "movl (%%eax), %%eax\n\t"2.393 - "jmp frame_loop\n"2.394 - "frame_found: movl 0x4(%%eax), %0\n"2.395 - "frame_not_found:"2.396 - : "=r" (result)2.397 - : "r" (((uint8_t *)&sh4r) + 128 )2.398 - : "eax", "ecx", "edx" );2.399 - return result;2.400 -}2.401 -#endif2.402 -2.403 -#endif /* !lxdream_ia32mac.h */2.404 -2.405 -
3.1 --- a/src/sh4/ia64abi.h Sun Dec 14 06:36:49 2008 +00003.2 +++ b/src/sh4/ia64abi.h Sun Dec 14 07:50:48 2008 +00003.3 @@ -94,8 +94,16 @@3.4 void enter_block( )3.5 {3.6 PUSH_r32(R_EBP);3.7 - /* mov &sh4r, ebp */3.8 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );3.9 + // Minimum aligned allocation is 16 bytes3.10 + REXW(); SUB_imm8s_r32( 16, R_ESP );3.11 +}3.12 +3.13 +static inline void exit_block( )3.14 +{3.15 + REXW(); ADD_imm8s_r32( 16, R_ESP );3.16 + POP_r32(R_EBP);3.17 + RET();3.18 }3.20 /**3.21 @@ -111,8 +119,7 @@3.22 } else {3.23 call_func1(xlat_get_code,R_EAX);3.24 }3.25 - POP_r32(R_EBP);3.26 - RET();3.27 + exit_block();3.28 }3.30 /**3.31 @@ -129,30 +136,28 @@3.32 } else {3.33 call_func1(xlat_get_code,R_EAX);3.34 }3.35 - POP_r32(R_EBP);3.36 - RET();3.37 + exit_block();3.38 }3.40 #define EXIT_BLOCK_SIZE(pc) (25 + (IS_IN_ICACHE(pc)?10:CALL_FUNC1_SIZE))3.41 /**3.42 * Exit the block to an absolute PC3.43 */3.44 -void exit_block( sh4addr_t pc, sh4addr_t endpc )3.45 +void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )3.46 {3.47 load_imm32( R_ECX, pc ); // 53.48 store_spreg( R_ECX, REG_OFFSET(pc) ); // 33.49 if( IS_IN_ICACHE(pc) ) {3.50 REXW(); MOV_moff32_EAX( xlat_get_lut_entry(pc) );3.51 + REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 43.52 } else if( sh4_x86.tlb_on ) {3.53 call_func1(xlat_get_code_by_vma, R_ECX);3.54 } else {3.55 call_func1(xlat_get_code,R_ECX);3.56 }3.57 - REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 43.58 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 53.59 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 63.60 - POP_r32(R_EBP);3.61 - RET();3.62 + exit_block();3.63 }3.66 @@ -168,16 +173,15 @@3.67 store_spreg( R_ECX, REG_OFFSET(pc) ); // 33.68 if( IS_IN_ICACHE(pc) ) {3.69 REXW(); MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 53.70 + REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 43.71 } else if( sh4_x86.tlb_on ) {3.72 call_func1(xlat_get_code_by_vma,R_ECX);3.73 } else {3.74 call_func1(xlat_get_code,R_ECX);3.75 }3.76 - REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 43.77 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 53.78 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 63.79 - POP_r32(R_EBP);3.80 - RET();3.81 + exit_block();3.82 }3.84 /**3.85 @@ -206,9 +210,8 @@3.86 } else {3.87 call_func1(xlat_get_code,R_EAX);3.88 }3.89 - POP_r32(R_EBP);3.90 - RET();3.91 -3.92 + exit_block();3.93 +3.94 // Exception already raised - just cleanup3.95 uint8_t *preexc_ptr = xlat_output;3.96 MOV_r32_r32( R_EDX, R_ECX );3.97 @@ -223,8 +226,7 @@3.98 } else {3.99 call_func0(xlat_get_code);3.100 }3.101 - POP_r32(R_EBP);3.102 - RET();3.103 + exit_block();3.105 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {3.106 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];3.107 @@ -233,7 +235,7 @@3.108 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );3.109 int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;3.110 if( stack_adj > 0 ) {3.111 - ADD_imm8s_r32( stack_adj*4, R_ESP );3.112 + REXW(); ADD_imm8s_r32( stack_adj*4, R_ESP );3.113 }3.114 int rel = preexc_ptr - xlat_output;3.115 JMP_rel(rel);3.116 @@ -247,26 +249,35 @@3.117 }3.118 }3.120 +struct UnwindInfo {3.121 + uintptr_t block_start;3.122 + uintptr_t block_end;3.123 + void *pc;3.124 +};3.125 +3.126 _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )3.127 {3.128 - void *rbp = (void *)_Unwind_GetGR(context, 6);3.129 - void *expect = (((uint8_t *)&sh4r) + 128 );3.130 - if( rbp == expect ) {3.131 - void **result = (void **)arg;3.132 - *result = (void *)_Unwind_GetIP(context);3.133 + struct UnwindInfo *info = arg;3.134 + void *pc = (void *)_Unwind_GetIP(context);3.135 + if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {3.136 + info->pc = pc;3.137 return _URC_NORMAL_STOP;3.138 }3.139 -3.140 +3.141 return _URC_NO_REASON;3.142 }3.144 -void *xlat_get_native_pc( void *code, uint32_t size )3.145 +void *xlat_get_native_pc( void *code, uint32_t code_size )3.146 {3.147 struct _Unwind_Exception exc;3.148 + struct UnwindInfo info;3.150 + info.pc = NULL;3.151 + info.block_start = (uintptr_t)code;3.152 + info.block_end = info.block_start + code_size;3.153 void *result = NULL;3.154 - _Unwind_Backtrace( xlat_check_frame, &result );3.155 - return result;3.156 + _Unwind_Backtrace( xlat_check_frame, &info );3.157 + return info.pc;3.158 }3.160 #endif /* !lxdream_ia64abi_H */
4.1 --- a/src/sh4/sh4trans.h Sun Dec 14 06:36:49 2008 +00004.2 +++ b/src/sh4/sh4trans.h Sun Dec 14 07:50:48 2008 +00004.3 @@ -34,7 +34,7 @@4.4 /** Maximum size of the translation epilogue (current real size is 116 bytes, so4.5 * allows a little room4.6 */4.7 -#define EPILOGUE_SIZE 1284.8 +#define EPILOGUE_SIZE 1364.10 /** Maximum number of recovery records for a translated block (2048 based on4.11 * 1 record per SH4 instruction in a 4K page).
5.1 --- a/src/sh4/sh4x86.in Sun Dec 14 06:36:49 2008 +00005.2 +++ b/src/sh4/sh4x86.in Sun Dec 14 07:50:48 2008 +00005.3 @@ -315,12 +315,8 @@5.4 #if SIZEOF_VOID_P == 85.5 #include "sh4/ia64abi.h"5.6 #else /* 32-bit system */5.7 -#ifdef APPLE_BUILD5.8 -#include "sh4/ia32mac.h"5.9 -#else5.10 #include "sh4/ia32abi.h"5.11 #endif5.12 -#endif5.14 void sh4_translate_begin_block( sh4addr_t pc )5.15 {5.16 @@ -390,9 +386,7 @@5.17 } else {5.18 call_func1(xlat_get_code,R_EAX);5.19 }5.20 - AND_imm8s_rptr( 0xFC, R_EAX );5.21 - POP_r32(R_EBP);5.22 - RET();5.23 + exit_block();5.24 }5.26 /**5.27 @@ -473,9 +467,9 @@5.28 load_spreg( R_ECX, R_GBR );5.29 ADD_r32_r32( R_ECX, R_EAX );5.30 MMU_TRANSLATE_WRITE( R_EAX );5.31 - PUSH_realigned_r32(R_EAX);5.32 + MOV_r32_esp8(R_EAX, 0);5.33 MEM_READ_BYTE( R_EAX, R_EDX );5.34 - POP_realigned_r32(R_EAX);5.35 + MOV_esp8_r32(0, R_EAX);5.36 AND_imm32_r32(imm, R_EDX );5.37 MEM_WRITE_BYTE( R_EAX, R_EDX );5.38 sh4_x86.tstate = TSTATE_NONE;5.39 @@ -662,10 +656,10 @@5.40 load_reg( R_EAX, Rm );5.41 check_ralign32( R_EAX );5.42 MMU_TRANSLATE_READ( R_EAX );5.43 - PUSH_realigned_r32( R_EAX );5.44 + MOV_r32_esp8(R_EAX, 0);5.45 load_reg( R_EAX, Rn );5.46 ADD_imm8s_r32( 4, R_EAX );5.47 - MMU_TRANSLATE_READ_EXC( R_EAX, -5 );5.48 + MMU_TRANSLATE_READ( R_EAX );5.49 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );5.50 // Note translate twice in case of page boundaries. Maybe worth5.51 // adding a page-boundary check to skip the second translation5.52 @@ -673,19 +667,18 @@5.53 load_reg( R_EAX, Rm );5.54 check_ralign32( R_EAX );5.55 MMU_TRANSLATE_READ( R_EAX );5.56 - load_reg( R_ECX, Rn );5.57 - check_ralign32( R_ECX );5.58 - PUSH_realigned_r32( R_EAX );5.59 - MMU_TRANSLATE_READ_EXC( R_ECX, -5 );5.60 - MOV_r32_r32( R_ECX, R_EAX );5.61 + MOV_r32_esp8( R_EAX, 0 );5.62 + load_reg( R_EAX, Rn );5.63 + check_ralign32( R_EAX );5.64 + MMU_TRANSLATE_READ( R_EAX );5.65 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );5.66 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );5.67 }5.68 MEM_READ_LONG( R_EAX, R_EAX );5.69 - POP_r32( R_ECX );5.70 - PUSH_r32( R_EAX );5.71 - MEM_READ_LONG( R_ECX, R_EAX );5.72 - POP_realigned_r32( R_ECX );5.73 + MOV_r32_esp8( R_EAX, 4 );5.74 + MOV_esp8_r32( 0, R_EAX );5.75 + MEM_READ_LONG( R_EAX, R_EAX );5.76 + MOV_esp8_r32( 4, R_ECX );5.78 IMUL_r32( R_ECX );5.79 ADD_r32_sh4r( R_EAX, R_MACL );5.80 @@ -704,10 +697,10 @@5.81 load_reg( R_EAX, Rm );5.82 check_ralign16( R_EAX );5.83 MMU_TRANSLATE_READ( R_EAX );5.84 - PUSH_realigned_r32( R_EAX );5.85 + MOV_r32_esp8( R_EAX, 0 );5.86 load_reg( R_EAX, Rn );5.87 ADD_imm8s_r32( 2, R_EAX );5.88 - MMU_TRANSLATE_READ_EXC( R_EAX, -5 );5.89 + MMU_TRANSLATE_READ( R_EAX );5.90 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );5.91 // Note translate twice in case of page boundaries. Maybe worth5.92 // adding a page-boundary check to skip the second translation5.93 @@ -715,21 +708,20 @@5.94 load_reg( R_EAX, Rm );5.95 check_ralign16( R_EAX );5.96 MMU_TRANSLATE_READ( R_EAX );5.97 - load_reg( R_ECX, Rn );5.98 - check_ralign16( R_ECX );5.99 - PUSH_realigned_r32( R_EAX );5.100 - MMU_TRANSLATE_READ_EXC( R_ECX, -5 );5.101 - MOV_r32_r32( R_ECX, R_EAX );5.102 + MOV_r32_esp8( R_EAX, 0 );5.103 + load_reg( R_EAX, Rn );5.104 + check_ralign16( R_EAX );5.105 + MMU_TRANSLATE_READ( R_EAX );5.106 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );5.107 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );5.108 }5.109 MEM_READ_WORD( R_EAX, R_EAX );5.110 - POP_r32( R_ECX );5.111 - PUSH_r32( R_EAX );5.112 - MEM_READ_WORD( R_ECX, R_EAX );5.113 - POP_realigned_r32( R_ECX );5.114 + MOV_r32_esp8( R_EAX, 4 );5.115 + MOV_esp8_r32( 0, R_EAX );5.116 + MEM_READ_WORD( R_EAX, R_EAX );5.117 + MOV_esp8_r32( 4, R_ECX );5.118 +5.119 IMUL_r32( R_ECX );5.120 -5.121 load_spreg( R_ECX, R_S );5.122 TEST_r32_r32( R_ECX, R_ECX );5.123 JE_rel8( nosat );5.124 @@ -830,9 +822,9 @@5.125 load_spreg( R_ECX, R_GBR );5.126 ADD_r32_r32( R_ECX, R_EAX );5.127 MMU_TRANSLATE_WRITE( R_EAX );5.128 - PUSH_realigned_r32(R_EAX);5.129 + MOV_r32_esp8( R_EAX, 0 );5.130 MEM_READ_BYTE( R_EAX, R_EDX );5.131 - POP_realigned_r32(R_EAX);5.132 + MOV_esp8_r32( 0, R_EAX );5.133 OR_imm32_r32(imm, R_EDX );5.134 MEM_WRITE_BYTE( R_EAX, R_EDX );5.135 sh4_x86.tstate = TSTATE_NONE;5.136 @@ -1049,12 +1041,12 @@5.137 COUNT_INST(I_TASB);5.138 load_reg( R_EAX, Rn );5.139 MMU_TRANSLATE_WRITE( R_EAX );5.140 - PUSH_realigned_r32( R_EAX );5.141 + MOV_r32_esp8( R_EAX, 0 );5.142 MEM_READ_BYTE( R_EAX, R_EDX );5.143 TEST_r8_r8( R_DL, R_DL );5.144 SETE_t();5.145 OR_imm8_r8( 0x80, R_DL );5.146 - POP_realigned_r32( R_EAX );5.147 + MOV_esp8_r32( 0, R_EAX );5.148 MEM_WRITE_BYTE( R_EAX, R_EDX );5.149 sh4_x86.tstate = TSTATE_NONE;5.150 :}5.151 @@ -1105,9 +1097,9 @@5.152 load_spreg( R_ECX, R_GBR );5.153 ADD_r32_r32( R_ECX, R_EAX );5.154 MMU_TRANSLATE_WRITE( R_EAX );5.155 - PUSH_realigned_r32(R_EAX);5.156 + MOV_r32_esp8( R_EAX, 0 );5.157 MEM_READ_BYTE(R_EAX, R_EDX);5.158 - POP_realigned_r32(R_EAX);5.159 + MOV_esp8_r32( 0, R_EAX );5.160 XOR_imm32_r32( imm, R_EDX );5.161 MEM_WRITE_BYTE( R_EAX, R_EDX );5.162 sh4_x86.tstate = TSTATE_NONE;5.163 @@ -2636,11 +2628,12 @@5.164 check_walign32( R_EAX );5.165 ADD_imm8s_r32( -4, R_EAX );5.166 MMU_TRANSLATE_WRITE( R_EAX );5.167 - PUSH_realigned_r32( R_EAX );5.168 + MOV_r32_esp8( R_EAX, 0 );5.169 call_func0( sh4_read_sr );5.170 - POP_realigned_r32( R_ECX );5.171 + MOV_r32_r32( R_EAX, R_EDX );5.172 + MOV_esp8_r32( 0, R_EAX );5.173 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );5.174 - MEM_WRITE_LONG( R_ECX, R_EAX );5.175 + MEM_WRITE_LONG( R_EAX, R_EDX );5.176 sh4_x86.tstate = TSTATE_NONE;5.177 :}5.178 STC.L VBR, @-Rn {:
6.1 --- a/src/sh4/x86op.h Sun Dec 14 06:36:49 2008 +00006.2 +++ b/src/sh4/x86op.h Sun Dec 14 07:50:48 2008 +00006.3 @@ -54,31 +54,17 @@6.4 #define AND_imm8s_rptr(imm, r1) REXW(); AND_imm8s_r32( imm, r1 )6.5 #define LEA_sh4r_rptr(disp, r1) REXW(); LEA_sh4r_r32(disp,r1)6.6 #define MOV_moffptr_EAX(offptr) REXW(); MOV_moff32_EAX( offptr )6.7 -#define STACK_ALIGN 166.8 -#define POP_r32(r1) OP(0x58 + r1);6.9 -#define POP_realigned_r32(r1) OP(0x58 + r1); REXW(); ADD_imm8s_r32(8,R_ESP)6.10 -#define PUSH_r32(r1) OP(0x50 + r1);6.11 -#define PUSH_realigned_r32(r1) REXW(); SUB_imm8s_r32(8, R_ESP); OP(0x50 + r1)6.12 -#define PUSH_imm32(imm) OP(0x68); OP32(imm);6.13 -#define PUSH_imm64(imm) REXW(); OP(0x68); OP64(imm);6.14 #else /* 32-bit system */6.15 #define OPPTR(x) OP32((uint32_t)(x))6.16 #define AND_imm8s_rptr(imm, r1) AND_imm8s_r32( imm, r1 )6.17 #define LEA_sh4r_rptr(disp, r1) LEA_sh4r_r32(disp,r1)6.18 #define MOV_moffptr_EAX(offptr) MOV_moff32_EAX( offptr )6.19 -#define POP_realigned_r32(r1) POP_r32(r1)6.20 -#define PUSH_realigned_r32(r1) PUSH_r32(r1)6.21 -#ifdef APPLE_BUILD6.22 +#endif6.23 #define STACK_ALIGN 166.24 -#define POP_r32(r1) OP(0x58 + r1); sh4_x86.stack_posn -= 4;6.25 -#define PUSH_r32(r1) OP(0x50 + r1); sh4_x86.stack_posn += 4;6.26 -#define PUSH_imm32(imm) OP(0x68); OP32(imm); sh4_x86.stack_posn += 4;6.27 -#else6.28 #define POP_r32(r1) OP(0x58 + r1)6.29 #define PUSH_r32(r1) OP(0x50 + r1)6.30 #define PUSH_imm32(imm) OP(0x68); OP32(imm)6.31 -#endif6.32 -#endif6.33 +#define PUSH_imm64(imm) REXW(); OP(0x68); OP64(imm);6.35 #ifdef STACK_ALIGN6.36 #else6.37 @@ -124,6 +110,9 @@6.38 /* ebp+disp32 modrm form */6.39 #define MODRM_r32_ebp32(r1,disp) OP(0x85 | (r1<<3)); OP32(disp)6.41 +/* esp+disp32 modrm+sib form */6.42 +#define MODRM_r32_esp8(r1,disp) OP(0x44 | (r1<<3)); OP(0x24); OP(disp)6.43 +6.44 #define MODRM_r32_sh4r(r1,disp) if(disp>127){ MODRM_r32_ebp32(r1,disp);}else{ MODRM_r32_ebp8(r1,(unsigned char)disp); }6.46 #define REXW() OP(0x48)6.47 @@ -134,6 +123,7 @@6.48 #define ADD_r32_r32(r1,r2) OP(0x03); MODRM_rm32_r32(r1,r2)6.49 #define ADD_imm8s_r32(imm,r1) OP(0x83); MODRM_rm32_r32(r1, 0); OP(imm)6.50 #define ADD_imm8s_sh4r(imm,disp) OP(0x83); MODRM_r32_sh4r(0,disp); OP(imm)6.51 +#define ADD_imm8s_esp8(imm,disp) OP(0x83); MODRM_r32_esp8(0,disp); OP(imm)6.52 #define ADD_imm32_r32(imm32,r1) OP(0x81); MODRM_rm32_r32(r1,0); OP32(imm32)6.53 #define ADC_r32_r32(r1,r2) OP(0x13); MODRM_rm32_r32(r1,r2)6.54 #define ADC_sh4r_r32(disp,r1) OP(0x13); MODRM_r32_sh4r(r1,disp)6.55 @@ -143,6 +133,7 @@6.56 #define AND_imm8s_r32(imm8,r1) OP(0x83); MODRM_rm32_r32(r1,4); OP(imm8)6.57 #define AND_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,4); OP32(imm)6.58 #define CALL_r32(r1) OP(0xFF); MODRM_rm32_r32(r1,2)6.59 +#define CALL_ptr(ptr) OP(0xE8); OP32( (((char *)ptr) - (char *)xlat_output) - 4)6.60 #define CLC() OP(0xF8)6.61 #define CMC() OP(0xF5)6.62 #define CMP_sh4r_r32(disp,r1) OP(0x3B); MODRM_r32_sh4r(r1,disp)6.63 @@ -155,12 +146,15 @@6.64 #define INC_r32(r1) OP(0x40+r1)6.65 #define JMP_rel8(label) OP(0xEB); MARK_JMP8(label); OP(-1);6.66 #define LEA_sh4r_r32(disp,r1) OP(0x8D); MODRM_r32_sh4r(r1,disp)6.67 +#define LEA_r32disp8_r32(r1, disp, r2) OP(0x8D); OP( 0x40 + (r2<<3) + r1); OP(disp)6.68 #define MOV_r32_r32(r1,r2) OP(0x89); MODRM_r32_rm32(r1,r2)6.69 #define MOV_r32_sh4r(r1,disp) OP(0x89); MODRM_r32_sh4r(r1,disp)6.70 #define MOV_moff32_EAX(off) OP(0xA1); OPPTR(off)6.71 #define MOV_sh4r_r32(disp, r1) OP(0x8B); MODRM_r32_sh4r(r1,disp)6.72 #define MOV_r32_r32ind(r2,r1) OP(0x89); OP(0 + (r2<<3) + r1 )6.73 #define MOV_r32ind_r32(r1,r2) OP(0x8B); OP(0 + (r2<<3) + r1 )6.74 +#define MOV_r32_esp8(r1,disp) OP(0x89); MODRM_r32_esp8(r1,disp)6.75 +#define MOV_esp8_r32(disp,r1) OP(0x8B); MODRM_r32_esp8(r1,disp)6.76 #define MOVSX_r8_r32(r1,r2) OP(0x0F); OP(0xBE); MODRM_rm32_r32(r1,r2)6.77 #define MOVSX_r16_r32(r1,r2) OP(0x0F); OP(0xBF); MODRM_rm32_r32(r1,r2)6.78 #define MOVZX_r8_r32(r1,r2) OP(0x0F); OP(0xB6); MODRM_rm32_r32(r1,r2)
.