Search
lxdream.org :: lxdream :: r926:68f3e0fe02f1
lxdream 0.9.1
released Jun 29
Download Now
changeset926:68f3e0fe02f1
parent925:7cbcc51db63d
child927:17b6b9e245d8
authornkeynes
dateSun Dec 14 07:50:48 2008 +0000 (15 years ago)
Setup a 'proper' stackframe in translated blocks. This doesn't affect performance noticeably,
but does ensure that
a) The stack is aligned correctly on OS X with no extra effort, and
b) We can't mess up the stack and crash that way anymore.
Replace all PUSH/POP instructions (outside of prologue/epilogue) with ESP-rel moves to stack
local variables.
Finally merge ia32mac and ia32abi together, since they're pretty much the same now anyway (and
thereby simplifying maintenance a good deal)
src/sh4/ia32abi.h
src/sh4/ia32mac.h
src/sh4/ia64abi.h
src/sh4/sh4trans.h
src/sh4/sh4x86.in
src/sh4/x86op.h
1.1 --- a/src/sh4/ia32abi.h Sun Dec 14 06:36:49 2008 +0000
1.2 +++ b/src/sh4/ia32abi.h Sun Dec 14 07:50:48 2008 +0000
1.3 @@ -1,8 +1,10 @@
1.4 /**
1.5 * $Id$
1.6 *
1.7 - * Provides the implementation for the ia32 ABI (eg prologue, epilogue, and
1.8 - * calling conventions)
1.9 + * Provides the implementation for the ia32 ABI variant
1.10 + * (eg prologue, epilogue, and calling conventions). Stack frame is
1.11 + * aligned on 16-byte boundaries for the benefit of OS X (which
1.12 + * requires it).
1.13 *
1.14 * Copyright (c) 2007 Nathan Keynes.
1.15 *
1.16 @@ -17,8 +19,8 @@
1.17 * GNU General Public License for more details.
1.18 */
1.19
1.20 -#ifndef lxdream_ia32abi_H
1.21 -#define lxdream_ia32abi_H 1
1.22 +#ifndef lxdream_ia32mac_H
1.23 +#define lxdream_ia32mac_H 1
1.24
1.25 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
1.26
1.27 @@ -28,8 +30,7 @@
1.28 */
1.29 static inline void call_func0( void *ptr )
1.30 {
1.31 - load_imm32(R_ECX, (uint32_t)ptr);
1.32 - CALL_r32(R_ECX);
1.33 + CALL_ptr(ptr);
1.34 }
1.35
1.36 #ifdef HAVE_FASTCALL
1.37 @@ -38,8 +39,7 @@
1.38 if( arg1 != R_EAX ) {
1.39 MOV_r32_r32( arg1, R_EAX );
1.40 }
1.41 - load_imm32(R_ECX, (uint32_t)ptr);
1.42 - CALL_r32(R_ECX);
1.43 + CALL_ptr(ptr);
1.44 }
1.45
1.46 static inline void call_func2( void *ptr, int arg1, int arg2 )
1.47 @@ -50,8 +50,7 @@
1.48 if( arg1 != R_EAX ) {
1.49 MOV_r32_r32( arg1, R_EAX );
1.50 }
1.51 - load_imm32(R_ECX, (uint32_t)ptr);
1.52 - CALL_r32(R_ECX);
1.53 + CALL_ptr(ptr);
1.54 }
1.55
1.56 /**
1.57 @@ -60,11 +59,11 @@
1.58 */
1.59 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.60 {
1.61 - PUSH_r32(arg2b);
1.62 - PUSH_r32(addr);
1.63 + MOV_r32_esp8(addr, 0);
1.64 + MOV_r32_esp8(arg2b, 4);
1.65 call_func2(sh4_write_long, addr, arg2a);
1.66 - POP_r32(R_EAX);
1.67 - POP_r32(R_EDX);
1.68 + MOV_esp8_r32(0, R_EAX);
1.69 + MOV_esp8_r32(4, R_EDX);
1.70 ADD_imm8s_r32(4, R_EAX);
1.71 call_func0(sh4_write_long);
1.72 }
1.73 @@ -75,32 +74,33 @@
1.74 */
1.75 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.76 {
1.77 - PUSH_r32(addr);
1.78 + MOV_r32_esp8(addr, 0);
1.79 call_func1(sh4_read_long, addr);
1.80 - POP_r32(R_ECX);
1.81 - PUSH_r32(R_EAX);
1.82 - MOV_r32_r32(R_ECX, R_EAX);
1.83 + MOV_r32_esp8(R_EAX, 4);
1.84 + MOV_esp8_r32(0, R_EAX);
1.85 ADD_imm8s_r32(4, R_EAX);
1.86 call_func0(sh4_read_long);
1.87 if( arg2b != R_EAX ) {
1.88 MOV_r32_r32(R_EAX, arg2b);
1.89 }
1.90 - POP_r32(arg2a);
1.91 + MOV_esp8_r32(4, arg2a);
1.92 }
1.93 #else
1.94 static inline void call_func1( void *ptr, int arg1 )
1.95 {
1.96 + SUB_imm8s_r32( 12, R_ESP );
1.97 PUSH_r32(arg1);
1.98 - call_func0(ptr);
1.99 - ADD_imm8s_r32( 4, R_ESP );
1.100 + CALL_ptr(ptr);
1.101 + ADD_imm8s_r32( 16, R_ESP );
1.102 }
1.103
1.104 static inline void call_func2( void *ptr, int arg1, int arg2 )
1.105 {
1.106 + SUB_imm8s_r32( 8, R_ESP );
1.107 PUSH_r32(arg2);
1.108 PUSH_r32(arg1);
1.109 - call_func0(ptr);
1.110 - ADD_imm8s_r32( 8, R_ESP );
1.111 + CALL_ptr(ptr);
1.112 + ADD_imm8s_r32( 16, R_ESP );
1.113 }
1.114
1.115 /**
1.116 @@ -109,16 +109,17 @@
1.117 */
1.118 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.119 {
1.120 - ADD_imm8s_r32( 4, addr );
1.121 + SUB_imm8s_r32( 8, R_ESP );
1.122 PUSH_r32(arg2b);
1.123 - PUSH_r32(addr);
1.124 - ADD_imm8s_r32( -4, addr );
1.125 + LEA_r32disp8_r32( addr, 4, arg2b );
1.126 + PUSH_r32(arg2b);
1.127 + SUB_imm8s_r32( 8, R_ESP );
1.128 PUSH_r32(arg2a);
1.129 PUSH_r32(addr);
1.130 - call_func0(sh4_write_long);
1.131 - ADD_imm8s_r32( 8, R_ESP );
1.132 - call_func0(sh4_write_long);
1.133 - ADD_imm8s_r32( 8, R_ESP );
1.134 + CALL_ptr(sh4_write_long);
1.135 + ADD_imm8s_r32( 16, R_ESP );
1.136 + CALL_ptr(sh4_write_long);
1.137 + ADD_imm8s_r32( 16, R_ESP );
1.138 }
1.139
1.140 /**
1.141 @@ -127,36 +128,43 @@
1.142 */
1.143 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.144 {
1.145 + SUB_imm8s_r32( 12, R_ESP );
1.146 PUSH_r32(addr);
1.147 - call_func0(sh4_read_long);
1.148 - POP_r32(R_ECX);
1.149 - PUSH_r32(R_EAX);
1.150 - ADD_imm8s_r32( 4, R_ECX );
1.151 - PUSH_r32(R_ECX);
1.152 - call_func0(sh4_read_long);
1.153 - ADD_imm8s_r32( 4, R_ESP );
1.154 - MOV_r32_r32( R_EAX, arg2b );
1.155 - POP_r32(arg2a);
1.156 + CALL_ptr(sh4_read_long);
1.157 + MOV_r32_esp8(R_EAX, 4);
1.158 + ADD_imm8s_esp8(4, 0);
1.159 + CALL_ptr(sh4_read_long);
1.160 + if( arg2b != R_EAX ) {
1.161 + MOV_r32_r32( R_EAX, arg2b );
1.162 + }
1.163 + MOV_esp8_r32( 4, arg2a );
1.164 + ADD_imm8s_r32( 16, R_ESP );
1.165 }
1.166 +
1.167 #endif
1.168
1.169 /**
1.170 * Emit the 'start of block' assembly. Sets up the stack frame and save
1.171 * SI/DI as required
1.172 + * Allocates 8 bytes for local variables, which also has the convenient
1.173 + * side-effect of aligning the stack.
1.174 */
1.175 void enter_block( )
1.176 {
1.177 PUSH_r32(R_EBP);
1.178 - /* mov &sh4r, ebp */
1.179 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
1.180 + SUB_imm8s_r32( 8, R_ESP );
1.181 +}
1.182
1.183 -#ifdef STACK_ALIGN
1.184 - sh4_x86.stack_posn = 8;
1.185 -#endif
1.186 +static inline void exit_block( )
1.187 +{
1.188 + ADD_imm8s_r32( 8, R_ESP );
1.189 + POP_r32(R_EBP);
1.190 + RET();
1.191 }
1.192
1.193 /**
1.194 - * Exit the block with sh4r.pc already written
1.195 + * Exit the block with sh4r.new_pc written with the target pc
1.196 */
1.197 void exit_block_pcset( sh4addr_t pc )
1.198 {
1.199 @@ -167,9 +175,8 @@
1.200 call_func1(xlat_get_code_by_vma,R_EAX);
1.201 } else {
1.202 call_func1(xlat_get_code,R_EAX);
1.203 - }
1.204 - POP_r32(R_EBP);
1.205 - RET();
1.206 + }
1.207 + exit_block();
1.208 }
1.209
1.210 /**
1.211 @@ -185,37 +192,31 @@
1.212 call_func1(xlat_get_code_by_vma,R_EAX);
1.213 } else {
1.214 call_func1(xlat_get_code,R_EAX);
1.215 - }
1.216 - POP_r32(R_EBP);
1.217 - RET();
1.218 + }
1.219 + exit_block();
1.220 }
1.221
1.222 -#define EXIT_BLOCK_SIZE(pc) (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
1.223 -
1.224
1.225 /**
1.226 * Exit the block to an absolute PC
1.227 */
1.228 -void exit_block( sh4addr_t pc, sh4addr_t endpc )
1.229 +void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
1.230 {
1.231 load_imm32( R_ECX, pc ); // 5
1.232 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
1.233 if( IS_IN_ICACHE(pc) ) {
1.234 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
1.235 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.236 } else if( sh4_x86.tlb_on ) {
1.237 call_func1(xlat_get_code_by_vma,R_ECX);
1.238 } else {
1.239 call_func1(xlat_get_code,R_ECX);
1.240 }
1.241 - AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.242 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.243 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.244 - POP_r32(R_EBP);
1.245 - RET();
1.246 + exit_block();
1.247 }
1.248
1.249 -#define EXIT_BLOCK_REL_SIZE(pc) (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
1.250 -
1.251 /**
1.252 * Exit the block to a relative PC
1.253 */
1.254 @@ -226,16 +227,15 @@
1.255 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
1.256 if( IS_IN_ICACHE(pc) ) {
1.257 MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
1.258 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.259 } else if( sh4_x86.tlb_on ) {
1.260 call_func1(xlat_get_code_by_vma,R_ECX);
1.261 } else {
1.262 call_func1(xlat_get_code,R_ECX);
1.263 }
1.264 - AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.265 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.266 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.267 - POP_r32(R_EBP);
1.268 - RET();
1.269 + exit_block();
1.270 }
1.271
1.272 /**
1.273 @@ -257,7 +257,7 @@
1.274 MUL_r32( R_EDX );
1.275 ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
1.276
1.277 - POP_r32( R_EAX );
1.278 + POP_r32(R_EAX);
1.279 call_func1( sh4_raise_exception, R_EAX );
1.280 load_spreg( R_EAX, R_PC );
1.281 if( sh4_x86.tlb_on ) {
1.282 @@ -265,8 +265,7 @@
1.283 } else {
1.284 call_func1(xlat_get_code,R_EAX);
1.285 }
1.286 - POP_r32(R_EBP);
1.287 - RET();
1.288 + exit_block();
1.289
1.290 // Exception already raised - just cleanup
1.291 uint8_t *preexc_ptr = xlat_output;
1.292 @@ -282,8 +281,7 @@
1.293 } else {
1.294 call_func1(xlat_get_code,R_EAX);
1.295 }
1.296 - POP_r32(R_EBP);
1.297 - RET();
1.298 + exit_block();
1.299
1.300 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
1.301 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
1.302 @@ -306,6 +304,7 @@
1.303 }
1.304 }
1.305
1.306 +
1.307 /**
1.308 * The unwind methods only work if we compiled with DWARF2 frame information
1.309 * (ie -fexceptions), otherwise we have to use the direct frame scan.
1.310 @@ -314,19 +313,17 @@
1.311 #include <unwind.h>
1.312
1.313 struct UnwindInfo {
1.314 - int have_result;
1.315 - void *pc;
1.316 + uintptr_t block_start;
1.317 + uintptr_t block_end;
1.318 + void *pc;
1.319 };
1.320
1.321 _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
1.322 {
1.323 - void *ebp = (void *)_Unwind_GetGR(context, 5);
1.324 - void *expect = (((uint8_t *)&sh4r) + 128 );
1.325 - struct UnwindInfo *info = arg;
1.326 - if( ebp == expect ) {
1.327 - info->have_result = 1;
1.328 - info->pc = (void *)_Unwind_GetIP(context);
1.329 - } else if( info->have_result ) {
1.330 + struct UnwindInfo *info = arg;
1.331 + void *pc = (void *)_Unwind_GetIP(context);
1.332 + if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
1.333 + info->pc = pc;
1.334 return _URC_NORMAL_STOP;
1.335 }
1.336
1.337 @@ -338,12 +335,12 @@
1.338 struct _Unwind_Exception exc;
1.339 struct UnwindInfo info;
1.340
1.341 - info.have_result = 0;
1.342 + info.pc = NULL;
1.343 + info.block_start = (uintptr_t)code;
1.344 + info.block_end = info.block_start + code_size;
1.345 void *result = NULL;
1.346 _Unwind_Backtrace( xlat_check_frame, &info );
1.347 - if( info.have_result )
1.348 - return info.pc;
1.349 - return NULL;
1.350 + return info.pc;
1.351 }
1.352 #else
1.353 void *xlat_get_native_pc( void *code, uint32_t code_size )
1.354 @@ -370,6 +367,6 @@
1.355 }
1.356 #endif
1.357
1.358 -#endif /* !lxdream_ia32abi_H */
1.359 +#endif /* !lxdream_ia32mac.h */
1.360
1.361
2.1 --- a/src/sh4/ia32mac.h Sun Dec 14 06:36:49 2008 +0000
2.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
2.3 @@ -1,402 +0,0 @@
2.4 -/**
2.5 - * $Id$
2.6 - *
2.7 - * Provides the implementation for the ia32 Mac OS X ABI variant
2.8 - * (eg prologue, epilogue, and calling conventions). Main difference
2.9 - * from ia32abi is that stack frames are aligned on 16-byte boundaries.
2.10 - *
2.11 - * Copyright (c) 2007 Nathan Keynes.
2.12 - *
2.13 - * This program is free software; you can redistribute it and/or modify
2.14 - * it under the terms of the GNU General Public License as published by
2.15 - * the Free Software Foundation; either version 2 of the License, or
2.16 - * (at your option) any later version.
2.17 - *
2.18 - * This program is distributed in the hope that it will be useful,
2.19 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
2.20 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2.21 - * GNU General Public License for more details.
2.22 - */
2.23 -
2.24 -#ifndef lxdream_ia32mac_H
2.25 -#define lxdream_ia32mac_H 1
2.26 -
2.27 -#define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
2.28 -
2.29 -/**
2.30 - * Note: clobbers EAX to make the indirect call - this isn't usually
2.31 - * a problem since the callee will usually clobber it anyway.
2.32 - */
2.33 -#define CALL_FUNC0_SIZE 13
2.34 -static inline void call_func0( void *ptr )
2.35 -{
2.36 - int adj = (-sh4_x86.stack_posn)&0x0F;
2.37 - SUB_imm8s_r32( adj, R_ESP );
2.38 - load_imm32(R_ECX, (uint32_t)ptr);
2.39 - CALL_r32(R_ECX);
2.40 - ADD_imm8s_r32( adj, R_ESP );
2.41 -}
2.42 -
2.43 -#ifdef HAVE_FASTCALL
2.44 -static inline void call_func1( void *ptr, int arg1 )
2.45 -{
2.46 - int adj = (-sh4_x86.stack_posn)&0x0F;
2.47 - SUB_imm8s_r32( adj, R_ESP );
2.48 - if( arg1 != R_EAX ) {
2.49 - MOV_r32_r32( arg1, R_EAX );
2.50 - }
2.51 - load_imm32(R_ECX, (uint32_t)ptr);
2.52 - CALL_r32(R_ECX);
2.53 - ADD_imm8s_r32( adj, R_ESP );
2.54 -}
2.55 -
2.56 -static inline void call_func2( void *ptr, int arg1, int arg2 )
2.57 -{
2.58 - int adj = (-sh4_x86.stack_posn)&0x0F;
2.59 - SUB_imm8s_r32( adj, R_ESP );
2.60 - if( arg2 != R_EDX ) {
2.61 - MOV_r32_r32( arg2, R_EDX );
2.62 - }
2.63 - if( arg1 != R_EAX ) {
2.64 - MOV_r32_r32( arg1, R_EAX );
2.65 - }
2.66 - load_imm32(R_ECX, (uint32_t)ptr);
2.67 - CALL_r32(R_ECX);
2.68 - ADD_imm8s_r32( adj, R_ESP );
2.69 -}
2.70 -
2.71 -/**
2.72 - * Write a double (64-bit) value into memory, with the first word in arg2a, and
2.73 - * the second in arg2b
2.74 - */
2.75 -static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
2.76 -{
2.77 - PUSH_r32(arg2b);
2.78 - PUSH_r32(addr);
2.79 - call_func2(sh4_write_long, addr, arg2a);
2.80 - POP_r32(R_EAX);
2.81 - POP_r32(R_EDX);
2.82 - ADD_imm8s_r32(4, R_EAX);
2.83 - call_func0(sh4_write_long);
2.84 -}
2.85 -
2.86 -/**
2.87 - * Read a double (64-bit) value from memory, writing the first word into arg2a
2.88 - * and the second into arg2b. The addr must not be in EAX
2.89 - */
2.90 -static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
2.91 -{
2.92 - PUSH_r32(addr);
2.93 - call_func1(sh4_read_long, addr);
2.94 - POP_r32(R_ECX);
2.95 - PUSH_r32(R_EAX);
2.96 - MOV_r32_r32(R_ECX, R_EAX);
2.97 - ADD_imm8s_r32(4, R_EAX);
2.98 - call_func0(sh4_read_long);
2.99 - if( arg2b != R_EAX ) {
2.100 - MOV_r32_r32(R_EAX, arg2b);
2.101 - }
2.102 - POP_r32(arg2a);
2.103 -}
2.104 -#else
2.105 -static inline void call_func1( void *ptr, int arg1 )
2.106 -{
2.107 - int adj = (-4-sh4_x86.stack_posn)&0x0F;
2.108 - SUB_imm8s_r32( adj, R_ESP );
2.109 - PUSH_r32(arg1);
2.110 - load_imm32(R_EAX, (uint32_t)ptr);
2.111 - CALL_r32(R_EAX);
2.112 - ADD_imm8s_r32( adj+4, R_ESP );
2.113 - sh4_x86.stack_posn -= 4;
2.114 -}
2.115 -
2.116 -#define CALL_FUNC2_SIZE 15
2.117 -static inline void call_func2( void *ptr, int arg1, int arg2 )
2.118 -{
2.119 - int adj = (-8-sh4_x86.stack_posn)&0x0F;
2.120 - SUB_imm8s_r32( adj, R_ESP );
2.121 - PUSH_r32(arg2);
2.122 - PUSH_r32(arg1);
2.123 - load_imm32(R_EAX, (uint32_t)ptr);
2.124 - CALL_r32(R_EAX);
2.125 - ADD_imm8s_r32( adj+8, R_ESP );
2.126 - sh4_x86.stack_posn -= 8;
2.127 -}
2.128 -
2.129 -/**
2.130 - * Write a double (64-bit) value into memory, with the first word in arg2a, and
2.131 - * the second in arg2b
2.132 - */
2.133 -static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
2.134 -{
2.135 - int adj = (-8-sh4_x86.stack_posn)&0x0F;
2.136 - SUB_imm8s_r32( adj, R_ESP );
2.137 - ADD_imm8s_r32( 4, addr );
2.138 - PUSH_r32(arg2b);
2.139 - PUSH_r32(addr);
2.140 - ADD_imm8s_r32( -4, addr );
2.141 - SUB_imm8s_r32( 8, R_ESP );
2.142 - PUSH_r32(arg2a);
2.143 - PUSH_r32(addr);
2.144 - load_imm32(R_EAX, (uint32_t)sh4_write_long);
2.145 - CALL_r32(R_EAX);
2.146 - ADD_imm8s_r32( 16, R_ESP );
2.147 - load_imm32(R_EAX, (uint32_t)sh4_write_long);
2.148 - CALL_r32(R_EAX);
2.149 - ADD_imm8s_r32( adj+8, R_ESP );
2.150 - sh4_x86.stack_posn -= 16;
2.151 -}
2.152 -
2.153 -/**
2.154 - * Read a double (64-bit) value from memory, writing the first word into arg2a
2.155 - * and the second into arg2b. The addr must not be in EAX
2.156 - */
2.157 -static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
2.158 -{
2.159 - int adj = (-4-sh4_x86.stack_posn)&0x0F;
2.160 - int adj2 = (-8-sh4_x86.stack_posn)&0x0F;
2.161 - SUB_imm8s_r32( adj, R_ESP );
2.162 - PUSH_r32(addr);
2.163 - load_imm32(R_EAX, (uint32_t)sh4_read_long);
2.164 - CALL_r32(R_EAX);
2.165 - POP_r32(R_ECX);
2.166 - SUB_imm8s_r32( adj2-adj, R_ESP );
2.167 - PUSH_r32(R_EAX);
2.168 - ADD_imm8s_r32( 4, R_ECX );
2.169 - PUSH_r32(R_ECX);
2.170 - load_imm32(R_EAX, (uint32_t)sh4_read_long);
2.171 - CALL_r32(R_EAX);
2.172 - ADD_imm8s_r32( 4, R_ESP );
2.173 - MOV_r32_r32( R_EAX, arg2b );
2.174 - POP_r32(arg2a);
2.175 - ADD_imm8s_r32( adj2, R_ESP );
2.176 - sh4_x86.stack_posn -= 4;
2.177 -}
2.178 -
2.179 -#endif
2.180 -
2.181 -/**
2.182 - * Emit the 'start of block' assembly. Sets up the stack frame and save
2.183 - * SI/DI as required
2.184 - */
2.185 -void enter_block( )
2.186 -{
2.187 - PUSH_r32(R_EBP);
2.188 - /* mov &sh4r, ebp */
2.189 - load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
2.190 - sh4_x86.stack_posn = 8;
2.191 -}
2.192 -
2.193 -/**
2.194 - * Exit the block with sh4r.new_pc written with the target pc
2.195 - */
2.196 -void exit_block_pcset( sh4addr_t pc )
2.197 -{
2.198 - load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
2.199 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
2.200 - load_spreg( R_EAX, R_PC );
2.201 - if( sh4_x86.tlb_on ) {
2.202 - call_func1(xlat_get_code_by_vma,R_EAX);
2.203 - } else {
2.204 - call_func1(xlat_get_code,R_EAX);
2.205 - }
2.206 - POP_r32(R_EBP);
2.207 - RET();
2.208 -}
2.209 -
2.210 -/**
2.211 - * Exit the block with sh4r.new_pc written with the target pc
2.212 - */
2.213 -void exit_block_newpcset( sh4addr_t pc )
2.214 -{
2.215 - load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
2.216 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
2.217 - load_spreg( R_EAX, R_NEW_PC );
2.218 - store_spreg( R_EAX, R_PC );
2.219 - if( sh4_x86.tlb_on ) {
2.220 - call_func1(xlat_get_code_by_vma,R_EAX);
2.221 - } else {
2.222 - call_func1(xlat_get_code,R_EAX);
2.223 - }
2.224 - POP_r32(R_EBP);
2.225 - RET();
2.226 -}
2.227 -
2.228 -
2.229 -/**
2.230 - * Exit the block to an absolute PC
2.231 - */
2.232 -void exit_block( sh4addr_t pc, sh4addr_t endpc )
2.233 -{
2.234 - load_imm32( R_ECX, pc ); // 5
2.235 - store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
2.236 - if( IS_IN_ICACHE(pc) ) {
2.237 - MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
2.238 - } else if( sh4_x86.tlb_on ) {
2.239 - call_func1(xlat_get_code_by_vma,R_ECX);
2.240 - } else {
2.241 - call_func1(xlat_get_code,R_ECX);
2.242 - }
2.243 - AND_imm8s_r32( 0xFC, R_EAX ); // 3
2.244 - load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
2.245 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
2.246 - POP_r32(R_EBP);
2.247 - RET();
2.248 -}
2.249 -
2.250 -/**
2.251 - * Exit the block to a relative PC
2.252 - */
2.253 -void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
2.254 -{
2.255 - load_imm32( R_ECX, pc - sh4_x86.block_start_pc ); // 5
2.256 - ADD_sh4r_r32( R_PC, R_ECX );
2.257 - store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
2.258 - if( IS_IN_ICACHE(pc) ) {
2.259 - MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
2.260 - } else if( sh4_x86.tlb_on ) {
2.261 - call_func1(xlat_get_code_by_vma,R_ECX);
2.262 - } else {
2.263 - call_func1(xlat_get_code,R_ECX);
2.264 - }
2.265 - AND_imm8s_r32( 0xFC, R_EAX ); // 3
2.266 - load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
2.267 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
2.268 - POP_r32(R_EBP);
2.269 - RET();
2.270 -}
2.271 -
2.272 -/**
2.273 - * Write the block trailer (exception handling block)
2.274 - */
2.275 -void sh4_translate_end_block( sh4addr_t pc ) {
2.276 - if( sh4_x86.branch_taken == FALSE ) {
2.277 - // Didn't exit unconditionally already, so write the termination here
2.278 - exit_block_rel( pc, pc );
2.279 - }
2.280 - if( sh4_x86.backpatch_posn != 0 ) {
2.281 - unsigned int i;
2.282 - // Raise exception
2.283 - uint8_t *end_ptr = xlat_output;
2.284 - MOV_r32_r32( R_EDX, R_ECX );
2.285 - ADD_r32_r32( R_EDX, R_ECX );
2.286 - ADD_r32_sh4r( R_ECX, R_PC );
2.287 - MOV_moff32_EAX( &sh4_cpu_period );
2.288 - MUL_r32( R_EDX );
2.289 - ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
2.290 -
2.291 - POP_r32(R_EDX);
2.292 - call_func1( sh4_raise_exception, R_EDX );
2.293 - load_spreg( R_EAX, R_PC );
2.294 - if( sh4_x86.tlb_on ) {
2.295 - call_func1(xlat_get_code_by_vma,R_EAX);
2.296 - } else {
2.297 - call_func1(xlat_get_code,R_EAX);
2.298 - }
2.299 - POP_r32(R_EBP);
2.300 - RET();
2.301 -
2.302 - // Exception already raised - just cleanup
2.303 - uint8_t *preexc_ptr = xlat_output;
2.304 - MOV_r32_r32( R_EDX, R_ECX );
2.305 - ADD_r32_r32( R_EDX, R_ECX );
2.306 - ADD_r32_sh4r( R_ECX, R_SPC );
2.307 - MOV_moff32_EAX( &sh4_cpu_period );
2.308 - MUL_r32( R_EDX );
2.309 - ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
2.310 - load_spreg( R_EAX, R_PC );
2.311 - if( sh4_x86.tlb_on ) {
2.312 - call_func1(xlat_get_code_by_vma,R_EAX);
2.313 - } else {
2.314 - call_func1(xlat_get_code,R_EAX);
2.315 - }
2.316 - POP_r32(R_EBP);
2.317 - RET();
2.318 -
2.319 - for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
2.320 - uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
2.321 - *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
2.322 - if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
2.323 - load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
2.324 - int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;
2.325 - if( stack_adj > 0 ) {
2.326 - ADD_imm8s_r32( stack_adj, R_ESP );
2.327 - }
2.328 - int rel = preexc_ptr - xlat_output;
2.329 - JMP_rel(rel);
2.330 - } else {
2.331 - PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );
2.332 - load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
2.333 - int rel = end_ptr - xlat_output;
2.334 - JMP_rel(rel);
2.335 - }
2.336 - }
2.337 - }
2.338 -}
2.339 -
2.340 -
2.341 -/**
2.342 - * The unwind methods only work if we compiled with DWARF2 frame information
2.343 - * (ie -fexceptions), otherwise we have to use the direct frame scan.
2.344 - */
2.345 -#ifdef HAVE_EXCEPTIONS
2.346 -#include <unwind.h>
2.347 -
2.348 -struct UnwindInfo {
2.349 - uintptr_t block_start;
2.350 - uintptr_t block_end;
2.351 - void *pc;
2.352 -};
2.353 -
2.354 -_Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
2.355 -{
2.356 - struct UnwindInfo *info = arg;
2.357 - void *pc = (void *)_Unwind_GetIP(context);
2.358 - if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
2.359 - info->pc = pc;
2.360 - return _URC_NORMAL_STOP;
2.361 - }
2.362 -
2.363 - return _URC_NO_REASON;
2.364 -}
2.365 -
2.366 -void *xlat_get_native_pc( void *code, uint32_t code_size )
2.367 -{
2.368 - struct _Unwind_Exception exc;
2.369 - struct UnwindInfo info;
2.370 -
2.371 - info.pc = NULL;
2.372 - info.block_start = (uintptr_t)code;
2.373 - info.block_end = info.block_start + code_size;
2.374 - void *result = NULL;
2.375 - _Unwind_Backtrace( xlat_check_frame, &info );
2.376 - return info.pc;
2.377 -}
2.378 -#else
2.379 -void *xlat_get_native_pc( void *code, uint32_t code_size )
2.380 -{
2.381 - void *result = NULL;
2.382 - asm(
2.383 - "mov %%ebp, %%eax\n\t"
2.384 - "mov $0x8, %%ecx\n\t"
2.385 - "mov %1, %%edx\n"
2.386 - "frame_loop: test %%eax, %%eax\n\t"
2.387 - "je frame_not_found\n\t"
2.388 - "cmp (%%eax), %%edx\n\t"
2.389 - "je frame_found\n\t"
2.390 - "sub $0x1, %%ecx\n\t"
2.391 - "je frame_not_found\n\t"
2.392 - "movl (%%eax), %%eax\n\t"
2.393 - "jmp frame_loop\n"
2.394 - "frame_found: movl 0x4(%%eax), %0\n"
2.395 - "frame_not_found:"
2.396 - : "=r" (result)
2.397 - : "r" (((uint8_t *)&sh4r) + 128 )
2.398 - : "eax", "ecx", "edx" );
2.399 - return result;
2.400 -}
2.401 -#endif
2.402 -
2.403 -#endif /* !lxdream_ia32mac.h */
2.404 -
2.405 -
3.1 --- a/src/sh4/ia64abi.h Sun Dec 14 06:36:49 2008 +0000
3.2 +++ b/src/sh4/ia64abi.h Sun Dec 14 07:50:48 2008 +0000
3.3 @@ -94,8 +94,16 @@
3.4 void enter_block( )
3.5 {
3.6 PUSH_r32(R_EBP);
3.7 - /* mov &sh4r, ebp */
3.8 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
3.9 + // Minimum aligned allocation is 16 bytes
3.10 + REXW(); SUB_imm8s_r32( 16, R_ESP );
3.11 +}
3.12 +
3.13 +static inline void exit_block( )
3.14 +{
3.15 + REXW(); ADD_imm8s_r32( 16, R_ESP );
3.16 + POP_r32(R_EBP);
3.17 + RET();
3.18 }
3.19
3.20 /**
3.21 @@ -111,8 +119,7 @@
3.22 } else {
3.23 call_func1(xlat_get_code,R_EAX);
3.24 }
3.25 - POP_r32(R_EBP);
3.26 - RET();
3.27 + exit_block();
3.28 }
3.29
3.30 /**
3.31 @@ -129,30 +136,28 @@
3.32 } else {
3.33 call_func1(xlat_get_code,R_EAX);
3.34 }
3.35 - POP_r32(R_EBP);
3.36 - RET();
3.37 + exit_block();
3.38 }
3.39
3.40 #define EXIT_BLOCK_SIZE(pc) (25 + (IS_IN_ICACHE(pc)?10:CALL_FUNC1_SIZE))
3.41 /**
3.42 * Exit the block to an absolute PC
3.43 */
3.44 -void exit_block( sh4addr_t pc, sh4addr_t endpc )
3.45 +void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
3.46 {
3.47 load_imm32( R_ECX, pc ); // 5
3.48 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
3.49 if( IS_IN_ICACHE(pc) ) {
3.50 REXW(); MOV_moff32_EAX( xlat_get_lut_entry(pc) );
3.51 + REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 4
3.52 } else if( sh4_x86.tlb_on ) {
3.53 call_func1(xlat_get_code_by_vma, R_ECX);
3.54 } else {
3.55 call_func1(xlat_get_code,R_ECX);
3.56 }
3.57 - REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 4
3.58 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
3.59 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
3.60 - POP_r32(R_EBP);
3.61 - RET();
3.62 + exit_block();
3.63 }
3.64
3.65
3.66 @@ -168,16 +173,15 @@
3.67 store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
3.68 if( IS_IN_ICACHE(pc) ) {
3.69 REXW(); MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
3.70 + REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 4
3.71 } else if( sh4_x86.tlb_on ) {
3.72 call_func1(xlat_get_code_by_vma,R_ECX);
3.73 } else {
3.74 call_func1(xlat_get_code,R_ECX);
3.75 }
3.76 - REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 4
3.77 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
3.78 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
3.79 - POP_r32(R_EBP);
3.80 - RET();
3.81 + exit_block();
3.82 }
3.83
3.84 /**
3.85 @@ -206,9 +210,8 @@
3.86 } else {
3.87 call_func1(xlat_get_code,R_EAX);
3.88 }
3.89 - POP_r32(R_EBP);
3.90 - RET();
3.91 -
3.92 + exit_block();
3.93 +
3.94 // Exception already raised - just cleanup
3.95 uint8_t *preexc_ptr = xlat_output;
3.96 MOV_r32_r32( R_EDX, R_ECX );
3.97 @@ -223,8 +226,7 @@
3.98 } else {
3.99 call_func0(xlat_get_code);
3.100 }
3.101 - POP_r32(R_EBP);
3.102 - RET();
3.103 + exit_block();
3.104
3.105 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
3.106 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
3.107 @@ -233,7 +235,7 @@
3.108 load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
3.109 int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;
3.110 if( stack_adj > 0 ) {
3.111 - ADD_imm8s_r32( stack_adj*4, R_ESP );
3.112 + REXW(); ADD_imm8s_r32( stack_adj*4, R_ESP );
3.113 }
3.114 int rel = preexc_ptr - xlat_output;
3.115 JMP_rel(rel);
3.116 @@ -247,26 +249,35 @@
3.117 }
3.118 }
3.119
3.120 +struct UnwindInfo {
3.121 + uintptr_t block_start;
3.122 + uintptr_t block_end;
3.123 + void *pc;
3.124 +};
3.125 +
3.126 _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
3.127 {
3.128 - void *rbp = (void *)_Unwind_GetGR(context, 6);
3.129 - void *expect = (((uint8_t *)&sh4r) + 128 );
3.130 - if( rbp == expect ) {
3.131 - void **result = (void **)arg;
3.132 - *result = (void *)_Unwind_GetIP(context);
3.133 + struct UnwindInfo *info = arg;
3.134 + void *pc = (void *)_Unwind_GetIP(context);
3.135 + if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
3.136 + info->pc = pc;
3.137 return _URC_NORMAL_STOP;
3.138 }
3.139 -
3.140 +
3.141 return _URC_NO_REASON;
3.142 }
3.143
3.144 -void *xlat_get_native_pc( void *code, uint32_t size )
3.145 +void *xlat_get_native_pc( void *code, uint32_t code_size )
3.146 {
3.147 struct _Unwind_Exception exc;
3.148 + struct UnwindInfo info;
3.149
3.150 + info.pc = NULL;
3.151 + info.block_start = (uintptr_t)code;
3.152 + info.block_end = info.block_start + code_size;
3.153 void *result = NULL;
3.154 - _Unwind_Backtrace( xlat_check_frame, &result );
3.155 - return result;
3.156 + _Unwind_Backtrace( xlat_check_frame, &info );
3.157 + return info.pc;
3.158 }
3.159
3.160 #endif /* !lxdream_ia64abi_H */
4.1 --- a/src/sh4/sh4trans.h Sun Dec 14 06:36:49 2008 +0000
4.2 +++ b/src/sh4/sh4trans.h Sun Dec 14 07:50:48 2008 +0000
4.3 @@ -34,7 +34,7 @@
4.4 /** Maximum size of the translation epilogue (current real size is 116 bytes, so
4.5 * allows a little room
4.6 */
4.7 -#define EPILOGUE_SIZE 128
4.8 +#define EPILOGUE_SIZE 136
4.9
4.10 /** Maximum number of recovery records for a translated block (2048 based on
4.11 * 1 record per SH4 instruction in a 4K page).
5.1 --- a/src/sh4/sh4x86.in Sun Dec 14 06:36:49 2008 +0000
5.2 +++ b/src/sh4/sh4x86.in Sun Dec 14 07:50:48 2008 +0000
5.3 @@ -315,12 +315,8 @@
5.4 #if SIZEOF_VOID_P == 8
5.5 #include "sh4/ia64abi.h"
5.6 #else /* 32-bit system */
5.7 -#ifdef APPLE_BUILD
5.8 -#include "sh4/ia32mac.h"
5.9 -#else
5.10 #include "sh4/ia32abi.h"
5.11 #endif
5.12 -#endif
5.13
5.14 void sh4_translate_begin_block( sh4addr_t pc )
5.15 {
5.16 @@ -390,9 +386,7 @@
5.17 } else {
5.18 call_func1(xlat_get_code,R_EAX);
5.19 }
5.20 - AND_imm8s_rptr( 0xFC, R_EAX );
5.21 - POP_r32(R_EBP);
5.22 - RET();
5.23 + exit_block();
5.24 }
5.25
5.26 /**
5.27 @@ -473,9 +467,9 @@
5.28 load_spreg( R_ECX, R_GBR );
5.29 ADD_r32_r32( R_ECX, R_EAX );
5.30 MMU_TRANSLATE_WRITE( R_EAX );
5.31 - PUSH_realigned_r32(R_EAX);
5.32 + MOV_r32_esp8(R_EAX, 0);
5.33 MEM_READ_BYTE( R_EAX, R_EDX );
5.34 - POP_realigned_r32(R_EAX);
5.35 + MOV_esp8_r32(0, R_EAX);
5.36 AND_imm32_r32(imm, R_EDX );
5.37 MEM_WRITE_BYTE( R_EAX, R_EDX );
5.38 sh4_x86.tstate = TSTATE_NONE;
5.39 @@ -662,10 +656,10 @@
5.40 load_reg( R_EAX, Rm );
5.41 check_ralign32( R_EAX );
5.42 MMU_TRANSLATE_READ( R_EAX );
5.43 - PUSH_realigned_r32( R_EAX );
5.44 + MOV_r32_esp8(R_EAX, 0);
5.45 load_reg( R_EAX, Rn );
5.46 ADD_imm8s_r32( 4, R_EAX );
5.47 - MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
5.48 + MMU_TRANSLATE_READ( R_EAX );
5.49 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
5.50 // Note translate twice in case of page boundaries. Maybe worth
5.51 // adding a page-boundary check to skip the second translation
5.52 @@ -673,19 +667,18 @@
5.53 load_reg( R_EAX, Rm );
5.54 check_ralign32( R_EAX );
5.55 MMU_TRANSLATE_READ( R_EAX );
5.56 - load_reg( R_ECX, Rn );
5.57 - check_ralign32( R_ECX );
5.58 - PUSH_realigned_r32( R_EAX );
5.59 - MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
5.60 - MOV_r32_r32( R_ECX, R_EAX );
5.61 + MOV_r32_esp8( R_EAX, 0 );
5.62 + load_reg( R_EAX, Rn );
5.63 + check_ralign32( R_EAX );
5.64 + MMU_TRANSLATE_READ( R_EAX );
5.65 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
5.66 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
5.67 }
5.68 MEM_READ_LONG( R_EAX, R_EAX );
5.69 - POP_r32( R_ECX );
5.70 - PUSH_r32( R_EAX );
5.71 - MEM_READ_LONG( R_ECX, R_EAX );
5.72 - POP_realigned_r32( R_ECX );
5.73 + MOV_r32_esp8( R_EAX, 4 );
5.74 + MOV_esp8_r32( 0, R_EAX );
5.75 + MEM_READ_LONG( R_EAX, R_EAX );
5.76 + MOV_esp8_r32( 4, R_ECX );
5.77
5.78 IMUL_r32( R_ECX );
5.79 ADD_r32_sh4r( R_EAX, R_MACL );
5.80 @@ -704,10 +697,10 @@
5.81 load_reg( R_EAX, Rm );
5.82 check_ralign16( R_EAX );
5.83 MMU_TRANSLATE_READ( R_EAX );
5.84 - PUSH_realigned_r32( R_EAX );
5.85 + MOV_r32_esp8( R_EAX, 0 );
5.86 load_reg( R_EAX, Rn );
5.87 ADD_imm8s_r32( 2, R_EAX );
5.88 - MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
5.89 + MMU_TRANSLATE_READ( R_EAX );
5.90 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
5.91 // Note translate twice in case of page boundaries. Maybe worth
5.92 // adding a page-boundary check to skip the second translation
5.93 @@ -715,21 +708,20 @@
5.94 load_reg( R_EAX, Rm );
5.95 check_ralign16( R_EAX );
5.96 MMU_TRANSLATE_READ( R_EAX );
5.97 - load_reg( R_ECX, Rn );
5.98 - check_ralign16( R_ECX );
5.99 - PUSH_realigned_r32( R_EAX );
5.100 - MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
5.101 - MOV_r32_r32( R_ECX, R_EAX );
5.102 + MOV_r32_esp8( R_EAX, 0 );
5.103 + load_reg( R_EAX, Rn );
5.104 + check_ralign16( R_EAX );
5.105 + MMU_TRANSLATE_READ( R_EAX );
5.106 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
5.107 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
5.108 }
5.109 MEM_READ_WORD( R_EAX, R_EAX );
5.110 - POP_r32( R_ECX );
5.111 - PUSH_r32( R_EAX );
5.112 - MEM_READ_WORD( R_ECX, R_EAX );
5.113 - POP_realigned_r32( R_ECX );
5.114 + MOV_r32_esp8( R_EAX, 4 );
5.115 + MOV_esp8_r32( 0, R_EAX );
5.116 + MEM_READ_WORD( R_EAX, R_EAX );
5.117 + MOV_esp8_r32( 4, R_ECX );
5.118 +
5.119 IMUL_r32( R_ECX );
5.120 -
5.121 load_spreg( R_ECX, R_S );
5.122 TEST_r32_r32( R_ECX, R_ECX );
5.123 JE_rel8( nosat );
5.124 @@ -830,9 +822,9 @@
5.125 load_spreg( R_ECX, R_GBR );
5.126 ADD_r32_r32( R_ECX, R_EAX );
5.127 MMU_TRANSLATE_WRITE( R_EAX );
5.128 - PUSH_realigned_r32(R_EAX);
5.129 + MOV_r32_esp8( R_EAX, 0 );
5.130 MEM_READ_BYTE( R_EAX, R_EDX );
5.131 - POP_realigned_r32(R_EAX);
5.132 + MOV_esp8_r32( 0, R_EAX );
5.133 OR_imm32_r32(imm, R_EDX );
5.134 MEM_WRITE_BYTE( R_EAX, R_EDX );
5.135 sh4_x86.tstate = TSTATE_NONE;
5.136 @@ -1049,12 +1041,12 @@
5.137 COUNT_INST(I_TASB);
5.138 load_reg( R_EAX, Rn );
5.139 MMU_TRANSLATE_WRITE( R_EAX );
5.140 - PUSH_realigned_r32( R_EAX );
5.141 + MOV_r32_esp8( R_EAX, 0 );
5.142 MEM_READ_BYTE( R_EAX, R_EDX );
5.143 TEST_r8_r8( R_DL, R_DL );
5.144 SETE_t();
5.145 OR_imm8_r8( 0x80, R_DL );
5.146 - POP_realigned_r32( R_EAX );
5.147 + MOV_esp8_r32( 0, R_EAX );
5.148 MEM_WRITE_BYTE( R_EAX, R_EDX );
5.149 sh4_x86.tstate = TSTATE_NONE;
5.150 :}
5.151 @@ -1105,9 +1097,9 @@
5.152 load_spreg( R_ECX, R_GBR );
5.153 ADD_r32_r32( R_ECX, R_EAX );
5.154 MMU_TRANSLATE_WRITE( R_EAX );
5.155 - PUSH_realigned_r32(R_EAX);
5.156 + MOV_r32_esp8( R_EAX, 0 );
5.157 MEM_READ_BYTE(R_EAX, R_EDX);
5.158 - POP_realigned_r32(R_EAX);
5.159 + MOV_esp8_r32( 0, R_EAX );
5.160 XOR_imm32_r32( imm, R_EDX );
5.161 MEM_WRITE_BYTE( R_EAX, R_EDX );
5.162 sh4_x86.tstate = TSTATE_NONE;
5.163 @@ -2636,11 +2628,12 @@
5.164 check_walign32( R_EAX );
5.165 ADD_imm8s_r32( -4, R_EAX );
5.166 MMU_TRANSLATE_WRITE( R_EAX );
5.167 - PUSH_realigned_r32( R_EAX );
5.168 + MOV_r32_esp8( R_EAX, 0 );
5.169 call_func0( sh4_read_sr );
5.170 - POP_realigned_r32( R_ECX );
5.171 + MOV_r32_r32( R_EAX, R_EDX );
5.172 + MOV_esp8_r32( 0, R_EAX );
5.173 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
5.174 - MEM_WRITE_LONG( R_ECX, R_EAX );
5.175 + MEM_WRITE_LONG( R_EAX, R_EDX );
5.176 sh4_x86.tstate = TSTATE_NONE;
5.177 :}
5.178 STC.L VBR, @-Rn {:
6.1 --- a/src/sh4/x86op.h Sun Dec 14 06:36:49 2008 +0000
6.2 +++ b/src/sh4/x86op.h Sun Dec 14 07:50:48 2008 +0000
6.3 @@ -54,31 +54,17 @@
6.4 #define AND_imm8s_rptr(imm, r1) REXW(); AND_imm8s_r32( imm, r1 )
6.5 #define LEA_sh4r_rptr(disp, r1) REXW(); LEA_sh4r_r32(disp,r1)
6.6 #define MOV_moffptr_EAX(offptr) REXW(); MOV_moff32_EAX( offptr )
6.7 -#define STACK_ALIGN 16
6.8 -#define POP_r32(r1) OP(0x58 + r1);
6.9 -#define POP_realigned_r32(r1) OP(0x58 + r1); REXW(); ADD_imm8s_r32(8,R_ESP)
6.10 -#define PUSH_r32(r1) OP(0x50 + r1);
6.11 -#define PUSH_realigned_r32(r1) REXW(); SUB_imm8s_r32(8, R_ESP); OP(0x50 + r1)
6.12 -#define PUSH_imm32(imm) OP(0x68); OP32(imm);
6.13 -#define PUSH_imm64(imm) REXW(); OP(0x68); OP64(imm);
6.14 #else /* 32-bit system */
6.15 #define OPPTR(x) OP32((uint32_t)(x))
6.16 #define AND_imm8s_rptr(imm, r1) AND_imm8s_r32( imm, r1 )
6.17 #define LEA_sh4r_rptr(disp, r1) LEA_sh4r_r32(disp,r1)
6.18 #define MOV_moffptr_EAX(offptr) MOV_moff32_EAX( offptr )
6.19 -#define POP_realigned_r32(r1) POP_r32(r1)
6.20 -#define PUSH_realigned_r32(r1) PUSH_r32(r1)
6.21 -#ifdef APPLE_BUILD
6.22 +#endif
6.23 #define STACK_ALIGN 16
6.24 -#define POP_r32(r1) OP(0x58 + r1); sh4_x86.stack_posn -= 4;
6.25 -#define PUSH_r32(r1) OP(0x50 + r1); sh4_x86.stack_posn += 4;
6.26 -#define PUSH_imm32(imm) OP(0x68); OP32(imm); sh4_x86.stack_posn += 4;
6.27 -#else
6.28 #define POP_r32(r1) OP(0x58 + r1)
6.29 #define PUSH_r32(r1) OP(0x50 + r1)
6.30 #define PUSH_imm32(imm) OP(0x68); OP32(imm)
6.31 -#endif
6.32 -#endif
6.33 +#define PUSH_imm64(imm) REXW(); OP(0x68); OP64(imm);
6.34
6.35 #ifdef STACK_ALIGN
6.36 #else
6.37 @@ -124,6 +110,9 @@
6.38 /* ebp+disp32 modrm form */
6.39 #define MODRM_r32_ebp32(r1,disp) OP(0x85 | (r1<<3)); OP32(disp)
6.40
6.41 +/* esp+disp32 modrm+sib form */
6.42 +#define MODRM_r32_esp8(r1,disp) OP(0x44 | (r1<<3)); OP(0x24); OP(disp)
6.43 +
6.44 #define MODRM_r32_sh4r(r1,disp) if(disp>127){ MODRM_r32_ebp32(r1,disp);}else{ MODRM_r32_ebp8(r1,(unsigned char)disp); }
6.45
6.46 #define REXW() OP(0x48)
6.47 @@ -134,6 +123,7 @@
6.48 #define ADD_r32_r32(r1,r2) OP(0x03); MODRM_rm32_r32(r1,r2)
6.49 #define ADD_imm8s_r32(imm,r1) OP(0x83); MODRM_rm32_r32(r1, 0); OP(imm)
6.50 #define ADD_imm8s_sh4r(imm,disp) OP(0x83); MODRM_r32_sh4r(0,disp); OP(imm)
6.51 +#define ADD_imm8s_esp8(imm,disp) OP(0x83); MODRM_r32_esp8(0,disp); OP(imm)
6.52 #define ADD_imm32_r32(imm32,r1) OP(0x81); MODRM_rm32_r32(r1,0); OP32(imm32)
6.53 #define ADC_r32_r32(r1,r2) OP(0x13); MODRM_rm32_r32(r1,r2)
6.54 #define ADC_sh4r_r32(disp,r1) OP(0x13); MODRM_r32_sh4r(r1,disp)
6.55 @@ -143,6 +133,7 @@
6.56 #define AND_imm8s_r32(imm8,r1) OP(0x83); MODRM_rm32_r32(r1,4); OP(imm8)
6.57 #define AND_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,4); OP32(imm)
6.58 #define CALL_r32(r1) OP(0xFF); MODRM_rm32_r32(r1,2)
6.59 +#define CALL_ptr(ptr) OP(0xE8); OP32( (((char *)ptr) - (char *)xlat_output) - 4)
6.60 #define CLC() OP(0xF8)
6.61 #define CMC() OP(0xF5)
6.62 #define CMP_sh4r_r32(disp,r1) OP(0x3B); MODRM_r32_sh4r(r1,disp)
6.63 @@ -155,12 +146,15 @@
6.64 #define INC_r32(r1) OP(0x40+r1)
6.65 #define JMP_rel8(label) OP(0xEB); MARK_JMP8(label); OP(-1);
6.66 #define LEA_sh4r_r32(disp,r1) OP(0x8D); MODRM_r32_sh4r(r1,disp)
6.67 +#define LEA_r32disp8_r32(r1, disp, r2) OP(0x8D); OP( 0x40 + (r2<<3) + r1); OP(disp)
6.68 #define MOV_r32_r32(r1,r2) OP(0x89); MODRM_r32_rm32(r1,r2)
6.69 #define MOV_r32_sh4r(r1,disp) OP(0x89); MODRM_r32_sh4r(r1,disp)
6.70 #define MOV_moff32_EAX(off) OP(0xA1); OPPTR(off)
6.71 #define MOV_sh4r_r32(disp, r1) OP(0x8B); MODRM_r32_sh4r(r1,disp)
6.72 #define MOV_r32_r32ind(r2,r1) OP(0x89); OP(0 + (r2<<3) + r1 )
6.73 #define MOV_r32ind_r32(r1,r2) OP(0x8B); OP(0 + (r2<<3) + r1 )
6.74 +#define MOV_r32_esp8(r1,disp) OP(0x89); MODRM_r32_esp8(r1,disp)
6.75 +#define MOV_esp8_r32(disp,r1) OP(0x8B); MODRM_r32_esp8(r1,disp)
6.76 #define MOVSX_r8_r32(r1,r2) OP(0x0F); OP(0xBE); MODRM_rm32_r32(r1,r2)
6.77 #define MOVSX_r16_r32(r1,r2) OP(0x0F); OP(0xBF); MODRM_rm32_r32(r1,r2)
6.78 #define MOVZX_r8_r32(r1,r2) OP(0x0F); OP(0xB6); MODRM_rm32_r32(r1,r2)
.