Search
lxdream.org :: lxdream :: r368:36fac4c42322
lxdream 0.9.1
released Jun 29
Download Now
changeset368:36fac4c42322
parent367:9c52dcbad3fb
child369:4b4223e7d720
authornkeynes
dateTue Sep 04 08:40:23 2007 +0000 (16 years ago)
More translator WIP - blocks are approaching something sane
src/sh4/sh4trans.c
src/sh4/sh4x86.c
src/sh4/sh4x86.in
src/sh4/x86op.h
1.1 --- a/src/sh4/sh4trans.c Tue Sep 04 08:38:33 2007 +0000
1.2 +++ b/src/sh4/sh4trans.c Tue Sep 04 08:40:23 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4trans.c,v 1.1 2007-08-23 12:33:27 nkeynes Exp $
1.6 + * $Id: sh4trans.c,v 1.2 2007-09-04 08:40:23 nkeynes Exp $
1.7 *
1.8 * SH4 translation core module. This part handles the non-target-specific
1.9 * section of the translation.
1.10 @@ -27,7 +27,7 @@
1.11 */
1.12 uint32_t sh4_xlat_run_slice( uint32_t nanosecs )
1.13 {
1.14 - int i, result = 1;
1.15 + int i;
1.16 sh4r.slice_cycle = 0;
1.17
1.18 if( sh4r.sh4_state != SH4_STATE_RUNNING ) {
1.19 @@ -37,7 +37,7 @@
1.20 }
1.21 }
1.22
1.23 - for( ; sh4r.slice_cycle < nanosecs && result != 0; sh4r.slice_cycle ) {
1.24 + while( sh4r.slice_cycle < nanosecs ) {
1.25 if( SH4_EVENT_PENDING() ) {
1.26 if( sh4r.event_types & PENDING_EVENT ) {
1.27 event_execute();
1.28 @@ -48,12 +48,12 @@
1.29 }
1.30 }
1.31
1.32 - int (*code)() = xlat_get_code(sh4r.pc);
1.33 + gboolean (*code)() = xlat_get_code(sh4r.pc);
1.34 if( code == NULL ) {
1.35 code = sh4_translate_basic_block( sh4r.pc );
1.36 }
1.37 - result = code();
1.38 - sh4r.slice_cycle += result;
1.39 + if( !code() )
1.40 + break;
1.41 }
1.42
1.43 /* If we aborted early, but the cpu is still technically running,
1.44 @@ -97,8 +97,8 @@
1.45 }
1.46 pc += 2;
1.47 }
1.48 - sh4_translate_end_block(done);
1.49 - xlat_commit_block( xlat_output - block->size, pc-start );
1.50 + sh4_translate_end_block(pc);
1.51 + xlat_commit_block( xlat_output - block->code, pc-start );
1.52 return block->code;
1.53 }
1.54
2.1 --- a/src/sh4/sh4x86.c Tue Sep 04 08:38:33 2007 +0000
2.2 +++ b/src/sh4/sh4x86.c Tue Sep 04 08:40:23 2007 +0000
2.3 @@ -1,5 +1,5 @@
2.4 /**
2.5 - * $Id: sh4x86.c,v 1.2 2007-08-28 08:46:14 nkeynes Exp $
2.6 + * $Id: sh4x86.c,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
2.7 *
2.8 * SH4 => x86 translation. This version does no real optimization, it just
2.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
2.10 @@ -18,9 +18,73 @@
2.11 * GNU General Public License for more details.
2.12 */
2.13
2.14 -#include "sh4core.h"
2.15 -#include "sh4trans.h"
2.16 -#include "x86op.h"
2.17 +#include <assert.h>
2.18 +
2.19 +#include "sh4/sh4core.h"
2.20 +#include "sh4/sh4trans.h"
2.21 +#include "sh4/x86op.h"
2.22 +#include "clock.h"
2.23 +
2.24 +#define DEFAULT_BACKPATCH_SIZE 4096
2.25 +
2.26 +/**
2.27 + * Struct to manage internal translation state. This state is not saved -
2.28 + * it is only valid between calls to sh4_translate_begin_block() and
2.29 + * sh4_translate_end_block()
2.30 + */
2.31 +struct sh4_x86_state {
2.32 + gboolean in_delay_slot;
2.33 + gboolean priv_checked; /* true if we've already checked the cpu mode. */
2.34 + gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
2.35 +
2.36 + /* Allocated memory for the (block-wide) back-patch list */
2.37 + uint32_t **backpatch_list;
2.38 + uint32_t backpatch_posn;
2.39 + uint32_t backpatch_size;
2.40 +};
2.41 +
2.42 +#define EXIT_DATA_ADDR_READ 0
2.43 +#define EXIT_DATA_ADDR_WRITE 7
2.44 +#define EXIT_ILLEGAL 14
2.45 +#define EXIT_SLOT_ILLEGAL 21
2.46 +#define EXIT_FPU_DISABLED 28
2.47 +#define EXIT_SLOT_FPU_DISABLED 35
2.48 +
2.49 +static struct sh4_x86_state sh4_x86;
2.50 +
2.51 +void sh4_x86_init()
2.52 +{
2.53 + sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
2.54 + sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
2.55 +}
2.56 +
2.57 +
2.58 +static void sh4_x86_add_backpatch( uint8_t *ptr )
2.59 +{
2.60 + if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
2.61 + sh4_x86.backpatch_size <<= 1;
2.62 + sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
2.63 + assert( sh4_x86.backpatch_list != NULL );
2.64 + }
2.65 + sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
2.66 +}
2.67 +
2.68 +static void sh4_x86_do_backpatch( uint8_t *reloc_base )
2.69 +{
2.70 + unsigned int i;
2.71 + for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
2.72 + *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]));
2.73 + }
2.74 +}
2.75 +
2.76 +#ifndef NDEBUG
2.77 +#define MARK_JMP(x,n) uint8_t *_mark_jmp_##x = xlat_output + n
2.78 +#define CHECK_JMP(x) assert( _mark_jmp_##x == xlat_output )
2.79 +#else
2.80 +#define MARK_JMP(x,n)
2.81 +#define CHECK_JMP(x)
2.82 +#endif
2.83 +
2.84
2.85 /**
2.86 * Emit an instruction to load an SH4 reg into a real register
2.87 @@ -33,6 +97,36 @@
2.88 OP(REG_OFFSET(r[sh4reg]));
2.89 }
2.90
2.91 +/**
2.92 + * Load the SR register into an x86 register
2.93 + */
2.94 +static inline void read_sr( int x86reg )
2.95 +{
2.96 + MOV_ebp_r32( R_M, x86reg );
2.97 + SHL1_r32( x86reg );
2.98 + OR_ebp_r32( R_Q, x86reg );
2.99 + SHL_imm8_r32( 7, x86reg );
2.100 + OR_ebp_r32( R_S, x86reg );
2.101 + SHL1_r32( x86reg );
2.102 + OR_ebp_r32( R_T, x86reg );
2.103 + OR_ebp_r32( R_SR, x86reg );
2.104 +}
2.105 +
2.106 +static inline void write_sr( int x86reg )
2.107 +{
2.108 + TEST_imm32_r32( SR_M, x86reg );
2.109 + SETNE_ebp(R_M);
2.110 + TEST_imm32_r32( SR_Q, x86reg );
2.111 + SETNE_ebp(R_Q);
2.112 + TEST_imm32_r32( SR_S, x86reg );
2.113 + SETNE_ebp(R_S);
2.114 + TEST_imm32_r32( SR_T, x86reg );
2.115 + SETNE_ebp(R_T);
2.116 + AND_imm32_r32( SR_MQSTMASK, x86reg );
2.117 + MOV_r32_ebp( x86reg, R_SR );
2.118 +}
2.119 +
2.120 +
2.121 static inline void load_spreg( int x86reg, int regoffset )
2.122 {
2.123 /* mov [bp+n], reg */
2.124 @@ -73,8 +167,7 @@
2.125 static inline void call_func0( void *ptr )
2.126 {
2.127 load_imm32(R_EAX, (uint32_t)ptr);
2.128 - OP(0xFF);
2.129 - MODRM_rm32_r32(R_EAX, 2);
2.130 + CALL_r32(R_EAX);
2.131 }
2.132
2.133 static inline void call_func1( void *ptr, int arg1 )
2.134 @@ -92,6 +185,59 @@
2.135 ADD_imm8s_r32( -4, R_ESP );
2.136 }
2.137
2.138 +/* Exception checks - Note that all exception checks will clobber EAX */
2.139 +static void check_priv( )
2.140 +{
2.141 + if( !sh4_x86.priv_checked ) {
2.142 + sh4_x86.priv_checked = TRUE;
2.143 + load_spreg( R_EAX, R_SR );
2.144 + AND_imm32_r32( SR_MD, R_EAX );
2.145 + if( sh4_x86.in_delay_slot ) {
2.146 + JE_exit( EXIT_SLOT_ILLEGAL );
2.147 + } else {
2.148 + JE_exit( EXIT_ILLEGAL );
2.149 + }
2.150 + }
2.151 +}
2.152 +
2.153 +static void check_fpuen( )
2.154 +{
2.155 + if( !sh4_x86.fpuen_checked ) {
2.156 + sh4_x86.fpuen_checked = TRUE;
2.157 + load_spreg( R_EAX, R_SR );
2.158 + AND_imm32_r32( SR_FD, R_EAX );
2.159 + if( sh4_x86.in_delay_slot ) {
2.160 + JNE_exit(EXIT_SLOT_FPU_DISABLED);
2.161 + } else {
2.162 + JNE_exit(EXIT_FPU_DISABLED);
2.163 + }
2.164 + }
2.165 +}
2.166 +
2.167 +static void check_ralign16( int x86reg )
2.168 +{
2.169 + TEST_imm32_r32( 0x00000001, x86reg );
2.170 + JNE_exit(EXIT_DATA_ADDR_READ);
2.171 +}
2.172 +
2.173 +static void check_walign16( int x86reg )
2.174 +{
2.175 + TEST_imm32_r32( 0x00000001, x86reg );
2.176 + JNE_exit(EXIT_DATA_ADDR_WRITE);
2.177 +}
2.178 +
2.179 +static void check_ralign32( int x86reg )
2.180 +{
2.181 + TEST_imm32_r32( 0x00000003, x86reg );
2.182 + JNE_exit(EXIT_DATA_ADDR_READ);
2.183 +}
2.184 +static void check_walign32( int x86reg )
2.185 +{
2.186 + TEST_imm32_r32( 0x00000003, x86reg );
2.187 + JNE_exit(EXIT_DATA_ADDR_WRITE);
2.188 +}
2.189 +
2.190 +
2.191 #define UNDEF()
2.192 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
2.193 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
2.194 @@ -101,30 +247,83 @@
2.195 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
2.196 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
2.197
2.198 +#define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
2.199 +#define CHECKSLOTILLEGAL() if(sh4_x86.in_delay_slot) RAISE_EXCEPTION(EXC_SLOT_ILLEGAL)
2.200 +
2.201 +
2.202
2.203 /**
2.204 * Emit the 'start of block' assembly. Sets up the stack frame and save
2.205 * SI/DI as required
2.206 */
2.207 -void sh4_translate_begin_block() {
2.208 - /* push ebp */
2.209 - *xlat_output++ = 0x50 + R_EBP;
2.210 -
2.211 +void sh4_translate_begin_block()
2.212 +{
2.213 + PUSH_r32(R_EBP);
2.214 + PUSH_r32(R_ESI);
2.215 /* mov &sh4r, ebp */
2.216 load_imm32( R_EBP, (uint32_t)&sh4r );
2.217 + PUSH_r32(R_ESI);
2.218 +
2.219 + sh4_x86.in_delay_slot = FALSE;
2.220 + sh4_x86.priv_checked = FALSE;
2.221 + sh4_x86.fpuen_checked = FALSE;
2.222 + sh4_x86.backpatch_posn = 0;
2.223 +}
2.224
2.225 - /* load carry from SR */
2.226 +/**
2.227 + * Exit the block early (ie branch out), conditionally or otherwise
2.228 + */
2.229 +void exit_block( uint32_t pc )
2.230 +{
2.231 + load_imm32( R_ECX, pc );
2.232 + store_spreg( R_ECX, REG_OFFSET(pc) );
2.233 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
2.234 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.235 + MUL_r32( R_ESI );
2.236 + ADD_r32_r32( R_EAX, R_ECX );
2.237 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.238 + XOR_r32_r32( R_EAX, R_EAX );
2.239 + RET();
2.240 }
2.241
2.242 /**
2.243 * Flush any open regs back to memory, restore SI/DI/, update PC, etc
2.244 */
2.245 void sh4_translate_end_block( sh4addr_t pc ) {
2.246 - /* pop ebp */
2.247 - *xlat_output++ = 0x58 + R_EBP;
2.248 + assert( !sh4_x86.in_delay_slot ); // should never stop here
2.249 + // Normal termination - save PC, cycle count
2.250 + exit_block( pc );
2.251
2.252 - /* ret */
2.253 - *xlat_output++ = 0xC3;
2.254 + uint8_t *end_ptr = xlat_output;
2.255 + // Exception termination. Jump block for various exception codes:
2.256 + PUSH_imm32( EXC_DATA_ADDR_READ );
2.257 + JMP_rel8( 33 );
2.258 + PUSH_imm32( EXC_DATA_ADDR_WRITE );
2.259 + JMP_rel8( 26 );
2.260 + PUSH_imm32( EXC_ILLEGAL );
2.261 + JMP_rel8( 19 );
2.262 + PUSH_imm32( EXC_SLOT_ILLEGAL );
2.263 + JMP_rel8( 12 );
2.264 + PUSH_imm32( EXC_FPU_DISABLED );
2.265 + JMP_rel8( 5 );
2.266 + PUSH_imm32( EXC_SLOT_FPU_DISABLED );
2.267 + // target
2.268 + load_spreg( R_ECX, REG_OFFSET(pc) );
2.269 + ADD_r32_r32( R_ESI, R_ECX );
2.270 + ADD_r32_r32( R_ESI, R_ECX );
2.271 + store_spreg( R_ECX, REG_OFFSET(pc) );
2.272 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
2.273 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.274 + MUL_r32( R_ESI );
2.275 + ADD_r32_r32( R_EAX, R_ECX );
2.276 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.277 +
2.278 + load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
2.279 + CALL_r32( R_EAX ); // 2
2.280 + POP_r32(R_EBP);
2.281 + RET();
2.282 +
2.283 + sh4_x86_do_backpatch( end_ptr );
2.284 }
2.285
2.286 /**
2.287 @@ -138,7 +337,7 @@
2.288 uint32_t sh4_x86_translate_instruction( uint32_t pc )
2.289 {
2.290 uint16_t ir = sh4_read_word( pc );
2.291 -
2.292 +
2.293 switch( (ir&0xF000) >> 12 ) {
2.294 case 0x0:
2.295 switch( ir&0xF ) {
2.296 @@ -149,7 +348,8 @@
2.297 case 0x0:
2.298 { /* STC SR, Rn */
2.299 uint32_t Rn = ((ir>>8)&0xF);
2.300 - /* TODO */
2.301 + read_sr( R_EAX );
2.302 + store_reg( R_EAX, Rn );
2.303 }
2.304 break;
2.305 case 0x1:
2.306 @@ -566,6 +766,18 @@
2.307 case 0xC:
2.308 { /* CMP/STR Rm, Rn */
2.309 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
2.310 + load_reg( R_EAX, Rm );
2.311 + load_reg( R_ECX, Rn );
2.312 + XOR_r32_r32( R_ECX, R_EAX );
2.313 + TEST_r8_r8( R_AL, R_AL );
2.314 + JE_rel8(13);
2.315 + TEST_r8_r8( R_AH, R_AH ); // 2
2.316 + JE_rel8(9);
2.317 + SHR_imm8_r32( 16, R_EAX ); // 3
2.318 + TEST_r8_r8( R_AL, R_AL ); // 2
2.319 + JE_rel8(2);
2.320 + TEST_r8_r8( R_AH, R_AH ); // 2
2.321 + SETE_t();
2.322 }
2.323 break;
2.324 case 0xD:
2.325 @@ -880,6 +1092,11 @@
2.326 { /* STC.L SR, @-Rn */
2.327 uint32_t Rn = ((ir>>8)&0xF);
2.328 /* TODO */
2.329 + load_reg( R_ECX, Rn );
2.330 + ADD_imm8s_r32( -4, Rn );
2.331 + store_reg( R_ECX, Rn );
2.332 + read_sr( R_EAX );
2.333 + MEM_WRITE_LONG( R_ECX, R_EAX );
2.334 }
2.335 break;
2.336 case 0x1:
2.337 @@ -1085,6 +1302,12 @@
2.338 case 0x0:
2.339 { /* LDC.L @Rm+, SR */
2.340 uint32_t Rm = ((ir>>8)&0xF);
2.341 + load_reg( R_EAX, Rm );
2.342 + MOV_r32_r32( R_EAX, R_ECX );
2.343 + ADD_imm8s_r32( 4, R_EAX );
2.344 + store_reg( R_EAX, Rm );
2.345 + MEM_READ_LONG( R_ECX, R_EAX );
2.346 + write_sr( R_EAX );
2.347 }
2.348 break;
2.349 case 0x1:
2.350 @@ -1312,6 +1535,16 @@
2.351 case 0xD:
2.352 { /* SHLD Rm, Rn */
2.353 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
2.354 + load_reg( R_EAX, Rn );
2.355 + load_reg( R_ECX, Rm );
2.356 +
2.357 + MOV_r32_r32( R_EAX, R_EDX );
2.358 + SHL_r32_CL( R_EAX );
2.359 + NEG_r32( R_ECX );
2.360 + SHR_r32_CL( R_EDX );
2.361 + CMP_imm8s_r32( 0, R_ECX );
2.362 + CMOVAE_r32_r32( R_EDX, R_EAX );
2.363 + store_reg( R_EAX, Rn );
2.364 }
2.365 break;
2.366 case 0xE:
2.367 @@ -1321,7 +1554,8 @@
2.368 case 0x0:
2.369 { /* LDC Rm, SR */
2.370 uint32_t Rm = ((ir>>8)&0xF);
2.371 - /* We need to be a little careful about SR */
2.372 + load_reg( R_EAX, Rm );
2.373 + write_sr( R_EAX );
2.374 }
2.375 break;
2.376 case 0x1:
2.377 @@ -1590,6 +1824,10 @@
2.378 case 0xB:
2.379 { /* BF disp */
2.380 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
2.381 + CMP_imm8s_ebp( 0, R_T );
2.382 + JNE_rel8( 1 );
2.383 + exit_block( disp + pc + 4 );
2.384 + return 1;
2.385 }
2.386 break;
2.387 case 0xD:
2.388 @@ -1601,6 +1839,10 @@
2.389 case 0xF:
2.390 { /* BF/S disp */
2.391 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
2.392 + CMP_imm8s_ebp( 0, R_T );
2.393 + JNE_rel8( 1 );
2.394 + exit_block( disp + pc + 4 );
2.395 + sh4_x86.in_delay_slot = TRUE;
2.396 }
2.397 break;
2.398 default:
2.399 @@ -1619,6 +1861,7 @@
2.400 case 0xA:
2.401 { /* BRA disp */
2.402 int32_t disp = SIGNEXT12(ir&0xFFF)<<1;
2.403 + exit_block( disp + pc + 4 );
2.404 }
2.405 break;
2.406 case 0xB:
2.407 @@ -1697,6 +1940,9 @@
2.408 case 0x8:
2.409 { /* TST #imm, R0 */
2.410 uint32_t imm = (ir&0xFF);
2.411 + load_reg( R_EAX, 0 );
2.412 + TEST_imm32_r32( imm, R_EAX );
2.413 + SETE_t();
2.414 }
2.415 break;
2.416 case 0x9:
2.417 @@ -1726,6 +1972,12 @@
2.418 case 0xC:
2.419 { /* TST.B #imm, @(R0, GBR) */
2.420 uint32_t imm = (ir&0xFF);
2.421 + load_reg( R_EAX, 0);
2.422 + load_reg( R_ECX, R_GBR);
2.423 + ADD_r32_r32( R_EAX, R_ECX );
2.424 + MEM_READ_BYTE( R_ECX, R_EAX );
2.425 + TEST_imm8_r8( imm, R_EAX );
2.426 + SETE_t();
2.427 }
2.428 break;
2.429 case 0xD:
2.430 @@ -1960,6 +2212,7 @@
2.431 break;
2.432 }
2.433
2.434 + INC_r32(R_ESI);
2.435
2.436 return 0;
2.437 }
3.1 --- a/src/sh4/sh4x86.in Tue Sep 04 08:38:33 2007 +0000
3.2 +++ b/src/sh4/sh4x86.in Tue Sep 04 08:40:23 2007 +0000
3.3 @@ -1,5 +1,5 @@
3.4 /**
3.5 - * $Id: sh4x86.in,v 1.2 2007-08-28 08:46:14 nkeynes Exp $
3.6 + * $Id: sh4x86.in,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
3.7 *
3.8 * SH4 => x86 translation. This version does no real optimization, it just
3.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
3.10 @@ -18,9 +18,73 @@
3.11 * GNU General Public License for more details.
3.12 */
3.13
3.14 -#include "sh4core.h"
3.15 -#include "sh4trans.h"
3.16 -#include "x86op.h"
3.17 +#include <assert.h>
3.18 +
3.19 +#include "sh4/sh4core.h"
3.20 +#include "sh4/sh4trans.h"
3.21 +#include "sh4/x86op.h"
3.22 +#include "clock.h"
3.23 +
3.24 +#define DEFAULT_BACKPATCH_SIZE 4096
3.25 +
3.26 +/**
3.27 + * Struct to manage internal translation state. This state is not saved -
3.28 + * it is only valid between calls to sh4_translate_begin_block() and
3.29 + * sh4_translate_end_block()
3.30 + */
3.31 +struct sh4_x86_state {
3.32 + gboolean in_delay_slot;
3.33 + gboolean priv_checked; /* true if we've already checked the cpu mode. */
3.34 + gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
3.35 +
3.36 + /* Allocated memory for the (block-wide) back-patch list */
3.37 + uint32_t **backpatch_list;
3.38 + uint32_t backpatch_posn;
3.39 + uint32_t backpatch_size;
3.40 +};
3.41 +
3.42 +#define EXIT_DATA_ADDR_READ 0
3.43 +#define EXIT_DATA_ADDR_WRITE 7
3.44 +#define EXIT_ILLEGAL 14
3.45 +#define EXIT_SLOT_ILLEGAL 21
3.46 +#define EXIT_FPU_DISABLED 28
3.47 +#define EXIT_SLOT_FPU_DISABLED 35
3.48 +
3.49 +static struct sh4_x86_state sh4_x86;
3.50 +
3.51 +void sh4_x86_init()
3.52 +{
3.53 + sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
3.54 + sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
3.55 +}
3.56 +
3.57 +
3.58 +static void sh4_x86_add_backpatch( uint8_t *ptr )
3.59 +{
3.60 + if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
3.61 + sh4_x86.backpatch_size <<= 1;
3.62 + sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
3.63 + assert( sh4_x86.backpatch_list != NULL );
3.64 + }
3.65 + sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
3.66 +}
3.67 +
3.68 +static void sh4_x86_do_backpatch( uint8_t *reloc_base )
3.69 +{
3.70 + unsigned int i;
3.71 + for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
3.72 + *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]));
3.73 + }
3.74 +}
3.75 +
3.76 +#ifndef NDEBUG
3.77 +#define MARK_JMP(x,n) uint8_t *_mark_jmp_##x = xlat_output + n
3.78 +#define CHECK_JMP(x) assert( _mark_jmp_##x == xlat_output )
3.79 +#else
3.80 +#define MARK_JMP(x,n)
3.81 +#define CHECK_JMP(x)
3.82 +#endif
3.83 +
3.84
3.85 /**
3.86 * Emit an instruction to load an SH4 reg into a real register
3.87 @@ -33,6 +97,36 @@
3.88 OP(REG_OFFSET(r[sh4reg]));
3.89 }
3.90
3.91 +/**
3.92 + * Load the SR register into an x86 register
3.93 + */
3.94 +static inline void read_sr( int x86reg )
3.95 +{
3.96 + MOV_ebp_r32( R_M, x86reg );
3.97 + SHL1_r32( x86reg );
3.98 + OR_ebp_r32( R_Q, x86reg );
3.99 + SHL_imm8_r32( 7, x86reg );
3.100 + OR_ebp_r32( R_S, x86reg );
3.101 + SHL1_r32( x86reg );
3.102 + OR_ebp_r32( R_T, x86reg );
3.103 + OR_ebp_r32( R_SR, x86reg );
3.104 +}
3.105 +
3.106 +static inline void write_sr( int x86reg )
3.107 +{
3.108 + TEST_imm32_r32( SR_M, x86reg );
3.109 + SETNE_ebp(R_M);
3.110 + TEST_imm32_r32( SR_Q, x86reg );
3.111 + SETNE_ebp(R_Q);
3.112 + TEST_imm32_r32( SR_S, x86reg );
3.113 + SETNE_ebp(R_S);
3.114 + TEST_imm32_r32( SR_T, x86reg );
3.115 + SETNE_ebp(R_T);
3.116 + AND_imm32_r32( SR_MQSTMASK, x86reg );
3.117 + MOV_r32_ebp( x86reg, R_SR );
3.118 +}
3.119 +
3.120 +
3.121 static inline void load_spreg( int x86reg, int regoffset )
3.122 {
3.123 /* mov [bp+n], reg */
3.124 @@ -73,8 +167,7 @@
3.125 static inline void call_func0( void *ptr )
3.126 {
3.127 load_imm32(R_EAX, (uint32_t)ptr);
3.128 - OP(0xFF);
3.129 - MODRM_rm32_r32(R_EAX, 2);
3.130 + CALL_r32(R_EAX);
3.131 }
3.132
3.133 static inline void call_func1( void *ptr, int arg1 )
3.134 @@ -92,6 +185,59 @@
3.135 ADD_imm8s_r32( -4, R_ESP );
3.136 }
3.137
3.138 +/* Exception checks - Note that all exception checks will clobber EAX */
3.139 +static void check_priv( )
3.140 +{
3.141 + if( !sh4_x86.priv_checked ) {
3.142 + sh4_x86.priv_checked = TRUE;
3.143 + load_spreg( R_EAX, R_SR );
3.144 + AND_imm32_r32( SR_MD, R_EAX );
3.145 + if( sh4_x86.in_delay_slot ) {
3.146 + JE_exit( EXIT_SLOT_ILLEGAL );
3.147 + } else {
3.148 + JE_exit( EXIT_ILLEGAL );
3.149 + }
3.150 + }
3.151 +}
3.152 +
3.153 +static void check_fpuen( )
3.154 +{
3.155 + if( !sh4_x86.fpuen_checked ) {
3.156 + sh4_x86.fpuen_checked = TRUE;
3.157 + load_spreg( R_EAX, R_SR );
3.158 + AND_imm32_r32( SR_FD, R_EAX );
3.159 + if( sh4_x86.in_delay_slot ) {
3.160 + JNE_exit(EXIT_SLOT_FPU_DISABLED);
3.161 + } else {
3.162 + JNE_exit(EXIT_FPU_DISABLED);
3.163 + }
3.164 + }
3.165 +}
3.166 +
3.167 +static void check_ralign16( int x86reg )
3.168 +{
3.169 + TEST_imm32_r32( 0x00000001, x86reg );
3.170 + JNE_exit(EXIT_DATA_ADDR_READ);
3.171 +}
3.172 +
3.173 +static void check_walign16( int x86reg )
3.174 +{
3.175 + TEST_imm32_r32( 0x00000001, x86reg );
3.176 + JNE_exit(EXIT_DATA_ADDR_WRITE);
3.177 +}
3.178 +
3.179 +static void check_ralign32( int x86reg )
3.180 +{
3.181 + TEST_imm32_r32( 0x00000003, x86reg );
3.182 + JNE_exit(EXIT_DATA_ADDR_READ);
3.183 +}
3.184 +static void check_walign32( int x86reg )
3.185 +{
3.186 + TEST_imm32_r32( 0x00000003, x86reg );
3.187 + JNE_exit(EXIT_DATA_ADDR_WRITE);
3.188 +}
3.189 +
3.190 +
3.191 #define UNDEF()
3.192 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
3.193 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
3.194 @@ -101,30 +247,83 @@
3.195 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
3.196 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
3.197
3.198 +#define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
3.199 +#define CHECKSLOTILLEGAL() if(sh4_x86.in_delay_slot) RAISE_EXCEPTION(EXC_SLOT_ILLEGAL)
3.200 +
3.201 +
3.202
3.203 /**
3.204 * Emit the 'start of block' assembly. Sets up the stack frame and save
3.205 * SI/DI as required
3.206 */
3.207 -void sh4_translate_begin_block() {
3.208 - /* push ebp */
3.209 - *xlat_output++ = 0x50 + R_EBP;
3.210 -
3.211 +void sh4_translate_begin_block()
3.212 +{
3.213 + PUSH_r32(R_EBP);
3.214 + PUSH_r32(R_ESI);
3.215 /* mov &sh4r, ebp */
3.216 load_imm32( R_EBP, (uint32_t)&sh4r );
3.217 + PUSH_r32(R_ESI);
3.218 +
3.219 + sh4_x86.in_delay_slot = FALSE;
3.220 + sh4_x86.priv_checked = FALSE;
3.221 + sh4_x86.fpuen_checked = FALSE;
3.222 + sh4_x86.backpatch_posn = 0;
3.223 +}
3.224
3.225 - /* load carry from SR */
3.226 +/**
3.227 + * Exit the block early (ie branch out), conditionally or otherwise
3.228 + */
3.229 +void exit_block( uint32_t pc )
3.230 +{
3.231 + load_imm32( R_ECX, pc );
3.232 + store_spreg( R_ECX, REG_OFFSET(pc) );
3.233 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
3.234 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
3.235 + MUL_r32( R_ESI );
3.236 + ADD_r32_r32( R_EAX, R_ECX );
3.237 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
3.238 + XOR_r32_r32( R_EAX, R_EAX );
3.239 + RET();
3.240 }
3.241
3.242 /**
3.243 * Flush any open regs back to memory, restore SI/DI/, update PC, etc
3.244 */
3.245 void sh4_translate_end_block( sh4addr_t pc ) {
3.246 - /* pop ebp */
3.247 - *xlat_output++ = 0x58 + R_EBP;
3.248 + assert( !sh4_x86.in_delay_slot ); // should never stop here
3.249 + // Normal termination - save PC, cycle count
3.250 + exit_block( pc );
3.251
3.252 - /* ret */
3.253 - *xlat_output++ = 0xC3;
3.254 + uint8_t *end_ptr = xlat_output;
3.255 + // Exception termination. Jump block for various exception codes:
3.256 + PUSH_imm32( EXC_DATA_ADDR_READ );
3.257 + JMP_rel8( 33 );
3.258 + PUSH_imm32( EXC_DATA_ADDR_WRITE );
3.259 + JMP_rel8( 26 );
3.260 + PUSH_imm32( EXC_ILLEGAL );
3.261 + JMP_rel8( 19 );
3.262 + PUSH_imm32( EXC_SLOT_ILLEGAL );
3.263 + JMP_rel8( 12 );
3.264 + PUSH_imm32( EXC_FPU_DISABLED );
3.265 + JMP_rel8( 5 );
3.266 + PUSH_imm32( EXC_SLOT_FPU_DISABLED );
3.267 + // target
3.268 + load_spreg( R_ECX, REG_OFFSET(pc) );
3.269 + ADD_r32_r32( R_ESI, R_ECX );
3.270 + ADD_r32_r32( R_ESI, R_ECX );
3.271 + store_spreg( R_ECX, REG_OFFSET(pc) );
3.272 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
3.273 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
3.274 + MUL_r32( R_ESI );
3.275 + ADD_r32_r32( R_EAX, R_ECX );
3.276 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
3.277 +
3.278 + load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
3.279 + CALL_r32( R_EAX ); // 2
3.280 + POP_r32(R_EBP);
3.281 + RET();
3.282 +
3.283 + sh4_x86_do_backpatch( end_ptr );
3.284 }
3.285
3.286 /**
3.287 @@ -138,7 +337,7 @@
3.288 uint32_t sh4_x86_translate_instruction( uint32_t pc )
3.289 {
3.290 uint16_t ir = sh4_read_word( pc );
3.291 -
3.292 +
3.293 %%
3.294 /* ALU operations */
3.295 ADD Rm, Rn {:
3.296 @@ -232,6 +431,18 @@
3.297 SETGE_t();
3.298 :}
3.299 CMP/STR Rm, Rn {:
3.300 + load_reg( R_EAX, Rm );
3.301 + load_reg( R_ECX, Rn );
3.302 + XOR_r32_r32( R_ECX, R_EAX );
3.303 + TEST_r8_r8( R_AL, R_AL );
3.304 + JE_rel8(13);
3.305 + TEST_r8_r8( R_AH, R_AH ); // 2
3.306 + JE_rel8(9);
3.307 + SHR_imm8_r32( 16, R_EAX ); // 3
3.308 + TEST_r8_r8( R_AL, R_AL ); // 2
3.309 + JE_rel8(2);
3.310 + TEST_r8_r8( R_AH, R_AH ); // 2
3.311 + SETE_t();
3.312 :}
3.313 DIV0S Rm, Rn {:
3.314 load_reg( R_EAX, Rm );
3.315 @@ -379,6 +590,16 @@
3.316 store_reg( R_EAX, Rn );
3.317 :}
3.318 SHLD Rm, Rn {:
3.319 + load_reg( R_EAX, Rn );
3.320 + load_reg( R_ECX, Rm );
3.321 +
3.322 + MOV_r32_r32( R_EAX, R_EDX );
3.323 + SHL_r32_CL( R_EAX );
3.324 + NEG_r32( R_ECX );
3.325 + SHR_r32_CL( R_EDX );
3.326 + CMP_imm8s_r32( 0, R_ECX );
3.327 + CMOVAE_r32_r32( R_EDX, R_EAX );
3.328 + store_reg( R_EAX, Rn );
3.329 :}
3.330 SHAL Rn {:
3.331 load_reg( R_EAX, Rn );
3.332 @@ -477,8 +698,19 @@
3.333 TEST_r32_r32( R_EAX, R_ECX );
3.334 SETE_t();
3.335 :}
3.336 -TST #imm, R0 {: :}
3.337 -TST.B #imm, @(R0, GBR) {: :}
3.338 +TST #imm, R0 {:
3.339 + load_reg( R_EAX, 0 );
3.340 + TEST_imm32_r32( imm, R_EAX );
3.341 + SETE_t();
3.342 +:}
3.343 +TST.B #imm, @(R0, GBR) {:
3.344 + load_reg( R_EAX, 0);
3.345 + load_reg( R_ECX, R_GBR);
3.346 + ADD_r32_r32( R_EAX, R_ECX );
3.347 + MEM_READ_BYTE( R_ECX, R_EAX );
3.348 + TEST_imm8_r8( imm, R_EAX );
3.349 + SETE_t();
3.350 +:}
3.351 XOR Rm, Rn {:
3.352 load_reg( R_EAX, Rm );
3.353 load_reg( R_ECX, Rn );
3.354 @@ -725,9 +957,21 @@
3.355 :}
3.356
3.357 /* Control transfer instructions */
3.358 -BF disp {: :}
3.359 -BF/S disp {: :}
3.360 -BRA disp {: :}
3.361 +BF disp {:
3.362 + CMP_imm8s_ebp( 0, R_T );
3.363 + JNE_rel8( 1 );
3.364 + exit_block( disp + pc + 4 );
3.365 + return 1;
3.366 +:}
3.367 +BF/S disp {:
3.368 + CMP_imm8s_ebp( 0, R_T );
3.369 + JNE_rel8( 1 );
3.370 + exit_block( disp + pc + 4 );
3.371 + sh4_x86.in_delay_slot = TRUE;
3.372 +:}
3.373 +BRA disp {:
3.374 + exit_block( disp + pc + 4 );
3.375 +:}
3.376 BRAF Rn {: :}
3.377 BSR disp {: :}
3.378 BSRF Rn {: :}
3.379 @@ -785,7 +1029,10 @@
3.380 FTRV XMTRX, FVn {: :}
3.381
3.382 /* Processor control instructions */
3.383 -LDC Rm, SR {: /* We need to be a little careful about SR */ :}
3.384 +LDC Rm, SR {:
3.385 + load_reg( R_EAX, Rm );
3.386 + write_sr( R_EAX );
3.387 +:}
3.388 LDC Rm, GBR {:
3.389 load_reg( R_EAX, Rm );
3.390 store_spreg( R_EAX, R_GBR );
3.391 @@ -819,7 +1066,13 @@
3.392 MEM_READ_LONG( R_ECX, R_EAX );
3.393 store_spreg( R_EAX, R_GBR );
3.394 :}
3.395 -LDC.L @Rm+, SR {:
3.396 +LDC.L @Rm+, SR {:
3.397 + load_reg( R_EAX, Rm );
3.398 + MOV_r32_r32( R_EAX, R_ECX );
3.399 + ADD_imm8s_r32( 4, R_EAX );
3.400 + store_reg( R_EAX, Rm );
3.401 + MEM_READ_LONG( R_ECX, R_EAX );
3.402 + write_sr( R_EAX );
3.403 :}
3.404 LDC.L @Rm+, VBR {:
3.405 load_reg( R_EAX, Rm );
3.406 @@ -929,7 +1182,9 @@
3.407 OCBWB @Rn {: :}
3.408 PREF @Rn {: :}
3.409 SLEEP {: :}
3.410 - STC SR, Rn {: /* TODO */
3.411 + STC SR, Rn {:
3.412 + read_sr( R_EAX );
3.413 + store_reg( R_EAX, Rn );
3.414 :}
3.415 STC GBR, Rn {:
3.416 load_spreg( R_EAX, R_GBR );
3.417 @@ -955,9 +1210,14 @@
3.418 load_spreg( R_EAX, R_DBR );
3.419 store_reg( R_EAX, Rn );
3.420 :}
3.421 - STC Rm_BANK, Rn {: /* TODO */
3.422 +STC Rm_BANK, Rn {: /* TODO */
3.423 :}
3.424 - STC.L SR, @-Rn {: /* TODO */
3.425 +STC.L SR, @-Rn {: /* TODO */
3.426 + load_reg( R_ECX, Rn );
3.427 + ADD_imm8s_r32( -4, Rn );
3.428 + store_reg( R_ECX, Rn );
3.429 + read_sr( R_EAX );
3.430 + MEM_WRITE_LONG( R_ECX, R_EAX );
3.431 :}
3.432 STC.L VBR, @-Rn {:
3.433 load_reg( R_ECX, Rn );
3.434 @@ -1060,6 +1320,7 @@
3.435
3.436 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
3.437 %%
3.438 + INC_r32(R_ESI);
3.439
3.440 return 0;
3.441 }
4.1 --- a/src/sh4/x86op.h Tue Sep 04 08:38:33 2007 +0000
4.2 +++ b/src/sh4/x86op.h Tue Sep 04 08:40:23 2007 +0000
4.3 @@ -1,5 +1,5 @@
4.4 /**
4.5 - * $Id: x86op.h,v 1.2 2007-08-28 08:46:14 nkeynes Exp $
4.6 + * $Id: x86op.h,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
4.7 *
4.8 * Definitions of x86 opcodes for use by the translator.
4.9 *
4.10 @@ -49,6 +49,7 @@
4.11 #define R_Q REG_OFFSET(q)
4.12 #define R_S REG_OFFSET(s)
4.13 #define R_M REG_OFFSET(m)
4.14 +#define R_SR REG_OFFSET(sr)
4.15 #define R_GBR REG_OFFSET(gbr)
4.16 #define R_SSR REG_OFFSET(ssr)
4.17 #define R_SPC REG_OFFSET(spc)
4.18 @@ -76,29 +77,45 @@
4.19 /* ebp+disp32 modrm form */
4.20 #define MODRM_r32_ebp32(r1,disp) OP(0x85 | (r1<<3)); OP32(disp)
4.21
4.22 +#define MODRM_r32_ebp(r1,disp) if(disp>127){ MODRM_r32_ebp32(r1,disp);}else{ MODRM_r32_ebp8(r1,(unsigned char)disp); }
4.23 +
4.24 /* Major opcodes */
4.25 #define ADD_r32_r32(r1,r2) OP(0x03); MODRM_rm32_r32(r1,r2)
4.26 #define ADD_imm8s_r32(imm,r1) OP(0x83); MODRM_rm32_r32(r1, 0); OP(imm)
4.27 +#define ADD_imm32_r32(imm32,r1) OP(0x81); MODRM_rm32_r32(r1,0); OP32(imm32)
4.28 #define ADC_r32_r32(r1,r2) OP(0x13); MODRM_rm32_r32(r1,r2)
4.29 #define AND_r32_r32(r1,r2) OP(0x23); MODRM_rm32_r32(r1,r2)
4.30 +#define AND_imm8_r8(imm8, r1) OP(0x80); MODRM_rm32_r32(r1,4); OP(imm8)
4.31 #define AND_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,4); OP32(imm)
4.32 +#define CALL_r32(r1) OP(0xFF); MODRM_rm32_r32(r1,2)
4.33 #define CMC() OP(0xF5)
4.34 #define CMP_r32_r32(r1,r2) OP(0x3B); MODRM_rm32_r32(r1,r2)
4.35 +#define CMP_imm32_r32(imm32, r1) OP(0x81); MODRM_rm32_r32(r1,7); OP32(imm32)
4.36 #define CMP_imm8s_r32(imm,r1) OP(0x83); MODRM_rm32_r32(r1,7); OP(imm)
4.37 +#define CMP_imm8s_ebp(imm,disp) OP(0x83); MODRM_r32_ebp(7,disp) OP(imm)
4.38 +#define DEC_r32(r1) OP(0x48+r1)
4.39 +#define IMUL_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,5)
4.40 +#define INC_r32(r1) OP(0x40+r1)
4.41 #define JMP_rel8(rel) OP(0xEB); OP(rel)
4.42 -#define MOV_r32_ebp8(r1,disp) OP(0x89); MODRM_r32_ebp8(r1,disp)
4.43 +#define MOV_r32_r32(r1,r2) OP(0x89); MODRM_r32_rm32(r1,r2)
4.44 +#define MOV_r32_ebp(r1,disp) OP(0x89); MODRM_r32_ebp(r1,disp)
4.45 #define MOV_r32_ebp32(r1,disp) OP(0x89); MODRM_r32_ebp32(r1,disp)
4.46 -#define MOV_ebp8_r32(r1,disp) OP(0x8B); MODRM_r32_ebp8(r1,disp)
4.47 -#define MOV_ebp32_r32(r1,disp) OP(0x8B); MODRM_r32_ebp32(r1,disp)
4.48 +#define MOV_moff32_EAX(off) OP(0xA1); OP32(off)
4.49 +#define MOV_ebp_r32(disp, r1) OP(0x8B); MODRM_r32_ebp(r1,disp)
4.50 #define MOVSX_r8_r32(r1,r2) OP(0x0F); OP(0xBE); MODRM_rm32_r32(r1,r2)
4.51 #define MOVSX_r16_r32(r1,r2) OP(0x0F); OP(0xBF); MODRM_rm32_r32(r1,r2)
4.52 #define MOVZX_r8_r32(r1,r2) OP(0x0F); OP(0xB6); MODRM_rm32_r32(r1,r2)
4.53 #define MOVZX_r16_r32(r1,r2) OP(0x0F); OP(0xB7); MODRM_rm32_r32(r1,r2)
4.54 +#define MUL_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,4)
4.55 #define NEG_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,3)
4.56 #define NOT_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,2)
4.57 #define OR_r32_r32(r1,r2) OP(0x0B); MODRM_rm32_r32(r1,r2)
4.58 +#define OR_imm8_r8(imm,r1) OP(0x80); MODRM_rm32_r32(r1,1)
4.59 #define OR_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,1); OP32(imm)
4.60 +#define OR_ebp_r32(disp,r1) OP(0x0B); MODRM_r32_ebp(r1,disp)
4.61 +#define POP_r32(r1) OP(0x58 + r1)
4.62 #define PUSH_r32(r1) OP(0x50 + r1)
4.63 +#define PUSH_imm32(imm) OP(0x68); OP32(imm)
4.64 #define RCL1_r32(r1) OP(0xD1); MODRM_rm32_r32(r1,2)
4.65 #define RCR1_r32(r1) OP(0xD1); MODRM_rm32_r32(r1,3)
4.66 #define RET() OP(0xC3)
4.67 @@ -106,29 +123,23 @@
4.68 #define ROR1_r32(r1) OP(0xD1); MODRM_rm32_r32(r1,1)
4.69 #define SAR1_r32(r1) OP(0xD1); MODRM_rm32_r32(r1,7)
4.70 #define SAR_imm8_r32(imm,r1) OP(0xC1); MODRM_rm32_r32(r1,7); OP(imm)
4.71 +#define SAR_r32_CL(r1) OP(0xD3); MODRM_rm32_r32(r1,7)
4.72 #define SBB_r32_r32(r1,r2) OP(0x1B); MODRM_rm32_r32(r1,r2)
4.73 #define SHL1_r32(r1) OP(0xD1); MODRM_rm32_r32(r1,4)
4.74 +#define SHL_r32_CL(r1) OP(0xD3); MODRM_rm32_r32(r1,4)
4.75 #define SHL_imm8_r32(imm,r1) OP(0xC1); MODRM_rm32_r32(r1,4); OP(imm)
4.76 #define SHR1_r32(r1) OP(0xD1); MODRM_rm32_r32(r1,5)
4.77 +#define SHR_r32_CL(r1) OP(0xD3); MODRM_rm32_r32(r1,5)
4.78 #define SHR_imm8_r32(imm,r1) OP(0xC1); MODRM_rm32_r32(r1,5); OP(imm)
4.79 #define SUB_r32_r32(r1,r2) OP(0x2B); MODRM_rm32_r32(r1,r2)
4.80 +#define TEST_r8_r8(r1,r2) OP(0x84); MODRM_r32_rm32(r1,r2)
4.81 #define TEST_r32_r32(r1,r2) OP(0x85); MODRM_rm32_r32(r1,r2)
4.82 +#define TEST_imm8_r8(imm8,r1) OP(0xF6); MODRM_rm32_r32(r1,0); OP(imm8)
4.83 #define TEST_imm32_r32(imm,r1) OP(0xF7); MODRM_rm32_r32(r1,0); OP32(imm)
4.84 +#define XCHG_r8_r8(r1,r2) OP(0x86); MODRM_rm32_r32(r1,r2)
4.85 #define XOR_r32_r32(r1,r2) OP(0x33); MODRM_rm32_r32(r1,r2)
4.86 #define XOR_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,6); OP32(imm)
4.87
4.88 -#define ADD_imm32_r32(imm32,r1) OP(0x81); MODRM_rm32_r32(r1,0); OP32(imm32)
4.89 -#define AND_imm8_r8(imm8, r1) OP(0x80); MODRM_rm32_r32(r1,4); OP(imm8)
4.90 -#define CMP_imm32_r32(imm32, r1) OP(0x81); MODRM_rm32_r32(r1,7); OP32(imm32)
4.91 -#define MOV_r32_r32(r1,r2) OP(0x89); MODRM_r32_rm32(r1,r2)
4.92 -#define MUL_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,4)
4.93 -#define IMUL_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,5)
4.94 -#define OR_imm8_r8(imm,r1) OP(0x80); MODRM_rm32_r32(r1,1)
4.95 -#define TEST_r8_r8(r1,r2) OP(0x84); MODRM_r32_rm32(r1,r2)
4.96 -#define SAR_r32_CL(r1) OP(0xD3); MODRM_rm32_r32(r1,7)
4.97 -#define SHR_r32_CL(r1) OP(0xD3); MODRM_rm32_r32(r1,5)
4.98 -#define SHL_r32_CL(r1) OP(0xD3); MODRM_rm32_r32(r1,4)
4.99 -#define XCHG_r8_r8(r1,r2) OP(0x86); MODRM_rm32_r32(r1,r2)
4.100
4.101 /* Conditional branches */
4.102 #define JE_rel8(rel) OP(0x74); OP(rel)
4.103 @@ -138,8 +149,6 @@
4.104 #define JGE_rel8(rel) OP(0x7D); OP(rel)
4.105 #define JC_rel8(rel) OP(0x72); OP(rel)
4.106 #define JO_rel8(rel) OP(0x70); OP(rel)
4.107 -
4.108 -/* Negated forms */
4.109 #define JNE_rel8(rel) OP(0x75); OP(rel)
4.110 #define JNA_rel8(rel) OP(0x76); OP(rel)
4.111 #define JNAE_rel8(rel) OP(0x72); OP(rel)
4.112 @@ -148,24 +157,59 @@
4.113 #define JNC_rel8(rel) OP(0x73); OP(rel)
4.114 #define JNO_rel8(rel) OP(0x71); OP(rel)
4.115
4.116 +/* 32-bit long forms w/ backpatching to an exit routine */
4.117 +#define JE_exit(rel) OP(0x0F); OP(0x84); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.118 +#define JA_exit(rel) OP(0x0F); OP(0x87); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.119 +#define JAE_exit(rel) OP(0x0F); OP(0x83); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.120 +#define JG_exit(rel) OP(0x0F); OP(0x8F); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.121 +#define JGE_exit(rel) OP(0x0F); OP(0x8D); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.122 +#define JC_exit(rel) OP(0x0F); OP(0x82); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.123 +#define JO_exit(rel) OP(0x0F); OP(0x80); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.124 +#define JNE_exit(rel) OP(0x0F); OP(0x85); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.125 +#define JNA_exit(rel) OP(0x0F); OP(0x86); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.126 +#define JNAE_exit(rel) OP(0x0F);OP(0x82); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.127 +#define JNG_exit(rel) OP(0x0F); OP(0x8E); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.128 +#define JNGE_exit(rel) OP(0x0F);OP(0x8C); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.129 +#define JNC_exit(rel) OP(0x0F); OP(0x83); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.130 +#define JNO_exit(rel) OP(0x0F); OP(0x81); sh4_x86_add_backpatch(xlat_output); OP32(rel)
4.131 +
4.132 +
4.133 +/* Conditional moves ebp-rel */
4.134 +#define CMOVE_r32_r32(r1,r2) OP(0x0F); OP(0x44); MODRM_rm32_r32(r1,r2)
4.135 +#define CMOVA_r32_r32(r1,r2) OP(0x0F); OP(0x47); MODRM_rm32_r32(r1,r2)
4.136 +#define CMOVAE_r32_r32(r1,r2) OP(0x0F); OP(0x43); MODRM_rm32_r32(r1,r2)
4.137 +#define CMOVG_r32_r32(r1,r2) OP(0x0F); OP(0x4F); MODRM_rm32_r32(r1,r2)
4.138 +#define CMOVGE_r32_r32(r1,r2) OP(0x0F); OP(0x4D); MODRM_rm32_r32(r1,r2)
4.139 +#define CMOVC_r32_r32(r1,r2) OP(0x0F); OP(0x42); MODRM_rm32_r32(r1,r2)
4.140 +#define CMOVO_r32_r32(r1,r2) OP(0x0F); OP(0x40); MODRM_rm32_r32(r1,r2)
4.141 +
4.142 +
4.143 /* Conditional setcc - writeback to sh4r.t */
4.144 -#define SETE_t() OP(0x0F); OP(0x94); MODRM_r32_ebp8(0, R_T);
4.145 -#define SETA_t() OP(0x0F); OP(0x97); MODRM_r32_ebp8(0, R_T);
4.146 -#define SETAE_t() OP(0x0F); OP(0x93); MODRM_r32_ebp8(0, R_T);
4.147 -#define SETG_t() OP(0x0F); OP(0x9F); MODRM_r32_ebp8(0, R_T);
4.148 -#define SETGE_t() OP(0x0F); OP(0x9D); MODRM_r32_ebp8(0, R_T);
4.149 -#define SETC_t() OP(0x0F); OP(0x92); MODRM_r32_ebp8(0, R_T);
4.150 -#define SETO_t() OP(0x0F); OP(0x90); MODRM_r32_ebp8(0, R_T);
4.151 +#define SETE_ebp(disp) OP(0x0F); OP(0x94); MODRM_r32_ebp(0, disp);
4.152 +#define SETA_ebp(disp) OP(0x0F); OP(0x97); MODRM_r32_ebp(0, disp);
4.153 +#define SETAE_ebp(disp) OP(0x0F); OP(0x93); MODRM_r32_ebp(0, disp);
4.154 +#define SETG_ebp(disp) OP(0x0F); OP(0x9F); MODRM_r32_ebp(0, disp);
4.155 +#define SETGE_ebp(disp) OP(0x0F); OP(0x9D); MODRM_r32_ebp(0, disp);
4.156 +#define SETC_ebp(disp) OP(0x0F); OP(0x92); MODRM_r32_ebp(0, disp);
4.157 +#define SETO_ebp(disp) OP(0x0F); OP(0x90); MODRM_r32_ebp(0, disp);
4.158
4.159 -#define SETNE_t() OP(0x0F); OP(0x95); MODRM_r32_ebp8(0, R_T);
4.160 -#define SETNA_t() OP(0x0F); OP(0x96); MODRM_r32_ebp8(0, R_T);
4.161 -#define SETNAE_t() OP(0x0F); OP(0x92); MODRM_r32_ebp8(0, R_T);
4.162 -#define SETNG_t() OP(0x0F); OP(0x9E); MODRM_r32_ebp8(0, R_T);
4.163 -#define SETNGE_t() OP(0x0F); OP(0x9C); MODRM_r32_ebp8(0, R_T);
4.164 -#define SETNC_t() OP(0x0F); OP(0x93); MODRM_r32_ebp8(0, R_T);
4.165 -#define SETNO_t() OP(0x0F); OP(0x91); MODRM_r32_ebp8(0, R_T);
4.166 +#define SETNE_ebp(disp) OP(0x0F); OP(0x95); MODRM_r32_ebp(0, disp);
4.167 +#define SETNA_ebp(disp) OP(0x0F); OP(0x96); MODRM_r32_ebp(0, disp);
4.168 +#define SETNAE_ebp(disp) OP(0x0F); OP(0x92); MODRM_r32_ebp(0, disp);
4.169 +#define SETNG_ebp(disp) OP(0x0F); OP(0x9E); MODRM_r32_ebp(0, disp);
4.170 +#define SETNGE_ebp(disp) OP(0x0F); OP(0x9C); MODRM_r32_ebp(0, disp);
4.171 +#define SETNC_ebp(disp) OP(0x0F); OP(0x93); MODRM_r32_ebp(0, disp);
4.172 +#define SETNO_ebp(disp) OP(0x0F); OP(0x91); MODRM_r32_ebp(0, disp);
4.173 +
4.174 +#define SETE_t() SETE_ebp(R_T)
4.175 +#define SETA_t() SETA_ebp(R_T)
4.176 +#define SETAE_t() SETAE_ebp(R_T)
4.177 +#define SETG_t() SETG_ebp(R_T)
4.178 +#define SETGE_t() SETGE_ebp(R_T)
4.179 +#define SETC_t() SETC_ebp(R_T)
4.180 +#define SETO_t() SETO_ebp(R_T)
4.181
4.182 /* Pseudo-op Load carry from T: CMP [EBP+t], #01 ; CMC */
4.183 -#define LDC_t() OP(0x83); MODRM_r32_ebp8(7,R_T); OP(0x01); CMC()
4.184 +#define LDC_t() OP(0x83); MODRM_r32_ebp(7,R_T); OP(0x01); CMC()
4.185
4.186 #endif /* !__lxdream_x86op_H */
.