Search
lxdream.org :: lxdream/src/sh4/sh4x86.c :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.c
changeset 368:36fac4c42322
prev361:be3de4ecd954
next374:8f80a795513e
author nkeynes
date Tue Sep 04 08:40:23 2007 +0000 (16 years ago)
permissions -rw-r--r--
last change More translator WIP - blocks are approaching something sane
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.c Tue Aug 28 08:46:14 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Tue Sep 04 08:40:23 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.c,v 1.2 2007-08-28 08:46:14 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.3 2007-09-04 08:40:23 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -18,9 +18,73 @@
1.11 * GNU General Public License for more details.
1.12 */
1.13
1.14 -#include "sh4core.h"
1.15 -#include "sh4trans.h"
1.16 -#include "x86op.h"
1.17 +#include <assert.h>
1.18 +
1.19 +#include "sh4/sh4core.h"
1.20 +#include "sh4/sh4trans.h"
1.21 +#include "sh4/x86op.h"
1.22 +#include "clock.h"
1.23 +
1.24 +#define DEFAULT_BACKPATCH_SIZE 4096
1.25 +
1.26 +/**
1.27 + * Struct to manage internal translation state. This state is not saved -
1.28 + * it is only valid between calls to sh4_translate_begin_block() and
1.29 + * sh4_translate_end_block()
1.30 + */
1.31 +struct sh4_x86_state {
1.32 + gboolean in_delay_slot;
1.33 + gboolean priv_checked; /* true if we've already checked the cpu mode. */
1.34 + gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
1.35 +
1.36 + /* Allocated memory for the (block-wide) back-patch list */
1.37 + uint32_t **backpatch_list;
1.38 + uint32_t backpatch_posn;
1.39 + uint32_t backpatch_size;
1.40 +};
1.41 +
1.42 +#define EXIT_DATA_ADDR_READ 0
1.43 +#define EXIT_DATA_ADDR_WRITE 7
1.44 +#define EXIT_ILLEGAL 14
1.45 +#define EXIT_SLOT_ILLEGAL 21
1.46 +#define EXIT_FPU_DISABLED 28
1.47 +#define EXIT_SLOT_FPU_DISABLED 35
1.48 +
1.49 +static struct sh4_x86_state sh4_x86;
1.50 +
1.51 +void sh4_x86_init()
1.52 +{
1.53 + sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
1.54 + sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
1.55 +}
1.56 +
1.57 +
1.58 +static void sh4_x86_add_backpatch( uint8_t *ptr )
1.59 +{
1.60 + if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
1.61 + sh4_x86.backpatch_size <<= 1;
1.62 + sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
1.63 + assert( sh4_x86.backpatch_list != NULL );
1.64 + }
1.65 + sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
1.66 +}
1.67 +
1.68 +static void sh4_x86_do_backpatch( uint8_t *reloc_base )
1.69 +{
1.70 + unsigned int i;
1.71 + for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
1.72 + *sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]));
1.73 + }
1.74 +}
1.75 +
1.76 +#ifndef NDEBUG
1.77 +#define MARK_JMP(x,n) uint8_t *_mark_jmp_##x = xlat_output + n
1.78 +#define CHECK_JMP(x) assert( _mark_jmp_##x == xlat_output )
1.79 +#else
1.80 +#define MARK_JMP(x,n)
1.81 +#define CHECK_JMP(x)
1.82 +#endif
1.83 +
1.84
1.85 /**
1.86 * Emit an instruction to load an SH4 reg into a real register
1.87 @@ -33,6 +97,36 @@
1.88 OP(REG_OFFSET(r[sh4reg]));
1.89 }
1.90
1.91 +/**
1.92 + * Load the SR register into an x86 register
1.93 + */
1.94 +static inline void read_sr( int x86reg )
1.95 +{
1.96 + MOV_ebp_r32( R_M, x86reg );
1.97 + SHL1_r32( x86reg );
1.98 + OR_ebp_r32( R_Q, x86reg );
1.99 + SHL_imm8_r32( 7, x86reg );
1.100 + OR_ebp_r32( R_S, x86reg );
1.101 + SHL1_r32( x86reg );
1.102 + OR_ebp_r32( R_T, x86reg );
1.103 + OR_ebp_r32( R_SR, x86reg );
1.104 +}
1.105 +
1.106 +static inline void write_sr( int x86reg )
1.107 +{
1.108 + TEST_imm32_r32( SR_M, x86reg );
1.109 + SETNE_ebp(R_M);
1.110 + TEST_imm32_r32( SR_Q, x86reg );
1.111 + SETNE_ebp(R_Q);
1.112 + TEST_imm32_r32( SR_S, x86reg );
1.113 + SETNE_ebp(R_S);
1.114 + TEST_imm32_r32( SR_T, x86reg );
1.115 + SETNE_ebp(R_T);
1.116 + AND_imm32_r32( SR_MQSTMASK, x86reg );
1.117 + MOV_r32_ebp( x86reg, R_SR );
1.118 +}
1.119 +
1.120 +
1.121 static inline void load_spreg( int x86reg, int regoffset )
1.122 {
1.123 /* mov [bp+n], reg */
1.124 @@ -73,8 +167,7 @@
1.125 static inline void call_func0( void *ptr )
1.126 {
1.127 load_imm32(R_EAX, (uint32_t)ptr);
1.128 - OP(0xFF);
1.129 - MODRM_rm32_r32(R_EAX, 2);
1.130 + CALL_r32(R_EAX);
1.131 }
1.132
1.133 static inline void call_func1( void *ptr, int arg1 )
1.134 @@ -92,6 +185,59 @@
1.135 ADD_imm8s_r32( -4, R_ESP );
1.136 }
1.137
1.138 +/* Exception checks - Note that all exception checks will clobber EAX */
1.139 +static void check_priv( )
1.140 +{
1.141 + if( !sh4_x86.priv_checked ) {
1.142 + sh4_x86.priv_checked = TRUE;
1.143 + load_spreg( R_EAX, R_SR );
1.144 + AND_imm32_r32( SR_MD, R_EAX );
1.145 + if( sh4_x86.in_delay_slot ) {
1.146 + JE_exit( EXIT_SLOT_ILLEGAL );
1.147 + } else {
1.148 + JE_exit( EXIT_ILLEGAL );
1.149 + }
1.150 + }
1.151 +}
1.152 +
1.153 +static void check_fpuen( )
1.154 +{
1.155 + if( !sh4_x86.fpuen_checked ) {
1.156 + sh4_x86.fpuen_checked = TRUE;
1.157 + load_spreg( R_EAX, R_SR );
1.158 + AND_imm32_r32( SR_FD, R_EAX );
1.159 + if( sh4_x86.in_delay_slot ) {
1.160 + JNE_exit(EXIT_SLOT_FPU_DISABLED);
1.161 + } else {
1.162 + JNE_exit(EXIT_FPU_DISABLED);
1.163 + }
1.164 + }
1.165 +}
1.166 +
1.167 +static void check_ralign16( int x86reg )
1.168 +{
1.169 + TEST_imm32_r32( 0x00000001, x86reg );
1.170 + JNE_exit(EXIT_DATA_ADDR_READ);
1.171 +}
1.172 +
1.173 +static void check_walign16( int x86reg )
1.174 +{
1.175 + TEST_imm32_r32( 0x00000001, x86reg );
1.176 + JNE_exit(EXIT_DATA_ADDR_WRITE);
1.177 +}
1.178 +
1.179 +static void check_ralign32( int x86reg )
1.180 +{
1.181 + TEST_imm32_r32( 0x00000003, x86reg );
1.182 + JNE_exit(EXIT_DATA_ADDR_READ);
1.183 +}
1.184 +static void check_walign32( int x86reg )
1.185 +{
1.186 + TEST_imm32_r32( 0x00000003, x86reg );
1.187 + JNE_exit(EXIT_DATA_ADDR_WRITE);
1.188 +}
1.189 +
1.190 +
1.191 #define UNDEF()
1.192 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
1.193 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
1.194 @@ -101,30 +247,83 @@
1.195 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
1.196 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
1.197
1.198 +#define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
1.199 +#define CHECKSLOTILLEGAL() if(sh4_x86.in_delay_slot) RAISE_EXCEPTION(EXC_SLOT_ILLEGAL)
1.200 +
1.201 +
1.202
1.203 /**
1.204 * Emit the 'start of block' assembly. Sets up the stack frame and save
1.205 * SI/DI as required
1.206 */
1.207 -void sh4_translate_begin_block() {
1.208 - /* push ebp */
1.209 - *xlat_output++ = 0x50 + R_EBP;
1.210 -
1.211 +void sh4_translate_begin_block()
1.212 +{
1.213 + PUSH_r32(R_EBP);
1.214 + PUSH_r32(R_ESI);
1.215 /* mov &sh4r, ebp */
1.216 load_imm32( R_EBP, (uint32_t)&sh4r );
1.217 + PUSH_r32(R_ESI);
1.218 +
1.219 + sh4_x86.in_delay_slot = FALSE;
1.220 + sh4_x86.priv_checked = FALSE;
1.221 + sh4_x86.fpuen_checked = FALSE;
1.222 + sh4_x86.backpatch_posn = 0;
1.223 +}
1.224
1.225 - /* load carry from SR */
1.226 +/**
1.227 + * Exit the block early (ie branch out), conditionally or otherwise
1.228 + */
1.229 +void exit_block( uint32_t pc )
1.230 +{
1.231 + load_imm32( R_ECX, pc );
1.232 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.233 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.234 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.235 + MUL_r32( R_ESI );
1.236 + ADD_r32_r32( R_EAX, R_ECX );
1.237 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.238 + XOR_r32_r32( R_EAX, R_EAX );
1.239 + RET();
1.240 }
1.241
1.242 /**
1.243 * Flush any open regs back to memory, restore SI/DI/, update PC, etc
1.244 */
1.245 void sh4_translate_end_block( sh4addr_t pc ) {
1.246 - /* pop ebp */
1.247 - *xlat_output++ = 0x58 + R_EBP;
1.248 + assert( !sh4_x86.in_delay_slot ); // should never stop here
1.249 + // Normal termination - save PC, cycle count
1.250 + exit_block( pc );
1.251
1.252 - /* ret */
1.253 - *xlat_output++ = 0xC3;
1.254 + uint8_t *end_ptr = xlat_output;
1.255 + // Exception termination. Jump block for various exception codes:
1.256 + PUSH_imm32( EXC_DATA_ADDR_READ );
1.257 + JMP_rel8( 33 );
1.258 + PUSH_imm32( EXC_DATA_ADDR_WRITE );
1.259 + JMP_rel8( 26 );
1.260 + PUSH_imm32( EXC_ILLEGAL );
1.261 + JMP_rel8( 19 );
1.262 + PUSH_imm32( EXC_SLOT_ILLEGAL );
1.263 + JMP_rel8( 12 );
1.264 + PUSH_imm32( EXC_FPU_DISABLED );
1.265 + JMP_rel8( 5 );
1.266 + PUSH_imm32( EXC_SLOT_FPU_DISABLED );
1.267 + // target
1.268 + load_spreg( R_ECX, REG_OFFSET(pc) );
1.269 + ADD_r32_r32( R_ESI, R_ECX );
1.270 + ADD_r32_r32( R_ESI, R_ECX );
1.271 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.272 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.273 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.274 + MUL_r32( R_ESI );
1.275 + ADD_r32_r32( R_EAX, R_ECX );
1.276 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.277 +
1.278 + load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
1.279 + CALL_r32( R_EAX ); // 2
1.280 + POP_r32(R_EBP);
1.281 + RET();
1.282 +
1.283 + sh4_x86_do_backpatch( end_ptr );
1.284 }
1.285
1.286 /**
1.287 @@ -138,7 +337,7 @@
1.288 uint32_t sh4_x86_translate_instruction( uint32_t pc )
1.289 {
1.290 uint16_t ir = sh4_read_word( pc );
1.291 -
1.292 +
1.293 switch( (ir&0xF000) >> 12 ) {
1.294 case 0x0:
1.295 switch( ir&0xF ) {
1.296 @@ -149,7 +348,8 @@
1.297 case 0x0:
1.298 { /* STC SR, Rn */
1.299 uint32_t Rn = ((ir>>8)&0xF);
1.300 - /* TODO */
1.301 + read_sr( R_EAX );
1.302 + store_reg( R_EAX, Rn );
1.303 }
1.304 break;
1.305 case 0x1:
1.306 @@ -566,6 +766,18 @@
1.307 case 0xC:
1.308 { /* CMP/STR Rm, Rn */
1.309 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.310 + load_reg( R_EAX, Rm );
1.311 + load_reg( R_ECX, Rn );
1.312 + XOR_r32_r32( R_ECX, R_EAX );
1.313 + TEST_r8_r8( R_AL, R_AL );
1.314 + JE_rel8(13);
1.315 + TEST_r8_r8( R_AH, R_AH ); // 2
1.316 + JE_rel8(9);
1.317 + SHR_imm8_r32( 16, R_EAX ); // 3
1.318 + TEST_r8_r8( R_AL, R_AL ); // 2
1.319 + JE_rel8(2);
1.320 + TEST_r8_r8( R_AH, R_AH ); // 2
1.321 + SETE_t();
1.322 }
1.323 break;
1.324 case 0xD:
1.325 @@ -880,6 +1092,11 @@
1.326 { /* STC.L SR, @-Rn */
1.327 uint32_t Rn = ((ir>>8)&0xF);
1.328 /* TODO */
1.329 + load_reg( R_ECX, Rn );
1.330 + ADD_imm8s_r32( -4, Rn );
1.331 + store_reg( R_ECX, Rn );
1.332 + read_sr( R_EAX );
1.333 + MEM_WRITE_LONG( R_ECX, R_EAX );
1.334 }
1.335 break;
1.336 case 0x1:
1.337 @@ -1085,6 +1302,12 @@
1.338 case 0x0:
1.339 { /* LDC.L @Rm+, SR */
1.340 uint32_t Rm = ((ir>>8)&0xF);
1.341 + load_reg( R_EAX, Rm );
1.342 + MOV_r32_r32( R_EAX, R_ECX );
1.343 + ADD_imm8s_r32( 4, R_EAX );
1.344 + store_reg( R_EAX, Rm );
1.345 + MEM_READ_LONG( R_ECX, R_EAX );
1.346 + write_sr( R_EAX );
1.347 }
1.348 break;
1.349 case 0x1:
1.350 @@ -1312,6 +1535,16 @@
1.351 case 0xD:
1.352 { /* SHLD Rm, Rn */
1.353 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.354 + load_reg( R_EAX, Rn );
1.355 + load_reg( R_ECX, Rm );
1.356 +
1.357 + MOV_r32_r32( R_EAX, R_EDX );
1.358 + SHL_r32_CL( R_EAX );
1.359 + NEG_r32( R_ECX );
1.360 + SHR_r32_CL( R_EDX );
1.361 + CMP_imm8s_r32( 0, R_ECX );
1.362 + CMOVAE_r32_r32( R_EDX, R_EAX );
1.363 + store_reg( R_EAX, Rn );
1.364 }
1.365 break;
1.366 case 0xE:
1.367 @@ -1321,7 +1554,8 @@
1.368 case 0x0:
1.369 { /* LDC Rm, SR */
1.370 uint32_t Rm = ((ir>>8)&0xF);
1.371 - /* We need to be a little careful about SR */
1.372 + load_reg( R_EAX, Rm );
1.373 + write_sr( R_EAX );
1.374 }
1.375 break;
1.376 case 0x1:
1.377 @@ -1590,6 +1824,10 @@
1.378 case 0xB:
1.379 { /* BF disp */
1.380 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
1.381 + CMP_imm8s_ebp( 0, R_T );
1.382 + JNE_rel8( 1 );
1.383 + exit_block( disp + pc + 4 );
1.384 + return 1;
1.385 }
1.386 break;
1.387 case 0xD:
1.388 @@ -1601,6 +1839,10 @@
1.389 case 0xF:
1.390 { /* BF/S disp */
1.391 int32_t disp = SIGNEXT8(ir&0xFF)<<1;
1.392 + CMP_imm8s_ebp( 0, R_T );
1.393 + JNE_rel8( 1 );
1.394 + exit_block( disp + pc + 4 );
1.395 + sh4_x86.in_delay_slot = TRUE;
1.396 }
1.397 break;
1.398 default:
1.399 @@ -1619,6 +1861,7 @@
1.400 case 0xA:
1.401 { /* BRA disp */
1.402 int32_t disp = SIGNEXT12(ir&0xFFF)<<1;
1.403 + exit_block( disp + pc + 4 );
1.404 }
1.405 break;
1.406 case 0xB:
1.407 @@ -1697,6 +1940,9 @@
1.408 case 0x8:
1.409 { /* TST #imm, R0 */
1.410 uint32_t imm = (ir&0xFF);
1.411 + load_reg( R_EAX, 0 );
1.412 + TEST_imm32_r32( imm, R_EAX );
1.413 + SETE_t();
1.414 }
1.415 break;
1.416 case 0x9:
1.417 @@ -1726,6 +1972,12 @@
1.418 case 0xC:
1.419 { /* TST.B #imm, @(R0, GBR) */
1.420 uint32_t imm = (ir&0xFF);
1.421 + load_reg( R_EAX, 0);
1.422 + load_reg( R_ECX, R_GBR);
1.423 + ADD_r32_r32( R_EAX, R_ECX );
1.424 + MEM_READ_BYTE( R_ECX, R_EAX );
1.425 + TEST_imm8_r8( imm, R_EAX );
1.426 + SETE_t();
1.427 }
1.428 break;
1.429 case 0xD:
1.430 @@ -1960,6 +2212,7 @@
1.431 break;
1.432 }
1.433
1.434 + INC_r32(R_ESI);
1.435
1.436 return 0;
1.437 }
.