Search
lxdream.org :: lxdream :: r388:13bae2fb0373
lxdream 0.9.1
released Jun 29
Download Now
changeset388:13bae2fb0373
parent387:38e9fddbf0e3
child389:3e354da62264
authornkeynes
dateTue Sep 18 08:59:00 2007 +0000 (13 years ago)
More fixes and complete missing instructions
src/sh4/sh4x86.c
src/sh4/sh4x86.in
src/sh4/x86op.h
1.1 --- a/src/sh4/sh4x86.c Tue Sep 18 08:58:23 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Tue Sep 18 08:59:00 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.c,v 1.9 2007-09-16 07:03:23 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.10 2007-09-18 08:59:00 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -19,6 +19,7 @@
1.11 */
1.12
1.13 #include <assert.h>
1.14 +#include <math.h>
1.15
1.16 #ifndef NDEBUG
1.17 #define DEBUG_JUMPS 1
1.18 @@ -26,6 +27,7 @@
1.19
1.20 #include "sh4/sh4core.h"
1.21 #include "sh4/sh4trans.h"
1.22 +#include "sh4/sh4mmio.h"
1.23 #include "sh4/x86op.h"
1.24 #include "clock.h"
1.25
1.26 @@ -40,6 +42,7 @@
1.27 gboolean in_delay_slot;
1.28 gboolean priv_checked; /* true if we've already checked the cpu mode. */
1.29 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
1.30 + int exit_code;
1.31
1.32 /* Allocated memory for the (block-wide) back-patch list */
1.33 uint32_t **backpatch_list;
1.34 @@ -56,6 +59,8 @@
1.35
1.36 static struct sh4_x86_state sh4_x86;
1.37
1.38 +static uint32_t max_int = 0x7FFFFFFF;
1.39 +static uint32_t min_int = 0x80000000;
1.40 void signsat48( void )
1.41 {
1.42 if( ((int64_t)sh4r.mac) < (int64_t)0xFFFF800000000000LL )
1.43 @@ -64,6 +69,40 @@
1.44 sh4r.mac = 0x00007FFFFFFFFFFFLL;
1.45 }
1.46
1.47 +void sh4_fsca( uint32_t anglei, float *fr )
1.48 +{
1.49 + float angle = (((float)(anglei&0xFFFF))/65536.0) * 2 * M_PI;
1.50 + *fr++ = cosf(angle);
1.51 + *fr = sinf(angle);
1.52 +}
1.53 +
1.54 +void sh4_sleep()
1.55 +{
1.56 + if( MMIO_READ( CPG, STBCR ) & 0x80 ) {
1.57 + sh4r.sh4_state = SH4_STATE_STANDBY;
1.58 + } else {
1.59 + sh4r.sh4_state = SH4_STATE_SLEEP;
1.60 + }
1.61 +}
1.62 +
1.63 +/**
1.64 + * Compute the matrix tranform of fv given the matrix xf.
1.65 + * Both fv and xf are word-swapped as per the sh4r.fr banks
1.66 + */
1.67 +void sh4_ftrv( float *target, float *xf )
1.68 +{
1.69 + float fv[4] = { target[1], target[0], target[3], target[2] };
1.70 + target[1] = xf[1] * fv[0] + xf[5]*fv[1] +
1.71 + xf[9]*fv[2] + xf[13]*fv[3];
1.72 + target[0] = xf[0] * fv[0] + xf[4]*fv[1] +
1.73 + xf[8]*fv[2] + xf[12]*fv[3];
1.74 + target[3] = xf[3] * fv[0] + xf[7]*fv[1] +
1.75 + xf[11]*fv[2] + xf[15]*fv[3];
1.76 + target[2] = xf[2] * fv[0] + xf[6]*fv[1] +
1.77 + xf[10]*fv[2] + xf[14]*fv[3];
1.78 +}
1.79 +
1.80 +
1.81
1.82 void sh4_x86_init()
1.83 {
1.84 @@ -161,7 +200,7 @@
1.85 /**
1.86 * Load a pointer to the back fp back into the specified x86 register. The
1.87 * bankreg must have been previously loaded with FPSCR.
1.88 - * NB: 10 bytes
1.89 + * NB: 12 bytes
1.90 */
1.91 static inline void load_xf_bank( int bankreg )
1.92 {
1.93 @@ -344,14 +383,6 @@
1.94 JNE_exit(EXIT_DATA_ADDR_WRITE);
1.95 }
1.96
1.97 -static inline void raise_exception( int exc )
1.98 -{
1.99 - PUSH_imm32(exc);
1.100 - call_func0(sh4_raise_exception);
1.101 - ADD_imm8s_r32( 4, R_ESP );
1.102 - sh4_x86.in_delay_slot = FALSE;
1.103 -}
1.104 -
1.105 #define UNDEF()
1.106 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
1.107 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
1.108 @@ -361,7 +392,6 @@
1.109 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
1.110 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
1.111
1.112 -#define RAISE_EXCEPTION( exc ) raise_exception(exc); return 1;
1.113 #define SLOTILLEGAL() JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
1.114
1.115
1.116 @@ -383,6 +413,7 @@
1.117 sh4_x86.priv_checked = FALSE;
1.118 sh4_x86.fpuen_checked = FALSE;
1.119 sh4_x86.backpatch_posn = 0;
1.120 + sh4_x86.exit_code = 1;
1.121 }
1.122
1.123 /**
1.124 @@ -396,7 +427,7 @@
1.125 MUL_r32( R_ESI );
1.126 ADD_r32_r32( R_EAX, R_ECX );
1.127 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.128 - load_imm32( R_EAX, 1 );
1.129 + load_imm32( R_EAX, sh4_x86.exit_code );
1.130 POP_r32(R_ESI);
1.131 POP_r32(R_EDI);
1.132 POP_r32(R_EBP);
1.133 @@ -411,46 +442,53 @@
1.134 // Normal termination - save PC, cycle count
1.135 exit_block( );
1.136
1.137 - uint8_t *end_ptr = xlat_output;
1.138 - // Exception termination. Jump block for various exception codes:
1.139 - PUSH_imm32( EXC_DATA_ADDR_READ );
1.140 - JMP_rel8( 33, target1 );
1.141 - PUSH_imm32( EXC_DATA_ADDR_WRITE );
1.142 - JMP_rel8( 26, target2 );
1.143 - PUSH_imm32( EXC_ILLEGAL );
1.144 - JMP_rel8( 19, target3 );
1.145 - PUSH_imm32( EXC_SLOT_ILLEGAL );
1.146 - JMP_rel8( 12, target4 );
1.147 - PUSH_imm32( EXC_FPU_DISABLED );
1.148 - JMP_rel8( 5, target5 );
1.149 - PUSH_imm32( EXC_SLOT_FPU_DISABLED );
1.150 - // target
1.151 - JMP_TARGET(target1);
1.152 - JMP_TARGET(target2);
1.153 - JMP_TARGET(target3);
1.154 - JMP_TARGET(target4);
1.155 - JMP_TARGET(target5);
1.156 - load_spreg( R_ECX, REG_OFFSET(pc) );
1.157 - ADD_r32_r32( R_ESI, R_ECX );
1.158 - ADD_r32_r32( R_ESI, R_ECX );
1.159 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.160 - MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.161 - load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.162 - MUL_r32( R_ESI );
1.163 - ADD_r32_r32( R_EAX, R_ECX );
1.164 - store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.165 + if( sh4_x86.backpatch_posn != 0 ) {
1.166 + uint8_t *end_ptr = xlat_output;
1.167 + // Exception termination. Jump block for various exception codes:
1.168 + PUSH_imm32( EXC_DATA_ADDR_READ );
1.169 + JMP_rel8( 33, target1 );
1.170 + PUSH_imm32( EXC_DATA_ADDR_WRITE );
1.171 + JMP_rel8( 26, target2 );
1.172 + PUSH_imm32( EXC_ILLEGAL );
1.173 + JMP_rel8( 19, target3 );
1.174 + PUSH_imm32( EXC_SLOT_ILLEGAL );
1.175 + JMP_rel8( 12, target4 );
1.176 + PUSH_imm32( EXC_FPU_DISABLED );
1.177 + JMP_rel8( 5, target5 );
1.178 + PUSH_imm32( EXC_SLOT_FPU_DISABLED );
1.179 + // target
1.180 + JMP_TARGET(target1);
1.181 + JMP_TARGET(target2);
1.182 + JMP_TARGET(target3);
1.183 + JMP_TARGET(target4);
1.184 + JMP_TARGET(target5);
1.185 + load_spreg( R_ECX, REG_OFFSET(pc) );
1.186 + ADD_r32_r32( R_ESI, R_ECX );
1.187 + ADD_r32_r32( R_ESI, R_ECX );
1.188 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.189 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.190 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.191 + MUL_r32( R_ESI );
1.192 + ADD_r32_r32( R_EAX, R_ECX );
1.193 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.194 +
1.195 + load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
1.196 + CALL_r32( R_EAX ); // 2
1.197 + ADD_imm8s_r32( 4, R_ESP );
1.198 + POP_r32(R_ESI);
1.199 + POP_r32(R_EDI);
1.200 + POP_r32(R_EBP);
1.201 + RET();
1.202
1.203 - load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
1.204 - CALL_r32( R_EAX ); // 2
1.205 - ADD_imm8s_r32( 4, R_ESP );
1.206 - POP_r32(R_ESI);
1.207 - POP_r32(R_EDI);
1.208 - POP_r32(R_EBP);
1.209 - RET();
1.210 + sh4_x86_do_backpatch( end_ptr );
1.211 + }
1.212
1.213 - sh4_x86_do_backpatch( end_ptr );
1.214 }
1.215
1.216 +
1.217 +extern uint16_t *sh4_icache;
1.218 +extern uint32_t sh4_icache_addr;
1.219 +
1.220 /**
1.221 * Translate a single instruction. Delayed branches are handled specially
1.222 * by translating both branch and delayed instruction as a single unit (as
1.223 @@ -461,8 +499,25 @@
1.224 */
1.225 uint32_t sh4_x86_translate_instruction( uint32_t pc )
1.226 {
1.227 - uint16_t ir = sh4_read_word( pc );
1.228 -
1.229 + uint32_t ir;
1.230 + /* Read instruction */
1.231 + uint32_t pageaddr = pc >> 12;
1.232 + if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
1.233 + ir = sh4_icache[(pc&0xFFF)>>1];
1.234 + } else {
1.235 + sh4_icache = (uint16_t *)mem_get_page(pc);
1.236 + if( ((uint32_t)sh4_icache) < MAX_IO_REGIONS ) {
1.237 + /* If someone's actually been so daft as to try to execute out of an IO
1.238 + * region, fallback on the full-blown memory read
1.239 + */
1.240 + sh4_icache = NULL;
1.241 + ir = sh4_read_word(pc);
1.242 + } else {
1.243 + sh4_icache_addr = pageaddr;
1.244 + ir = sh4_icache[(pc&0xFFF)>>1];
1.245 + }
1.246 + }
1.247 +
1.248 switch( (ir&0xF000) >> 12 ) {
1.249 case 0x0:
1.250 switch( ir&0xF ) {
1.251 @@ -779,7 +834,11 @@
1.252 break;
1.253 case 0x1:
1.254 { /* SLEEP */
1.255 - /* TODO */
1.256 + check_priv();
1.257 + call_func0( sh4_sleep );
1.258 + sh4_x86.exit_code = 0;
1.259 + sh4_x86.in_delay_slot = FALSE;
1.260 + return 1;
1.261 }
1.262 break;
1.263 case 0x2:
1.264 @@ -2336,8 +2395,9 @@
1.265 if( sh4_x86.in_delay_slot ) {
1.266 SLOTILLEGAL();
1.267 } else {
1.268 - // TODO: Write TRA
1.269 - RAISE_EXCEPTION(EXC_TRAP);
1.270 + PUSH_imm32( imm );
1.271 + call_func0( sh4_raise_trap );
1.272 + ADD_imm8s_r32( 4, R_ESP );
1.273 }
1.274 }
1.275 break;
1.276 @@ -2471,8 +2531,14 @@
1.277 if( sh4_x86.in_delay_slot ) {
1.278 SLOTILLEGAL();
1.279 } else {
1.280 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.281 - MEM_READ_LONG( R_ECX, R_EAX );
1.282 + uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1.283 + char *ptr = mem_get_region(target);
1.284 + if( ptr != NULL ) {
1.285 + MOV_moff32_EAX( (uint32_t)ptr );
1.286 + } else {
1.287 + load_imm32( R_ECX, target );
1.288 + MEM_READ_LONG( R_ECX, R_EAX );
1.289 + }
1.290 store_reg( R_EAX, Rn );
1.291 }
1.292 }
1.293 @@ -2517,13 +2583,13 @@
1.294 JNE_rel8(13, doubleprec);
1.295 push_fr(R_EDX, FRn);
1.296 push_fr(R_EDX, FRm);
1.297 - FMULP_st(1);
1.298 + FSUBP_st(1);
1.299 pop_fr(R_EDX, FRn);
1.300 JMP_rel8(11, end);
1.301 JMP_TARGET(doubleprec);
1.302 push_dr(R_EDX, FRn);
1.303 push_dr(R_EDX, FRm);
1.304 - FMULP_st(1);
1.305 + FSUBP_st(1);
1.306 pop_dr(R_EDX, FRn);
1.307 JMP_TARGET(end);
1.308 }
1.309 @@ -2841,8 +2907,8 @@
1.310 store_fr( R_ECX, R_EDX, FRn );
1.311 } else /* FRn&1 == 0 */ {
1.312 load_fr( R_ECX, R_ECX, FRm );
1.313 - store_fr( R_EDX, R_EAX, FRn-1 );
1.314 - store_fr( R_EDX, R_ECX, FRn );
1.315 + store_fr( R_EDX, R_EAX, FRn );
1.316 + store_fr( R_EDX, R_ECX, FRn+1 );
1.317 }
1.318 JMP_TARGET(end);
1.319 } else /* FRm&1 == 0 */ {
1.320 @@ -2905,7 +2971,32 @@
1.321 { /* FTRC FRm, FPUL */
1.322 uint32_t FRm = ((ir>>8)&0xF);
1.323 check_fpuen();
1.324 - // TODO
1.325 + load_spreg( R_ECX, R_FPSCR );
1.326 + load_fr_bank( R_EDX );
1.327 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.328 + JNE_rel8(5, doubleprec);
1.329 + push_fr( R_EDX, FRm );
1.330 + JMP_rel8(3, doop);
1.331 + JMP_TARGET(doubleprec);
1.332 + push_dr( R_EDX, FRm );
1.333 + JMP_TARGET( doop );
1.334 + load_imm32( R_ECX, (uint32_t)&max_int );
1.335 + FILD_r32ind( R_ECX );
1.336 + FCOMIP_st(1);
1.337 + JNA_rel8( 16, sat );
1.338 + load_imm32( R_ECX, (uint32_t)&min_int ); // 5
1.339 + FILD_r32ind( R_ECX ); // 2
1.340 + FCOMIP_st(1); // 2
1.341 + JAE_rel8( 5, sat2 ); // 2
1.342 + FISTP_sh4r(R_FPUL); // 3
1.343 + JMP_rel8( 9, end ); // 2
1.344 +
1.345 + JMP_TARGET(sat);
1.346 + JMP_TARGET(sat2);
1.347 + MOV_r32ind_r32( R_ECX, R_ECX ); // 2
1.348 + store_spreg( R_ECX, R_FPUL );
1.349 + FPOP_st();
1.350 + JMP_TARGET(end);
1.351 }
1.352 break;
1.353 case 0x4:
1.354 @@ -3013,7 +3104,6 @@
1.355 { /* FCNVSD FPUL, FRn */
1.356 uint32_t FRn = ((ir>>8)&0xF);
1.357 check_fpuen();
1.358 - check_fpuen();
1.359 load_spreg( R_ECX, R_FPSCR );
1.360 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.361 JE_rel8(9, end); // only when PR=1
1.362 @@ -3040,6 +3130,28 @@
1.363 { /* FIPR FVm, FVn */
1.364 uint32_t FVn = ((ir>>10)&0x3); uint32_t FVm = ((ir>>8)&0x3);
1.365 check_fpuen();
1.366 + load_spreg( R_ECX, R_FPSCR );
1.367 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.368 + JNE_rel8(44, doubleprec);
1.369 +
1.370 + load_fr_bank( R_ECX );
1.371 + push_fr( R_ECX, FVm<<2 );
1.372 + push_fr( R_ECX, FVn<<2 );
1.373 + FMULP_st(1);
1.374 + push_fr( R_ECX, (FVm<<2)+1);
1.375 + push_fr( R_ECX, (FVn<<2)+1);
1.376 + FMULP_st(1);
1.377 + FADDP_st(1);
1.378 + push_fr( R_ECX, (FVm<<2)+2);
1.379 + push_fr( R_ECX, (FVn<<2)+2);
1.380 + FMULP_st(1);
1.381 + FADDP_st(1);
1.382 + push_fr( R_ECX, (FVm<<2)+3);
1.383 + push_fr( R_ECX, (FVn<<2)+3);
1.384 + FMULP_st(1);
1.385 + FADDP_st(1);
1.386 + pop_fr( R_ECX, (FVn<<2)+3);
1.387 + JMP_TARGET(doubleprec);
1.388 }
1.389 break;
1.390 case 0xF:
1.391 @@ -3048,6 +3160,14 @@
1.392 { /* FSCA FPUL, FRn */
1.393 uint32_t FRn = ((ir>>9)&0x7)<<1;
1.394 check_fpuen();
1.395 + load_spreg( R_ECX, R_FPSCR );
1.396 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.397 + JNE_rel8( 21, doubleprec );
1.398 + load_fr_bank( R_ECX );
1.399 + ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
1.400 + load_spreg( R_EDX, R_FPUL );
1.401 + call_func2( sh4_fsca, R_EDX, R_ECX );
1.402 + JMP_TARGET(doubleprec);
1.403 }
1.404 break;
1.405 case 0x1:
1.406 @@ -3056,6 +3176,14 @@
1.407 { /* FTRV XMTRX, FVn */
1.408 uint32_t FVn = ((ir>>10)&0x3);
1.409 check_fpuen();
1.410 + load_spreg( R_ECX, R_FPSCR );
1.411 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.412 + JNE_rel8( 30, doubleprec );
1.413 + load_fr_bank( R_EDX ); // 3
1.414 + ADD_imm8s_r32( FVn<<4, R_EDX ); // 3
1.415 + load_xf_bank( R_ECX ); // 12
1.416 + call_func2( sh4_ftrv, R_EDX, R_ECX ); // 12
1.417 + JMP_TARGET(doubleprec);
1.418 }
1.419 break;
1.420 case 0x1:
2.1 --- a/src/sh4/sh4x86.in Tue Sep 18 08:58:23 2007 +0000
2.2 +++ b/src/sh4/sh4x86.in Tue Sep 18 08:59:00 2007 +0000
2.3 @@ -1,5 +1,5 @@
2.4 /**
2.5 - * $Id: sh4x86.in,v 1.10 2007-09-16 07:03:23 nkeynes Exp $
2.6 + * $Id: sh4x86.in,v 1.11 2007-09-18 08:59:00 nkeynes Exp $
2.7 *
2.8 * SH4 => x86 translation. This version does no real optimization, it just
2.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
2.10 @@ -19,6 +19,7 @@
2.11 */
2.12
2.13 #include <assert.h>
2.14 +#include <math.h>
2.15
2.16 #ifndef NDEBUG
2.17 #define DEBUG_JUMPS 1
2.18 @@ -26,6 +27,7 @@
2.19
2.20 #include "sh4/sh4core.h"
2.21 #include "sh4/sh4trans.h"
2.22 +#include "sh4/sh4mmio.h"
2.23 #include "sh4/x86op.h"
2.24 #include "clock.h"
2.25
2.26 @@ -40,6 +42,7 @@
2.27 gboolean in_delay_slot;
2.28 gboolean priv_checked; /* true if we've already checked the cpu mode. */
2.29 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
2.30 + int exit_code;
2.31
2.32 /* Allocated memory for the (block-wide) back-patch list */
2.33 uint32_t **backpatch_list;
2.34 @@ -56,6 +59,8 @@
2.35
2.36 static struct sh4_x86_state sh4_x86;
2.37
2.38 +static uint32_t max_int = 0x7FFFFFFF;
2.39 +static uint32_t min_int = 0x80000000;
2.40 void signsat48( void )
2.41 {
2.42 if( ((int64_t)sh4r.mac) < (int64_t)0xFFFF800000000000LL )
2.43 @@ -64,6 +69,40 @@
2.44 sh4r.mac = 0x00007FFFFFFFFFFFLL;
2.45 }
2.46
2.47 +void sh4_fsca( uint32_t anglei, float *fr )
2.48 +{
2.49 + float angle = (((float)(anglei&0xFFFF))/65536.0) * 2 * M_PI;
2.50 + *fr++ = cosf(angle);
2.51 + *fr = sinf(angle);
2.52 +}
2.53 +
2.54 +void sh4_sleep()
2.55 +{
2.56 + if( MMIO_READ( CPG, STBCR ) & 0x80 ) {
2.57 + sh4r.sh4_state = SH4_STATE_STANDBY;
2.58 + } else {
2.59 + sh4r.sh4_state = SH4_STATE_SLEEP;
2.60 + }
2.61 +}
2.62 +
2.63 +/**
2.64 + * Compute the matrix tranform of fv given the matrix xf.
2.65 + * Both fv and xf are word-swapped as per the sh4r.fr banks
2.66 + */
2.67 +void sh4_ftrv( float *target, float *xf )
2.68 +{
2.69 + float fv[4] = { target[1], target[0], target[3], target[2] };
2.70 + target[1] = xf[1] * fv[0] + xf[5]*fv[1] +
2.71 + xf[9]*fv[2] + xf[13]*fv[3];
2.72 + target[0] = xf[0] * fv[0] + xf[4]*fv[1] +
2.73 + xf[8]*fv[2] + xf[12]*fv[3];
2.74 + target[3] = xf[3] * fv[0] + xf[7]*fv[1] +
2.75 + xf[11]*fv[2] + xf[15]*fv[3];
2.76 + target[2] = xf[2] * fv[0] + xf[6]*fv[1] +
2.77 + xf[10]*fv[2] + xf[14]*fv[3];
2.78 +}
2.79 +
2.80 +
2.81
2.82 void sh4_x86_init()
2.83 {
2.84 @@ -161,7 +200,7 @@
2.85 /**
2.86 * Load a pointer to the back fp back into the specified x86 register. The
2.87 * bankreg must have been previously loaded with FPSCR.
2.88 - * NB: 10 bytes
2.89 + * NB: 12 bytes
2.90 */
2.91 static inline void load_xf_bank( int bankreg )
2.92 {
2.93 @@ -344,14 +383,6 @@
2.94 JNE_exit(EXIT_DATA_ADDR_WRITE);
2.95 }
2.96
2.97 -static inline void raise_exception( int exc )
2.98 -{
2.99 - PUSH_imm32(exc);
2.100 - call_func0(sh4_raise_exception);
2.101 - ADD_imm8s_r32( 4, R_ESP );
2.102 - sh4_x86.in_delay_slot = FALSE;
2.103 -}
2.104 -
2.105 #define UNDEF()
2.106 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
2.107 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
2.108 @@ -361,7 +392,6 @@
2.109 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
2.110 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
2.111
2.112 -#define RAISE_EXCEPTION( exc ) raise_exception(exc); return 1;
2.113 #define SLOTILLEGAL() JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
2.114
2.115
2.116 @@ -383,6 +413,7 @@
2.117 sh4_x86.priv_checked = FALSE;
2.118 sh4_x86.fpuen_checked = FALSE;
2.119 sh4_x86.backpatch_posn = 0;
2.120 + sh4_x86.exit_code = 1;
2.121 }
2.122
2.123 /**
2.124 @@ -396,7 +427,7 @@
2.125 MUL_r32( R_ESI );
2.126 ADD_r32_r32( R_EAX, R_ECX );
2.127 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.128 - load_imm32( R_EAX, 1 );
2.129 + load_imm32( R_EAX, sh4_x86.exit_code );
2.130 POP_r32(R_ESI);
2.131 POP_r32(R_EDI);
2.132 POP_r32(R_EBP);
2.133 @@ -411,46 +442,53 @@
2.134 // Normal termination - save PC, cycle count
2.135 exit_block( );
2.136
2.137 - uint8_t *end_ptr = xlat_output;
2.138 - // Exception termination. Jump block for various exception codes:
2.139 - PUSH_imm32( EXC_DATA_ADDR_READ );
2.140 - JMP_rel8( 33, target1 );
2.141 - PUSH_imm32( EXC_DATA_ADDR_WRITE );
2.142 - JMP_rel8( 26, target2 );
2.143 - PUSH_imm32( EXC_ILLEGAL );
2.144 - JMP_rel8( 19, target3 );
2.145 - PUSH_imm32( EXC_SLOT_ILLEGAL );
2.146 - JMP_rel8( 12, target4 );
2.147 - PUSH_imm32( EXC_FPU_DISABLED );
2.148 - JMP_rel8( 5, target5 );
2.149 - PUSH_imm32( EXC_SLOT_FPU_DISABLED );
2.150 - // target
2.151 - JMP_TARGET(target1);
2.152 - JMP_TARGET(target2);
2.153 - JMP_TARGET(target3);
2.154 - JMP_TARGET(target4);
2.155 - JMP_TARGET(target5);
2.156 - load_spreg( R_ECX, REG_OFFSET(pc) );
2.157 - ADD_r32_r32( R_ESI, R_ECX );
2.158 - ADD_r32_r32( R_ESI, R_ECX );
2.159 - store_spreg( R_ECX, REG_OFFSET(pc) );
2.160 - MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
2.161 - load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.162 - MUL_r32( R_ESI );
2.163 - ADD_r32_r32( R_EAX, R_ECX );
2.164 - store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.165 + if( sh4_x86.backpatch_posn != 0 ) {
2.166 + uint8_t *end_ptr = xlat_output;
2.167 + // Exception termination. Jump block for various exception codes:
2.168 + PUSH_imm32( EXC_DATA_ADDR_READ );
2.169 + JMP_rel8( 33, target1 );
2.170 + PUSH_imm32( EXC_DATA_ADDR_WRITE );
2.171 + JMP_rel8( 26, target2 );
2.172 + PUSH_imm32( EXC_ILLEGAL );
2.173 + JMP_rel8( 19, target3 );
2.174 + PUSH_imm32( EXC_SLOT_ILLEGAL );
2.175 + JMP_rel8( 12, target4 );
2.176 + PUSH_imm32( EXC_FPU_DISABLED );
2.177 + JMP_rel8( 5, target5 );
2.178 + PUSH_imm32( EXC_SLOT_FPU_DISABLED );
2.179 + // target
2.180 + JMP_TARGET(target1);
2.181 + JMP_TARGET(target2);
2.182 + JMP_TARGET(target3);
2.183 + JMP_TARGET(target4);
2.184 + JMP_TARGET(target5);
2.185 + load_spreg( R_ECX, REG_OFFSET(pc) );
2.186 + ADD_r32_r32( R_ESI, R_ECX );
2.187 + ADD_r32_r32( R_ESI, R_ECX );
2.188 + store_spreg( R_ECX, REG_OFFSET(pc) );
2.189 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
2.190 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.191 + MUL_r32( R_ESI );
2.192 + ADD_r32_r32( R_EAX, R_ECX );
2.193 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
2.194 +
2.195 + load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
2.196 + CALL_r32( R_EAX ); // 2
2.197 + ADD_imm8s_r32( 4, R_ESP );
2.198 + POP_r32(R_ESI);
2.199 + POP_r32(R_EDI);
2.200 + POP_r32(R_EBP);
2.201 + RET();
2.202
2.203 - load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
2.204 - CALL_r32( R_EAX ); // 2
2.205 - ADD_imm8s_r32( 4, R_ESP );
2.206 - POP_r32(R_ESI);
2.207 - POP_r32(R_EDI);
2.208 - POP_r32(R_EBP);
2.209 - RET();
2.210 + sh4_x86_do_backpatch( end_ptr );
2.211 + }
2.212
2.213 - sh4_x86_do_backpatch( end_ptr );
2.214 }
2.215
2.216 +
2.217 +extern uint16_t *sh4_icache;
2.218 +extern uint32_t sh4_icache_addr;
2.219 +
2.220 /**
2.221 * Translate a single instruction. Delayed branches are handled specially
2.222 * by translating both branch and delayed instruction as a single unit (as
2.223 @@ -461,8 +499,25 @@
2.224 */
2.225 uint32_t sh4_x86_translate_instruction( uint32_t pc )
2.226 {
2.227 - uint16_t ir = sh4_read_word( pc );
2.228 -
2.229 + uint32_t ir;
2.230 + /* Read instruction */
2.231 + uint32_t pageaddr = pc >> 12;
2.232 + if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
2.233 + ir = sh4_icache[(pc&0xFFF)>>1];
2.234 + } else {
2.235 + sh4_icache = (uint16_t *)mem_get_page(pc);
2.236 + if( ((uint32_t)sh4_icache) < MAX_IO_REGIONS ) {
2.237 + /* If someone's actually been so daft as to try to execute out of an IO
2.238 + * region, fallback on the full-blown memory read
2.239 + */
2.240 + sh4_icache = NULL;
2.241 + ir = sh4_read_word(pc);
2.242 + } else {
2.243 + sh4_icache_addr = pageaddr;
2.244 + ir = sh4_icache[(pc&0xFFF)>>1];
2.245 + }
2.246 + }
2.247 +
2.248 %%
2.249 /* ALU operations */
2.250 ADD Rm, Rn {:
2.251 @@ -1130,8 +1185,14 @@
2.252 if( sh4_x86.in_delay_slot ) {
2.253 SLOTILLEGAL();
2.254 } else {
2.255 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
2.256 - MEM_READ_LONG( R_ECX, R_EAX );
2.257 + uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
2.258 + char *ptr = mem_get_region(target);
2.259 + if( ptr != NULL ) {
2.260 + MOV_moff32_EAX( (uint32_t)ptr );
2.261 + } else {
2.262 + load_imm32( R_ECX, target );
2.263 + MEM_READ_LONG( R_ECX, R_EAX );
2.264 + }
2.265 store_reg( R_EAX, Rn );
2.266 }
2.267 :}
2.268 @@ -1381,8 +1442,9 @@
2.269 if( sh4_x86.in_delay_slot ) {
2.270 SLOTILLEGAL();
2.271 } else {
2.272 - // TODO: Write TRA
2.273 - RAISE_EXCEPTION(EXC_TRAP);
2.274 + PUSH_imm32( imm );
2.275 + call_func0( sh4_raise_trap );
2.276 + ADD_imm8s_r32( 4, R_ESP );
2.277 }
2.278 :}
2.279 UNDEF {:
2.280 @@ -1443,8 +1505,8 @@
2.281 store_fr( R_ECX, R_EDX, FRn );
2.282 } else /* FRn&1 == 0 */ {
2.283 load_fr( R_ECX, R_ECX, FRm );
2.284 - store_fr( R_EDX, R_EAX, FRn-1 );
2.285 - store_fr( R_EDX, R_ECX, FRn );
2.286 + store_fr( R_EDX, R_EAX, FRn );
2.287 + store_fr( R_EDX, R_ECX, FRn+1 );
2.288 }
2.289 JMP_TARGET(end);
2.290 } else /* FRm&1 == 0 */ {
2.291 @@ -1687,7 +1749,32 @@
2.292 :}
2.293 FTRC FRm, FPUL {:
2.294 check_fpuen();
2.295 - // TODO
2.296 + load_spreg( R_ECX, R_FPSCR );
2.297 + load_fr_bank( R_EDX );
2.298 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.299 + JNE_rel8(5, doubleprec);
2.300 + push_fr( R_EDX, FRm );
2.301 + JMP_rel8(3, doop);
2.302 + JMP_TARGET(doubleprec);
2.303 + push_dr( R_EDX, FRm );
2.304 + JMP_TARGET( doop );
2.305 + load_imm32( R_ECX, (uint32_t)&max_int );
2.306 + FILD_r32ind( R_ECX );
2.307 + FCOMIP_st(1);
2.308 + JNA_rel8( 16, sat );
2.309 + load_imm32( R_ECX, (uint32_t)&min_int ); // 5
2.310 + FILD_r32ind( R_ECX ); // 2
2.311 + FCOMIP_st(1); // 2
2.312 + JAE_rel8( 5, sat2 ); // 2
2.313 + FISTP_sh4r(R_FPUL); // 3
2.314 + JMP_rel8( 9, end ); // 2
2.315 +
2.316 + JMP_TARGET(sat);
2.317 + JMP_TARGET(sat2);
2.318 + MOV_r32ind_r32( R_ECX, R_ECX ); // 2
2.319 + store_spreg( R_ECX, R_FPUL );
2.320 + FPOP_st();
2.321 + JMP_TARGET(end);
2.322 :}
2.323 FLDS FRm, FPUL {:
2.324 check_fpuen();
2.325 @@ -1713,7 +1800,6 @@
2.326 :}
2.327 FCNVSD FPUL, FRn {:
2.328 check_fpuen();
2.329 - check_fpuen();
2.330 load_spreg( R_ECX, R_FPSCR );
2.331 TEST_imm32_r32( FPSCR_PR, R_ECX );
2.332 JE_rel8(9, end); // only when PR=1
2.333 @@ -1870,13 +1956,13 @@
2.334 JNE_rel8(13, doubleprec);
2.335 push_fr(R_EDX, FRn);
2.336 push_fr(R_EDX, FRm);
2.337 - FMULP_st(1);
2.338 + FSUBP_st(1);
2.339 pop_fr(R_EDX, FRn);
2.340 JMP_rel8(11, end);
2.341 JMP_TARGET(doubleprec);
2.342 push_dr(R_EDX, FRn);
2.343 push_dr(R_EDX, FRm);
2.344 - FMULP_st(1);
2.345 + FSUBP_st(1);
2.346 pop_dr(R_EDX, FRn);
2.347 JMP_TARGET(end);
2.348 :}
2.349 @@ -1918,12 +2004,50 @@
2.350
2.351 FSCA FPUL, FRn {:
2.352 check_fpuen();
2.353 + load_spreg( R_ECX, R_FPSCR );
2.354 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.355 + JNE_rel8( 21, doubleprec );
2.356 + load_fr_bank( R_ECX );
2.357 + ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
2.358 + load_spreg( R_EDX, R_FPUL );
2.359 + call_func2( sh4_fsca, R_EDX, R_ECX );
2.360 + JMP_TARGET(doubleprec);
2.361 :}
2.362 FIPR FVm, FVn {:
2.363 check_fpuen();
2.364 + load_spreg( R_ECX, R_FPSCR );
2.365 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.366 + JNE_rel8(44, doubleprec);
2.367 +
2.368 + load_fr_bank( R_ECX );
2.369 + push_fr( R_ECX, FVm<<2 );
2.370 + push_fr( R_ECX, FVn<<2 );
2.371 + FMULP_st(1);
2.372 + push_fr( R_ECX, (FVm<<2)+1);
2.373 + push_fr( R_ECX, (FVn<<2)+1);
2.374 + FMULP_st(1);
2.375 + FADDP_st(1);
2.376 + push_fr( R_ECX, (FVm<<2)+2);
2.377 + push_fr( R_ECX, (FVn<<2)+2);
2.378 + FMULP_st(1);
2.379 + FADDP_st(1);
2.380 + push_fr( R_ECX, (FVm<<2)+3);
2.381 + push_fr( R_ECX, (FVn<<2)+3);
2.382 + FMULP_st(1);
2.383 + FADDP_st(1);
2.384 + pop_fr( R_ECX, (FVn<<2)+3);
2.385 + JMP_TARGET(doubleprec);
2.386 :}
2.387 FTRV XMTRX, FVn {:
2.388 check_fpuen();
2.389 + load_spreg( R_ECX, R_FPSCR );
2.390 + TEST_imm32_r32( FPSCR_PR, R_ECX );
2.391 + JNE_rel8( 30, doubleprec );
2.392 + load_fr_bank( R_EDX ); // 3
2.393 + ADD_imm8s_r32( FVn<<4, R_EDX ); // 3
2.394 + load_xf_bank( R_ECX ); // 12
2.395 + call_func2( sh4_ftrv, R_EDX, R_ECX ); // 12
2.396 + JMP_TARGET(doubleprec);
2.397 :}
2.398
2.399 FRCHG {:
2.400 @@ -2139,7 +2263,13 @@
2.401 JMP_TARGET(end);
2.402 ADD_imm8s_r32( 4, R_ESP );
2.403 :}
2.404 -SLEEP {: /* TODO */ :}
2.405 +SLEEP {:
2.406 + check_priv();
2.407 + call_func0( sh4_sleep );
2.408 + sh4_x86.exit_code = 0;
2.409 + sh4_x86.in_delay_slot = FALSE;
2.410 + return 1;
2.411 +:}
2.412 STC SR, Rn {:
2.413 check_priv();
2.414 call_func0(sh4_read_sr);
3.1 --- a/src/sh4/x86op.h Tue Sep 18 08:58:23 2007 +0000
3.2 +++ b/src/sh4/x86op.h Tue Sep 18 08:59:00 2007 +0000
3.3 @@ -1,5 +1,5 @@
3.4 /**
3.5 - * $Id: x86op.h,v 1.8 2007-09-16 07:03:23 nkeynes Exp $
3.6 + * $Id: x86op.h,v 1.9 2007-09-18 08:59:00 nkeynes Exp $
3.7 *
3.8 * Definitions of x86 opcodes for use by the translator.
3.9 *
3.10 @@ -120,6 +120,7 @@
3.11 #define MOV_r32_sh4r(r1,disp) OP(0x89); MODRM_r32_sh4r(r1,disp)
3.12 #define MOV_moff32_EAX(off) OP(0xA1); OP32(off)
3.13 #define MOV_sh4r_r32(disp, r1) OP(0x8B); MODRM_r32_sh4r(r1,disp)
3.14 +#define MOV_r32ind_r32(r1,r2) OP(0x8B); OP(0 + (r2<<3) + r1 )
3.15 #define MOVSX_r8_r32(r1,r2) OP(0x0F); OP(0xBE); MODRM_rm32_r32(r1,r2)
3.16 #define MOVSX_r16_r32(r1,r2) OP(0x0F); OP(0xBF); MODRM_rm32_r32(r1,r2)
3.17 #define MOVZX_r8_r32(r1,r2) OP(0x0F); OP(0xB6); MODRM_rm32_r32(r1,r2)
3.18 @@ -128,7 +129,7 @@
3.19 #define NEG_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,3)
3.20 #define NOT_r32(r1) OP(0xF7); MODRM_rm32_r32(r1,2)
3.21 #define OR_r32_r32(r1,r2) OP(0x0B); MODRM_rm32_r32(r1,r2)
3.22 -#define OR_imm8_r8(imm,r1) OP(0x80); MODRM_rm32_r32(r1,1)
3.23 +#define OR_imm8_r8(imm,r1) OP(0x80); MODRM_rm32_r32(r1,1); OP(imm)
3.24 #define OR_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,1); OP32(imm)
3.25 #define OR_sh4r_r32(disp,r1) OP(0x0B); MODRM_r32_sh4r(r1,disp)
3.26 #define POP_r32(r1) OP(0x58 + r1)
3.27 @@ -171,12 +172,13 @@
3.28 #define FCOMIP_st(st) OP(0xDF); OP(0xF0+st)
3.29 #define FDIVP_st(st) OP(0xDE); OP(0xF8+st)
3.30 #define FILD_sh4r(disp) OP(0xDB); MODRM_r32_sh4r(0, disp)
3.31 -#define FISTTP_shr4(disp) OP(0xDB); MODRM_r32_sh4r(1, disp)
3.32 +#define FILD_r32ind(r32) OP(0xDB); OP(0x00+r32)
3.33 +#define FISTP_sh4r(disp) OP(0xDB); MODRM_r32_sh4r(3, disp)
3.34 #define FLD0_st0() OP(0xD9); OP(0xEE);
3.35 #define FLD1_st0() OP(0xD9); OP(0xE8);
3.36 #define FMULP_st(st) OP(0xDE); OP(0xC8+st)
3.37 #define FPOP_st() OP(0xDD); OP(0xC0); OP(0xD9); OP(0xF7)
3.38 -#define FSUB_st(st) OP(0xDE); OP(0xE8+st)
3.39 +#define FSUBP_st(st) OP(0xDE); OP(0xE8+st)
3.40 #define FSQRT_st0() OP(0xD9); OP(0xFA)
3.41
3.42 /* Conditional branches */
.