Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 388:13bae2fb0373
prev386:6fb10951326a
next394:7eb172bfeefe
author nkeynes
date Tue Sep 18 08:59:00 2007 +0000 (12 years ago)
permissions -rw-r--r--
last change More fixes and complete missing instructions
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Sun Sep 16 07:03:23 2007 +0000
1.2 +++ b/src/sh4/sh4x86.in Tue Sep 18 08:59:00 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.in,v 1.10 2007-09-16 07:03:23 nkeynes Exp $
1.6 + * $Id: sh4x86.in,v 1.11 2007-09-18 08:59:00 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -19,6 +19,7 @@
1.11 */
1.12
1.13 #include <assert.h>
1.14 +#include <math.h>
1.15
1.16 #ifndef NDEBUG
1.17 #define DEBUG_JUMPS 1
1.18 @@ -26,6 +27,7 @@
1.19
1.20 #include "sh4/sh4core.h"
1.21 #include "sh4/sh4trans.h"
1.22 +#include "sh4/sh4mmio.h"
1.23 #include "sh4/x86op.h"
1.24 #include "clock.h"
1.25
1.26 @@ -40,6 +42,7 @@
1.27 gboolean in_delay_slot;
1.28 gboolean priv_checked; /* true if we've already checked the cpu mode. */
1.29 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
1.30 + int exit_code;
1.31
1.32 /* Allocated memory for the (block-wide) back-patch list */
1.33 uint32_t **backpatch_list;
1.34 @@ -56,6 +59,8 @@
1.35
1.36 static struct sh4_x86_state sh4_x86;
1.37
1.38 +static uint32_t max_int = 0x7FFFFFFF;
1.39 +static uint32_t min_int = 0x80000000;
1.40 void signsat48( void )
1.41 {
1.42 if( ((int64_t)sh4r.mac) < (int64_t)0xFFFF800000000000LL )
1.43 @@ -64,6 +69,40 @@
1.44 sh4r.mac = 0x00007FFFFFFFFFFFLL;
1.45 }
1.46
1.47 +void sh4_fsca( uint32_t anglei, float *fr )
1.48 +{
1.49 + float angle = (((float)(anglei&0xFFFF))/65536.0) * 2 * M_PI;
1.50 + *fr++ = cosf(angle);
1.51 + *fr = sinf(angle);
1.52 +}
1.53 +
1.54 +void sh4_sleep()
1.55 +{
1.56 + if( MMIO_READ( CPG, STBCR ) & 0x80 ) {
1.57 + sh4r.sh4_state = SH4_STATE_STANDBY;
1.58 + } else {
1.59 + sh4r.sh4_state = SH4_STATE_SLEEP;
1.60 + }
1.61 +}
1.62 +
1.63 +/**
1.64 + * Compute the matrix tranform of fv given the matrix xf.
1.65 + * Both fv and xf are word-swapped as per the sh4r.fr banks
1.66 + */
1.67 +void sh4_ftrv( float *target, float *xf )
1.68 +{
1.69 + float fv[4] = { target[1], target[0], target[3], target[2] };
1.70 + target[1] = xf[1] * fv[0] + xf[5]*fv[1] +
1.71 + xf[9]*fv[2] + xf[13]*fv[3];
1.72 + target[0] = xf[0] * fv[0] + xf[4]*fv[1] +
1.73 + xf[8]*fv[2] + xf[12]*fv[3];
1.74 + target[3] = xf[3] * fv[0] + xf[7]*fv[1] +
1.75 + xf[11]*fv[2] + xf[15]*fv[3];
1.76 + target[2] = xf[2] * fv[0] + xf[6]*fv[1] +
1.77 + xf[10]*fv[2] + xf[14]*fv[3];
1.78 +}
1.79 +
1.80 +
1.81
1.82 void sh4_x86_init()
1.83 {
1.84 @@ -161,7 +200,7 @@
1.85 /**
1.86 * Load a pointer to the back fp back into the specified x86 register. The
1.87 * bankreg must have been previously loaded with FPSCR.
1.88 - * NB: 10 bytes
1.89 + * NB: 12 bytes
1.90 */
1.91 static inline void load_xf_bank( int bankreg )
1.92 {
1.93 @@ -344,14 +383,6 @@
1.94 JNE_exit(EXIT_DATA_ADDR_WRITE);
1.95 }
1.96
1.97 -static inline void raise_exception( int exc )
1.98 -{
1.99 - PUSH_imm32(exc);
1.100 - call_func0(sh4_raise_exception);
1.101 - ADD_imm8s_r32( 4, R_ESP );
1.102 - sh4_x86.in_delay_slot = FALSE;
1.103 -}
1.104 -
1.105 #define UNDEF()
1.106 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
1.107 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
1.108 @@ -361,7 +392,6 @@
1.109 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
1.110 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
1.111
1.112 -#define RAISE_EXCEPTION( exc ) raise_exception(exc); return 1;
1.113 #define SLOTILLEGAL() JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
1.114
1.115
1.116 @@ -383,6 +413,7 @@
1.117 sh4_x86.priv_checked = FALSE;
1.118 sh4_x86.fpuen_checked = FALSE;
1.119 sh4_x86.backpatch_posn = 0;
1.120 + sh4_x86.exit_code = 1;
1.121 }
1.122
1.123 /**
1.124 @@ -396,7 +427,7 @@
1.125 MUL_r32( R_ESI );
1.126 ADD_r32_r32( R_EAX, R_ECX );
1.127 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.128 - load_imm32( R_EAX, 1 );
1.129 + load_imm32( R_EAX, sh4_x86.exit_code );
1.130 POP_r32(R_ESI);
1.131 POP_r32(R_EDI);
1.132 POP_r32(R_EBP);
1.133 @@ -411,46 +442,53 @@
1.134 // Normal termination - save PC, cycle count
1.135 exit_block( );
1.136
1.137 - uint8_t *end_ptr = xlat_output;
1.138 - // Exception termination. Jump block for various exception codes:
1.139 - PUSH_imm32( EXC_DATA_ADDR_READ );
1.140 - JMP_rel8( 33, target1 );
1.141 - PUSH_imm32( EXC_DATA_ADDR_WRITE );
1.142 - JMP_rel8( 26, target2 );
1.143 - PUSH_imm32( EXC_ILLEGAL );
1.144 - JMP_rel8( 19, target3 );
1.145 - PUSH_imm32( EXC_SLOT_ILLEGAL );
1.146 - JMP_rel8( 12, target4 );
1.147 - PUSH_imm32( EXC_FPU_DISABLED );
1.148 - JMP_rel8( 5, target5 );
1.149 - PUSH_imm32( EXC_SLOT_FPU_DISABLED );
1.150 - // target
1.151 - JMP_TARGET(target1);
1.152 - JMP_TARGET(target2);
1.153 - JMP_TARGET(target3);
1.154 - JMP_TARGET(target4);
1.155 - JMP_TARGET(target5);
1.156 - load_spreg( R_ECX, REG_OFFSET(pc) );
1.157 - ADD_r32_r32( R_ESI, R_ECX );
1.158 - ADD_r32_r32( R_ESI, R_ECX );
1.159 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.160 - MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.161 - load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.162 - MUL_r32( R_ESI );
1.163 - ADD_r32_r32( R_EAX, R_ECX );
1.164 - store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.165 + if( sh4_x86.backpatch_posn != 0 ) {
1.166 + uint8_t *end_ptr = xlat_output;
1.167 + // Exception termination. Jump block for various exception codes:
1.168 + PUSH_imm32( EXC_DATA_ADDR_READ );
1.169 + JMP_rel8( 33, target1 );
1.170 + PUSH_imm32( EXC_DATA_ADDR_WRITE );
1.171 + JMP_rel8( 26, target2 );
1.172 + PUSH_imm32( EXC_ILLEGAL );
1.173 + JMP_rel8( 19, target3 );
1.174 + PUSH_imm32( EXC_SLOT_ILLEGAL );
1.175 + JMP_rel8( 12, target4 );
1.176 + PUSH_imm32( EXC_FPU_DISABLED );
1.177 + JMP_rel8( 5, target5 );
1.178 + PUSH_imm32( EXC_SLOT_FPU_DISABLED );
1.179 + // target
1.180 + JMP_TARGET(target1);
1.181 + JMP_TARGET(target2);
1.182 + JMP_TARGET(target3);
1.183 + JMP_TARGET(target4);
1.184 + JMP_TARGET(target5);
1.185 + load_spreg( R_ECX, REG_OFFSET(pc) );
1.186 + ADD_r32_r32( R_ESI, R_ECX );
1.187 + ADD_r32_r32( R_ESI, R_ECX );
1.188 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.189 + MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.190 + load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.191 + MUL_r32( R_ESI );
1.192 + ADD_r32_r32( R_EAX, R_ECX );
1.193 + store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.194 +
1.195 + load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
1.196 + CALL_r32( R_EAX ); // 2
1.197 + ADD_imm8s_r32( 4, R_ESP );
1.198 + POP_r32(R_ESI);
1.199 + POP_r32(R_EDI);
1.200 + POP_r32(R_EBP);
1.201 + RET();
1.202
1.203 - load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
1.204 - CALL_r32( R_EAX ); // 2
1.205 - ADD_imm8s_r32( 4, R_ESP );
1.206 - POP_r32(R_ESI);
1.207 - POP_r32(R_EDI);
1.208 - POP_r32(R_EBP);
1.209 - RET();
1.210 + sh4_x86_do_backpatch( end_ptr );
1.211 + }
1.212
1.213 - sh4_x86_do_backpatch( end_ptr );
1.214 }
1.215
1.216 +
1.217 +extern uint16_t *sh4_icache;
1.218 +extern uint32_t sh4_icache_addr;
1.219 +
1.220 /**
1.221 * Translate a single instruction. Delayed branches are handled specially
1.222 * by translating both branch and delayed instruction as a single unit (as
1.223 @@ -461,8 +499,25 @@
1.224 */
1.225 uint32_t sh4_x86_translate_instruction( uint32_t pc )
1.226 {
1.227 - uint16_t ir = sh4_read_word( pc );
1.228 -
1.229 + uint32_t ir;
1.230 + /* Read instruction */
1.231 + uint32_t pageaddr = pc >> 12;
1.232 + if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
1.233 + ir = sh4_icache[(pc&0xFFF)>>1];
1.234 + } else {
1.235 + sh4_icache = (uint16_t *)mem_get_page(pc);
1.236 + if( ((uint32_t)sh4_icache) < MAX_IO_REGIONS ) {
1.237 + /* If someone's actually been so daft as to try to execute out of an IO
1.238 + * region, fallback on the full-blown memory read
1.239 + */
1.240 + sh4_icache = NULL;
1.241 + ir = sh4_read_word(pc);
1.242 + } else {
1.243 + sh4_icache_addr = pageaddr;
1.244 + ir = sh4_icache[(pc&0xFFF)>>1];
1.245 + }
1.246 + }
1.247 +
1.248 %%
1.249 /* ALU operations */
1.250 ADD Rm, Rn {:
1.251 @@ -1130,8 +1185,14 @@
1.252 if( sh4_x86.in_delay_slot ) {
1.253 SLOTILLEGAL();
1.254 } else {
1.255 - load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.256 - MEM_READ_LONG( R_ECX, R_EAX );
1.257 + uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1.258 + char *ptr = mem_get_region(target);
1.259 + if( ptr != NULL ) {
1.260 + MOV_moff32_EAX( (uint32_t)ptr );
1.261 + } else {
1.262 + load_imm32( R_ECX, target );
1.263 + MEM_READ_LONG( R_ECX, R_EAX );
1.264 + }
1.265 store_reg( R_EAX, Rn );
1.266 }
1.267 :}
1.268 @@ -1381,8 +1442,9 @@
1.269 if( sh4_x86.in_delay_slot ) {
1.270 SLOTILLEGAL();
1.271 } else {
1.272 - // TODO: Write TRA
1.273 - RAISE_EXCEPTION(EXC_TRAP);
1.274 + PUSH_imm32( imm );
1.275 + call_func0( sh4_raise_trap );
1.276 + ADD_imm8s_r32( 4, R_ESP );
1.277 }
1.278 :}
1.279 UNDEF {:
1.280 @@ -1443,8 +1505,8 @@
1.281 store_fr( R_ECX, R_EDX, FRn );
1.282 } else /* FRn&1 == 0 */ {
1.283 load_fr( R_ECX, R_ECX, FRm );
1.284 - store_fr( R_EDX, R_EAX, FRn-1 );
1.285 - store_fr( R_EDX, R_ECX, FRn );
1.286 + store_fr( R_EDX, R_EAX, FRn );
1.287 + store_fr( R_EDX, R_ECX, FRn+1 );
1.288 }
1.289 JMP_TARGET(end);
1.290 } else /* FRm&1 == 0 */ {
1.291 @@ -1687,7 +1749,32 @@
1.292 :}
1.293 FTRC FRm, FPUL {:
1.294 check_fpuen();
1.295 - // TODO
1.296 + load_spreg( R_ECX, R_FPSCR );
1.297 + load_fr_bank( R_EDX );
1.298 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.299 + JNE_rel8(5, doubleprec);
1.300 + push_fr( R_EDX, FRm );
1.301 + JMP_rel8(3, doop);
1.302 + JMP_TARGET(doubleprec);
1.303 + push_dr( R_EDX, FRm );
1.304 + JMP_TARGET( doop );
1.305 + load_imm32( R_ECX, (uint32_t)&max_int );
1.306 + FILD_r32ind( R_ECX );
1.307 + FCOMIP_st(1);
1.308 + JNA_rel8( 16, sat );
1.309 + load_imm32( R_ECX, (uint32_t)&min_int ); // 5
1.310 + FILD_r32ind( R_ECX ); // 2
1.311 + FCOMIP_st(1); // 2
1.312 + JAE_rel8( 5, sat2 ); // 2
1.313 + FISTP_sh4r(R_FPUL); // 3
1.314 + JMP_rel8( 9, end ); // 2
1.315 +
1.316 + JMP_TARGET(sat);
1.317 + JMP_TARGET(sat2);
1.318 + MOV_r32ind_r32( R_ECX, R_ECX ); // 2
1.319 + store_spreg( R_ECX, R_FPUL );
1.320 + FPOP_st();
1.321 + JMP_TARGET(end);
1.322 :}
1.323 FLDS FRm, FPUL {:
1.324 check_fpuen();
1.325 @@ -1713,7 +1800,6 @@
1.326 :}
1.327 FCNVSD FPUL, FRn {:
1.328 check_fpuen();
1.329 - check_fpuen();
1.330 load_spreg( R_ECX, R_FPSCR );
1.331 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.332 JE_rel8(9, end); // only when PR=1
1.333 @@ -1870,13 +1956,13 @@
1.334 JNE_rel8(13, doubleprec);
1.335 push_fr(R_EDX, FRn);
1.336 push_fr(R_EDX, FRm);
1.337 - FMULP_st(1);
1.338 + FSUBP_st(1);
1.339 pop_fr(R_EDX, FRn);
1.340 JMP_rel8(11, end);
1.341 JMP_TARGET(doubleprec);
1.342 push_dr(R_EDX, FRn);
1.343 push_dr(R_EDX, FRm);
1.344 - FMULP_st(1);
1.345 + FSUBP_st(1);
1.346 pop_dr(R_EDX, FRn);
1.347 JMP_TARGET(end);
1.348 :}
1.349 @@ -1918,12 +2004,50 @@
1.350
1.351 FSCA FPUL, FRn {:
1.352 check_fpuen();
1.353 + load_spreg( R_ECX, R_FPSCR );
1.354 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.355 + JNE_rel8( 21, doubleprec );
1.356 + load_fr_bank( R_ECX );
1.357 + ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
1.358 + load_spreg( R_EDX, R_FPUL );
1.359 + call_func2( sh4_fsca, R_EDX, R_ECX );
1.360 + JMP_TARGET(doubleprec);
1.361 :}
1.362 FIPR FVm, FVn {:
1.363 check_fpuen();
1.364 + load_spreg( R_ECX, R_FPSCR );
1.365 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.366 + JNE_rel8(44, doubleprec);
1.367 +
1.368 + load_fr_bank( R_ECX );
1.369 + push_fr( R_ECX, FVm<<2 );
1.370 + push_fr( R_ECX, FVn<<2 );
1.371 + FMULP_st(1);
1.372 + push_fr( R_ECX, (FVm<<2)+1);
1.373 + push_fr( R_ECX, (FVn<<2)+1);
1.374 + FMULP_st(1);
1.375 + FADDP_st(1);
1.376 + push_fr( R_ECX, (FVm<<2)+2);
1.377 + push_fr( R_ECX, (FVn<<2)+2);
1.378 + FMULP_st(1);
1.379 + FADDP_st(1);
1.380 + push_fr( R_ECX, (FVm<<2)+3);
1.381 + push_fr( R_ECX, (FVn<<2)+3);
1.382 + FMULP_st(1);
1.383 + FADDP_st(1);
1.384 + pop_fr( R_ECX, (FVn<<2)+3);
1.385 + JMP_TARGET(doubleprec);
1.386 :}
1.387 FTRV XMTRX, FVn {:
1.388 check_fpuen();
1.389 + load_spreg( R_ECX, R_FPSCR );
1.390 + TEST_imm32_r32( FPSCR_PR, R_ECX );
1.391 + JNE_rel8( 30, doubleprec );
1.392 + load_fr_bank( R_EDX ); // 3
1.393 + ADD_imm8s_r32( FVn<<4, R_EDX ); // 3
1.394 + load_xf_bank( R_ECX ); // 12
1.395 + call_func2( sh4_ftrv, R_EDX, R_ECX ); // 12
1.396 + JMP_TARGET(doubleprec);
1.397 :}
1.398
1.399 FRCHG {:
1.400 @@ -2139,7 +2263,13 @@
1.401 JMP_TARGET(end);
1.402 ADD_imm8s_r32( 4, R_ESP );
1.403 :}
1.404 -SLEEP {: /* TODO */ :}
1.405 +SLEEP {:
1.406 + check_priv();
1.407 + call_func0( sh4_sleep );
1.408 + sh4_x86.exit_code = 0;
1.409 + sh4_x86.in_delay_slot = FALSE;
1.410 + return 1;
1.411 +:}
1.412 STC SR, Rn {:
1.413 check_priv();
1.414 call_func0(sh4_read_sr);
.