Search
lxdream.org :: lxdream :: r386:6fb10951326a
lxdream 0.9.1
released Jun 29
Download Now
changeset386:6fb10951326a
parent385:766eca01ef4d
child387:38e9fddbf0e3
authornkeynes
dateSun Sep 16 07:03:23 2007 +0000 (16 years ago)
Implement MAC.W, MAC.L and DIV1
Correct SHAD/SHLD
Fix privilege and slot illegal checks on LDC/STC opcodes
Fix various other small bugs
src/sh4/sh4x86.c
src/sh4/sh4x86.in
src/sh4/x86op.h
1.1 --- a/src/sh4/sh4x86.c Sun Sep 16 07:01:35 2007 +0000
1.2 +++ b/src/sh4/sh4x86.c Sun Sep 16 07:03:23 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.c,v 1.8 2007-09-12 11:41:43 nkeynes Exp $
1.6 + * $Id: sh4x86.c,v 1.9 2007-09-16 07:03:23 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -56,6 +56,15 @@
1.11
1.12 static struct sh4_x86_state sh4_x86;
1.13
1.14 +void signsat48( void )
1.15 +{
1.16 + if( ((int64_t)sh4r.mac) < (int64_t)0xFFFF800000000000LL )
1.17 + sh4r.mac = 0xFFFF800000000000LL;
1.18 + else if( ((int64_t)sh4r.mac) > (int64_t)0x00007FFFFFFFFFFFLL )
1.19 + sh4r.mac = 0x00007FFFFFFFFFFFLL;
1.20 +}
1.21 +
1.22 +
1.23 void sh4_x86_init()
1.24 {
1.25 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
1.26 @@ -156,12 +165,23 @@
1.27 */
1.28 static inline void load_xf_bank( int bankreg )
1.29 {
1.30 + NOT_r32( bankreg );
1.31 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
1.32 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
1.33 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
1.34 }
1.35
1.36 /**
1.37 + * Update the fr_bank pointer based on the current fpscr value.
1.38 + */
1.39 +static inline void update_fr_bank( int fpscrreg )
1.40 +{
1.41 + SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
1.42 + AND_imm8s_r32( 0x40, fpscrreg ); // Complete extraction
1.43 + OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
1.44 + store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
1.45 +}
1.46 +/**
1.47 * Push FPUL (as a 32-bit float) onto the FPU stack
1.48 */
1.49 static inline void push_fpul( )
1.50 @@ -242,11 +262,11 @@
1.51 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.52 {
1.53 ADD_imm8s_r32( 4, addr );
1.54 + PUSH_r32(arg2b);
1.55 PUSH_r32(addr);
1.56 - PUSH_r32(arg2b);
1.57 ADD_imm8s_r32( -4, addr );
1.58 + PUSH_r32(arg2a);
1.59 PUSH_r32(addr);
1.60 - PUSH_r32(arg2a);
1.61 call_func0(sh4_write_long);
1.62 ADD_imm8s_r32( 8, R_ESP );
1.63 call_func0(sh4_write_long);
1.64 @@ -324,6 +344,13 @@
1.65 JNE_exit(EXIT_DATA_ADDR_WRITE);
1.66 }
1.67
1.68 +static inline void raise_exception( int exc )
1.69 +{
1.70 + PUSH_imm32(exc);
1.71 + call_func0(sh4_raise_exception);
1.72 + ADD_imm8s_r32( 4, R_ESP );
1.73 + sh4_x86.in_delay_slot = FALSE;
1.74 +}
1.75
1.76 #define UNDEF()
1.77 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
1.78 @@ -334,8 +361,8 @@
1.79 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
1.80 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
1.81
1.82 -#define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
1.83 -#define SLOTILLEGAL() RAISE_EXCEPTION(EXC_SLOT_ILLEGAL); return 1
1.84 +#define RAISE_EXCEPTION( exc ) raise_exception(exc); return 1;
1.85 +#define SLOTILLEGAL() JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
1.86
1.87
1.88
1.89 @@ -415,6 +442,9 @@
1.90
1.91 load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
1.92 CALL_r32( R_EAX ); // 2
1.93 + ADD_imm8s_r32( 4, R_ESP );
1.94 + POP_r32(R_ESI);
1.95 + POP_r32(R_EDI);
1.96 POP_r32(R_EBP);
1.97 RET();
1.98
1.99 @@ -443,6 +473,7 @@
1.100 case 0x0:
1.101 { /* STC SR, Rn */
1.102 uint32_t Rn = ((ir>>8)&0xF);
1.103 + check_priv();
1.104 call_func0(sh4_read_sr);
1.105 store_reg( R_EAX, Rn );
1.106 }
1.107 @@ -457,6 +488,7 @@
1.108 case 0x2:
1.109 { /* STC VBR, Rn */
1.110 uint32_t Rn = ((ir>>8)&0xF);
1.111 + check_priv();
1.112 load_spreg( R_EAX, R_VBR );
1.113 store_reg( R_EAX, Rn );
1.114 }
1.115 @@ -464,6 +496,7 @@
1.116 case 0x3:
1.117 { /* STC SSR, Rn */
1.118 uint32_t Rn = ((ir>>8)&0xF);
1.119 + check_priv();
1.120 load_spreg( R_EAX, R_SSR );
1.121 store_reg( R_EAX, Rn );
1.122 }
1.123 @@ -471,6 +504,7 @@
1.124 case 0x4:
1.125 { /* STC SPC, Rn */
1.126 uint32_t Rn = ((ir>>8)&0xF);
1.127 + check_priv();
1.128 load_spreg( R_EAX, R_SPC );
1.129 store_reg( R_EAX, Rn );
1.130 }
1.131 @@ -483,6 +517,7 @@
1.132 case 0x1:
1.133 { /* STC Rm_BANK, Rn */
1.134 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm_BANK = ((ir>>4)&0x7);
1.135 + check_priv();
1.136 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
1.137 store_reg( R_EAX, Rn );
1.138 }
1.139 @@ -502,7 +537,6 @@
1.140 load_reg( R_EDI, Rn );
1.141 ADD_r32_r32( R_EAX, R_EDI );
1.142 sh4_x86.in_delay_slot = TRUE;
1.143 - INC_r32(R_ESI);
1.144 return 0;
1.145 }
1.146 }
1.147 @@ -514,8 +548,8 @@
1.148 SLOTILLEGAL();
1.149 } else {
1.150 load_reg( R_EDI, Rn );
1.151 + ADD_imm32_r32( pc + 4, R_EDI );
1.152 sh4_x86.in_delay_slot = TRUE;
1.153 - INC_r32(R_ESI);
1.154 return 0;
1.155 }
1.156 }
1.157 @@ -698,6 +732,7 @@
1.158 case 0x3:
1.159 { /* STC SGR, Rn */
1.160 uint32_t Rn = ((ir>>8)&0xF);
1.161 + check_priv();
1.162 load_spreg( R_EAX, R_SGR );
1.163 store_reg( R_EAX, Rn );
1.164 }
1.165 @@ -719,6 +754,7 @@
1.166 case 0xF:
1.167 { /* STC DBR, Rn */
1.168 uint32_t Rn = ((ir>>8)&0xF);
1.169 + check_priv();
1.170 load_spreg( R_EAX, R_DBR );
1.171 store_reg( R_EAX, Rn );
1.172 }
1.173 @@ -737,7 +773,6 @@
1.174 } else {
1.175 load_spreg( R_EDI, R_PR );
1.176 sh4_x86.in_delay_slot = TRUE;
1.177 - INC_r32(R_ESI);
1.178 return 0;
1.179 }
1.180 }
1.181 @@ -753,13 +788,12 @@
1.182 if( sh4_x86.in_delay_slot ) {
1.183 SLOTILLEGAL();
1.184 } else {
1.185 - load_spreg( R_EDI, R_PR );
1.186 + load_spreg( R_EDI, R_SPC );
1.187 load_spreg( R_EAX, R_SSR );
1.188 call_func1( sh4_write_sr, R_EAX );
1.189 sh4_x86.in_delay_slot = TRUE;
1.190 sh4_x86.priv_checked = FALSE;
1.191 sh4_x86.fpuen_checked = FALSE;
1.192 - INC_r32(R_ESI);
1.193 return 0;
1.194 }
1.195 }
1.196 @@ -804,6 +838,26 @@
1.197 case 0xF:
1.198 { /* MAC.L @Rm+, @Rn+ */
1.199 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.200 + load_reg( R_ECX, Rm );
1.201 + check_ralign32( R_ECX );
1.202 + load_reg( R_ECX, Rn );
1.203 + check_ralign32( R_ECX );
1.204 + ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
1.205 + MEM_READ_LONG( R_ECX, R_EAX );
1.206 + PUSH_r32( R_EAX );
1.207 + load_reg( R_ECX, Rm );
1.208 + ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1.209 + MEM_READ_LONG( R_ECX, R_EAX );
1.210 + POP_r32( R_ECX );
1.211 + IMUL_r32( R_ECX );
1.212 + ADD_r32_sh4r( R_EAX, R_MACL );
1.213 + ADC_r32_sh4r( R_EDX, R_MACH );
1.214 +
1.215 + load_spreg( R_ECX, R_S );
1.216 + TEST_r32_r32(R_ECX, R_ECX);
1.217 + JE_rel8( 7, nosat );
1.218 + call_func0( signsat48 );
1.219 + JMP_TARGET( nosat );
1.220 }
1.221 break;
1.222 default:
1.223 @@ -836,8 +890,8 @@
1.224 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.225 load_reg( R_ECX, Rn );
1.226 check_walign16( R_ECX );
1.227 - MEM_READ_WORD( R_ECX, R_EAX );
1.228 - store_reg( R_EAX, Rn );
1.229 + load_reg( R_EAX, Rm );
1.230 + MEM_WRITE_WORD( R_ECX, R_EAX );
1.231 }
1.232 break;
1.233 case 0x2:
1.234 @@ -854,7 +908,7 @@
1.235 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.236 load_reg( R_EAX, Rm );
1.237 load_reg( R_ECX, Rn );
1.238 - ADD_imm8s_r32( -1, Rn );
1.239 + ADD_imm8s_r32( -1, R_ECX );
1.240 store_reg( R_ECX, Rn );
1.241 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.242 }
1.243 @@ -866,6 +920,7 @@
1.244 check_walign16( R_ECX );
1.245 load_reg( R_EAX, Rm );
1.246 ADD_imm8s_r32( -2, R_ECX );
1.247 + store_reg( R_ECX, Rn );
1.248 MEM_WRITE_WORD( R_ECX, R_EAX );
1.249 }
1.250 break;
1.251 @@ -884,13 +939,13 @@
1.252 { /* DIV0S Rm, Rn */
1.253 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.254 load_reg( R_EAX, Rm );
1.255 - load_reg( R_ECX, Rm );
1.256 + load_reg( R_ECX, Rn );
1.257 SHR_imm8_r32( 31, R_EAX );
1.258 SHR_imm8_r32( 31, R_ECX );
1.259 store_spreg( R_EAX, R_M );
1.260 store_spreg( R_ECX, R_Q );
1.261 CMP_r32_r32( R_EAX, R_ECX );
1.262 - SETE_t();
1.263 + SETNE_t();
1.264 }
1.265 break;
1.266 case 0x8:
1.267 @@ -1015,19 +1070,26 @@
1.268 case 0x4:
1.269 { /* DIV1 Rm, Rn */
1.270 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.271 - load_reg( R_ECX, Rn );
1.272 + load_spreg( R_ECX, R_M );
1.273 + load_reg( R_EAX, Rn );
1.274 LDC_t();
1.275 - RCL1_r32( R_ECX ); // OP2
1.276 - SETC_r32( R_EDX ); // Q
1.277 - load_spreg( R_EAX, R_Q );
1.278 - CMP_sh4r_r32( R_M, R_EAX );
1.279 - JE_rel8(8,mqequal);
1.280 - ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
1.281 - JMP_rel8(3, mqnotequal);
1.282 + RCL1_r32( R_EAX );
1.283 + SETC_r8( R_DL ); // Q'
1.284 + CMP_sh4r_r32( R_Q, R_ECX );
1.285 + JE_rel8(5, mqequal);
1.286 + ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1.287 + JMP_rel8(3, end);
1.288 JMP_TARGET(mqequal);
1.289 - SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
1.290 - JMP_TARGET(mqnotequal);
1.291 - // TODO
1.292 + SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1.293 + JMP_TARGET(end);
1.294 + store_reg( R_EAX, Rn ); // Done with Rn now
1.295 + SETC_r8(R_AL); // tmp1
1.296 + XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
1.297 + XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
1.298 + store_spreg( R_ECX, R_Q );
1.299 + XOR_imm8s_r32( 1, R_AL ); // T = !Q'
1.300 + MOVZX_r8_r32( R_AL, R_EAX );
1.301 + store_spreg( R_EAX, R_T );
1.302 }
1.303 break;
1.304 case 0x5:
1.305 @@ -1148,7 +1210,7 @@
1.306 { /* DT Rn */
1.307 uint32_t Rn = ((ir>>8)&0xF);
1.308 load_reg( R_EAX, Rn );
1.309 - ADD_imm8s_r32( -1, Rn );
1.310 + ADD_imm8s_r32( -1, R_EAX );
1.311 store_reg( R_EAX, Rn );
1.312 SETE_t();
1.313 }
1.314 @@ -1203,7 +1265,7 @@
1.315 { /* STS.L MACH, @-Rn */
1.316 uint32_t Rn = ((ir>>8)&0xF);
1.317 load_reg( R_ECX, Rn );
1.318 - ADD_imm8s_r32( -4, Rn );
1.319 + ADD_imm8s_r32( -4, R_ECX );
1.320 store_reg( R_ECX, Rn );
1.321 load_spreg( R_EAX, R_MACH );
1.322 MEM_WRITE_LONG( R_ECX, R_EAX );
1.323 @@ -1213,7 +1275,7 @@
1.324 { /* STS.L MACL, @-Rn */
1.325 uint32_t Rn = ((ir>>8)&0xF);
1.326 load_reg( R_ECX, Rn );
1.327 - ADD_imm8s_r32( -4, Rn );
1.328 + ADD_imm8s_r32( -4, R_ECX );
1.329 store_reg( R_ECX, Rn );
1.330 load_spreg( R_EAX, R_MACL );
1.331 MEM_WRITE_LONG( R_ECX, R_EAX );
1.332 @@ -1223,7 +1285,7 @@
1.333 { /* STS.L PR, @-Rn */
1.334 uint32_t Rn = ((ir>>8)&0xF);
1.335 load_reg( R_ECX, Rn );
1.336 - ADD_imm8s_r32( -4, Rn );
1.337 + ADD_imm8s_r32( -4, R_ECX );
1.338 store_reg( R_ECX, Rn );
1.339 load_spreg( R_EAX, R_PR );
1.340 MEM_WRITE_LONG( R_ECX, R_EAX );
1.341 @@ -1232,8 +1294,9 @@
1.342 case 0x3:
1.343 { /* STC.L SGR, @-Rn */
1.344 uint32_t Rn = ((ir>>8)&0xF);
1.345 + check_priv();
1.346 load_reg( R_ECX, Rn );
1.347 - ADD_imm8s_r32( -4, Rn );
1.348 + ADD_imm8s_r32( -4, R_ECX );
1.349 store_reg( R_ECX, Rn );
1.350 load_spreg( R_EAX, R_SGR );
1.351 MEM_WRITE_LONG( R_ECX, R_EAX );
1.352 @@ -1243,7 +1306,7 @@
1.353 { /* STS.L FPUL, @-Rn */
1.354 uint32_t Rn = ((ir>>8)&0xF);
1.355 load_reg( R_ECX, Rn );
1.356 - ADD_imm8s_r32( -4, Rn );
1.357 + ADD_imm8s_r32( -4, R_ECX );
1.358 store_reg( R_ECX, Rn );
1.359 load_spreg( R_EAX, R_FPUL );
1.360 MEM_WRITE_LONG( R_ECX, R_EAX );
1.361 @@ -1253,7 +1316,7 @@
1.362 { /* STS.L FPSCR, @-Rn */
1.363 uint32_t Rn = ((ir>>8)&0xF);
1.364 load_reg( R_ECX, Rn );
1.365 - ADD_imm8s_r32( -4, Rn );
1.366 + ADD_imm8s_r32( -4, R_ECX );
1.367 store_reg( R_ECX, Rn );
1.368 load_spreg( R_EAX, R_FPSCR );
1.369 MEM_WRITE_LONG( R_ECX, R_EAX );
1.370 @@ -1262,8 +1325,9 @@
1.371 case 0xF:
1.372 { /* STC.L DBR, @-Rn */
1.373 uint32_t Rn = ((ir>>8)&0xF);
1.374 + check_priv();
1.375 load_reg( R_ECX, Rn );
1.376 - ADD_imm8s_r32( -4, Rn );
1.377 + ADD_imm8s_r32( -4, R_ECX );
1.378 store_reg( R_ECX, Rn );
1.379 load_spreg( R_EAX, R_DBR );
1.380 MEM_WRITE_LONG( R_ECX, R_EAX );
1.381 @@ -1281,8 +1345,9 @@
1.382 case 0x0:
1.383 { /* STC.L SR, @-Rn */
1.384 uint32_t Rn = ((ir>>8)&0xF);
1.385 + check_priv();
1.386 load_reg( R_ECX, Rn );
1.387 - ADD_imm8s_r32( -4, Rn );
1.388 + ADD_imm8s_r32( -4, R_ECX );
1.389 store_reg( R_ECX, Rn );
1.390 call_func0( sh4_read_sr );
1.391 MEM_WRITE_LONG( R_ECX, R_EAX );
1.392 @@ -1292,7 +1357,7 @@
1.393 { /* STC.L GBR, @-Rn */
1.394 uint32_t Rn = ((ir>>8)&0xF);
1.395 load_reg( R_ECX, Rn );
1.396 - ADD_imm8s_r32( -4, Rn );
1.397 + ADD_imm8s_r32( -4, R_ECX );
1.398 store_reg( R_ECX, Rn );
1.399 load_spreg( R_EAX, R_GBR );
1.400 MEM_WRITE_LONG( R_ECX, R_EAX );
1.401 @@ -1301,8 +1366,9 @@
1.402 case 0x2:
1.403 { /* STC.L VBR, @-Rn */
1.404 uint32_t Rn = ((ir>>8)&0xF);
1.405 + check_priv();
1.406 load_reg( R_ECX, Rn );
1.407 - ADD_imm8s_r32( -4, Rn );
1.408 + ADD_imm8s_r32( -4, R_ECX );
1.409 store_reg( R_ECX, Rn );
1.410 load_spreg( R_EAX, R_VBR );
1.411 MEM_WRITE_LONG( R_ECX, R_EAX );
1.412 @@ -1311,8 +1377,9 @@
1.413 case 0x3:
1.414 { /* STC.L SSR, @-Rn */
1.415 uint32_t Rn = ((ir>>8)&0xF);
1.416 + check_priv();
1.417 load_reg( R_ECX, Rn );
1.418 - ADD_imm8s_r32( -4, Rn );
1.419 + ADD_imm8s_r32( -4, R_ECX );
1.420 store_reg( R_ECX, Rn );
1.421 load_spreg( R_EAX, R_SSR );
1.422 MEM_WRITE_LONG( R_ECX, R_EAX );
1.423 @@ -1321,8 +1388,9 @@
1.424 case 0x4:
1.425 { /* STC.L SPC, @-Rn */
1.426 uint32_t Rn = ((ir>>8)&0xF);
1.427 + check_priv();
1.428 load_reg( R_ECX, Rn );
1.429 - ADD_imm8s_r32( -4, Rn );
1.430 + ADD_imm8s_r32( -4, R_ECX );
1.431 store_reg( R_ECX, Rn );
1.432 load_spreg( R_EAX, R_SPC );
1.433 MEM_WRITE_LONG( R_ECX, R_EAX );
1.434 @@ -1336,8 +1404,9 @@
1.435 case 0x1:
1.436 { /* STC.L Rm_BANK, @-Rn */
1.437 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm_BANK = ((ir>>4)&0x7);
1.438 + check_priv();
1.439 load_reg( R_ECX, Rn );
1.440 - ADD_imm8s_r32( -4, Rn );
1.441 + ADD_imm8s_r32( -4, R_ECX );
1.442 store_reg( R_ECX, Rn );
1.443 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
1.444 MEM_WRITE_LONG( R_ECX, R_EAX );
1.445 @@ -1443,6 +1512,7 @@
1.446 case 0x3:
1.447 { /* LDC.L @Rm+, SGR */
1.448 uint32_t Rm = ((ir>>8)&0xF);
1.449 + check_priv();
1.450 load_reg( R_EAX, Rm );
1.451 MOV_r32_r32( R_EAX, R_ECX );
1.452 ADD_imm8s_r32( 4, R_EAX );
1.453 @@ -1471,11 +1541,13 @@
1.454 store_reg( R_EAX, Rm );
1.455 MEM_READ_LONG( R_ECX, R_EAX );
1.456 store_spreg( R_EAX, R_FPSCR );
1.457 + update_fr_bank( R_EAX );
1.458 }
1.459 break;
1.460 case 0xF:
1.461 { /* LDC.L @Rm+, DBR */
1.462 uint32_t Rm = ((ir>>8)&0xF);
1.463 + check_priv();
1.464 load_reg( R_EAX, Rm );
1.465 MOV_r32_r32( R_EAX, R_ECX );
1.466 ADD_imm8s_r32( 4, R_EAX );
1.467 @@ -1496,14 +1568,19 @@
1.468 case 0x0:
1.469 { /* LDC.L @Rm+, SR */
1.470 uint32_t Rm = ((ir>>8)&0xF);
1.471 - load_reg( R_EAX, Rm );
1.472 - MOV_r32_r32( R_EAX, R_ECX );
1.473 - ADD_imm8s_r32( 4, R_EAX );
1.474 - store_reg( R_EAX, Rm );
1.475 - MEM_READ_LONG( R_ECX, R_EAX );
1.476 - call_func1( sh4_write_sr, R_EAX );
1.477 - sh4_x86.priv_checked = FALSE;
1.478 - sh4_x86.fpuen_checked = FALSE;
1.479 + if( sh4_x86.in_delay_slot ) {
1.480 + SLOTILLEGAL();
1.481 + } else {
1.482 + check_priv();
1.483 + load_reg( R_EAX, Rm );
1.484 + MOV_r32_r32( R_EAX, R_ECX );
1.485 + ADD_imm8s_r32( 4, R_EAX );
1.486 + store_reg( R_EAX, Rm );
1.487 + MEM_READ_LONG( R_ECX, R_EAX );
1.488 + call_func1( sh4_write_sr, R_EAX );
1.489 + sh4_x86.priv_checked = FALSE;
1.490 + sh4_x86.fpuen_checked = FALSE;
1.491 + }
1.492 }
1.493 break;
1.494 case 0x1:
1.495 @@ -1520,6 +1597,7 @@
1.496 case 0x2:
1.497 { /* LDC.L @Rm+, VBR */
1.498 uint32_t Rm = ((ir>>8)&0xF);
1.499 + check_priv();
1.500 load_reg( R_EAX, Rm );
1.501 MOV_r32_r32( R_EAX, R_ECX );
1.502 ADD_imm8s_r32( 4, R_EAX );
1.503 @@ -1531,6 +1609,7 @@
1.504 case 0x3:
1.505 { /* LDC.L @Rm+, SSR */
1.506 uint32_t Rm = ((ir>>8)&0xF);
1.507 + check_priv();
1.508 load_reg( R_EAX, Rm );
1.509 MOV_r32_r32( R_EAX, R_ECX );
1.510 ADD_imm8s_r32( 4, R_EAX );
1.511 @@ -1542,6 +1621,7 @@
1.512 case 0x4:
1.513 { /* LDC.L @Rm+, SPC */
1.514 uint32_t Rm = ((ir>>8)&0xF);
1.515 + check_priv();
1.516 load_reg( R_EAX, Rm );
1.517 MOV_r32_r32( R_EAX, R_ECX );
1.518 ADD_imm8s_r32( 4, R_EAX );
1.519 @@ -1558,6 +1638,7 @@
1.520 case 0x1:
1.521 { /* LDC.L @Rm+, Rn_BANK */
1.522 uint32_t Rm = ((ir>>8)&0xF); uint32_t Rn_BANK = ((ir>>4)&0x7);
1.523 + check_priv();
1.524 load_reg( R_EAX, Rm );
1.525 MOV_r32_r32( R_EAX, R_ECX );
1.526 ADD_imm8s_r32( 4, R_EAX );
1.527 @@ -1656,6 +1737,7 @@
1.528 case 0x3:
1.529 { /* LDC Rm, SGR */
1.530 uint32_t Rm = ((ir>>8)&0xF);
1.531 + check_priv();
1.532 load_reg( R_EAX, Rm );
1.533 store_spreg( R_EAX, R_SGR );
1.534 }
1.535 @@ -1672,11 +1754,13 @@
1.536 uint32_t Rm = ((ir>>8)&0xF);
1.537 load_reg( R_EAX, Rm );
1.538 store_spreg( R_EAX, R_FPSCR );
1.539 + update_fr_bank( R_EAX );
1.540 }
1.541 break;
1.542 case 0xF:
1.543 { /* LDC Rm, DBR */
1.544 uint32_t Rm = ((ir>>8)&0xF);
1.545 + check_priv();
1.546 load_reg( R_EAX, Rm );
1.547 store_spreg( R_EAX, R_DBR );
1.548 }
1.549 @@ -1698,7 +1782,6 @@
1.550 store_spreg( R_EAX, R_PR );
1.551 load_reg( R_EDI, Rn );
1.552 sh4_x86.in_delay_slot = TRUE;
1.553 - INC_r32(R_ESI);
1.554 return 0;
1.555 }
1.556 }
1.557 @@ -1711,6 +1794,7 @@
1.558 TEST_r8_r8( R_AL, R_AL );
1.559 SETE_t();
1.560 OR_imm8_r8( 0x80, R_AL );
1.561 + load_reg( R_ECX, Rn );
1.562 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.563 }
1.564 break;
1.565 @@ -1722,7 +1806,6 @@
1.566 } else {
1.567 load_reg( R_EDI, Rn );
1.568 sh4_x86.in_delay_slot = TRUE;
1.569 - INC_r32(R_ESI);
1.570 return 0;
1.571 }
1.572 }
1.573 @@ -1739,16 +1822,23 @@
1.574 load_reg( R_EAX, Rn );
1.575 load_reg( R_ECX, Rm );
1.576 CMP_imm32_r32( 0, R_ECX );
1.577 - JAE_rel8(9, doshl);
1.578 + JGE_rel8(16, doshl);
1.579
1.580 NEG_r32( R_ECX ); // 2
1.581 AND_imm8_r8( 0x1F, R_CL ); // 3
1.582 + JE_rel8( 4, emptysar); // 2
1.583 SAR_r32_CL( R_EAX ); // 2
1.584 - JMP_rel8(5, end); // 2
1.585 + JMP_rel8(10, end); // 2
1.586 +
1.587 + JMP_TARGET(emptysar);
1.588 + SAR_imm8_r32(31, R_EAX ); // 3
1.589 + JMP_rel8(5, end2);
1.590 +
1.591 JMP_TARGET(doshl);
1.592 AND_imm8_r8( 0x1F, R_CL ); // 3
1.593 SHL_r32_CL( R_EAX ); // 2
1.594 JMP_TARGET(end);
1.595 + JMP_TARGET(end2);
1.596 store_reg( R_EAX, Rn );
1.597 }
1.598 break;
1.599 @@ -1757,13 +1847,24 @@
1.600 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.601 load_reg( R_EAX, Rn );
1.602 load_reg( R_ECX, Rm );
1.603 + CMP_imm32_r32( 0, R_ECX );
1.604 + JGE_rel8(15, doshl);
1.605
1.606 - MOV_r32_r32( R_EAX, R_EDX );
1.607 - SHL_r32_CL( R_EAX );
1.608 - NEG_r32( R_ECX );
1.609 - SHR_r32_CL( R_EDX );
1.610 - CMP_imm8s_r32( 0, R_ECX );
1.611 - CMOVAE_r32_r32( R_EDX, R_EAX );
1.612 + NEG_r32( R_ECX ); // 2
1.613 + AND_imm8_r8( 0x1F, R_CL ); // 3
1.614 + JE_rel8( 4, emptyshr );
1.615 + SHR_r32_CL( R_EAX ); // 2
1.616 + JMP_rel8(9, end); // 2
1.617 +
1.618 + JMP_TARGET(emptyshr);
1.619 + XOR_r32_r32( R_EAX, R_EAX );
1.620 + JMP_rel8(5, end2);
1.621 +
1.622 + JMP_TARGET(doshl);
1.623 + AND_imm8_r8( 0x1F, R_CL ); // 3
1.624 + SHL_r32_CL( R_EAX ); // 2
1.625 + JMP_TARGET(end);
1.626 + JMP_TARGET(end2);
1.627 store_reg( R_EAX, Rn );
1.628 }
1.629 break;
1.630 @@ -1774,10 +1875,15 @@
1.631 case 0x0:
1.632 { /* LDC Rm, SR */
1.633 uint32_t Rm = ((ir>>8)&0xF);
1.634 - load_reg( R_EAX, Rm );
1.635 - call_func1( sh4_write_sr, R_EAX );
1.636 - sh4_x86.priv_checked = FALSE;
1.637 - sh4_x86.fpuen_checked = FALSE;
1.638 + if( sh4_x86.in_delay_slot ) {
1.639 + SLOTILLEGAL();
1.640 + } else {
1.641 + check_priv();
1.642 + load_reg( R_EAX, Rm );
1.643 + call_func1( sh4_write_sr, R_EAX );
1.644 + sh4_x86.priv_checked = FALSE;
1.645 + sh4_x86.fpuen_checked = FALSE;
1.646 + }
1.647 }
1.648 break;
1.649 case 0x1:
1.650 @@ -1790,6 +1896,7 @@
1.651 case 0x2:
1.652 { /* LDC Rm, VBR */
1.653 uint32_t Rm = ((ir>>8)&0xF);
1.654 + check_priv();
1.655 load_reg( R_EAX, Rm );
1.656 store_spreg( R_EAX, R_VBR );
1.657 }
1.658 @@ -1797,6 +1904,7 @@
1.659 case 0x3:
1.660 { /* LDC Rm, SSR */
1.661 uint32_t Rm = ((ir>>8)&0xF);
1.662 + check_priv();
1.663 load_reg( R_EAX, Rm );
1.664 store_spreg( R_EAX, R_SSR );
1.665 }
1.666 @@ -1804,6 +1912,7 @@
1.667 case 0x4:
1.668 { /* LDC Rm, SPC */
1.669 uint32_t Rm = ((ir>>8)&0xF);
1.670 + check_priv();
1.671 load_reg( R_EAX, Rm );
1.672 store_spreg( R_EAX, R_SPC );
1.673 }
1.674 @@ -1816,6 +1925,7 @@
1.675 case 0x1:
1.676 { /* LDC Rm, Rn_BANK */
1.677 uint32_t Rm = ((ir>>8)&0xF); uint32_t Rn_BANK = ((ir>>4)&0x7);
1.678 + check_priv();
1.679 load_reg( R_EAX, Rm );
1.680 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
1.681 }
1.682 @@ -1825,6 +1935,43 @@
1.683 case 0xF:
1.684 { /* MAC.W @Rm+, @Rn+ */
1.685 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.686 + load_reg( R_ECX, Rm );
1.687 + check_ralign16( R_ECX );
1.688 + load_reg( R_ECX, Rn );
1.689 + check_ralign16( R_ECX );
1.690 + ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
1.691 + MEM_READ_WORD( R_ECX, R_EAX );
1.692 + PUSH_r32( R_EAX );
1.693 + load_reg( R_ECX, Rm );
1.694 + ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1.695 + MEM_READ_WORD( R_ECX, R_EAX );
1.696 + POP_r32( R_ECX );
1.697 + IMUL_r32( R_ECX );
1.698 +
1.699 + load_spreg( R_ECX, R_S );
1.700 + TEST_r32_r32( R_ECX, R_ECX );
1.701 + JE_rel8( 47, nosat );
1.702 +
1.703 + ADD_r32_sh4r( R_EAX, R_MACL ); // 6
1.704 + JNO_rel8( 51, end ); // 2
1.705 + load_imm32( R_EDX, 1 ); // 5
1.706 + store_spreg( R_EDX, R_MACH ); // 6
1.707 + JS_rel8( 13, positive ); // 2
1.708 + load_imm32( R_EAX, 0x80000000 );// 5
1.709 + store_spreg( R_EAX, R_MACL ); // 6
1.710 + JMP_rel8( 25, end2 ); // 2
1.711 +
1.712 + JMP_TARGET(positive);
1.713 + load_imm32( R_EAX, 0x7FFFFFFF );// 5
1.714 + store_spreg( R_EAX, R_MACL ); // 6
1.715 + JMP_rel8( 12, end3); // 2
1.716 +
1.717 + JMP_TARGET(nosat);
1.718 + ADD_r32_sh4r( R_EAX, R_MACL ); // 6
1.719 + ADC_r32_sh4r( R_EDX, R_MACH ); // 6
1.720 + JMP_TARGET(end);
1.721 + JMP_TARGET(end2);
1.722 + JMP_TARGET(end3);
1.723 }
1.724 break;
1.725 }
1.726 @@ -1846,7 +1993,7 @@
1.727 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.728 load_reg( R_ECX, Rm );
1.729 MEM_READ_BYTE( R_ECX, R_EAX );
1.730 - store_reg( R_ECX, Rn );
1.731 + store_reg( R_EAX, Rn );
1.732 }
1.733 break;
1.734 case 0x1:
1.735 @@ -1901,7 +2048,7 @@
1.736 { /* MOV.L @Rm+, Rn */
1.737 uint32_t Rn = ((ir>>8)&0xF); uint32_t Rm = ((ir>>4)&0xF);
1.738 load_reg( R_EAX, Rm );
1.739 - check_ralign32( R_ECX );
1.740 + check_ralign32( R_EAX );
1.741 MOV_r32_r32( R_EAX, R_ECX );
1.742 ADD_imm8s_r32( 4, R_EAX );
1.743 store_reg( R_EAX, Rm );
1.744 @@ -2083,13 +2230,12 @@
1.745 if( sh4_x86.in_delay_slot ) {
1.746 SLOTILLEGAL();
1.747 } else {
1.748 - load_imm32( R_EDI, pc + 2 );
1.749 + load_imm32( R_EDI, pc + 4 );
1.750 CMP_imm8s_sh4r( 0, R_T );
1.751 JE_rel8( 5, nottaken );
1.752 load_imm32( R_EDI, disp + pc + 4 );
1.753 JMP_TARGET(nottaken);
1.754 sh4_x86.in_delay_slot = TRUE;
1.755 - INC_r32(R_ESI);
1.756 return 0;
1.757 }
1.758 }
1.759 @@ -2100,13 +2246,12 @@
1.760 if( sh4_x86.in_delay_slot ) {
1.761 SLOTILLEGAL();
1.762 } else {
1.763 - load_imm32( R_EDI, pc + 2 );
1.764 + load_imm32( R_EDI, pc + 4 );
1.765 CMP_imm8s_sh4r( 0, R_T );
1.766 JNE_rel8( 5, nottaken );
1.767 load_imm32( R_EDI, disp + pc + 4 );
1.768 JMP_TARGET(nottaken);
1.769 sh4_x86.in_delay_slot = TRUE;
1.770 - INC_r32(R_ESI);
1.771 return 0;
1.772 }
1.773 }
1.774 @@ -2136,7 +2281,6 @@
1.775 } else {
1.776 load_imm32( R_EDI, disp + pc + 4 );
1.777 sh4_x86.in_delay_slot = TRUE;
1.778 - INC_r32(R_ESI);
1.779 return 0;
1.780 }
1.781 }
1.782 @@ -2151,7 +2295,6 @@
1.783 store_spreg( R_EAX, R_PR );
1.784 load_imm32( R_EDI, disp + pc + 4 );
1.785 sh4_x86.in_delay_slot = TRUE;
1.786 - INC_r32(R_ESI);
1.787 return 0;
1.788 }
1.789 }
1.790 @@ -2287,8 +2430,10 @@
1.791 load_reg( R_EAX, 0 );
1.792 load_spreg( R_ECX, R_GBR );
1.793 ADD_r32_r32( R_EAX, R_ECX );
1.794 - MEM_READ_BYTE( R_ECX, R_EAX );
1.795 - AND_imm32_r32(imm, R_ECX );
1.796 + PUSH_r32(R_ECX);
1.797 + call_func0(sh4_read_byte);
1.798 + POP_r32(R_ECX);
1.799 + AND_imm32_r32(imm, R_EAX );
1.800 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.801 }
1.802 break;
1.803 @@ -2298,7 +2443,9 @@
1.804 load_reg( R_EAX, 0 );
1.805 load_spreg( R_ECX, R_GBR );
1.806 ADD_r32_r32( R_EAX, R_ECX );
1.807 - MEM_READ_BYTE( R_ECX, R_EAX );
1.808 + PUSH_r32(R_ECX);
1.809 + call_func0(sh4_read_byte);
1.810 + POP_r32(R_ECX);
1.811 XOR_imm32_r32( imm, R_EAX );
1.812 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.813 }
1.814 @@ -2309,8 +2456,10 @@
1.815 load_reg( R_EAX, 0 );
1.816 load_spreg( R_ECX, R_GBR );
1.817 ADD_r32_r32( R_EAX, R_ECX );
1.818 - MEM_READ_BYTE( R_ECX, R_EAX );
1.819 - OR_imm32_r32(imm, R_ECX );
1.820 + PUSH_r32(R_ECX);
1.821 + call_func0(sh4_read_byte);
1.822 + POP_r32(R_ECX);
1.823 + OR_imm32_r32(imm, R_EAX );
1.824 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.825 }
1.826 break;
1.827 @@ -2324,7 +2473,7 @@
1.828 } else {
1.829 load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
1.830 MEM_READ_LONG( R_ECX, R_EAX );
1.831 - store_reg( R_EAX, 0 );
1.832 + store_reg( R_EAX, Rn );
1.833 }
1.834 }
1.835 break;
1.836 @@ -2435,10 +2584,10 @@
1.837 JMP_TARGET(doubleprec);
1.838 push_dr(R_EDX, FRm);
1.839 push_dr(R_EDX, FRn);
1.840 + JMP_TARGET(end);
1.841 FCOMIP_st(1);
1.842 SETE_t();
1.843 FPOP_st();
1.844 - JMP_TARGET(end);
1.845 }
1.846 break;
1.847 case 0x5:
1.848 @@ -2475,7 +2624,7 @@
1.849 load_fr_bank( R_ECX );
1.850 store_fr( R_ECX, R_EAX, FRn );
1.851 if( FRn&1 ) {
1.852 - JMP_rel8(46, end);
1.853 + JMP_rel8(48, end);
1.854 JMP_TARGET(doublesize);
1.855 MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.856 load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.857 @@ -2508,7 +2657,7 @@
1.858 load_fr( R_ECX, R_EAX, FRm );
1.859 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.860 if( FRm&1 ) {
1.861 - JMP_rel8( 46, end );
1.862 + JMP_rel8( 48, end );
1.863 JMP_TARGET(doublesize);
1.864 load_xf_bank( R_ECX );
1.865 load_fr( R_ECX, R_EAX, FRm&0x0E );
1.866 @@ -2539,7 +2688,7 @@
1.867 load_fr_bank( R_ECX );
1.868 store_fr( R_ECX, R_EAX, FRn );
1.869 if( FRn&1 ) {
1.870 - JMP_rel8(46, end);
1.871 + JMP_rel8(48, end);
1.872 JMP_TARGET(doublesize);
1.873 MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.874 load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.875 @@ -2574,7 +2723,7 @@
1.876 load_fr_bank( R_ECX );
1.877 store_fr( R_ECX, R_EAX, FRn );
1.878 if( FRn&1 ) {
1.879 - JMP_rel8(52, end);
1.880 + JMP_rel8(54, end);
1.881 JMP_TARGET(doublesize);
1.882 ADD_imm8s_r32( 8, R_EAX );
1.883 store_reg(R_EAX, Rm);
1.884 @@ -2609,7 +2758,7 @@
1.885 load_fr( R_ECX, R_EAX, FRm );
1.886 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.887 if( FRm&1 ) {
1.888 - JMP_rel8( 46, end );
1.889 + JMP_rel8( 48, end );
1.890 JMP_TARGET(doublesize);
1.891 load_xf_bank( R_ECX );
1.892 load_fr( R_ECX, R_EAX, FRm&0x0E );
1.893 @@ -2635,14 +2784,14 @@
1.894 check_walign32( R_EDX );
1.895 load_spreg( R_ECX, R_FPSCR );
1.896 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.897 - JNE_rel8(20, doublesize);
1.898 + JNE_rel8(26, doublesize);
1.899 load_fr_bank( R_ECX );
1.900 load_fr( R_ECX, R_EAX, FRm );
1.901 ADD_imm8s_r32(-4,R_EDX);
1.902 store_reg( R_EDX, Rn );
1.903 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.904 if( FRm&1 ) {
1.905 - JMP_rel8( 46, end );
1.906 + JMP_rel8( 54, end );
1.907 JMP_TARGET(doublesize);
1.908 load_xf_bank( R_ECX );
1.909 load_fr( R_ECX, R_EAX, FRm&0x0E );
1.910 @@ -2652,7 +2801,7 @@
1.911 MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.912 JMP_TARGET(end);
1.913 } else {
1.914 - JMP_rel8( 39, end );
1.915 + JMP_rel8( 45, end );
1.916 JMP_TARGET(doublesize);
1.917 load_fr_bank( R_ECX );
1.918 load_fr( R_ECX, R_EAX, FRm&0x0E );
1.919 @@ -2682,7 +2831,7 @@
1.920 load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
1.921 store_fr( R_EDX, R_EAX, FRn );
1.922 if( FRm&1 ) {
1.923 - JMP_rel8(22, end);
1.924 + JMP_rel8(24, end);
1.925 JMP_TARGET(doublesize);
1.926 load_xf_bank( R_ECX );
1.927 load_fr( R_ECX, R_EAX, FRm-1 );
1.928 @@ -2698,7 +2847,7 @@
1.929 JMP_TARGET(end);
1.930 } else /* FRm&1 == 0 */ {
1.931 if( FRn&1 ) {
1.932 - JMP_rel8(22, end);
1.933 + JMP_rel8(24, end);
1.934 load_xf_bank( R_ECX );
1.935 load_fr( R_EDX, R_EAX, FRm );
1.936 load_fr( R_EDX, R_EDX, FRm+1 );
1.937 @@ -2925,16 +3074,17 @@
1.938 load_spreg( R_ECX, R_FPSCR );
1.939 XOR_imm32_r32( FPSCR_FR, R_ECX );
1.940 store_spreg( R_ECX, R_FPSCR );
1.941 + update_fr_bank( R_ECX );
1.942 }
1.943 break;
1.944 case 0x3:
1.945 { /* UNDEF */
1.946 if( sh4_x86.in_delay_slot ) {
1.947 - RAISE_EXCEPTION(EXC_SLOT_ILLEGAL);
1.948 + SLOTILLEGAL();
1.949 } else {
1.950 - RAISE_EXCEPTION(EXC_ILLEGAL);
1.951 + JMP_exit(EXIT_ILLEGAL);
1.952 + return 1;
1.953 }
1.954 - return 1;
1.955 }
1.956 break;
1.957 default:
1.958 @@ -2983,10 +3133,12 @@
1.959 break;
1.960 }
1.961
1.962 - INC_r32(R_ESI);
1.963 if( sh4_x86.in_delay_slot ) {
1.964 + ADD_imm8s_r32(2,R_ESI);
1.965 sh4_x86.in_delay_slot = FALSE;
1.966 return 1;
1.967 + } else {
1.968 + INC_r32(R_ESI);
1.969 }
1.970 return 0;
1.971 }
2.1 --- a/src/sh4/sh4x86.in Sun Sep 16 07:01:35 2007 +0000
2.2 +++ b/src/sh4/sh4x86.in Sun Sep 16 07:03:23 2007 +0000
2.3 @@ -1,5 +1,5 @@
2.4 /**
2.5 - * $Id: sh4x86.in,v 1.9 2007-09-13 08:28:01 nkeynes Exp $
2.6 + * $Id: sh4x86.in,v 1.10 2007-09-16 07:03:23 nkeynes Exp $
2.7 *
2.8 * SH4 => x86 translation. This version does no real optimization, it just
2.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
2.10 @@ -56,6 +56,15 @@
2.11
2.12 static struct sh4_x86_state sh4_x86;
2.13
2.14 +void signsat48( void )
2.15 +{
2.16 + if( ((int64_t)sh4r.mac) < (int64_t)0xFFFF800000000000LL )
2.17 + sh4r.mac = 0xFFFF800000000000LL;
2.18 + else if( ((int64_t)sh4r.mac) > (int64_t)0x00007FFFFFFFFFFFLL )
2.19 + sh4r.mac = 0x00007FFFFFFFFFFFLL;
2.20 +}
2.21 +
2.22 +
2.23 void sh4_x86_init()
2.24 {
2.25 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
2.26 @@ -156,12 +165,23 @@
2.27 */
2.28 static inline void load_xf_bank( int bankreg )
2.29 {
2.30 + NOT_r32( bankreg );
2.31 SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
2.32 AND_imm8s_r32( 0x40, bankreg ); // Complete extraction
2.33 OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
2.34 }
2.35
2.36 /**
2.37 + * Update the fr_bank pointer based on the current fpscr value.
2.38 + */
2.39 +static inline void update_fr_bank( int fpscrreg )
2.40 +{
2.41 + SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
2.42 + AND_imm8s_r32( 0x40, fpscrreg ); // Complete extraction
2.43 + OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
2.44 + store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
2.45 +}
2.46 +/**
2.47 * Push FPUL (as a 32-bit float) onto the FPU stack
2.48 */
2.49 static inline void push_fpul( )
2.50 @@ -242,11 +262,11 @@
2.51 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
2.52 {
2.53 ADD_imm8s_r32( 4, addr );
2.54 + PUSH_r32(arg2b);
2.55 PUSH_r32(addr);
2.56 - PUSH_r32(arg2b);
2.57 ADD_imm8s_r32( -4, addr );
2.58 + PUSH_r32(arg2a);
2.59 PUSH_r32(addr);
2.60 - PUSH_r32(arg2a);
2.61 call_func0(sh4_write_long);
2.62 ADD_imm8s_r32( 8, R_ESP );
2.63 call_func0(sh4_write_long);
2.64 @@ -324,6 +344,13 @@
2.65 JNE_exit(EXIT_DATA_ADDR_WRITE);
2.66 }
2.67
2.68 +static inline void raise_exception( int exc )
2.69 +{
2.70 + PUSH_imm32(exc);
2.71 + call_func0(sh4_raise_exception);
2.72 + ADD_imm8s_r32( 4, R_ESP );
2.73 + sh4_x86.in_delay_slot = FALSE;
2.74 +}
2.75
2.76 #define UNDEF()
2.77 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
2.78 @@ -334,8 +361,8 @@
2.79 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
2.80 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
2.81
2.82 -#define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
2.83 -#define SLOTILLEGAL() RAISE_EXCEPTION(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1
2.84 +#define RAISE_EXCEPTION( exc ) raise_exception(exc); return 1;
2.85 +#define SLOTILLEGAL() JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
2.86
2.87
2.88
2.89 @@ -479,8 +506,10 @@
2.90 load_reg( R_EAX, 0 );
2.91 load_spreg( R_ECX, R_GBR );
2.92 ADD_r32_r32( R_EAX, R_ECX );
2.93 - MEM_READ_BYTE( R_ECX, R_EAX );
2.94 - AND_imm32_r32(imm, R_ECX );
2.95 + PUSH_r32(R_ECX);
2.96 + call_func0(sh4_read_byte);
2.97 + POP_r32(R_ECX);
2.98 + AND_imm32_r32(imm, R_EAX );
2.99 MEM_WRITE_BYTE( R_ECX, R_EAX );
2.100 :}
2.101 CMP/EQ Rm, Rn {:
2.102 @@ -547,13 +576,13 @@
2.103 :}
2.104 DIV0S Rm, Rn {:
2.105 load_reg( R_EAX, Rm );
2.106 - load_reg( R_ECX, Rm );
2.107 + load_reg( R_ECX, Rn );
2.108 SHR_imm8_r32( 31, R_EAX );
2.109 SHR_imm8_r32( 31, R_ECX );
2.110 store_spreg( R_EAX, R_M );
2.111 store_spreg( R_ECX, R_Q );
2.112 CMP_r32_r32( R_EAX, R_ECX );
2.113 - SETE_t();
2.114 + SETNE_t();
2.115 :}
2.116 DIV0U {:
2.117 XOR_r32_r32( R_EAX, R_EAX );
2.118 @@ -561,20 +590,27 @@
2.119 store_spreg( R_EAX, R_M );
2.120 store_spreg( R_EAX, R_T );
2.121 :}
2.122 -DIV1 Rm, Rn {:
2.123 - load_reg( R_ECX, Rn );
2.124 +DIV1 Rm, Rn {:
2.125 + load_spreg( R_ECX, R_M );
2.126 + load_reg( R_EAX, Rn );
2.127 LDC_t();
2.128 - RCL1_r32( R_ECX ); // OP2
2.129 - SETC_r32( R_EDX ); // Q
2.130 - load_spreg( R_EAX, R_Q );
2.131 - CMP_sh4r_r32( R_M, R_EAX );
2.132 - JE_rel8(8,mqequal);
2.133 - ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
2.134 - JMP_rel8(3, mqnotequal);
2.135 + RCL1_r32( R_EAX );
2.136 + SETC_r8( R_DL ); // Q'
2.137 + CMP_sh4r_r32( R_Q, R_ECX );
2.138 + JE_rel8(5, mqequal);
2.139 + ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
2.140 + JMP_rel8(3, end);
2.141 JMP_TARGET(mqequal);
2.142 - SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
2.143 - JMP_TARGET(mqnotequal);
2.144 - // TODO
2.145 + SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
2.146 + JMP_TARGET(end);
2.147 + store_reg( R_EAX, Rn ); // Done with Rn now
2.148 + SETC_r8(R_AL); // tmp1
2.149 + XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
2.150 + XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
2.151 + store_spreg( R_ECX, R_Q );
2.152 + XOR_imm8s_r32( 1, R_AL ); // T = !Q'
2.153 + MOVZX_r8_r32( R_AL, R_EAX );
2.154 + store_spreg( R_EAX, R_T );
2.155 :}
2.156 DMULS.L Rm, Rn {:
2.157 load_reg( R_EAX, Rm );
2.158 @@ -616,8 +652,67 @@
2.159 MOVZX_r16_r32( R_EAX, R_EAX );
2.160 store_reg( R_EAX, Rn );
2.161 :}
2.162 -MAC.L @Rm+, @Rn+ {: :}
2.163 -MAC.W @Rm+, @Rn+ {: :}
2.164 +MAC.L @Rm+, @Rn+ {:
2.165 + load_reg( R_ECX, Rm );
2.166 + check_ralign32( R_ECX );
2.167 + load_reg( R_ECX, Rn );
2.168 + check_ralign32( R_ECX );
2.169 + ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
2.170 + MEM_READ_LONG( R_ECX, R_EAX );
2.171 + PUSH_r32( R_EAX );
2.172 + load_reg( R_ECX, Rm );
2.173 + ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2.174 + MEM_READ_LONG( R_ECX, R_EAX );
2.175 + POP_r32( R_ECX );
2.176 + IMUL_r32( R_ECX );
2.177 + ADD_r32_sh4r( R_EAX, R_MACL );
2.178 + ADC_r32_sh4r( R_EDX, R_MACH );
2.179 +
2.180 + load_spreg( R_ECX, R_S );
2.181 + TEST_r32_r32(R_ECX, R_ECX);
2.182 + JE_rel8( 7, nosat );
2.183 + call_func0( signsat48 );
2.184 + JMP_TARGET( nosat );
2.185 +:}
2.186 +MAC.W @Rm+, @Rn+ {:
2.187 + load_reg( R_ECX, Rm );
2.188 + check_ralign16( R_ECX );
2.189 + load_reg( R_ECX, Rn );
2.190 + check_ralign16( R_ECX );
2.191 + ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
2.192 + MEM_READ_WORD( R_ECX, R_EAX );
2.193 + PUSH_r32( R_EAX );
2.194 + load_reg( R_ECX, Rm );
2.195 + ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
2.196 + MEM_READ_WORD( R_ECX, R_EAX );
2.197 + POP_r32( R_ECX );
2.198 + IMUL_r32( R_ECX );
2.199 +
2.200 + load_spreg( R_ECX, R_S );
2.201 + TEST_r32_r32( R_ECX, R_ECX );
2.202 + JE_rel8( 47, nosat );
2.203 +
2.204 + ADD_r32_sh4r( R_EAX, R_MACL ); // 6
2.205 + JNO_rel8( 51, end ); // 2
2.206 + load_imm32( R_EDX, 1 ); // 5
2.207 + store_spreg( R_EDX, R_MACH ); // 6
2.208 + JS_rel8( 13, positive ); // 2
2.209 + load_imm32( R_EAX, 0x80000000 );// 5
2.210 + store_spreg( R_EAX, R_MACL ); // 6
2.211 + JMP_rel8( 25, end2 ); // 2
2.212 +
2.213 + JMP_TARGET(positive);
2.214 + load_imm32( R_EAX, 0x7FFFFFFF );// 5
2.215 + store_spreg( R_EAX, R_MACL ); // 6
2.216 + JMP_rel8( 12, end3); // 2
2.217 +
2.218 + JMP_TARGET(nosat);
2.219 + ADD_r32_sh4r( R_EAX, R_MACL ); // 6
2.220 + ADC_r32_sh4r( R_EDX, R_MACH ); // 6
2.221 + JMP_TARGET(end);
2.222 + JMP_TARGET(end2);
2.223 + JMP_TARGET(end3);
2.224 +:}
2.225 MOVT Rn {:
2.226 load_spreg( R_EAX, R_T );
2.227 store_reg( R_EAX, Rn );
2.228 @@ -673,8 +768,10 @@
2.229 load_reg( R_EAX, 0 );
2.230 load_spreg( R_ECX, R_GBR );
2.231 ADD_r32_r32( R_EAX, R_ECX );
2.232 - MEM_READ_BYTE( R_ECX, R_EAX );
2.233 - OR_imm32_r32(imm, R_ECX );
2.234 + PUSH_r32(R_ECX);
2.235 + call_func0(sh4_read_byte);
2.236 + POP_r32(R_ECX);
2.237 + OR_imm32_r32(imm, R_EAX );
2.238 MEM_WRITE_BYTE( R_ECX, R_EAX );
2.239 :}
2.240 ROTCL Rn {:
2.241 @@ -708,34 +805,46 @@
2.242 load_reg( R_EAX, Rn );
2.243 load_reg( R_ECX, Rm );
2.244 CMP_imm32_r32( 0, R_ECX );
2.245 - JGE_rel8(9, doshl);
2.246 + JGE_rel8(16, doshl);
2.247
2.248 NEG_r32( R_ECX ); // 2
2.249 AND_imm8_r8( 0x1F, R_CL ); // 3
2.250 + JE_rel8( 4, emptysar); // 2
2.251 SAR_r32_CL( R_EAX ); // 2
2.252 - JMP_rel8(5, end); // 2
2.253 + JMP_rel8(10, end); // 2
2.254 +
2.255 + JMP_TARGET(emptysar);
2.256 + SAR_imm8_r32(31, R_EAX ); // 3
2.257 + JMP_rel8(5, end2);
2.258
2.259 JMP_TARGET(doshl);
2.260 AND_imm8_r8( 0x1F, R_CL ); // 3
2.261 SHL_r32_CL( R_EAX ); // 2
2.262 JMP_TARGET(end);
2.263 + JMP_TARGET(end2);
2.264 store_reg( R_EAX, Rn );
2.265 :}
2.266 SHLD Rm, Rn {:
2.267 load_reg( R_EAX, Rn );
2.268 load_reg( R_ECX, Rm );
2.269 CMP_imm32_r32( 0, R_ECX );
2.270 - JGE_rel8(9, doshl);
2.271 + JGE_rel8(15, doshl);
2.272
2.273 NEG_r32( R_ECX ); // 2
2.274 AND_imm8_r8( 0x1F, R_CL ); // 3
2.275 + JE_rel8( 4, emptyshr );
2.276 SHR_r32_CL( R_EAX ); // 2
2.277 - JMP_rel8(5, end); // 2
2.278 + JMP_rel8(9, end); // 2
2.279 +
2.280 + JMP_TARGET(emptyshr);
2.281 + XOR_r32_r32( R_EAX, R_EAX );
2.282 + JMP_rel8(5, end2);
2.283
2.284 JMP_TARGET(doshl);
2.285 AND_imm8_r8( 0x1F, R_CL ); // 3
2.286 SHL_r32_CL( R_EAX ); // 2
2.287 JMP_TARGET(end);
2.288 + JMP_TARGET(end2);
2.289 store_reg( R_EAX, Rn );
2.290 :}
2.291 SHAL Rn {:
2.292 @@ -827,6 +936,7 @@
2.293 TEST_r8_r8( R_AL, R_AL );
2.294 SETE_t();
2.295 OR_imm8_r8( 0x80, R_AL );
2.296 + load_reg( R_ECX, Rn );
2.297 MEM_WRITE_BYTE( R_ECX, R_EAX );
2.298 :}
2.299 TST Rm, Rn {:
2.300 @@ -863,7 +973,9 @@
2.301 load_reg( R_EAX, 0 );
2.302 load_spreg( R_ECX, R_GBR );
2.303 ADD_r32_r32( R_EAX, R_ECX );
2.304 - MEM_READ_BYTE( R_ECX, R_EAX );
2.305 + PUSH_r32(R_ECX);
2.306 + call_func0(sh4_read_byte);
2.307 + POP_r32(R_ECX);
2.308 XOR_imm32_r32( imm, R_EAX );
2.309 MEM_WRITE_BYTE( R_ECX, R_EAX );
2.310 :}
2.311 @@ -919,7 +1031,7 @@
2.312 MOV.B @Rm, Rn {:
2.313 load_reg( R_ECX, Rm );
2.314 MEM_READ_BYTE( R_ECX, R_EAX );
2.315 - store_reg( R_ECX, Rn );
2.316 + store_reg( R_EAX, Rn );
2.317 :}
2.318 MOV.B @Rm+, Rn {:
2.319 load_reg( R_ECX, Rm );
2.320 @@ -1145,13 +1257,12 @@
2.321 if( sh4_x86.in_delay_slot ) {
2.322 SLOTILLEGAL();
2.323 } else {
2.324 - load_imm32( R_EDI, pc + 2 );
2.325 + load_imm32( R_EDI, pc + 4 );
2.326 CMP_imm8s_sh4r( 0, R_T );
2.327 JNE_rel8( 5, nottaken );
2.328 load_imm32( R_EDI, disp + pc + 4 );
2.329 JMP_TARGET(nottaken);
2.330 sh4_x86.in_delay_slot = TRUE;
2.331 - INC_r32(R_ESI);
2.332 return 0;
2.333 }
2.334 :}
2.335 @@ -1161,7 +1272,6 @@
2.336 } else {
2.337 load_imm32( R_EDI, disp + pc + 4 );
2.338 sh4_x86.in_delay_slot = TRUE;
2.339 - INC_r32(R_ESI);
2.340 return 0;
2.341 }
2.342 :}
2.343 @@ -1172,7 +1282,6 @@
2.344 load_reg( R_EDI, Rn );
2.345 ADD_imm32_r32( pc + 4, R_EDI );
2.346 sh4_x86.in_delay_slot = TRUE;
2.347 - INC_r32(R_ESI);
2.348 return 0;
2.349 }
2.350 :}
2.351 @@ -1184,7 +1293,6 @@
2.352 store_spreg( R_EAX, R_PR );
2.353 load_imm32( R_EDI, disp + pc + 4 );
2.354 sh4_x86.in_delay_slot = TRUE;
2.355 - INC_r32(R_ESI);
2.356 return 0;
2.357 }
2.358 :}
2.359 @@ -1197,7 +1305,6 @@
2.360 load_reg( R_EDI, Rn );
2.361 ADD_r32_r32( R_EAX, R_EDI );
2.362 sh4_x86.in_delay_slot = TRUE;
2.363 - INC_r32(R_ESI);
2.364 return 0;
2.365 }
2.366 :}
2.367 @@ -1218,13 +1325,12 @@
2.368 if( sh4_x86.in_delay_slot ) {
2.369 SLOTILLEGAL();
2.370 } else {
2.371 - load_imm32( R_EDI, pc + 2 );
2.372 + load_imm32( R_EDI, pc + 4 );
2.373 CMP_imm8s_sh4r( 0, R_T );
2.374 JE_rel8( 5, nottaken );
2.375 load_imm32( R_EDI, disp + pc + 4 );
2.376 JMP_TARGET(nottaken);
2.377 sh4_x86.in_delay_slot = TRUE;
2.378 - INC_r32(R_ESI);
2.379 return 0;
2.380 }
2.381 :}
2.382 @@ -1234,7 +1340,6 @@
2.383 } else {
2.384 load_reg( R_EDI, Rn );
2.385 sh4_x86.in_delay_slot = TRUE;
2.386 - INC_r32(R_ESI);
2.387 return 0;
2.388 }
2.389 :}
2.390 @@ -1246,7 +1351,6 @@
2.391 store_spreg( R_EAX, R_PR );
2.392 load_reg( R_EDI, Rn );
2.393 sh4_x86.in_delay_slot = TRUE;
2.394 - INC_r32(R_ESI);
2.395 return 0;
2.396 }
2.397 :}
2.398 @@ -1255,13 +1359,12 @@
2.399 if( sh4_x86.in_delay_slot ) {
2.400 SLOTILLEGAL();
2.401 } else {
2.402 - load_spreg( R_EDI, R_PR );
2.403 + load_spreg( R_EDI, R_SPC );
2.404 load_spreg( R_EAX, R_SSR );
2.405 call_func1( sh4_write_sr, R_EAX );
2.406 sh4_x86.in_delay_slot = TRUE;
2.407 sh4_x86.priv_checked = FALSE;
2.408 sh4_x86.fpuen_checked = FALSE;
2.409 - INC_r32(R_ESI);
2.410 return 0;
2.411 }
2.412 :}
2.413 @@ -1271,7 +1374,6 @@
2.414 } else {
2.415 load_spreg( R_EDI, R_PR );
2.416 sh4_x86.in_delay_slot = TRUE;
2.417 - INC_r32(R_ESI);
2.418 return 0;
2.419 }
2.420 :}
2.421 @@ -1287,7 +1389,7 @@
2.422 if( sh4_x86.in_delay_slot ) {
2.423 SLOTILLEGAL();
2.424 } else {
2.425 - RAISE_EXCEPTION(EXC_ILLEGAL);
2.426 + JMP_exit(EXIT_ILLEGAL);
2.427 return 1;
2.428 }
2.429 :}
2.430 @@ -1331,7 +1433,7 @@
2.431 load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
2.432 store_fr( R_EDX, R_EAX, FRn );
2.433 if( FRm&1 ) {
2.434 - JMP_rel8(22, end);
2.435 + JMP_rel8(24, end);
2.436 JMP_TARGET(doublesize);
2.437 load_xf_bank( R_ECX );
2.438 load_fr( R_ECX, R_EAX, FRm-1 );
2.439 @@ -1347,7 +1449,7 @@
2.440 JMP_TARGET(end);
2.441 } else /* FRm&1 == 0 */ {
2.442 if( FRn&1 ) {
2.443 - JMP_rel8(22, end);
2.444 + JMP_rel8(24, end);
2.445 load_xf_bank( R_ECX );
2.446 load_fr( R_EDX, R_EAX, FRm );
2.447 load_fr( R_EDX, R_EDX, FRm+1 );
2.448 @@ -1375,7 +1477,7 @@
2.449 load_fr( R_ECX, R_EAX, FRm );
2.450 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
2.451 if( FRm&1 ) {
2.452 - JMP_rel8( 46, end );
2.453 + JMP_rel8( 48, end );
2.454 JMP_TARGET(doublesize);
2.455 load_xf_bank( R_ECX );
2.456 load_fr( R_ECX, R_EAX, FRm&0x0E );
2.457 @@ -1403,7 +1505,7 @@
2.458 load_fr_bank( R_ECX );
2.459 store_fr( R_ECX, R_EAX, FRn );
2.460 if( FRn&1 ) {
2.461 - JMP_rel8(46, end);
2.462 + JMP_rel8(48, end);
2.463 JMP_TARGET(doublesize);
2.464 MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.465 load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
2.466 @@ -1434,7 +1536,7 @@
2.467 store_reg( R_EDX, Rn );
2.468 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
2.469 if( FRm&1 ) {
2.470 - JMP_rel8( 52, end );
2.471 + JMP_rel8( 54, end );
2.472 JMP_TARGET(doublesize);
2.473 load_xf_bank( R_ECX );
2.474 load_fr( R_ECX, R_EAX, FRm&0x0E );
2.475 @@ -1469,7 +1571,7 @@
2.476 load_fr_bank( R_ECX );
2.477 store_fr( R_ECX, R_EAX, FRn );
2.478 if( FRn&1 ) {
2.479 - JMP_rel8(52, end);
2.480 + JMP_rel8(54, end);
2.481 JMP_TARGET(doublesize);
2.482 ADD_imm8s_r32( 8, R_EAX );
2.483 store_reg(R_EAX, Rm);
2.484 @@ -1502,7 +1604,7 @@
2.485 load_fr( R_ECX, R_EAX, FRm );
2.486 MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
2.487 if( FRm&1 ) {
2.488 - JMP_rel8( 46, end );
2.489 + JMP_rel8( 48, end );
2.490 JMP_TARGET(doublesize);
2.491 load_xf_bank( R_ECX );
2.492 load_fr( R_ECX, R_EAX, FRm&0x0E );
2.493 @@ -1531,7 +1633,7 @@
2.494 load_fr_bank( R_ECX );
2.495 store_fr( R_ECX, R_EAX, FRn );
2.496 if( FRn&1 ) {
2.497 - JMP_rel8(46, end);
2.498 + JMP_rel8(48, end);
2.499 JMP_TARGET(doublesize);
2.500 MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
2.501 load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
2.502 @@ -1829,7 +1931,7 @@
2.503 load_spreg( R_ECX, R_FPSCR );
2.504 XOR_imm32_r32( FPSCR_FR, R_ECX );
2.505 store_spreg( R_ECX, R_FPSCR );
2.506 -
2.507 + update_fr_bank( R_ECX );
2.508 :}
2.509 FSCHG {:
2.510 check_fpuen();
2.511 @@ -1840,36 +1942,47 @@
2.512
2.513 /* Processor control instructions */
2.514 LDC Rm, SR {:
2.515 - load_reg( R_EAX, Rm );
2.516 - call_func1( sh4_write_sr, R_EAX );
2.517 - sh4_x86.priv_checked = FALSE;
2.518 - sh4_x86.fpuen_checked = FALSE;
2.519 + if( sh4_x86.in_delay_slot ) {
2.520 + SLOTILLEGAL();
2.521 + } else {
2.522 + check_priv();
2.523 + load_reg( R_EAX, Rm );
2.524 + call_func1( sh4_write_sr, R_EAX );
2.525 + sh4_x86.priv_checked = FALSE;
2.526 + sh4_x86.fpuen_checked = FALSE;
2.527 + }
2.528 :}
2.529 LDC Rm, GBR {:
2.530 load_reg( R_EAX, Rm );
2.531 store_spreg( R_EAX, R_GBR );
2.532 :}
2.533 LDC Rm, VBR {:
2.534 + check_priv();
2.535 load_reg( R_EAX, Rm );
2.536 store_spreg( R_EAX, R_VBR );
2.537 :}
2.538 LDC Rm, SSR {:
2.539 + check_priv();
2.540 load_reg( R_EAX, Rm );
2.541 store_spreg( R_EAX, R_SSR );
2.542 :}
2.543 LDC Rm, SGR {:
2.544 + check_priv();
2.545 load_reg( R_EAX, Rm );
2.546 store_spreg( R_EAX, R_SGR );
2.547 :}
2.548 LDC Rm, SPC {:
2.549 + check_priv();
2.550 load_reg( R_EAX, Rm );
2.551 store_spreg( R_EAX, R_SPC );
2.552 :}
2.553 LDC Rm, DBR {:
2.554 + check_priv();
2.555 load_reg( R_EAX, Rm );
2.556 store_spreg( R_EAX, R_DBR );
2.557 :}
2.558 LDC Rm, Rn_BANK {:
2.559 + check_priv();
2.560 load_reg( R_EAX, Rm );
2.561 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2.562 :}
2.563 @@ -1882,16 +1995,22 @@
2.564 store_spreg( R_EAX, R_GBR );
2.565 :}
2.566 LDC.L @Rm+, SR {:
2.567 - load_reg( R_EAX, Rm );
2.568 - MOV_r32_r32( R_EAX, R_ECX );
2.569 - ADD_imm8s_r32( 4, R_EAX );
2.570 - store_reg( R_EAX, Rm );
2.571 - MEM_READ_LONG( R_ECX, R_EAX );
2.572 - call_func1( sh4_write_sr, R_EAX );
2.573 - sh4_x86.priv_checked = FALSE;
2.574 - sh4_x86.fpuen_checked = FALSE;
2.575 + if( sh4_x86.in_delay_slot ) {
2.576 + SLOTILLEGAL();
2.577 + } else {
2.578 + check_priv();
2.579 + load_reg( R_EAX, Rm );
2.580 + MOV_r32_r32( R_EAX, R_ECX );
2.581 + ADD_imm8s_r32( 4, R_EAX );
2.582 + store_reg( R_EAX, Rm );
2.583 + MEM_READ_LONG( R_ECX, R_EAX );
2.584 + call_func1( sh4_write_sr, R_EAX );
2.585 + sh4_x86.priv_checked = FALSE;
2.586 + sh4_x86.fpuen_checked = FALSE;
2.587 + }
2.588 :}
2.589 LDC.L @Rm+, VBR {:
2.590 + check_priv();
2.591 load_reg( R_EAX, Rm );
2.592 MOV_r32_r32( R_EAX, R_ECX );
2.593 ADD_imm8s_r32( 4, R_EAX );
2.594 @@ -1900,6 +2019,7 @@
2.595 store_spreg( R_EAX, R_VBR );
2.596 :}
2.597 LDC.L @Rm+, SSR {:
2.598 + check_priv();
2.599 load_reg( R_EAX, Rm );
2.600 MOV_r32_r32( R_EAX, R_ECX );
2.601 ADD_imm8s_r32( 4, R_EAX );
2.602 @@ -1908,6 +2028,7 @@
2.603 store_spreg( R_EAX, R_SSR );
2.604 :}
2.605 LDC.L @Rm+, SGR {:
2.606 + check_priv();
2.607 load_reg( R_EAX, Rm );
2.608 MOV_r32_r32( R_EAX, R_ECX );
2.609 ADD_imm8s_r32( 4, R_EAX );
2.610 @@ -1916,6 +2037,7 @@
2.611 store_spreg( R_EAX, R_SGR );
2.612 :}
2.613 LDC.L @Rm+, SPC {:
2.614 + check_priv();
2.615 load_reg( R_EAX, Rm );
2.616 MOV_r32_r32( R_EAX, R_ECX );
2.617 ADD_imm8s_r32( 4, R_EAX );
2.618 @@ -1924,6 +2046,7 @@
2.619 store_spreg( R_EAX, R_SPC );
2.620 :}
2.621 LDC.L @Rm+, DBR {:
2.622 + check_priv();
2.623 load_reg( R_EAX, Rm );
2.624 MOV_r32_r32( R_EAX, R_ECX );
2.625 ADD_imm8s_r32( 4, R_EAX );
2.626 @@ -1932,6 +2055,7 @@
2.627 store_spreg( R_EAX, R_DBR );
2.628 :}
2.629 LDC.L @Rm+, Rn_BANK {:
2.630 + check_priv();
2.631 load_reg( R_EAX, Rm );
2.632 MOV_r32_r32( R_EAX, R_ECX );
2.633 ADD_imm8s_r32( 4, R_EAX );
2.634 @@ -1942,6 +2066,7 @@
2.635 LDS Rm, FPSCR {:
2.636 load_reg( R_EAX, Rm );
2.637 store_spreg( R_EAX, R_FPSCR );
2.638 + update_fr_bank( R_EAX );
2.639 :}
2.640 LDS.L @Rm+, FPSCR {:
2.641 load_reg( R_EAX, Rm );
2.642 @@ -1950,6 +2075,7 @@
2.643 store_reg( R_EAX, Rm );
2.644 MEM_READ_LONG( R_ECX, R_EAX );
2.645 store_spreg( R_EAX, R_FPSCR );
2.646 + update_fr_bank( R_EAX );
2.647 :}
2.648 LDS Rm, FPUL {:
2.649 load_reg( R_EAX, Rm );
2.650 @@ -2013,40 +2139,48 @@
2.651 JMP_TARGET(end);
2.652 ADD_imm8s_r32( 4, R_ESP );
2.653 :}
2.654 - SLEEP {: /* TODO */ :}
2.655 - STC SR, Rn {:
2.656 - call_func0(sh4_read_sr);
2.657 - store_reg( R_EAX, Rn );
2.658 +SLEEP {: /* TODO */ :}
2.659 +STC SR, Rn {:
2.660 + check_priv();
2.661 + call_func0(sh4_read_sr);
2.662 + store_reg( R_EAX, Rn );
2.663 :}
2.664 STC GBR, Rn {:
2.665 load_spreg( R_EAX, R_GBR );
2.666 store_reg( R_EAX, Rn );
2.667 :}
2.668 STC VBR, Rn {:
2.669 + check_priv();
2.670 load_spreg( R_EAX, R_VBR );
2.671 store_reg( R_EAX, Rn );
2.672 :}
2.673 STC SSR, Rn {:
2.674 + check_priv();
2.675 load_spreg( R_EAX, R_SSR );
2.676 store_reg( R_EAX, Rn );
2.677 :}
2.678 STC SPC, Rn {:
2.679 + check_priv();
2.680 load_spreg( R_EAX, R_SPC );
2.681 store_reg( R_EAX, Rn );
2.682 :}
2.683 STC SGR, Rn {:
2.684 + check_priv();
2.685 load_spreg( R_EAX, R_SGR );
2.686 store_reg( R_EAX, Rn );
2.687 :}
2.688 STC DBR, Rn {:
2.689 + check_priv();
2.690 load_spreg( R_EAX, R_DBR );
2.691 store_reg( R_EAX, Rn );
2.692 :}
2.693 STC Rm_BANK, Rn {:
2.694 + check_priv();
2.695 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2.696 store_reg( R_EAX, Rn );
2.697 :}
2.698 STC.L SR, @-Rn {:
2.699 + check_priv();
2.700 load_reg( R_ECX, Rn );
2.701 ADD_imm8s_r32( -4, R_ECX );
2.702 store_reg( R_ECX, Rn );
2.703 @@ -2054,6 +2188,7 @@
2.704 MEM_WRITE_LONG( R_ECX, R_EAX );
2.705 :}
2.706 STC.L VBR, @-Rn {:
2.707 + check_priv();
2.708 load_reg( R_ECX, Rn );
2.709 ADD_imm8s_r32( -4, R_ECX );
2.710 store_reg( R_ECX, Rn );
2.711 @@ -2061,6 +2196,7 @@
2.712 MEM_WRITE_LONG( R_ECX, R_EAX );
2.713 :}
2.714 STC.L SSR, @-Rn {:
2.715 + check_priv();
2.716 load_reg( R_ECX, Rn );
2.717 ADD_imm8s_r32( -4, R_ECX );
2.718 store_reg( R_ECX, Rn );
2.719 @@ -2068,6 +2204,7 @@
2.720 MEM_WRITE_LONG( R_ECX, R_EAX );
2.721 :}
2.722 STC.L SPC, @-Rn {:
2.723 + check_priv();
2.724 load_reg( R_ECX, Rn );
2.725 ADD_imm8s_r32( -4, R_ECX );
2.726 store_reg( R_ECX, Rn );
2.727 @@ -2075,6 +2212,7 @@
2.728 MEM_WRITE_LONG( R_ECX, R_EAX );
2.729 :}
2.730 STC.L SGR, @-Rn {:
2.731 + check_priv();
2.732 load_reg( R_ECX, Rn );
2.733 ADD_imm8s_r32( -4, R_ECX );
2.734 store_reg( R_ECX, Rn );
2.735 @@ -2082,6 +2220,7 @@
2.736 MEM_WRITE_LONG( R_ECX, R_EAX );
2.737 :}
2.738 STC.L DBR, @-Rn {:
2.739 + check_priv();
2.740 load_reg( R_ECX, Rn );
2.741 ADD_imm8s_r32( -4, R_ECX );
2.742 store_reg( R_ECX, Rn );
2.743 @@ -2089,6 +2228,7 @@
2.744 MEM_WRITE_LONG( R_ECX, R_EAX );
2.745 :}
2.746 STC.L Rm_BANK, @-Rn {:
2.747 + check_priv();
2.748 load_reg( R_ECX, Rn );
2.749 ADD_imm8s_r32( -4, R_ECX );
2.750 store_reg( R_ECX, Rn );
2.751 @@ -2160,10 +2300,12 @@
2.752
2.753 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
2.754 %%
2.755 - INC_r32(R_ESI);
2.756 if( sh4_x86.in_delay_slot ) {
2.757 + ADD_imm8s_r32(2,R_ESI);
2.758 sh4_x86.in_delay_slot = FALSE;
2.759 return 1;
2.760 + } else {
2.761 + INC_r32(R_ESI);
2.762 }
2.763 return 0;
2.764 }
3.1 --- a/src/sh4/x86op.h Sun Sep 16 07:01:35 2007 +0000
3.2 +++ b/src/sh4/x86op.h Sun Sep 16 07:03:23 2007 +0000
3.3 @@ -1,5 +1,5 @@
3.4 /**
3.5 - * $Id: x86op.h,v 1.7 2007-09-12 11:31:16 nkeynes Exp $
3.6 + * $Id: x86op.h,v 1.8 2007-09-16 07:03:23 nkeynes Exp $
3.7 *
3.8 * Definitions of x86 opcodes for use by the translator.
3.9 *
3.10 @@ -92,10 +92,14 @@
3.11
3.12 /* Major opcodes */
3.13 #define ADD_sh4r_r32(disp,r1) OP(0x03); MODRM_r32_sh4r(r1,disp)
3.14 +#define ADD_r32_sh4r(r1,disp) OP(0x01); MODRM_r32_sh4r(r1,disp)
3.15 #define ADD_r32_r32(r1,r2) OP(0x03); MODRM_rm32_r32(r1,r2)
3.16 #define ADD_imm8s_r32(imm,r1) OP(0x83); MODRM_rm32_r32(r1, 0); OP(imm)
3.17 +#define ADD_imm8s_sh4r(imm,disp) OP(0x83); MODRM_r32_sh4r(0,disp); OP(imm)
3.18 #define ADD_imm32_r32(imm32,r1) OP(0x81); MODRM_rm32_r32(r1,0); OP32(imm32)
3.19 #define ADC_r32_r32(r1,r2) OP(0x13); MODRM_rm32_r32(r1,r2)
3.20 +#define ADC_sh4r_r32(disp,r1) OP(0x13); MODRM_r32_sh4r(r1,disp)
3.21 +#define ADC_r32_sh4r(r1,disp) OP(0x11); MODRM_r32_sh4r(r1,disp)
3.22 #define AND_r32_r32(r1,r2) OP(0x23); MODRM_rm32_r32(r1,r2)
3.23 #define AND_imm8_r8(imm8, r1) OP(0x80); MODRM_rm32_r32(r1,4); OP(imm8)
3.24 #define AND_imm8s_r32(imm8,r1) OP(0x83); MODRM_rm32_r32(r1,4); OP(imm8)
3.25 @@ -153,6 +157,8 @@
3.26 #define TEST_imm8_r8(imm8,r1) OP(0xF6); MODRM_rm32_r32(r1,0); OP(imm8)
3.27 #define TEST_imm32_r32(imm,r1) OP(0xF7); MODRM_rm32_r32(r1,0); OP32(imm)
3.28 #define XCHG_r8_r8(r1,r2) OP(0x86); MODRM_rm32_r32(r1,r2)
3.29 +#define XOR_r8_r8(r1,r2) OP(0x32); MODRM_rm32_r32(r1,r2)
3.30 +#define XOR_imm8s_r32(imm,r1) OP(0x83); MODRM_rm32_r32(r1,6); OP(imm)
3.31 #define XOR_r32_r32(r1,r2) OP(0x33); MODRM_rm32_r32(r1,r2)
3.32 #define XOR_sh4r_r32(disp,r1) OP(0x33); MODRM_r32_sh4r(r1,disp)
3.33 #define XOR_imm32_r32(imm,r1) OP(0x81); MODRM_rm32_r32(r1,6); OP32(imm)
3.34 @@ -188,8 +194,12 @@
3.35 #define JNGE_rel8(rel,label) OP(0x7C); OP(rel); MARK_JMP(rel,label)
3.36 #define JNC_rel8(rel,label) OP(0x73); OP(rel); MARK_JMP(rel,label)
3.37 #define JNO_rel8(rel,label) OP(0x71); OP(rel); MARK_JMP(rel,label)
3.38 +#define JNS_rel8(rel,label) OP(0x79); OP(rel); MARK_JMP(rel,label)
3.39 +#define JS_rel8(rel,label) OP(0x78); OP(rel); MARK_JMP(rel,label)
3.40 +
3.41
3.42 /* 32-bit long forms w/ backpatching to an exit routine */
3.43 +#define JMP_exit(rel) OP(0xE9); sh4_x86_add_backpatch(xlat_output); OP32(rel)
3.44 #define JE_exit(rel) OP(0x0F); OP(0x84); sh4_x86_add_backpatch(xlat_output); OP32(rel)
3.45 #define JA_exit(rel) OP(0x0F); OP(0x87); sh4_x86_add_backpatch(xlat_output); OP32(rel)
3.46 #define JAE_exit(rel) OP(0x0F); OP(0x83); sh4_x86_add_backpatch(xlat_output); OP32(rel)
3.47 @@ -240,8 +250,9 @@
3.48 #define SETGE_t() SETGE_sh4r(R_T)
3.49 #define SETC_t() SETC_sh4r(R_T)
3.50 #define SETO_t() SETO_sh4r(R_T)
3.51 +#define SETNE_t() SETNE_sh4r(R_T)
3.52
3.53 -#define SETC_r32(r1) OP(0x0F); OP(0x92); MODRM_rm32_r32(r1, 0)
3.54 +#define SETC_r8(r1) OP(0x0F); OP(0x92); MODRM_rm32_r32(r1, 0)
3.55
3.56 /* Pseudo-op Load carry from T: CMP [EBP+t], #01 ; CMC */
3.57 #define LDC_t() OP(0x83); MODRM_r32_sh4r(7,R_T); OP(0x01); CMC()
.