Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 416:714df603c869
prev409:549e00835448
next417:bd927df302a9
author nkeynes
date Wed Oct 03 12:19:03 2007 +0000 (13 years ago)
permissions -rw-r--r--
last change Remove INC %esi (and esi in general), replace with load immediates (faster)
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Sat Sep 29 05:33:02 2007 +0000
1.2 +++ b/src/sh4/sh4x86.in Wed Oct 03 12:19:03 2007 +0000
1.3 @@ -1,5 +1,5 @@
1.4 /**
1.5 - * $Id: sh4x86.in,v 1.17 2007-09-29 05:33:02 nkeynes Exp $
1.6 + * $Id: sh4x86.in,v 1.18 2007-10-03 12:19:03 nkeynes Exp $
1.7 *
1.8 * SH4 => x86 translation. This version does no real optimization, it just
1.9 * outputs straight-line x86 code - it mainly exists to provide a baseline
1.10 @@ -293,7 +293,23 @@
1.11 }
1.12
1.13 /* Exception checks - Note that all exception checks will clobber EAX */
1.14 -static void check_priv( )
1.15 +#define precheck() load_imm32(R_EDX, (pc-sh4_x86.block_start_pc-(sh4_x86.in_delay_slot?2:0))>>1)
1.16 +
1.17 +#define check_priv( ) \
1.18 + if( !sh4_x86.priv_checked ) { \
1.19 + sh4_x86.priv_checked = TRUE;\
1.20 + precheck();\
1.21 + load_spreg( R_EAX, R_SR );\
1.22 + AND_imm32_r32( SR_MD, R_EAX );\
1.23 + if( sh4_x86.in_delay_slot ) {\
1.24 + JE_exit( EXIT_SLOT_ILLEGAL );\
1.25 + } else {\
1.26 + JE_exit( EXIT_ILLEGAL );\
1.27 + }\
1.28 + }\
1.29 +
1.30 +
1.31 +static void check_priv_no_precheck()
1.32 {
1.33 if( !sh4_x86.priv_checked ) {
1.34 sh4_x86.priv_checked = TRUE;
1.35 @@ -307,7 +323,20 @@
1.36 }
1.37 }
1.38
1.39 -static void check_fpuen( )
1.40 +#define check_fpuen( ) \
1.41 + if( !sh4_x86.fpuen_checked ) {\
1.42 + sh4_x86.fpuen_checked = TRUE;\
1.43 + precheck();\
1.44 + load_spreg( R_EAX, R_SR );\
1.45 + AND_imm32_r32( SR_FD, R_EAX );\
1.46 + if( sh4_x86.in_delay_slot ) {\
1.47 + JNE_exit(EXIT_SLOT_FPU_DISABLED);\
1.48 + } else {\
1.49 + JNE_exit(EXIT_FPU_DISABLED);\
1.50 + }\
1.51 + }
1.52 +
1.53 +static void check_fpuen_no_precheck()
1.54 {
1.55 if( !sh4_x86.fpuen_checked ) {
1.56 sh4_x86.fpuen_checked = TRUE;
1.57 @@ -319,6 +348,7 @@
1.58 JNE_exit(EXIT_FPU_DISABLED);
1.59 }
1.60 }
1.61 +
1.62 }
1.63
1.64 static void check_ralign16( int x86reg )
1.65 @@ -353,7 +383,7 @@
1.66 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
1.67 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
1.68
1.69 -#define SLOTILLEGAL() JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
1.70 +#define SLOTILLEGAL() precheck(); JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
1.71
1.72
1.73
1.74 @@ -366,8 +396,6 @@
1.75 PUSH_r32(R_EBP);
1.76 /* mov &sh4r, ebp */
1.77 load_imm32( R_EBP, (uint32_t)&sh4r );
1.78 - PUSH_r32(R_ESI);
1.79 - XOR_r32_r32(R_ESI, R_ESI);
1.80
1.81 sh4_x86.in_delay_slot = FALSE;
1.82 sh4_x86.priv_checked = FALSE;
1.83 @@ -379,7 +407,7 @@
1.84
1.85 /**
1.86 * Exit the block to an absolute PC
1.87 - * Bytes: 30
1.88 + * Bytes: 29
1.89 */
1.90 void exit_block( sh4addr_t pc, sh4addr_t endpc )
1.91 {
1.92 @@ -389,21 +417,19 @@
1.93 AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.94 load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.95 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.96 - POP_r32(R_ESI);
1.97 POP_r32(R_EBP);
1.98 RET();
1.99 }
1.100
1.101 /**
1.102 * Exit the block with sh4r.pc already written
1.103 - * Bytes: 16
1.104 + * Bytes: 15
1.105 */
1.106 void exit_block_pcset( pc )
1.107 {
1.108 XOR_r32_r32( R_EAX, R_EAX ); // 2
1.109 load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.110 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.111 - POP_r32(R_ESI);
1.112 POP_r32(R_EBP);
1.113 RET();
1.114 }
1.115 @@ -437,12 +463,12 @@
1.116 JMP_TARGET(target4);
1.117 JMP_TARGET(target5);
1.118 load_spreg( R_ECX, REG_OFFSET(pc) );
1.119 - ADD_r32_r32( R_ESI, R_ECX );
1.120 - ADD_r32_r32( R_ESI, R_ECX );
1.121 + ADD_r32_r32( R_EDX, R_ECX );
1.122 + ADD_r32_r32( R_EDX, R_ECX );
1.123 store_spreg( R_ECX, REG_OFFSET(pc) );
1.124 MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.125 load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.126 - MUL_r32( R_ESI );
1.127 + MUL_r32( R_EDX );
1.128 ADD_r32_r32( R_EAX, R_ECX );
1.129 store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
1.130
1.131 @@ -450,7 +476,6 @@
1.132 CALL_r32( R_EAX ); // 2
1.133 ADD_imm8s_r32( 4, R_ESP );
1.134 XOR_r32_r32( R_EAX, R_EAX );
1.135 - POP_r32(R_ESI);
1.136 POP_r32(R_EBP);
1.137 RET();
1.138
1.139 @@ -683,6 +708,7 @@
1.140 :}
1.141 MAC.L @Rm+, @Rn+ {:
1.142 load_reg( R_ECX, Rm );
1.143 + precheck();
1.144 check_ralign32( R_ECX );
1.145 load_reg( R_ECX, Rn );
1.146 check_ralign32( R_ECX );
1.147 @@ -705,6 +731,7 @@
1.148 :}
1.149 MAC.W @Rm+, @Rn+ {:
1.150 load_reg( R_ECX, Rm );
1.151 + precheck();
1.152 check_ralign16( R_ECX );
1.153 load_reg( R_ECX, Rn );
1.154 check_ralign16( R_ECX );
1.155 @@ -1097,12 +1124,14 @@
1.156 MOV.L Rm, @Rn {:
1.157 load_reg( R_EAX, Rm );
1.158 load_reg( R_ECX, Rn );
1.159 + precheck();
1.160 check_walign32(R_ECX);
1.161 MEM_WRITE_LONG( R_ECX, R_EAX );
1.162 :}
1.163 MOV.L Rm, @-Rn {:
1.164 load_reg( R_EAX, Rm );
1.165 load_reg( R_ECX, Rn );
1.166 + precheck();
1.167 check_walign32( R_ECX );
1.168 ADD_imm8s_r32( -4, R_ECX );
1.169 store_reg( R_ECX, Rn );
1.170 @@ -1112,6 +1141,7 @@
1.171 load_reg( R_EAX, 0 );
1.172 load_reg( R_ECX, Rn );
1.173 ADD_r32_r32( R_EAX, R_ECX );
1.174 + precheck();
1.175 check_walign32( R_ECX );
1.176 load_reg( R_EAX, Rm );
1.177 MEM_WRITE_LONG( R_ECX, R_EAX );
1.178 @@ -1120,6 +1150,7 @@
1.179 load_spreg( R_ECX, R_GBR );
1.180 load_reg( R_EAX, 0 );
1.181 ADD_imm32_r32( disp, R_ECX );
1.182 + precheck();
1.183 check_walign32( R_ECX );
1.184 MEM_WRITE_LONG( R_ECX, R_EAX );
1.185 :}
1.186 @@ -1127,17 +1158,20 @@
1.187 load_reg( R_ECX, Rn );
1.188 load_reg( R_EAX, Rm );
1.189 ADD_imm32_r32( disp, R_ECX );
1.190 + precheck();
1.191 check_walign32( R_ECX );
1.192 MEM_WRITE_LONG( R_ECX, R_EAX );
1.193 :}
1.194 MOV.L @Rm, Rn {:
1.195 load_reg( R_ECX, Rm );
1.196 + precheck();
1.197 check_ralign32( R_ECX );
1.198 MEM_READ_LONG( R_ECX, R_EAX );
1.199 store_reg( R_EAX, Rn );
1.200 :}
1.201 MOV.L @Rm+, Rn {:
1.202 load_reg( R_EAX, Rm );
1.203 + precheck();
1.204 check_ralign32( R_EAX );
1.205 MOV_r32_r32( R_EAX, R_ECX );
1.206 ADD_imm8s_r32( 4, R_EAX );
1.207 @@ -1149,6 +1183,7 @@
1.208 load_reg( R_EAX, 0 );
1.209 load_reg( R_ECX, Rm );
1.210 ADD_r32_r32( R_EAX, R_ECX );
1.211 + precheck();
1.212 check_ralign32( R_ECX );
1.213 MEM_READ_LONG( R_ECX, R_EAX );
1.214 store_reg( R_EAX, Rn );
1.215 @@ -1156,6 +1191,7 @@
1.216 MOV.L @(disp, GBR), R0 {:
1.217 load_spreg( R_ECX, R_GBR );
1.218 ADD_imm32_r32( disp, R_ECX );
1.219 + precheck();
1.220 check_ralign32( R_ECX );
1.221 MEM_READ_LONG( R_ECX, R_EAX );
1.222 store_reg( R_EAX, 0 );
1.223 @@ -1178,18 +1214,21 @@
1.224 MOV.L @(disp, Rm), Rn {:
1.225 load_reg( R_ECX, Rm );
1.226 ADD_imm8s_r32( disp, R_ECX );
1.227 + precheck();
1.228 check_ralign32( R_ECX );
1.229 MEM_READ_LONG( R_ECX, R_EAX );
1.230 store_reg( R_EAX, Rn );
1.231 :}
1.232 MOV.W Rm, @Rn {:
1.233 load_reg( R_ECX, Rn );
1.234 + precheck();
1.235 check_walign16( R_ECX );
1.236 load_reg( R_EAX, Rm );
1.237 MEM_WRITE_WORD( R_ECX, R_EAX );
1.238 :}
1.239 MOV.W Rm, @-Rn {:
1.240 load_reg( R_ECX, Rn );
1.241 + precheck();
1.242 check_walign16( R_ECX );
1.243 load_reg( R_EAX, Rm );
1.244 ADD_imm8s_r32( -2, R_ECX );
1.245 @@ -1200,6 +1239,7 @@
1.246 load_reg( R_EAX, 0 );
1.247 load_reg( R_ECX, Rn );
1.248 ADD_r32_r32( R_EAX, R_ECX );
1.249 + precheck();
1.250 check_walign16( R_ECX );
1.251 load_reg( R_EAX, Rm );
1.252 MEM_WRITE_WORD( R_ECX, R_EAX );
1.253 @@ -1208,6 +1248,7 @@
1.254 load_spreg( R_ECX, R_GBR );
1.255 load_reg( R_EAX, 0 );
1.256 ADD_imm32_r32( disp, R_ECX );
1.257 + precheck();
1.258 check_walign16( R_ECX );
1.259 MEM_WRITE_WORD( R_ECX, R_EAX );
1.260 :}
1.261 @@ -1215,17 +1256,20 @@
1.262 load_reg( R_ECX, Rn );
1.263 load_reg( R_EAX, 0 );
1.264 ADD_imm32_r32( disp, R_ECX );
1.265 + precheck();
1.266 check_walign16( R_ECX );
1.267 MEM_WRITE_WORD( R_ECX, R_EAX );
1.268 :}
1.269 MOV.W @Rm, Rn {:
1.270 load_reg( R_ECX, Rm );
1.271 + precheck();
1.272 check_ralign16( R_ECX );
1.273 MEM_READ_WORD( R_ECX, R_EAX );
1.274 store_reg( R_EAX, Rn );
1.275 :}
1.276 MOV.W @Rm+, Rn {:
1.277 load_reg( R_EAX, Rm );
1.278 + precheck();
1.279 check_ralign16( R_EAX );
1.280 MOV_r32_r32( R_EAX, R_ECX );
1.281 ADD_imm8s_r32( 2, R_EAX );
1.282 @@ -1237,6 +1281,7 @@
1.283 load_reg( R_EAX, 0 );
1.284 load_reg( R_ECX, Rm );
1.285 ADD_r32_r32( R_EAX, R_ECX );
1.286 + precheck();
1.287 check_ralign16( R_ECX );
1.288 MEM_READ_WORD( R_ECX, R_EAX );
1.289 store_reg( R_EAX, Rn );
1.290 @@ -1244,6 +1289,7 @@
1.291 MOV.W @(disp, GBR), R0 {:
1.292 load_spreg( R_ECX, R_GBR );
1.293 ADD_imm32_r32( disp, R_ECX );
1.294 + precheck();
1.295 check_ralign16( R_ECX );
1.296 MEM_READ_WORD( R_ECX, R_EAX );
1.297 store_reg( R_EAX, 0 );
1.298 @@ -1260,6 +1306,7 @@
1.299 MOV.W @(disp, Rm), R0 {:
1.300 load_reg( R_ECX, Rm );
1.301 ADD_imm32_r32( disp, R_ECX );
1.302 + precheck();
1.303 check_ralign16( R_ECX );
1.304 MEM_READ_WORD( R_ECX, R_EAX );
1.305 store_reg( R_EAX, 0 );
1.306 @@ -1275,6 +1322,7 @@
1.307 MOVCA.L R0, @Rn {:
1.308 load_reg( R_EAX, 0 );
1.309 load_reg( R_ECX, Rn );
1.310 + precheck();
1.311 check_walign32( R_ECX );
1.312 MEM_WRITE_LONG( R_ECX, R_EAX );
1.313 :}
1.314 @@ -1285,7 +1333,7 @@
1.315 SLOTILLEGAL();
1.316 } else {
1.317 CMP_imm8s_sh4r( 0, R_T );
1.318 - JNE_rel8( 30, nottaken );
1.319 + JNE_rel8( 29, nottaken );
1.320 exit_block( disp + pc + 4, pc+2 );
1.321 JMP_TARGET(nottaken);
1.322 return 2;
1.323 @@ -1364,7 +1412,7 @@
1.324 SLOTILLEGAL();
1.325 } else {
1.326 CMP_imm8s_sh4r( 0, R_T );
1.327 - JE_rel8( 30, nottaken );
1.328 + JE_rel8( 29, nottaken );
1.329 exit_block( disp + pc + 4, pc+2 );
1.330 JMP_TARGET(nottaken);
1.331 return 2;
1.332 @@ -1460,6 +1508,7 @@
1.333 if( sh4_x86.in_delay_slot ) {
1.334 SLOTILLEGAL();
1.335 } else {
1.336 + precheck();
1.337 JMP_exit(EXIT_ILLEGAL);
1.338 return 2;
1.339 }
1.340 @@ -1537,188 +1586,194 @@
1.341 }
1.342 }
1.343 :}
1.344 -FMOV FRm, @Rn {:
1.345 - check_fpuen();
1.346 - load_reg( R_EDX, Rn );
1.347 - check_walign32( R_EDX );
1.348 - load_spreg( R_ECX, R_FPSCR );
1.349 - TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.350 +FMOV FRm, @Rn {:
1.351 + precheck();
1.352 + check_fpuen_no_precheck();
1.353 + load_reg( R_ECX, Rn );
1.354 + check_walign32( R_ECX );
1.355 + load_spreg( R_EDX, R_FPSCR );
1.356 + TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.357 JNE_rel8(20, doublesize);
1.358 - load_fr_bank( R_ECX );
1.359 - load_fr( R_ECX, R_EAX, FRm );
1.360 - MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.361 + load_fr_bank( R_EDX );
1.362 + load_fr( R_EDX, R_EAX, FRm );
1.363 + MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1.364 if( FRm&1 ) {
1.365 JMP_rel8( 48, end );
1.366 JMP_TARGET(doublesize);
1.367 - load_xf_bank( R_ECX );
1.368 - load_fr( R_ECX, R_EAX, FRm&0x0E );
1.369 - load_fr( R_ECX, R_ECX, FRm|0x01 );
1.370 - MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.371 + load_xf_bank( R_EDX );
1.372 + load_fr( R_EDX, R_EAX, FRm&0x0E );
1.373 + load_fr( R_EDX, R_EDX, FRm|0x01 );
1.374 + MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.375 JMP_TARGET(end);
1.376 } else {
1.377 JMP_rel8( 39, end );
1.378 JMP_TARGET(doublesize);
1.379 - load_fr_bank( R_ECX );
1.380 - load_fr( R_ECX, R_EAX, FRm&0x0E );
1.381 - load_fr( R_ECX, R_ECX, FRm|0x01 );
1.382 - MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.383 + load_fr_bank( R_EDX );
1.384 + load_fr( R_EDX, R_EAX, FRm&0x0E );
1.385 + load_fr( R_EDX, R_EDX, FRm|0x01 );
1.386 + MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.387 JMP_TARGET(end);
1.388 }
1.389 :}
1.390 FMOV @Rm, FRn {:
1.391 - check_fpuen();
1.392 - load_reg( R_EDX, Rm );
1.393 - check_ralign32( R_EDX );
1.394 - load_spreg( R_ECX, R_FPSCR );
1.395 - TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.396 + precheck();
1.397 + check_fpuen_no_precheck();
1.398 + load_reg( R_ECX, Rm );
1.399 + check_ralign32( R_ECX );
1.400 + load_spreg( R_EDX, R_FPSCR );
1.401 + TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.402 JNE_rel8(19, doublesize);
1.403 - MEM_READ_LONG( R_EDX, R_EAX );
1.404 - load_fr_bank( R_ECX );
1.405 - store_fr( R_ECX, R_EAX, FRn );
1.406 + MEM_READ_LONG( R_ECX, R_EAX );
1.407 + load_fr_bank( R_EDX );
1.408 + store_fr( R_EDX, R_EAX, FRn );
1.409 if( FRn&1 ) {
1.410 JMP_rel8(48, end);
1.411 JMP_TARGET(doublesize);
1.412 - MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.413 - load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.414 - load_xf_bank( R_ECX );
1.415 - store_fr( R_ECX, R_EAX, FRn&0x0E );
1.416 - store_fr( R_ECX, R_EDX, FRn|0x01 );
1.417 + MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.418 + load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.419 + load_xf_bank( R_EDX );
1.420 + store_fr( R_EDX, R_EAX, FRn&0x0E );
1.421 + store_fr( R_EDX, R_ECX, FRn|0x01 );
1.422 JMP_TARGET(end);
1.423 } else {
1.424 JMP_rel8(36, end);
1.425 JMP_TARGET(doublesize);
1.426 - MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.427 - load_fr_bank( R_ECX );
1.428 - store_fr( R_ECX, R_EAX, FRn&0x0E );
1.429 - store_fr( R_ECX, R_EDX, FRn|0x01 );
1.430 + MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.431 + load_fr_bank( R_EDX );
1.432 + store_fr( R_EDX, R_EAX, FRn&0x0E );
1.433 + store_fr( R_EDX, R_ECX, FRn|0x01 );
1.434 JMP_TARGET(end);
1.435 }
1.436 :}
1.437 FMOV FRm, @-Rn {:
1.438 - check_fpuen();
1.439 - load_reg( R_EDX, Rn );
1.440 - check_walign32( R_EDX );
1.441 - load_spreg( R_ECX, R_FPSCR );
1.442 - TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.443 + precheck();
1.444 + check_fpuen_no_precheck();
1.445 + load_reg( R_ECX, Rn );
1.446 + check_walign32( R_ECX );
1.447 + load_spreg( R_EDX, R_FPSCR );
1.448 + TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.449 JNE_rel8(26, doublesize);
1.450 - load_fr_bank( R_ECX );
1.451 - load_fr( R_ECX, R_EAX, FRm );
1.452 - ADD_imm8s_r32(-4,R_EDX);
1.453 - store_reg( R_EDX, Rn );
1.454 - MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.455 + load_fr_bank( R_EDX );
1.456 + load_fr( R_EDX, R_EAX, FRm );
1.457 + ADD_imm8s_r32(-4,R_ECX);
1.458 + store_reg( R_ECX, Rn );
1.459 + MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1.460 if( FRm&1 ) {
1.461 JMP_rel8( 54, end );
1.462 JMP_TARGET(doublesize);
1.463 - load_xf_bank( R_ECX );
1.464 - load_fr( R_ECX, R_EAX, FRm&0x0E );
1.465 - load_fr( R_ECX, R_ECX, FRm|0x01 );
1.466 - ADD_imm8s_r32(-8,R_EDX);
1.467 - store_reg( R_EDX, Rn );
1.468 - MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.469 + load_xf_bank( R_EDX );
1.470 + load_fr( R_EDX, R_EAX, FRm&0x0E );
1.471 + load_fr( R_EDX, R_EDX, FRm|0x01 );
1.472 + ADD_imm8s_r32(-8,R_ECX);
1.473 + store_reg( R_ECX, Rn );
1.474 + MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.475 JMP_TARGET(end);
1.476 } else {
1.477 JMP_rel8( 45, end );
1.478 JMP_TARGET(doublesize);
1.479 - load_fr_bank( R_ECX );
1.480 - load_fr( R_ECX, R_EAX, FRm&0x0E );
1.481 - load_fr( R_ECX, R_ECX, FRm|0x01 );
1.482 - ADD_imm8s_r32(-8,R_EDX);
1.483 - store_reg( R_EDX, Rn );
1.484 - MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.485 + load_fr_bank( R_EDX );
1.486 + load_fr( R_EDX, R_EAX, FRm&0x0E );
1.487 + load_fr( R_EDX, R_EDX, FRm|0x01 );
1.488 + ADD_imm8s_r32(-8,R_ECX);
1.489 + store_reg( R_ECX, Rn );
1.490 + MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.491 JMP_TARGET(end);
1.492 }
1.493 :}
1.494 -FMOV @Rm+, FRn {:
1.495 - check_fpuen();
1.496 - load_reg( R_EDX, Rm );
1.497 - check_ralign32( R_EDX );
1.498 - MOV_r32_r32( R_EDX, R_EAX );
1.499 - load_spreg( R_ECX, R_FPSCR );
1.500 - TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.501 +FMOV @Rm+, FRn {:
1.502 + precheck();
1.503 + check_fpuen_no_precheck();
1.504 + load_reg( R_ECX, Rm );
1.505 + check_ralign32( R_ECX );
1.506 + MOV_r32_r32( R_ECX, R_EAX );
1.507 + load_spreg( R_EDX, R_FPSCR );
1.508 + TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.509 JNE_rel8(25, doublesize);
1.510 ADD_imm8s_r32( 4, R_EAX );
1.511 store_reg( R_EAX, Rm );
1.512 - MEM_READ_LONG( R_EDX, R_EAX );
1.513 - load_fr_bank( R_ECX );
1.514 - store_fr( R_ECX, R_EAX, FRn );
1.515 + MEM_READ_LONG( R_ECX, R_EAX );
1.516 + load_fr_bank( R_EDX );
1.517 + store_fr( R_EDX, R_EAX, FRn );
1.518 if( FRn&1 ) {
1.519 JMP_rel8(54, end);
1.520 JMP_TARGET(doublesize);
1.521 ADD_imm8s_r32( 8, R_EAX );
1.522 store_reg(R_EAX, Rm);
1.523 - MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.524 - load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.525 - load_xf_bank( R_ECX );
1.526 - store_fr( R_ECX, R_EAX, FRn&0x0E );
1.527 - store_fr( R_ECX, R_EDX, FRn|0x01 );
1.528 + MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.529 + load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.530 + load_xf_bank( R_EDX );
1.531 + store_fr( R_EDX, R_EAX, FRn&0x0E );
1.532 + store_fr( R_EDX, R_ECX, FRn|0x01 );
1.533 JMP_TARGET(end);
1.534 } else {
1.535 JMP_rel8(42, end);
1.536 ADD_imm8s_r32( 8, R_EAX );
1.537 store_reg(R_EAX, Rm);
1.538 - MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.539 - load_fr_bank( R_ECX );
1.540 - store_fr( R_ECX, R_EAX, FRn&0x0E );
1.541 - store_fr( R_ECX, R_EDX, FRn|0x01 );
1.542 + MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.543 + load_fr_bank( R_EDX );
1.544 + store_fr( R_EDX, R_EAX, FRn&0x0E );
1.545 + store_fr( R_EDX, R_ECX, FRn|0x01 );
1.546 JMP_TARGET(end);
1.547 }
1.548 :}
1.549 FMOV FRm, @(R0, Rn) {:
1.550 - check_fpuen();
1.551 - load_reg( R_EDX, Rn );
1.552 - ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
1.553 - check_walign32( R_EDX );
1.554 - load_spreg( R_ECX, R_FPSCR );
1.555 - TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.556 + precheck();
1.557 + check_fpuen_no_precheck();
1.558 + load_reg( R_ECX, Rn );
1.559 + ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
1.560 + check_walign32( R_ECX );
1.561 + load_spreg( R_EDX, R_FPSCR );
1.562 + TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.563 JNE_rel8(20, doublesize);
1.564 - load_fr_bank( R_ECX );
1.565 - load_fr( R_ECX, R_EAX, FRm );
1.566 - MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
1.567 + load_fr_bank( R_EDX );
1.568 + load_fr( R_EDX, R_EAX, FRm );
1.569 + MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1.570 if( FRm&1 ) {
1.571 JMP_rel8( 48, end );
1.572 JMP_TARGET(doublesize);
1.573 - load_xf_bank( R_ECX );
1.574 - load_fr( R_ECX, R_EAX, FRm&0x0E );
1.575 - load_fr( R_ECX, R_ECX, FRm|0x01 );
1.576 - MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.577 + load_xf_bank( R_EDX );
1.578 + load_fr( R_EDX, R_EAX, FRm&0x0E );
1.579 + load_fr( R_EDX, R_EDX, FRm|0x01 );
1.580 + MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.581 JMP_TARGET(end);
1.582 } else {
1.583 JMP_rel8( 39, end );
1.584 JMP_TARGET(doublesize);
1.585 - load_fr_bank( R_ECX );
1.586 - load_fr( R_ECX, R_EAX, FRm&0x0E );
1.587 - load_fr( R_ECX, R_ECX, FRm|0x01 );
1.588 - MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
1.589 + load_fr_bank( R_EDX );
1.590 + load_fr( R_EDX, R_EAX, FRm&0x0E );
1.591 + load_fr( R_EDX, R_EDX, FRm|0x01 );
1.592 + MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.593 JMP_TARGET(end);
1.594 }
1.595 :}
1.596 FMOV @(R0, Rm), FRn {:
1.597 - check_fpuen();
1.598 - load_reg( R_EDX, Rm );
1.599 - ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
1.600 - check_ralign32( R_EDX );
1.601 - load_spreg( R_ECX, R_FPSCR );
1.602 - TEST_imm32_r32( FPSCR_SZ, R_ECX );
1.603 + precheck();
1.604 + check_fpuen_no_precheck();
1.605 + load_reg( R_ECX, Rm );
1.606 + ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
1.607 + check_ralign32( R_ECX );
1.608 + load_spreg( R_EDX, R_FPSCR );
1.609 + TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.610 JNE_rel8(19, doublesize);
1.611 - MEM_READ_LONG( R_EDX, R_EAX );
1.612 - load_fr_bank( R_ECX );
1.613 - store_fr( R_ECX, R_EAX, FRn );
1.614 + MEM_READ_LONG( R_ECX, R_EAX );
1.615 + load_fr_bank( R_EDX );
1.616 + store_fr( R_EDX, R_EAX, FRn );
1.617 if( FRn&1 ) {
1.618 JMP_rel8(48, end);
1.619 JMP_TARGET(doublesize);
1.620 - MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.621 - load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
1.622 - load_xf_bank( R_ECX );
1.623 - store_fr( R_ECX, R_EAX, FRn&0x0E );
1.624 - store_fr( R_ECX, R_EDX, FRn|0x01 );
1.625 + MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.626 + load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.627 + load_xf_bank( R_EDX );
1.628 + store_fr( R_EDX, R_EAX, FRn&0x0E );
1.629 + store_fr( R_EDX, R_ECX, FRn|0x01 );
1.630 JMP_TARGET(end);
1.631 } else {
1.632 JMP_rel8(36, end);
1.633 JMP_TARGET(doublesize);
1.634 - MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
1.635 - load_fr_bank( R_ECX );
1.636 - store_fr( R_ECX, R_EAX, FRn&0x0E );
1.637 - store_fr( R_ECX, R_EDX, FRn|0x01 );
1.638 + MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.639 + load_fr_bank( R_EDX );
1.640 + store_fr( R_EDX, R_EAX, FRn&0x0E );
1.641 + store_fr( R_EDX, R_ECX, FRn|0x01 );
1.642 JMP_TARGET(end);
1.643 }
1.644 :}
1.645 @@ -2126,6 +2181,7 @@
1.646 :}
1.647 LDC.L @Rm+, GBR {:
1.648 load_reg( R_EAX, Rm );
1.649 + precheck();
1.650 check_ralign32( R_EAX );
1.651 MOV_r32_r32( R_EAX, R_ECX );
1.652 ADD_imm8s_r32( 4, R_EAX );
1.653 @@ -2137,7 +2193,8 @@
1.654 if( sh4_x86.in_delay_slot ) {
1.655 SLOTILLEGAL();
1.656 } else {
1.657 - check_priv();
1.658 + precheck();
1.659 + check_priv_no_precheck();
1.660 load_reg( R_EAX, Rm );
1.661 check_ralign32( R_EAX );
1.662 MOV_r32_r32( R_EAX, R_ECX );
1.663 @@ -2150,7 +2207,8 @@
1.664 }
1.665 :}
1.666 LDC.L @Rm+, VBR {:
1.667 - check_priv();
1.668 + precheck();
1.669 + check_priv_no_precheck();
1.670 load_reg( R_EAX, Rm );
1.671 check_ralign32( R_EAX );
1.672 MOV_r32_r32( R_EAX, R_ECX );
1.673 @@ -2160,8 +2218,10 @@
1.674 store_spreg( R_EAX, R_VBR );
1.675 :}
1.676 LDC.L @Rm+, SSR {:
1.677 - check_priv();
1.678 + precheck();
1.679 + check_priv_no_precheck();
1.680 load_reg( R_EAX, Rm );
1.681 + check_ralign32( R_EAX );
1.682 MOV_r32_r32( R_EAX, R_ECX );
1.683 ADD_imm8s_r32( 4, R_EAX );
1.684 store_reg( R_EAX, Rm );
1.685 @@ -2169,7 +2229,8 @@
1.686 store_spreg( R_EAX, R_SSR );
1.687 :}
1.688 LDC.L @Rm+, SGR {:
1.689 - check_priv();
1.690 + precheck();
1.691 + check_priv_no_precheck();
1.692 load_reg( R_EAX, Rm );
1.693 check_ralign32( R_EAX );
1.694 MOV_r32_r32( R_EAX, R_ECX );
1.695 @@ -2179,7 +2240,8 @@
1.696 store_spreg( R_EAX, R_SGR );
1.697 :}
1.698 LDC.L @Rm+, SPC {:
1.699 - check_priv();
1.700 + precheck();
1.701 + check_priv_no_precheck();
1.702 load_reg( R_EAX, Rm );
1.703 check_ralign32( R_EAX );
1.704 MOV_r32_r32( R_EAX, R_ECX );
1.705 @@ -2189,7 +2251,8 @@
1.706 store_spreg( R_EAX, R_SPC );
1.707 :}
1.708 LDC.L @Rm+, DBR {:
1.709 - check_priv();
1.710 + precheck();
1.711 + check_priv_no_precheck();
1.712 load_reg( R_EAX, Rm );
1.713 check_ralign32( R_EAX );
1.714 MOV_r32_r32( R_EAX, R_ECX );
1.715 @@ -2199,7 +2262,8 @@
1.716 store_spreg( R_EAX, R_DBR );
1.717 :}
1.718 LDC.L @Rm+, Rn_BANK {:
1.719 - check_priv();
1.720 + precheck();
1.721 + check_priv_no_precheck();
1.722 load_reg( R_EAX, Rm );
1.723 check_ralign32( R_EAX );
1.724 MOV_r32_r32( R_EAX, R_ECX );
1.725 @@ -2215,6 +2279,7 @@
1.726 :}
1.727 LDS.L @Rm+, FPSCR {:
1.728 load_reg( R_EAX, Rm );
1.729 + precheck();
1.730 check_ralign32( R_EAX );
1.731 MOV_r32_r32( R_EAX, R_ECX );
1.732 ADD_imm8s_r32( 4, R_EAX );
1.733 @@ -2229,6 +2294,7 @@
1.734 :}
1.735 LDS.L @Rm+, FPUL {:
1.736 load_reg( R_EAX, Rm );
1.737 + precheck();
1.738 check_ralign32( R_EAX );
1.739 MOV_r32_r32( R_EAX, R_ECX );
1.740 ADD_imm8s_r32( 4, R_EAX );
1.741 @@ -2242,6 +2308,7 @@
1.742 :}
1.743 LDS.L @Rm+, MACH {:
1.744 load_reg( R_EAX, Rm );
1.745 + precheck();
1.746 check_ralign32( R_EAX );
1.747 MOV_r32_r32( R_EAX, R_ECX );
1.748 ADD_imm8s_r32( 4, R_EAX );
1.749 @@ -2255,6 +2322,7 @@
1.750 :}
1.751 LDS.L @Rm+, MACL {:
1.752 load_reg( R_EAX, Rm );
1.753 + precheck();
1.754 check_ralign32( R_EAX );
1.755 MOV_r32_r32( R_EAX, R_ECX );
1.756 ADD_imm8s_r32( 4, R_EAX );
1.757 @@ -2268,6 +2336,7 @@
1.758 :}
1.759 LDS.L @Rm+, PR {:
1.760 load_reg( R_EAX, Rm );
1.761 + precheck();
1.762 check_ralign32( R_EAX );
1.763 MOV_r32_r32( R_EAX, R_ECX );
1.764 ADD_imm8s_r32( 4, R_EAX );
1.765 @@ -2293,7 +2362,6 @@
1.766 check_priv();
1.767 call_func0( sh4_sleep );
1.768 sh4_x86.in_delay_slot = FALSE;
1.769 - INC_r32(R_ESI);
1.770 return 2;
1.771 :}
1.772 STC SR, Rn {:
1.773 @@ -2336,7 +2404,8 @@
1.774 store_reg( R_EAX, Rn );
1.775 :}
1.776 STC.L SR, @-Rn {:
1.777 - check_priv();
1.778 + precheck();
1.779 + check_priv_no_precheck();
1.780 call_func0( sh4_read_sr );
1.781 load_reg( R_ECX, Rn );
1.782 check_walign32( R_ECX );
1.783 @@ -2345,7 +2414,8 @@
1.784 MEM_WRITE_LONG( R_ECX, R_EAX );
1.785 :}
1.786 STC.L VBR, @-Rn {:
1.787 - check_priv();
1.788 + precheck();
1.789 + check_priv_no_precheck();
1.790 load_reg( R_ECX, Rn );
1.791 check_walign32( R_ECX );
1.792 ADD_imm8s_r32( -4, R_ECX );
1.793 @@ -2354,7 +2424,8 @@
1.794 MEM_WRITE_LONG( R_ECX, R_EAX );
1.795 :}
1.796 STC.L SSR, @-Rn {:
1.797 - check_priv();
1.798 + precheck();
1.799 + check_priv_no_precheck();
1.800 load_reg( R_ECX, Rn );
1.801 check_walign32( R_ECX );
1.802 ADD_imm8s_r32( -4, R_ECX );
1.803 @@ -2362,8 +2433,9 @@
1.804 load_spreg( R_EAX, R_SSR );
1.805 MEM_WRITE_LONG( R_ECX, R_EAX );
1.806 :}
1.807 -STC.L SPC, @-Rn {:
1.808 - check_priv();
1.809 +STC.L SPC, @-Rn {:
1.810 + precheck();
1.811 + check_priv_no_precheck();
1.812 load_reg( R_ECX, Rn );
1.813 check_walign32( R_ECX );
1.814 ADD_imm8s_r32( -4, R_ECX );
1.815 @@ -2372,7 +2444,8 @@
1.816 MEM_WRITE_LONG( R_ECX, R_EAX );
1.817 :}
1.818 STC.L SGR, @-Rn {:
1.819 - check_priv();
1.820 + precheck();
1.821 + check_priv_no_precheck();
1.822 load_reg( R_ECX, Rn );
1.823 check_walign32( R_ECX );
1.824 ADD_imm8s_r32( -4, R_ECX );
1.825 @@ -2381,7 +2454,8 @@
1.826 MEM_WRITE_LONG( R_ECX, R_EAX );
1.827 :}
1.828 STC.L DBR, @-Rn {:
1.829 - check_priv();
1.830 + precheck();
1.831 + check_priv_no_precheck();
1.832 load_reg( R_ECX, Rn );
1.833 check_walign32( R_ECX );
1.834 ADD_imm8s_r32( -4, R_ECX );
1.835 @@ -2390,7 +2464,8 @@
1.836 MEM_WRITE_LONG( R_ECX, R_EAX );
1.837 :}
1.838 STC.L Rm_BANK, @-Rn {:
1.839 - check_priv();
1.840 + precheck();
1.841 + check_priv_no_precheck();
1.842 load_reg( R_ECX, Rn );
1.843 check_walign32( R_ECX );
1.844 ADD_imm8s_r32( -4, R_ECX );
1.845 @@ -2400,6 +2475,7 @@
1.846 :}
1.847 STC.L GBR, @-Rn {:
1.848 load_reg( R_ECX, Rn );
1.849 + precheck();
1.850 check_walign32( R_ECX );
1.851 ADD_imm8s_r32( -4, R_ECX );
1.852 store_reg( R_ECX, Rn );
1.853 @@ -2412,6 +2488,7 @@
1.854 :}
1.855 STS.L FPSCR, @-Rn {:
1.856 load_reg( R_ECX, Rn );
1.857 + precheck();
1.858 check_walign32( R_ECX );
1.859 ADD_imm8s_r32( -4, R_ECX );
1.860 store_reg( R_ECX, Rn );
1.861 @@ -2424,6 +2501,7 @@
1.862 :}
1.863 STS.L FPUL, @-Rn {:
1.864 load_reg( R_ECX, Rn );
1.865 + precheck();
1.866 check_walign32( R_ECX );
1.867 ADD_imm8s_r32( -4, R_ECX );
1.868 store_reg( R_ECX, Rn );
1.869 @@ -2436,6 +2514,7 @@
1.870 :}
1.871 STS.L MACH, @-Rn {:
1.872 load_reg( R_ECX, Rn );
1.873 + precheck();
1.874 check_walign32( R_ECX );
1.875 ADD_imm8s_r32( -4, R_ECX );
1.876 store_reg( R_ECX, Rn );
1.877 @@ -2448,6 +2527,7 @@
1.878 :}
1.879 STS.L MACL, @-Rn {:
1.880 load_reg( R_ECX, Rn );
1.881 + precheck();
1.882 check_walign32( R_ECX );
1.883 ADD_imm8s_r32( -4, R_ECX );
1.884 store_reg( R_ECX, Rn );
1.885 @@ -2460,6 +2540,7 @@
1.886 :}
1.887 STS.L PR, @-Rn {:
1.888 load_reg( R_ECX, Rn );
1.889 + precheck();
1.890 check_walign32( R_ECX );
1.891 ADD_imm8s_r32( -4, R_ECX );
1.892 store_reg( R_ECX, Rn );
1.893 @@ -2469,11 +2550,6 @@
1.894
1.895 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
1.896 %%
1.897 - if( sh4_x86.in_delay_slot ) {
1.898 - ADD_imm8s_r32(2,R_ESI);
1.899 - sh4_x86.in_delay_slot = FALSE;
1.900 - } else {
1.901 - INC_r32(R_ESI);
1.902 - }
1.903 + sh4_x86.in_delay_slot = FALSE;
1.904 return 0;
1.905 }
.