Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 527:14c9489f647e
prev526:ba3da45b5754
next532:43653e748030
author nkeynes
date Sun Nov 18 11:12:44 2007 +0000 (12 years ago)
permissions -rw-r--r--
last change x86-64 translator work-in-progress
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Sat Nov 17 06:04:19 2007 +0000
1.2 +++ b/src/sh4/sh4x86.in Sun Nov 18 11:12:44 2007 +0000
1.3 @@ -152,6 +152,17 @@
1.4 }
1.5
1.6 /**
1.7 + * Load an immediate 64-bit quantity (note: x86-64 only)
1.8 + */
1.9 +static inline void load_imm64( int x86reg, uint32_t value ) {
1.10 + /* mov #value, reg */
1.11 + REXW();
1.12 + OP(0xB8 + x86reg);
1.13 + OP64(value);
1.14 +}
1.15 +
1.16 +
1.17 +/**
1.18 * Emit an instruction to store an SH4 reg (RN)
1.19 */
1.20 void static inline store_reg( int x86reg, int sh4reg ) {
1.21 @@ -253,16 +264,161 @@
1.22 OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
1.23 }
1.24
1.25 +#if SH4_TRANSLATOR == TARGET_X86_64
1.26 +/* X86-64 has different calling conventions... */
1.27 +/**
1.28 + * Note: clobbers EAX to make the indirect call - this isn't usually
1.29 + * a problem since the callee will usually clobber it anyway.
1.30 + * Size: 12 bytes
1.31 + */
1.32 +#define CALL_FUNC0_SIZE 12
1.33 +static inline void call_func0( void *ptr )
1.34 +{
1.35 + load_imm64(R_EAX, (uint64_t)ptr);
1.36 + CALL_r32(R_EAX);
1.37 +}
1.38 +
1.39 +#define CALL_FUNC1_SIZE 14
1.40 +static inline void call_func1( void *ptr, int arg1 )
1.41 +{
1.42 + MOV_r32_r32(arg1, R_EDI);
1.43 + call_func0(ptr);
1.44 +}
1.45 +
1.46 +#define CALL_FUNC2_SIZE 16
1.47 +static inline void call_func2( void *ptr, int arg1, int arg2 )
1.48 +{
1.49 + MOV_r32_r32(arg1, R_EDI);
1.50 + MOV_r32_r32(arg2, R_ESI);
1.51 + call_func0(ptr);
1.52 +}
1.53 +
1.54 +#define MEM_WRITE_DOUBLE_SIZE 39
1.55 +/**
1.56 + * Write a double (64-bit) value into memory, with the first word in arg2a, and
1.57 + * the second in arg2b
1.58 + */
1.59 +static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.60 +{
1.61 +/*
1.62 + MOV_r32_r32( addr, R_EDI );
1.63 + MOV_r32_r32( arg2b, R_ESI );
1.64 + REXW(); SHL_imm8_r32( 32, R_ESI );
1.65 + REXW(); MOVZX_r16_r32( arg2a, arg2a );
1.66 + REXW(); OR_r32_r32( arg2a, R_ESI );
1.67 + call_func0(sh4_write_quad);
1.68 +*/
1.69 + PUSH_r32(arg2b);
1.70 + PUSH_r32(addr);
1.71 + call_func2(sh4_write_long, addr, arg2a);
1.72 + POP_r32(addr);
1.73 + POP_r32(arg2b);
1.74 + ADD_imm8s_r32(4, addr);
1.75 + call_func2(sh4_write_long, addr, arg2b);
1.76 +}
1.77 +
1.78 +#define MEM_READ_DOUBLE_SIZE 35
1.79 +/**
1.80 + * Read a double (64-bit) value from memory, writing the first word into arg2a
1.81 + * and the second into arg2b. The addr must not be in EAX
1.82 + */
1.83 +static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.84 +{
1.85 +/*
1.86 + MOV_r32_r32( addr, R_EDI );
1.87 + call_func0(sh4_read_quad);
1.88 + REXW(); MOV_r32_r32( R_EAX, arg2a );
1.89 + REXW(); MOV_r32_r32( R_EAX, arg2b );
1.90 + REXW(); SHR_imm8_r32( 32, arg2b );
1.91 +*/
1.92 + PUSH_r32(addr);
1.93 + call_func1(sh4_read_long, addr);
1.94 + POP_r32(R_EDI);
1.95 + PUSH_r32(R_EAX);
1.96 + ADD_imm8s_r32(4, R_EDI);
1.97 + call_func0(sh4_read_long);
1.98 + MOV_r32_r32(R_EAX, arg2b);
1.99 + POP_r32(arg2a);
1.100 +}
1.101 +
1.102 +#define EXIT_BLOCK_SIZE 35
1.103 +/**
1.104 + * Exit the block to an absolute PC
1.105 + */
1.106 +void exit_block( sh4addr_t pc, sh4addr_t endpc )
1.107 +{
1.108 + load_imm32( R_ECX, pc ); // 5
1.109 + store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
1.110 + REXW(); MOV_moff32_EAX( xlat_get_lut_entry(pc) );
1.111 + REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.112 + load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.113 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.114 + POP_r32(R_EBP);
1.115 + RET();
1.116 +}
1.117 +
1.118 +
1.119 +/**
1.120 + * Write the block trailer (exception handling block)
1.121 + */
1.122 +void sh4_translate_end_block( sh4addr_t pc ) {
1.123 + if( sh4_x86.branch_taken == FALSE ) {
1.124 + // Didn't exit unconditionally already, so write the termination here
1.125 + exit_block( pc, pc );
1.126 + }
1.127 + if( sh4_x86.backpatch_posn != 0 ) {
1.128 + uint8_t *end_ptr = xlat_output;
1.129 + // Exception termination. Jump block for various exception codes:
1.130 + load_imm32( R_EDI, EXC_DATA_ADDR_READ );
1.131 + JMP_rel8( 33, target1 );
1.132 + load_imm32( R_EDI, EXC_DATA_ADDR_WRITE );
1.133 + JMP_rel8( 26, target2 );
1.134 + load_imm32( R_EDI, EXC_ILLEGAL );
1.135 + JMP_rel8( 19, target3 );
1.136 + load_imm32( R_EDI, EXC_SLOT_ILLEGAL );
1.137 + JMP_rel8( 12, target4 );
1.138 + load_imm32( R_EDI, EXC_FPU_DISABLED );
1.139 + JMP_rel8( 5, target5 );
1.140 + load_imm32( R_EDI, EXC_SLOT_FPU_DISABLED );
1.141 + // target
1.142 + JMP_TARGET(target1);
1.143 + JMP_TARGET(target2);
1.144 + JMP_TARGET(target3);
1.145 + JMP_TARGET(target4);
1.146 + JMP_TARGET(target5);
1.147 + // Raise exception
1.148 + load_spreg( R_ECX, REG_OFFSET(pc) );
1.149 + ADD_r32_r32( R_EDX, R_ECX );
1.150 + ADD_r32_r32( R_EDX, R_ECX );
1.151 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.152 + MOV_moff32_EAX( &sh4_cpu_period );
1.153 + MUL_r32( R_EDX );
1.154 + ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
1.155 +
1.156 + call_func0( sh4_raise_exception );
1.157 + load_spreg( R_EAX, REG_OFFSET(pc) );
1.158 + call_func1(xlat_get_code,R_EAX);
1.159 + POP_r32(R_EBP);
1.160 + RET();
1.161 +
1.162 + sh4_x86_do_backpatch( end_ptr );
1.163 + }
1.164 +}
1.165 +
1.166 +#else /* SH4_TRANSLATOR == TARGET_X86 */
1.167 +
1.168 /**
1.169 * Note: clobbers EAX to make the indirect call - this isn't usually
1.170 * a problem since the callee will usually clobber it anyway.
1.171 */
1.172 +#define CALL_FUNC0_SIZE 7
1.173 static inline void call_func0( void *ptr )
1.174 {
1.175 load_imm32(R_EAX, (uint32_t)ptr);
1.176 CALL_r32(R_EAX);
1.177 }
1.178
1.179 +#define CALL_FUNC1_SIZE 11
1.180 static inline void call_func1( void *ptr, int arg1 )
1.181 {
1.182 PUSH_r32(arg1);
1.183 @@ -270,6 +426,7 @@
1.184 ADD_imm8s_r32( 4, R_ESP );
1.185 }
1.186
1.187 +#define CALL_FUNC2_SIZE 12
1.188 static inline void call_func2( void *ptr, int arg1, int arg2 )
1.189 {
1.190 PUSH_r32(arg2);
1.191 @@ -283,6 +440,7 @@
1.192 * the second in arg2b
1.193 * NB: 30 bytes
1.194 */
1.195 +#define MEM_WRITE_DOUBLE_SIZE 30
1.196 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
1.197 {
1.198 ADD_imm8s_r32( 4, addr );
1.199 @@ -302,6 +460,7 @@
1.200 * and the second into arg2b. The addr must not be in EAX
1.201 * NB: 27 bytes
1.202 */
1.203 +#define MEM_READ_DOUBLE_SIZE 27
1.204 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
1.205 {
1.206 PUSH_r32(addr);
1.207 @@ -316,6 +475,71 @@
1.208 POP_r32(arg2a);
1.209 }
1.210
1.211 +#define EXIT_BLOCK_SIZE 29
1.212 +/**
1.213 + * Exit the block to an absolute PC
1.214 + */
1.215 +void exit_block( sh4addr_t pc, sh4addr_t endpc )
1.216 +{
1.217 + load_imm32( R_ECX, pc ); // 5
1.218 + store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
1.219 + MOV_moff32_EAX( xlat_get_lut_entry(pc) ); // 5
1.220 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.221 + load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.222 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.223 + POP_r32(R_EBP);
1.224 + RET();
1.225 +}
1.226 +
1.227 +/**
1.228 + * Write the block trailer (exception handling block)
1.229 + */
1.230 +void sh4_translate_end_block( sh4addr_t pc ) {
1.231 + if( sh4_x86.branch_taken == FALSE ) {
1.232 + // Didn't exit unconditionally already, so write the termination here
1.233 + exit_block( pc, pc );
1.234 + }
1.235 + if( sh4_x86.backpatch_posn != 0 ) {
1.236 + uint8_t *end_ptr = xlat_output;
1.237 + // Exception termination. Jump block for various exception codes:
1.238 + PUSH_imm32( EXC_DATA_ADDR_READ );
1.239 + JMP_rel8( 33, target1 );
1.240 + PUSH_imm32( EXC_DATA_ADDR_WRITE );
1.241 + JMP_rel8( 26, target2 );
1.242 + PUSH_imm32( EXC_ILLEGAL );
1.243 + JMP_rel8( 19, target3 );
1.244 + PUSH_imm32( EXC_SLOT_ILLEGAL );
1.245 + JMP_rel8( 12, target4 );
1.246 + PUSH_imm32( EXC_FPU_DISABLED );
1.247 + JMP_rel8( 5, target5 );
1.248 + PUSH_imm32( EXC_SLOT_FPU_DISABLED );
1.249 + // target
1.250 + JMP_TARGET(target1);
1.251 + JMP_TARGET(target2);
1.252 + JMP_TARGET(target3);
1.253 + JMP_TARGET(target4);
1.254 + JMP_TARGET(target5);
1.255 + // Raise exception
1.256 + load_spreg( R_ECX, REG_OFFSET(pc) );
1.257 + ADD_r32_r32( R_EDX, R_ECX );
1.258 + ADD_r32_r32( R_EDX, R_ECX );
1.259 + store_spreg( R_ECX, REG_OFFSET(pc) );
1.260 + MOV_moff32_EAX( &sh4_cpu_period );
1.261 + MUL_r32( R_EDX );
1.262 + ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
1.263 +
1.264 + call_func0( sh4_raise_exception );
1.265 + ADD_imm8s_r32( 4, R_ESP );
1.266 + load_spreg( R_EAX, REG_OFFSET(pc) );
1.267 + call_func1(xlat_get_code,R_EAX);
1.268 + POP_r32(R_EBP);
1.269 + RET();
1.270 +
1.271 + sh4_x86_do_backpatch( end_ptr );
1.272 + }
1.273 +}
1.274 +#endif
1.275 +
1.276 /* Exception checks - Note that all exception checks will clobber EAX */
1.277 #define precheck() load_imm32(R_EDX, (pc-sh4_x86.block_start_pc-(sh4_x86.in_delay_slot?2:0))>>1)
1.278
1.279 @@ -431,22 +655,6 @@
1.280 }
1.281
1.282 /**
1.283 - * Exit the block to an absolute PC
1.284 - * Bytes: 29
1.285 - */
1.286 -void exit_block( sh4addr_t pc, sh4addr_t endpc )
1.287 -{
1.288 - load_imm32( R_ECX, pc ); // 5
1.289 - store_spreg( R_ECX, REG_OFFSET(pc) ); // 3
1.290 - MOV_moff32_EAX( (uint32_t)xlat_get_lut_entry(pc) ); // 5
1.291 - AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.292 - load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.293 - ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.294 - POP_r32(R_EBP);
1.295 - RET();
1.296 -}
1.297 -
1.298 -/**
1.299 * Exit the block with sh4r.pc already written
1.300 * Bytes: 15
1.301 */
1.302 @@ -460,57 +668,6 @@
1.303 RET();
1.304 }
1.305
1.306 -/**
1.307 - * Write the block trailer (exception handling block)
1.308 - */
1.309 -void sh4_translate_end_block( sh4addr_t pc ) {
1.310 - if( sh4_x86.branch_taken == FALSE ) {
1.311 - // Didn't exit unconditionally already, so write the termination here
1.312 - exit_block( pc, pc );
1.313 - }
1.314 - if( sh4_x86.backpatch_posn != 0 ) {
1.315 - uint8_t *end_ptr = xlat_output;
1.316 - // Exception termination. Jump block for various exception codes:
1.317 - PUSH_imm32( EXC_DATA_ADDR_READ );
1.318 - JMP_rel8( 33, target1 );
1.319 - PUSH_imm32( EXC_DATA_ADDR_WRITE );
1.320 - JMP_rel8( 26, target2 );
1.321 - PUSH_imm32( EXC_ILLEGAL );
1.322 - JMP_rel8( 19, target3 );
1.323 - PUSH_imm32( EXC_SLOT_ILLEGAL );
1.324 - JMP_rel8( 12, target4 );
1.325 - PUSH_imm32( EXC_FPU_DISABLED );
1.326 - JMP_rel8( 5, target5 );
1.327 - PUSH_imm32( EXC_SLOT_FPU_DISABLED );
1.328 - // target
1.329 - JMP_TARGET(target1);
1.330 - JMP_TARGET(target2);
1.331 - JMP_TARGET(target3);
1.332 - JMP_TARGET(target4);
1.333 - JMP_TARGET(target5);
1.334 - // Raise exception
1.335 - load_spreg( R_ECX, REG_OFFSET(pc) );
1.336 - ADD_r32_r32( R_EDX, R_ECX );
1.337 - ADD_r32_r32( R_EDX, R_ECX );
1.338 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.339 - MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
1.340 - MUL_r32( R_EDX );
1.341 - ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
1.342 -
1.343 - load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
1.344 - CALL_r32( R_EAX ); // 2
1.345 - ADD_imm8s_r32( 4, R_ESP );
1.346 - load_spreg( R_EAX, REG_OFFSET(pc) );
1.347 - call_func1(xlat_get_code,R_EAX);
1.348 - POP_r32(R_EBP);
1.349 - RET();
1.350 -
1.351 - sh4_x86_do_backpatch( end_ptr );
1.352 - }
1.353 -
1.354 -}
1.355 -
1.356 -
1.357 extern uint16_t *sh4_icache;
1.358 extern uint32_t sh4_icache_addr;
1.359
1.360 @@ -531,7 +688,7 @@
1.361 ir = sh4_icache[(pc&0xFFF)>>1];
1.362 } else {
1.363 sh4_icache = (uint16_t *)mem_get_page(pc);
1.364 - if( ((uint32_t)sh4_icache) < MAX_IO_REGIONS ) {
1.365 + if( ((uintptr_t)sh4_icache) < MAX_IO_REGIONS ) {
1.366 /* If someone's actually been so daft as to try to execute out of an IO
1.367 * region, fallback on the full-blown memory read
1.368 */
1.369 @@ -595,7 +752,7 @@
1.370 load_spreg( R_ECX, R_GBR );
1.371 ADD_r32_r32( R_EAX, R_ECX );
1.372 PUSH_r32(R_ECX);
1.373 - call_func0(sh4_read_byte);
1.374 + MEM_READ_BYTE( R_ECX, R_EAX );
1.375 POP_r32(R_ECX);
1.376 AND_imm32_r32(imm, R_EAX );
1.377 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.378 @@ -777,7 +934,7 @@
1.379
1.380 load_spreg( R_ECX, R_S );
1.381 TEST_r32_r32(R_ECX, R_ECX);
1.382 - JE_rel8( 7, nosat );
1.383 + JE_rel8( CALL_FUNC0_SIZE, nosat );
1.384 call_func0( signsat48 );
1.385 JMP_TARGET( nosat );
1.386 sh4_x86.tstate = TSTATE_NONE;
1.387 @@ -887,7 +1044,7 @@
1.388 load_spreg( R_ECX, R_GBR );
1.389 ADD_r32_r32( R_EAX, R_ECX );
1.390 PUSH_r32(R_ECX);
1.391 - call_func0(sh4_read_byte);
1.392 + MEM_READ_BYTE( R_ECX, R_EAX );
1.393 POP_r32(R_ECX);
1.394 OR_imm32_r32(imm, R_EAX );
1.395 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.396 @@ -1130,7 +1287,7 @@
1.397 load_spreg( R_ECX, R_GBR );
1.398 ADD_r32_r32( R_EAX, R_ECX );
1.399 PUSH_r32(R_ECX);
1.400 - call_func0(sh4_read_byte);
1.401 + MEM_READ_BYTE(R_ECX, R_EAX);
1.402 POP_r32(R_ECX);
1.403 XOR_imm32_r32( imm, R_EAX );
1.404 MEM_WRITE_BYTE( R_ECX, R_EAX );
1.405 @@ -1319,7 +1476,7 @@
1.406 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1.407 sh4ptr_t ptr = mem_get_region(target);
1.408 if( ptr != NULL ) {
1.409 - MOV_moff32_EAX( (uint32_t)ptr );
1.410 + MOV_moff32_EAX( ptr );
1.411 } else {
1.412 load_imm32( R_ECX, target );
1.413 MEM_READ_LONG( R_ECX, R_EAX );
1.414 @@ -1462,7 +1619,7 @@
1.415 if( sh4_x86.in_delay_slot ) {
1.416 SLOTILLEGAL();
1.417 } else {
1.418 - JT_rel8( 29, nottaken );
1.419 + JT_rel8( EXIT_BLOCK_SIZE, nottaken );
1.420 exit_block( disp + pc + 4, pc+2 );
1.421 JMP_TARGET(nottaken);
1.422 return 2;
1.423 @@ -1545,7 +1702,7 @@
1.424 if( sh4_x86.in_delay_slot ) {
1.425 SLOTILLEGAL();
1.426 } else {
1.427 - JF_rel8( 29, nottaken );
1.428 + JF_rel8( EXIT_BLOCK_SIZE, nottaken );
1.429 exit_block( disp + pc + 4, pc+2 );
1.430 JMP_TARGET(nottaken);
1.431 return 2;
1.432 @@ -1633,8 +1790,8 @@
1.433 if( sh4_x86.in_delay_slot ) {
1.434 SLOTILLEGAL();
1.435 } else {
1.436 - PUSH_imm32( imm );
1.437 - call_func0( sh4_raise_trap );
1.438 + load_imm32( R_EAX, imm );
1.439 + call_func1( sh4_raise_trap, R_EAX );
1.440 ADD_imm8s_r32( 4, R_ESP );
1.441 sh4_x86.tstate = TSTATE_NONE;
1.442 exit_block_pcset(pc);
1.443 @@ -1737,12 +1894,12 @@
1.444 check_walign32( R_ECX );
1.445 load_spreg( R_EDX, R_FPSCR );
1.446 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.447 - JNE_rel8(20, doublesize);
1.448 + JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
1.449 load_fr_bank( R_EDX );
1.450 load_fr( R_EDX, R_EAX, FRm );
1.451 MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1.452 if( FRm&1 ) {
1.453 - JMP_rel8( 48, end );
1.454 + JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
1.455 JMP_TARGET(doublesize);
1.456 load_xf_bank( R_EDX );
1.457 load_fr( R_EDX, R_EAX, FRm&0x0E );
1.458 @@ -1750,7 +1907,7 @@
1.459 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.460 JMP_TARGET(end);
1.461 } else {
1.462 - JMP_rel8( 39, end );
1.463 + JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
1.464 JMP_TARGET(doublesize);
1.465 load_fr_bank( R_EDX );
1.466 load_fr( R_EDX, R_EAX, FRm&0x0E );
1.467 @@ -1767,12 +1924,12 @@
1.468 check_ralign32( R_ECX );
1.469 load_spreg( R_EDX, R_FPSCR );
1.470 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.471 - JNE_rel8(19, doublesize);
1.472 + JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
1.473 MEM_READ_LONG( R_ECX, R_EAX );
1.474 load_fr_bank( R_EDX );
1.475 store_fr( R_EDX, R_EAX, FRn );
1.476 if( FRn&1 ) {
1.477 - JMP_rel8(48, end);
1.478 + JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
1.479 JMP_TARGET(doublesize);
1.480 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.481 load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.482 @@ -1781,7 +1938,7 @@
1.483 store_fr( R_EDX, R_ECX, FRn|0x01 );
1.484 JMP_TARGET(end);
1.485 } else {
1.486 - JMP_rel8(36, end);
1.487 + JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
1.488 JMP_TARGET(doublesize);
1.489 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.490 load_fr_bank( R_EDX );
1.491 @@ -1798,14 +1955,14 @@
1.492 check_walign32( R_ECX );
1.493 load_spreg( R_EDX, R_FPSCR );
1.494 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.495 - JNE_rel8(26, doublesize);
1.496 + JNE_rel8(14 + CALL_FUNC2_SIZE, doublesize);
1.497 load_fr_bank( R_EDX );
1.498 load_fr( R_EDX, R_EAX, FRm );
1.499 ADD_imm8s_r32(-4,R_ECX);
1.500 store_reg( R_ECX, Rn );
1.501 MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1.502 if( FRm&1 ) {
1.503 - JMP_rel8( 54, end );
1.504 + JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
1.505 JMP_TARGET(doublesize);
1.506 load_xf_bank( R_EDX );
1.507 load_fr( R_EDX, R_EAX, FRm&0x0E );
1.508 @@ -1815,7 +1972,7 @@
1.509 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.510 JMP_TARGET(end);
1.511 } else {
1.512 - JMP_rel8( 45, end );
1.513 + JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
1.514 JMP_TARGET(doublesize);
1.515 load_fr_bank( R_EDX );
1.516 load_fr( R_EDX, R_EAX, FRm&0x0E );
1.517 @@ -1835,14 +1992,14 @@
1.518 MOV_r32_r32( R_ECX, R_EAX );
1.519 load_spreg( R_EDX, R_FPSCR );
1.520 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.521 - JNE_rel8(25, doublesize);
1.522 + JNE_rel8(14 + CALL_FUNC1_SIZE, doublesize);
1.523 ADD_imm8s_r32( 4, R_EAX );
1.524 store_reg( R_EAX, Rm );
1.525 MEM_READ_LONG( R_ECX, R_EAX );
1.526 load_fr_bank( R_EDX );
1.527 store_fr( R_EDX, R_EAX, FRn );
1.528 if( FRn&1 ) {
1.529 - JMP_rel8(54, end);
1.530 + JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
1.531 JMP_TARGET(doublesize);
1.532 ADD_imm8s_r32( 8, R_EAX );
1.533 store_reg(R_EAX, Rm);
1.534 @@ -1853,7 +2010,7 @@
1.535 store_fr( R_EDX, R_ECX, FRn|0x01 );
1.536 JMP_TARGET(end);
1.537 } else {
1.538 - JMP_rel8(42, end);
1.539 + JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
1.540 ADD_imm8s_r32( 8, R_EAX );
1.541 store_reg(R_EAX, Rm);
1.542 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.543 @@ -1872,12 +2029,12 @@
1.544 check_walign32( R_ECX );
1.545 load_spreg( R_EDX, R_FPSCR );
1.546 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.547 - JNE_rel8(20, doublesize);
1.548 + JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
1.549 load_fr_bank( R_EDX );
1.550 load_fr( R_EDX, R_EAX, FRm );
1.551 MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
1.552 if( FRm&1 ) {
1.553 - JMP_rel8( 48, end );
1.554 + JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
1.555 JMP_TARGET(doublesize);
1.556 load_xf_bank( R_EDX );
1.557 load_fr( R_EDX, R_EAX, FRm&0x0E );
1.558 @@ -1885,7 +2042,7 @@
1.559 MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
1.560 JMP_TARGET(end);
1.561 } else {
1.562 - JMP_rel8( 39, end );
1.563 + JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
1.564 JMP_TARGET(doublesize);
1.565 load_fr_bank( R_EDX );
1.566 load_fr( R_EDX, R_EAX, FRm&0x0E );
1.567 @@ -1903,12 +2060,12 @@
1.568 check_ralign32( R_ECX );
1.569 load_spreg( R_EDX, R_FPSCR );
1.570 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1.571 - JNE_rel8(19, doublesize);
1.572 + JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
1.573 MEM_READ_LONG( R_ECX, R_EAX );
1.574 load_fr_bank( R_EDX );
1.575 store_fr( R_EDX, R_EAX, FRn );
1.576 if( FRn&1 ) {
1.577 - JMP_rel8(48, end);
1.578 + JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
1.579 JMP_TARGET(doublesize);
1.580 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.581 load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
1.582 @@ -1917,7 +2074,7 @@
1.583 store_fr( R_EDX, R_ECX, FRn|0x01 );
1.584 JMP_TARGET(end);
1.585 } else {
1.586 - JMP_rel8(36, end);
1.587 + JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
1.588 JMP_TARGET(doublesize);
1.589 MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
1.590 load_fr_bank( R_EDX );
1.591 @@ -2244,7 +2401,7 @@
1.592 check_fpuen();
1.593 load_spreg( R_ECX, R_FPSCR );
1.594 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.595 - JNE_rel8( 21, doubleprec );
1.596 + JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
1.597 load_fr_bank( R_ECX );
1.598 ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
1.599 load_spreg( R_EDX, R_FPUL );
1.600 @@ -2282,7 +2439,7 @@
1.601 check_fpuen();
1.602 load_spreg( R_ECX, R_FPSCR );
1.603 TEST_imm32_r32( FPSCR_PR, R_ECX );
1.604 - JNE_rel8( 30, doubleprec );
1.605 + JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
1.606 load_fr_bank( R_EDX ); // 3
1.607 ADD_imm8s_r32( FVn<<4, R_EDX ); // 3
1.608 load_xf_bank( R_ECX ); // 12
1.609 @@ -2548,7 +2705,7 @@
1.610 PUSH_r32( R_EAX );
1.611 AND_imm32_r32( 0xFC000000, R_EAX );
1.612 CMP_imm32_r32( 0xE0000000, R_EAX );
1.613 - JNE_rel8(7, end);
1.614 + JNE_rel8(CALL_FUNC0_SIZE, end);
1.615 call_func0( sh4_flush_store_queue );
1.616 JMP_TARGET(end);
1.617 ADD_imm8s_r32( 4, R_ESP );
.