revision 901:32c5cf5e206f
summary |
tree |
shortlog |
changelog |
graph |
changeset |
raw | bz2 | zip | gz changeset | 901:32c5cf5e206f |
parent | 900:609fa702406b |
child | 902:408568dc97d5 |
author | nkeynes |
date | Sun Oct 26 02:28:29 2008 +0000 (14 years ago) |
Move the precision/size tests to translation-time rather than execution-time,
and flush/retranslate on a mismatch. Shaves a few percent off the core runtime
and flush/retranslate on a mismatch. Shaves a few percent off the core runtime
1.1 --- a/src/sh4/ia32abi.h Sun Oct 26 00:52:32 2008 +00001.2 +++ b/src/sh4/ia32abi.h Sun Oct 26 02:28:29 2008 +00001.3 @@ -94,20 +94,12 @@1.4 * Emit the 'start of block' assembly. Sets up the stack frame and save1.5 * SI/DI as required1.6 */1.7 -void sh4_translate_begin_block( sh4addr_t pc )1.8 +void enter_block( )1.9 {1.10 PUSH_r32(R_EBP);1.11 /* mov &sh4r, ebp */1.12 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );1.14 - sh4_x86.in_delay_slot = FALSE;1.15 - sh4_x86.priv_checked = FALSE;1.16 - sh4_x86.fpuen_checked = FALSE;1.17 - sh4_x86.branch_taken = FALSE;1.18 - sh4_x86.backpatch_posn = 0;1.19 - sh4_x86.block_start_pc = pc;1.20 - sh4_x86.tlb_on = IS_MMU_ENABLED();1.21 - sh4_x86.tstate = TSTATE_NONE;1.22 #ifdef STACK_ALIGN1.23 sh4_x86.stack_posn = 8;1.24 #endif
2.1 --- a/src/sh4/ia32mac.h Sun Oct 26 00:52:32 2008 +00002.2 +++ b/src/sh4/ia32mac.h Sun Oct 26 02:28:29 2008 +00002.3 @@ -120,20 +120,11 @@2.4 * Emit the 'start of block' assembly. Sets up the stack frame and save2.5 * SI/DI as required2.6 */2.7 -void sh4_translate_begin_block( sh4addr_t pc )2.8 +void enter_block( )2.9 {2.10 PUSH_r32(R_EBP);2.11 /* mov &sh4r, ebp */2.12 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );2.13 -2.14 - sh4_x86.in_delay_slot = FALSE;2.15 - sh4_x86.priv_checked = FALSE;2.16 - sh4_x86.fpuen_checked = FALSE;2.17 - sh4_x86.branch_taken = FALSE;2.18 - sh4_x86.backpatch_posn = 0;2.19 - sh4_x86.block_start_pc = pc;2.20 - sh4_x86.tstate = TSTATE_NONE;2.21 - sh4_x86.tlb_on = IS_MMU_ENABLED();2.22 sh4_x86.stack_posn = 8;2.23 }
3.1 --- a/src/sh4/ia64abi.h Sun Oct 26 00:52:32 2008 +00003.2 +++ b/src/sh4/ia64abi.h Sun Oct 26 02:28:29 2008 +00003.3 @@ -91,22 +91,13 @@3.4 * Emit the 'start of block' assembly. Sets up the stack frame and save3.5 * SI/DI as required3.6 */3.7 -void sh4_translate_begin_block( sh4addr_t pc )3.8 +void enter_block( )3.9 {3.10 PUSH_r32(R_EBP);3.11 /* mov &sh4r, ebp */3.12 load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );3.13 -3.14 - sh4_x86.in_delay_slot = FALSE;3.15 - sh4_x86.priv_checked = FALSE;3.16 - sh4_x86.fpuen_checked = FALSE;3.17 - sh4_x86.branch_taken = FALSE;3.18 - sh4_x86.backpatch_posn = 0;3.19 - sh4_x86.block_start_pc = pc;3.20 - sh4_x86.tlb_on = IS_MMU_ENABLED();3.21 - sh4_x86.tstate = TSTATE_NONE;3.22 }3.23 -3.24 +XS3.25 /**3.26 * Exit the block with sh4r.pc already written3.27 */
4.1 --- a/src/sh4/sh4trans.c Sun Oct 26 00:52:32 2008 +00004.2 +++ b/src/sh4/sh4trans.c Sun Oct 26 02:28:29 2008 +00004.3 @@ -52,7 +52,7 @@4.4 }4.6 code = xlat_get_code_by_vma( sh4r.pc );4.7 - if( code == NULL ) {4.8 + if( code == NULL || (sh4r.fpscr & (FPSCR_PR|FPSCR_SZ)) != XLAT_BLOCK_FPSCR(code) ) {4.9 code = sh4_translate_basic_block( sh4r.pc );4.10 }4.11 }4.12 @@ -138,6 +138,8 @@4.13 memcpy( xlat_output, xlat_recovery, recovery_size);4.14 xlat_current_block->recover_table_offset = xlat_output - (uint8_t *)xlat_current_block->code;4.15 xlat_current_block->recover_table_size = xlat_recovery_posn;4.16 + xlat_current_block->fpscr = sh4r.fpscr & (FPSCR_PR|FPSCR_SZ);4.17 + xlat_current_block->fpscr_mask = (FPSCR_PR|FPSCR_SZ);4.18 xlat_commit_block( finalsize, pc-start );4.19 return xlat_current_block->code;4.20 }
5.1 --- a/src/sh4/sh4x86.in Sun Oct 26 00:52:32 2008 +00005.2 +++ b/src/sh4/sh4x86.in Sun Oct 26 02:28:29 2008 +00005.3 @@ -55,6 +55,8 @@5.4 gboolean priv_checked; /* true if we've already checked the cpu mode. */5.5 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */5.6 gboolean branch_taken; /* true if we branched unconditionally */5.7 + gboolean double_prec; /* true if FPU is in double-precision mode */5.8 + gboolean double_size; /* true if FPU is in double-size mode */5.9 uint32_t block_start_pc;5.10 uint32_t stack_posn; /* Trace stack height for alignment purposes */5.11 int tstate;5.12 @@ -311,12 +313,28 @@5.13 #endif5.14 #endif5.16 +void sh4_translate_begin_block( sh4addr_t pc )5.17 +{5.18 + enter_block();5.19 + sh4_x86.in_delay_slot = FALSE;5.20 + sh4_x86.priv_checked = FALSE;5.21 + sh4_x86.fpuen_checked = FALSE;5.22 + sh4_x86.branch_taken = FALSE;5.23 + sh4_x86.backpatch_posn = 0;5.24 + sh4_x86.block_start_pc = pc;5.25 + sh4_x86.tlb_on = IS_MMU_ENABLED();5.26 + sh4_x86.tstate = TSTATE_NONE;5.27 + sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;5.28 + sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;5.29 +}5.30 +5.31 +5.32 uint32_t sh4_translate_end_block_size()5.33 {5.34 if( sh4_x86.backpatch_posn <= 3 ) {5.35 - return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);5.36 + return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);5.37 } else {5.38 - return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;5.39 + return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;5.40 }5.41 }5.43 @@ -1812,118 +1830,92 @@5.44 FMOV FRm, FRn {:5.45 COUNT_INST(I_FMOV1);5.46 check_fpuen();5.47 - load_spreg( R_ECX, R_FPSCR );5.48 - TEST_imm32_r32( FPSCR_SZ, R_ECX );5.49 - JNE_rel8(doublesize);5.50 - load_fr( R_EAX, FRm ); // SZ=0 branch5.51 - store_fr( R_EAX, FRn );5.52 - JMP_rel8(end);5.53 - JMP_TARGET(doublesize);5.54 - load_dr0( R_EAX, FRm );5.55 - load_dr1( R_ECX, FRm );5.56 - store_dr0( R_EAX, FRn );5.57 - store_dr1( R_ECX, FRn );5.58 - JMP_TARGET(end);5.59 - sh4_x86.tstate = TSTATE_NONE;5.60 + if( sh4_x86.double_size ) {5.61 + load_dr0( R_EAX, FRm );5.62 + load_dr1( R_ECX, FRm );5.63 + store_dr0( R_EAX, FRn );5.64 + store_dr1( R_ECX, FRn );5.65 + } else {5.66 + load_fr( R_EAX, FRm ); // SZ=0 branch5.67 + store_fr( R_EAX, FRn );5.68 + }5.69 :}5.70 FMOV FRm, @Rn {:5.71 COUNT_INST(I_FMOV2);5.72 check_fpuen();5.73 load_reg( R_EAX, Rn );5.74 - load_spreg( R_EDX, R_FPSCR );5.75 - TEST_imm32_r32( FPSCR_SZ, R_EDX );5.76 - JNE_rel8(doublesize);5.77 -5.78 - check_walign32( R_EAX );5.79 - MMU_TRANSLATE_WRITE( R_EAX );5.80 - load_fr( R_ECX, FRm );5.81 - MEM_WRITE_LONG( R_EAX, R_ECX ); // 125.82 - JMP_rel8(end);5.83 -5.84 - JMP_TARGET(doublesize);5.85 - check_walign64( R_EAX );5.86 - MMU_TRANSLATE_WRITE( R_EAX );5.87 - load_dr0( R_ECX, FRm );5.88 - load_dr1( R_EDX, FRm );5.89 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );5.90 - JMP_TARGET(end);5.91 + if( sh4_x86.double_size ) {5.92 + check_walign64( R_EAX );5.93 + MMU_TRANSLATE_WRITE( R_EAX );5.94 + load_dr0( R_ECX, FRm );5.95 + load_dr1( R_EDX, FRm );5.96 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );5.97 + } else {5.98 + check_walign32( R_EAX );5.99 + MMU_TRANSLATE_WRITE( R_EAX );5.100 + load_fr( R_ECX, FRm );5.101 + MEM_WRITE_LONG( R_EAX, R_ECX );5.102 + }5.103 sh4_x86.tstate = TSTATE_NONE;5.104 :}5.105 FMOV @Rm, FRn {:5.106 COUNT_INST(I_FMOV5);5.107 check_fpuen();5.108 load_reg( R_EAX, Rm );5.109 - load_spreg( R_EDX, R_FPSCR );5.110 - TEST_imm32_r32( FPSCR_SZ, R_EDX );5.111 - JNE_rel8(doublesize);5.112 -5.113 - check_ralign32( R_EAX );5.114 - MMU_TRANSLATE_READ( R_EAX );5.115 - MEM_READ_LONG( R_EAX, R_EAX );5.116 - store_fr( R_EAX, FRn );5.117 - JMP_rel8(end);5.118 -5.119 - JMP_TARGET(doublesize);5.120 - check_ralign64( R_EAX );5.121 - MMU_TRANSLATE_READ( R_EAX );5.122 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );5.123 - store_dr0( R_ECX, FRn );5.124 - store_dr1( R_EAX, FRn );5.125 - JMP_TARGET(end);5.126 + if( sh4_x86.double_size ) {5.127 + check_ralign64( R_EAX );5.128 + MMU_TRANSLATE_READ( R_EAX );5.129 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );5.130 + store_dr0( R_ECX, FRn );5.131 + store_dr1( R_EAX, FRn );5.132 + } else {5.133 + check_ralign32( R_EAX );5.134 + MMU_TRANSLATE_READ( R_EAX );5.135 + MEM_READ_LONG( R_EAX, R_EAX );5.136 + store_fr( R_EAX, FRn );5.137 + }5.138 sh4_x86.tstate = TSTATE_NONE;5.139 :}5.140 FMOV FRm, @-Rn {:5.141 COUNT_INST(I_FMOV3);5.142 check_fpuen();5.143 load_reg( R_EAX, Rn );5.144 - load_spreg( R_EDX, R_FPSCR );5.145 - TEST_imm32_r32( FPSCR_SZ, R_EDX );5.146 - JNE_rel8(doublesize);5.147 -5.148 - check_walign32( R_EAX );5.149 - ADD_imm8s_r32( -4, R_EAX );5.150 - MMU_TRANSLATE_WRITE( R_EAX );5.151 - load_fr( R_ECX, FRm );5.152 - ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));5.153 - MEM_WRITE_LONG( R_EAX, R_ECX );5.154 - JMP_rel8(end);5.155 -5.156 - JMP_TARGET(doublesize);5.157 - check_walign64( R_EAX );5.158 - ADD_imm8s_r32(-8,R_EAX);5.159 - MMU_TRANSLATE_WRITE( R_EAX );5.160 - load_dr0( R_ECX, FRm );5.161 - load_dr1( R_EDX, FRm );5.162 - ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));5.163 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );5.164 - JMP_TARGET(end);5.165 -5.166 + if( sh4_x86.double_size ) {5.167 + check_walign64( R_EAX );5.168 + ADD_imm8s_r32(-8,R_EAX);5.169 + MMU_TRANSLATE_WRITE( R_EAX );5.170 + load_dr0( R_ECX, FRm );5.171 + load_dr1( R_EDX, FRm );5.172 + ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));5.173 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );5.174 + } else {5.175 + check_walign32( R_EAX );5.176 + ADD_imm8s_r32( -4, R_EAX );5.177 + MMU_TRANSLATE_WRITE( R_EAX );5.178 + load_fr( R_ECX, FRm );5.179 + ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));5.180 + MEM_WRITE_LONG( R_EAX, R_ECX );5.181 + }5.182 sh4_x86.tstate = TSTATE_NONE;5.183 :}5.184 FMOV @Rm+, FRn {:5.185 COUNT_INST(I_FMOV6);5.186 check_fpuen();5.187 load_reg( R_EAX, Rm );5.188 - load_spreg( R_EDX, R_FPSCR );5.189 - TEST_imm32_r32( FPSCR_SZ, R_EDX );5.190 - JNE_rel8(doublesize);5.191 -5.192 - check_ralign32( R_EAX );5.193 - MMU_TRANSLATE_READ( R_EAX );5.194 - ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );5.195 - MEM_READ_LONG( R_EAX, R_EAX );5.196 - store_fr( R_EAX, FRn );5.197 - JMP_rel8(end);5.198 -5.199 - JMP_TARGET(doublesize);5.200 - check_ralign64( R_EAX );5.201 - MMU_TRANSLATE_READ( R_EAX );5.202 - ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );5.203 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );5.204 - store_dr0( R_ECX, FRn );5.205 - store_dr1( R_EAX, FRn );5.206 - JMP_TARGET(end);5.207 -5.208 + if( sh4_x86.double_size ) {5.209 + check_ralign64( R_EAX );5.210 + MMU_TRANSLATE_READ( R_EAX );5.211 + ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );5.212 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );5.213 + store_dr0( R_ECX, FRn );5.214 + store_dr1( R_EAX, FRn );5.215 + } else {5.216 + check_ralign32( R_EAX );5.217 + MMU_TRANSLATE_READ( R_EAX );5.218 + ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );5.219 + MEM_READ_LONG( R_EAX, R_EAX );5.220 + store_fr( R_EAX, FRn );5.221 + }5.222 sh4_x86.tstate = TSTATE_NONE;5.223 :}5.224 FMOV FRm, @(R0, Rn) {:5.225 @@ -1931,24 +1923,18 @@5.226 check_fpuen();5.227 load_reg( R_EAX, Rn );5.228 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );5.229 - load_spreg( R_EDX, R_FPSCR );5.230 - TEST_imm32_r32( FPSCR_SZ, R_EDX );5.231 - JNE_rel8(doublesize);5.232 -5.233 - check_walign32( R_EAX );5.234 - MMU_TRANSLATE_WRITE( R_EAX );5.235 - load_fr( R_ECX, FRm );5.236 - MEM_WRITE_LONG( R_EAX, R_ECX ); // 125.237 - JMP_rel8(end);5.238 -5.239 - JMP_TARGET(doublesize);5.240 - check_walign64( R_EAX );5.241 - MMU_TRANSLATE_WRITE( R_EAX );5.242 - load_dr0( R_ECX, FRm );5.243 - load_dr1( R_EDX, FRm );5.244 - MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );5.245 - JMP_TARGET(end);5.246 -5.247 + if( sh4_x86.double_size ) {5.248 + check_walign64( R_EAX );5.249 + MMU_TRANSLATE_WRITE( R_EAX );5.250 + load_dr0( R_ECX, FRm );5.251 + load_dr1( R_EDX, FRm );5.252 + MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );5.253 + } else {5.254 + check_walign32( R_EAX );5.255 + MMU_TRANSLATE_WRITE( R_EAX );5.256 + load_fr( R_ECX, FRm );5.257 + MEM_WRITE_LONG( R_EAX, R_ECX ); // 125.258 + }5.259 sh4_x86.tstate = TSTATE_NONE;5.260 :}5.261 FMOV @(R0, Rm), FRn {:5.262 @@ -1956,74 +1942,56 @@5.263 check_fpuen();5.264 load_reg( R_EAX, Rm );5.265 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );5.266 - load_spreg( R_EDX, R_FPSCR );5.267 - TEST_imm32_r32( FPSCR_SZ, R_EDX );5.268 - JNE_rel8(doublesize);5.269 -5.270 - check_ralign32( R_EAX );5.271 - MMU_TRANSLATE_READ( R_EAX );5.272 - MEM_READ_LONG( R_EAX, R_EAX );5.273 - store_fr( R_EAX, FRn );5.274 - JMP_rel8(end);5.275 -5.276 - JMP_TARGET(doublesize);5.277 - check_ralign64( R_EAX );5.278 - MMU_TRANSLATE_READ( R_EAX );5.279 - MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );5.280 - store_dr0( R_ECX, FRn );5.281 - store_dr1( R_EAX, FRn );5.282 - JMP_TARGET(end);5.283 -5.284 + if( sh4_x86.double_size ) {5.285 + check_ralign64( R_EAX );5.286 + MMU_TRANSLATE_READ( R_EAX );5.287 + MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );5.288 + store_dr0( R_ECX, FRn );5.289 + store_dr1( R_EAX, FRn );5.290 + } else {5.291 + check_ralign32( R_EAX );5.292 + MMU_TRANSLATE_READ( R_EAX );5.293 + MEM_READ_LONG( R_EAX, R_EAX );5.294 + store_fr( R_EAX, FRn );5.295 + }5.296 sh4_x86.tstate = TSTATE_NONE;5.297 :}5.298 FLDI0 FRn {: /* IFF PR=0 */5.299 COUNT_INST(I_FLDI0);5.300 check_fpuen();5.301 - load_spreg( R_ECX, R_FPSCR );5.302 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.303 - JNE_rel8(end);5.304 - XOR_r32_r32( R_EAX, R_EAX );5.305 - store_fr( R_EAX, FRn );5.306 - JMP_TARGET(end);5.307 + if( sh4_x86.double_prec == 0 ) {5.308 + XOR_r32_r32( R_EAX, R_EAX );5.309 + store_fr( R_EAX, FRn );5.310 + }5.311 sh4_x86.tstate = TSTATE_NONE;5.312 :}5.313 FLDI1 FRn {: /* IFF PR=0 */5.314 COUNT_INST(I_FLDI1);5.315 check_fpuen();5.316 - load_spreg( R_ECX, R_FPSCR );5.317 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.318 - JNE_rel8(end);5.319 - load_imm32(R_EAX, 0x3F800000);5.320 - store_fr( R_EAX, FRn );5.321 - JMP_TARGET(end);5.322 - sh4_x86.tstate = TSTATE_NONE;5.323 + if( sh4_x86.double_prec == 0 ) {5.324 + load_imm32(R_EAX, 0x3F800000);5.325 + store_fr( R_EAX, FRn );5.326 + }5.327 :}5.329 FLOAT FPUL, FRn {:5.330 COUNT_INST(I_FLOAT);5.331 check_fpuen();5.332 - load_spreg( R_ECX, R_FPSCR );5.333 FILD_sh4r(R_FPUL);5.334 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.335 - JNE_rel8(doubleprec);5.336 - pop_fr( FRn );5.337 - JMP_rel8(end);5.338 - JMP_TARGET(doubleprec);5.339 - pop_dr( FRn );5.340 - JMP_TARGET(end);5.341 - sh4_x86.tstate = TSTATE_NONE;5.342 + if( sh4_x86.double_prec ) {5.343 + pop_dr( FRn );5.344 + } else {5.345 + pop_fr( FRn );5.346 + }5.347 :}5.348 FTRC FRm, FPUL {:5.349 COUNT_INST(I_FTRC);5.350 check_fpuen();5.351 - load_spreg( R_ECX, R_FPSCR );5.352 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.353 - JNE_rel8(doubleprec);5.354 - push_fr( FRm );5.355 - JMP_rel8(doop);5.356 - JMP_TARGET(doubleprec);5.357 - push_dr( FRm );5.358 - JMP_TARGET( doop );5.359 + if( sh4_x86.double_prec ) {5.360 + push_dr( FRm );5.361 + } else {5.362 + push_fr( FRm );5.363 + }5.364 load_ptr( R_ECX, &max_int );5.365 FILD_r32ind( R_ECX );5.366 FCOMIP_st(1);5.367 @@ -2053,289 +2021,232 @@5.368 check_fpuen();5.369 load_fr( R_EAX, FRm );5.370 store_spreg( R_EAX, R_FPUL );5.371 - sh4_x86.tstate = TSTATE_NONE;5.372 :}5.373 FSTS FPUL, FRn {:5.374 COUNT_INST(I_FSTS);5.375 check_fpuen();5.376 load_spreg( R_EAX, R_FPUL );5.377 store_fr( R_EAX, FRn );5.378 - sh4_x86.tstate = TSTATE_NONE;5.379 :}5.380 FCNVDS FRm, FPUL {:5.381 COUNT_INST(I_FCNVDS);5.382 check_fpuen();5.383 - load_spreg( R_ECX, R_FPSCR );5.384 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.385 - JE_rel8(end); // only when PR=15.386 - push_dr( FRm );5.387 - pop_fpul();5.388 - JMP_TARGET(end);5.389 - sh4_x86.tstate = TSTATE_NONE;5.390 + if( sh4_x86.double_prec ) {5.391 + push_dr( FRm );5.392 + pop_fpul();5.393 + }5.394 :}5.395 FCNVSD FPUL, FRn {:5.396 COUNT_INST(I_FCNVSD);5.397 check_fpuen();5.398 - load_spreg( R_ECX, R_FPSCR );5.399 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.400 - JE_rel8(end); // only when PR=15.401 - push_fpul();5.402 - pop_dr( FRn );5.403 - JMP_TARGET(end);5.404 - sh4_x86.tstate = TSTATE_NONE;5.405 + if( sh4_x86.double_prec ) {5.406 + push_fpul();5.407 + pop_dr( FRn );5.408 + }5.409 :}5.411 /* Floating point instructions */5.412 FABS FRn {:5.413 COUNT_INST(I_FABS);5.414 check_fpuen();5.415 - load_spreg( R_ECX, R_FPSCR );5.416 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.417 - JNE_rel8(doubleprec);5.418 - push_fr(FRn); // 65.419 - FABS_st0(); // 25.420 - pop_fr(FRn); //65.421 - JMP_rel8(end); // 25.422 - JMP_TARGET(doubleprec);5.423 - push_dr(FRn);5.424 - FABS_st0();5.425 - pop_dr(FRn);5.426 - JMP_TARGET(end);5.427 - sh4_x86.tstate = TSTATE_NONE;5.428 + if( sh4_x86.double_prec ) {5.429 + push_dr(FRn);5.430 + FABS_st0();5.431 + pop_dr(FRn);5.432 + } else {5.433 + push_fr(FRn);5.434 + FABS_st0();5.435 + pop_fr(FRn);5.436 + }5.437 :}5.438 FADD FRm, FRn {:5.439 COUNT_INST(I_FADD);5.440 check_fpuen();5.441 - load_spreg( R_ECX, R_FPSCR );5.442 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.443 - JNE_rel8(doubleprec);5.444 - push_fr(FRm);5.445 - push_fr(FRn);5.446 - FADDP_st(1);5.447 - pop_fr(FRn);5.448 - JMP_rel8(end);5.449 - JMP_TARGET(doubleprec);5.450 - push_dr(FRm);5.451 - push_dr(FRn);5.452 - FADDP_st(1);5.453 - pop_dr(FRn);5.454 - JMP_TARGET(end);5.455 - sh4_x86.tstate = TSTATE_NONE;5.456 + if( sh4_x86.double_prec ) {5.457 + push_dr(FRm);5.458 + push_dr(FRn);5.459 + FADDP_st(1);5.460 + pop_dr(FRn);5.461 + } else {5.462 + push_fr(FRm);5.463 + push_fr(FRn);5.464 + FADDP_st(1);5.465 + pop_fr(FRn);5.466 + }5.467 :}5.468 FDIV FRm, FRn {:5.469 COUNT_INST(I_FDIV);5.470 check_fpuen();5.471 - load_spreg( R_ECX, R_FPSCR );5.472 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.473 - JNE_rel8(doubleprec);5.474 - push_fr(FRn);5.475 - push_fr(FRm);5.476 - FDIVP_st(1);5.477 - pop_fr(FRn);5.478 - JMP_rel8(end);5.479 - JMP_TARGET(doubleprec);5.480 - push_dr(FRn);5.481 - push_dr(FRm);5.482 - FDIVP_st(1);5.483 - pop_dr(FRn);5.484 - JMP_TARGET(end);5.485 - sh4_x86.tstate = TSTATE_NONE;5.486 + if( sh4_x86.double_prec ) {5.487 + push_dr(FRn);5.488 + push_dr(FRm);5.489 + FDIVP_st(1);5.490 + pop_dr(FRn);5.491 + } else {5.492 + push_fr(FRn);5.493 + push_fr(FRm);5.494 + FDIVP_st(1);5.495 + pop_fr(FRn);5.496 + }5.497 :}5.498 FMAC FR0, FRm, FRn {:5.499 COUNT_INST(I_FMAC);5.500 check_fpuen();5.501 - load_spreg( R_ECX, R_FPSCR );5.502 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.503 - JNE_rel8(doubleprec);5.504 - push_fr( 0 );5.505 - push_fr( FRm );5.506 - FMULP_st(1);5.507 - push_fr( FRn );5.508 - FADDP_st(1);5.509 - pop_fr( FRn );5.510 - JMP_rel8(end);5.511 - JMP_TARGET(doubleprec);5.512 - push_dr( 0 );5.513 - push_dr( FRm );5.514 - FMULP_st(1);5.515 - push_dr( FRn );5.516 - FADDP_st(1);5.517 - pop_dr( FRn );5.518 - JMP_TARGET(end);5.519 - sh4_x86.tstate = TSTATE_NONE;5.520 + if( sh4_x86.double_prec ) {5.521 + push_dr( 0 );5.522 + push_dr( FRm );5.523 + FMULP_st(1);5.524 + push_dr( FRn );5.525 + FADDP_st(1);5.526 + pop_dr( FRn );5.527 + } else {5.528 + push_fr( 0 );5.529 + push_fr( FRm );5.530 + FMULP_st(1);5.531 + push_fr( FRn );5.532 + FADDP_st(1);5.533 + pop_fr( FRn );5.534 + }5.535 :}5.537 FMUL FRm, FRn {:5.538 COUNT_INST(I_FMUL);5.539 check_fpuen();5.540 - load_spreg( R_ECX, R_FPSCR );5.541 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.542 - JNE_rel8(doubleprec);5.543 - push_fr(FRm);5.544 - push_fr(FRn);5.545 - FMULP_st(1);5.546 - pop_fr(FRn);5.547 - JMP_rel8(end);5.548 - JMP_TARGET(doubleprec);5.549 - push_dr(FRm);5.550 - push_dr(FRn);5.551 - FMULP_st(1);5.552 - pop_dr(FRn);5.553 - JMP_TARGET(end);5.554 - sh4_x86.tstate = TSTATE_NONE;5.555 + if( sh4_x86.double_prec ) {5.556 + push_dr(FRm);5.557 + push_dr(FRn);5.558 + FMULP_st(1);5.559 + pop_dr(FRn);5.560 + } else {5.561 + push_fr(FRm);5.562 + push_fr(FRn);5.563 + FMULP_st(1);5.564 + pop_fr(FRn);5.565 + }5.566 :}5.567 FNEG FRn {:5.568 COUNT_INST(I_FNEG);5.569 check_fpuen();5.570 - load_spreg( R_ECX, R_FPSCR );5.571 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.572 - JNE_rel8(doubleprec);5.573 - push_fr(FRn);5.574 - FCHS_st0();5.575 - pop_fr(FRn);5.576 - JMP_rel8(end);5.577 - JMP_TARGET(doubleprec);5.578 - push_dr(FRn);5.579 - FCHS_st0();5.580 - pop_dr(FRn);5.581 - JMP_TARGET(end);5.582 - sh4_x86.tstate = TSTATE_NONE;5.583 + if( sh4_x86.double_prec ) {5.584 + push_dr(FRn);5.585 + FCHS_st0();5.586 + pop_dr(FRn);5.587 + } else {5.588 + push_fr(FRn);5.589 + FCHS_st0();5.590 + pop_fr(FRn);5.591 + }5.592 :}5.593 FSRRA FRn {:5.594 COUNT_INST(I_FSRRA);5.595 check_fpuen();5.596 - load_spreg( R_ECX, R_FPSCR );5.597 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.598 - JNE_rel8(end); // PR=0 only5.599 - FLD1_st0();5.600 - push_fr(FRn);5.601 - FSQRT_st0();5.602 - FDIVP_st(1);5.603 - pop_fr(FRn);5.604 - JMP_TARGET(end);5.605 - sh4_x86.tstate = TSTATE_NONE;5.606 + if( sh4_x86.double_prec == 0 ) {5.607 + FLD1_st0();5.608 + push_fr(FRn);5.609 + FSQRT_st0();5.610 + FDIVP_st(1);5.611 + pop_fr(FRn);5.612 + }5.613 :}5.614 FSQRT FRn {:5.615 COUNT_INST(I_FSQRT);5.616 check_fpuen();5.617 - load_spreg( R_ECX, R_FPSCR );5.618 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.619 - JNE_rel8(doubleprec);5.620 - push_fr(FRn);5.621 - FSQRT_st0();5.622 - pop_fr(FRn);5.623 - JMP_rel8(end);5.624 - JMP_TARGET(doubleprec);5.625 - push_dr(FRn);5.626 - FSQRT_st0();5.627 - pop_dr(FRn);5.628 - JMP_TARGET(end);5.629 - sh4_x86.tstate = TSTATE_NONE;5.630 + if( sh4_x86.double_prec ) {5.631 + push_dr(FRn);5.632 + FSQRT_st0();5.633 + pop_dr(FRn);5.634 + } else {5.635 + push_fr(FRn);5.636 + FSQRT_st0();5.637 + pop_fr(FRn);5.638 + }5.639 :}5.640 FSUB FRm, FRn {:5.641 COUNT_INST(I_FSUB);5.642 check_fpuen();5.643 - load_spreg( R_ECX, R_FPSCR );5.644 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.645 - JNE_rel8(doubleprec);5.646 - push_fr(FRn);5.647 - push_fr(FRm);5.648 - FSUBP_st(1);5.649 - pop_fr(FRn);5.650 - JMP_rel8(end);5.651 - JMP_TARGET(doubleprec);5.652 - push_dr(FRn);5.653 - push_dr(FRm);5.654 - FSUBP_st(1);5.655 - pop_dr(FRn);5.656 - JMP_TARGET(end);5.657 - sh4_x86.tstate = TSTATE_NONE;5.658 + if( sh4_x86.double_prec ) {5.659 + push_dr(FRn);5.660 + push_dr(FRm);5.661 + FSUBP_st(1);5.662 + pop_dr(FRn);5.663 + } else {5.664 + push_fr(FRn);5.665 + push_fr(FRm);5.666 + FSUBP_st(1);5.667 + pop_fr(FRn);5.668 + }5.669 :}5.671 FCMP/EQ FRm, FRn {:5.672 COUNT_INST(I_FCMPEQ);5.673 check_fpuen();5.674 - load_spreg( R_ECX, R_FPSCR );5.675 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.676 - JNE_rel8(doubleprec);5.677 - push_fr(FRm);5.678 - push_fr(FRn);5.679 - JMP_rel8(end);5.680 - JMP_TARGET(doubleprec);5.681 - push_dr(FRm);5.682 - push_dr(FRn);5.683 - JMP_TARGET(end);5.684 + if( sh4_x86.double_prec ) {5.685 + push_dr(FRm);5.686 + push_dr(FRn);5.687 + } else {5.688 + push_fr(FRm);5.689 + push_fr(FRn);5.690 + }5.691 FCOMIP_st(1);5.692 SETE_t();5.693 FPOP_st();5.694 - sh4_x86.tstate = TSTATE_NONE;5.695 + sh4_x86.tstate = TSTATE_E;5.696 :}5.697 FCMP/GT FRm, FRn {:5.698 COUNT_INST(I_FCMPGT);5.699 check_fpuen();5.700 - load_spreg( R_ECX, R_FPSCR );5.701 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.702 - JNE_rel8(doubleprec);5.703 - push_fr(FRm);5.704 - push_fr(FRn);5.705 - JMP_rel8(end);5.706 - JMP_TARGET(doubleprec);5.707 - push_dr(FRm);5.708 - push_dr(FRn);5.709 - JMP_TARGET(end);5.710 + if( sh4_x86.double_prec ) {5.711 + push_dr(FRm);5.712 + push_dr(FRn);5.713 + } else {5.714 + push_fr(FRm);5.715 + push_fr(FRn);5.716 + }5.717 FCOMIP_st(1);5.718 SETA_t();5.719 FPOP_st();5.720 - sh4_x86.tstate = TSTATE_NONE;5.721 + sh4_x86.tstate = TSTATE_A;5.722 :}5.724 FSCA FPUL, FRn {:5.725 COUNT_INST(I_FSCA);5.726 check_fpuen();5.727 - load_spreg( R_ECX, R_FPSCR );5.728 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.729 - JNE_rel8(doubleprec );5.730 - LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );5.731 - load_spreg( R_EDX, R_FPUL );5.732 - call_func2( sh4_fsca, R_EDX, R_ECX );5.733 - JMP_TARGET(doubleprec);5.734 + if( sh4_x86.double_prec == 0 ) {5.735 + LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );5.736 + load_spreg( R_EDX, R_FPUL );5.737 + call_func2( sh4_fsca, R_EDX, R_ECX );5.738 + }5.739 sh4_x86.tstate = TSTATE_NONE;5.740 :}5.741 FIPR FVm, FVn {:5.742 COUNT_INST(I_FIPR);5.743 check_fpuen();5.744 - load_spreg( R_ECX, R_FPSCR );5.745 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.746 - JNE_rel8( doubleprec);5.747 -5.748 - push_fr( FVm<<2 );5.749 - push_fr( FVn<<2 );5.750 - FMULP_st(1);5.751 - push_fr( (FVm<<2)+1);5.752 - push_fr( (FVn<<2)+1);5.753 - FMULP_st(1);5.754 - FADDP_st(1);5.755 - push_fr( (FVm<<2)+2);5.756 - push_fr( (FVn<<2)+2);5.757 - FMULP_st(1);5.758 - FADDP_st(1);5.759 - push_fr( (FVm<<2)+3);5.760 - push_fr( (FVn<<2)+3);5.761 - FMULP_st(1);5.762 - FADDP_st(1);5.763 - pop_fr( (FVn<<2)+3);5.764 - JMP_TARGET(doubleprec);5.765 - sh4_x86.tstate = TSTATE_NONE;5.766 + if( sh4_x86.double_prec == 0 ) {5.767 + push_fr( FVm<<2 );5.768 + push_fr( FVn<<2 );5.769 + FMULP_st(1);5.770 + push_fr( (FVm<<2)+1);5.771 + push_fr( (FVn<<2)+1);5.772 + FMULP_st(1);5.773 + FADDP_st(1);5.774 + push_fr( (FVm<<2)+2);5.775 + push_fr( (FVn<<2)+2);5.776 + FMULP_st(1);5.777 + FADDP_st(1);5.778 + push_fr( (FVm<<2)+3);5.779 + push_fr( (FVn<<2)+3);5.780 + FMULP_st(1);5.781 + FADDP_st(1);5.782 + pop_fr( (FVn<<2)+3);5.783 + }5.784 :}5.785 FTRV XMTRX, FVn {:5.786 COUNT_INST(I_FTRV);5.787 check_fpuen();5.788 - load_spreg( R_ECX, R_FPSCR );5.789 - TEST_imm32_r32( FPSCR_PR, R_ECX );5.790 - JNE_rel8( doubleprec );5.791 - LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EDX );5.792 - call_func1( sh4_ftrv, R_EDX ); // 125.793 - JMP_TARGET(doubleprec);5.794 + if( sh4_x86.double_prec == 0 ) {5.795 + LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EDX );5.796 + call_func1( sh4_ftrv, R_EDX );5.797 + }5.798 sh4_x86.tstate = TSTATE_NONE;5.799 :}5.801 @@ -2355,6 +2266,7 @@5.802 XOR_imm32_r32( FPSCR_SZ, R_ECX );5.803 store_spreg( R_ECX, R_FPSCR );5.804 sh4_x86.tstate = TSTATE_NONE;5.805 + sh4_x86.double_size = !sh4_x86.double_size;5.806 :}5.808 /* Processor control instructions */5.809 @@ -2517,6 +2429,7 @@5.810 load_reg( R_EAX, Rm );5.811 call_func1( sh4_write_fpscr, R_EAX );5.812 sh4_x86.tstate = TSTATE_NONE;5.813 + return 2;5.814 :}5.815 LDS.L @Rm+, FPSCR {:5.816 COUNT_INST(I_LDSFPSCRM);5.817 @@ -2528,6 +2441,7 @@5.818 MEM_READ_LONG( R_EAX, R_EAX );5.819 call_func1( sh4_write_fpscr, R_EAX );5.820 sh4_x86.tstate = TSTATE_NONE;5.821 + return 2;5.822 :}5.823 LDS Rm, FPUL {:5.824 COUNT_INST(I_LDS);
6.1 --- a/src/sh4/x86op.h Sun Oct 26 00:52:32 2008 +00006.2 +++ b/src/sh4/x86op.h Sun Oct 26 02:28:29 2008 +00006.3 @@ -159,6 +159,7 @@6.4 #define MOV_r32_sh4r(r1,disp) OP(0x89); MODRM_r32_sh4r(r1,disp)6.5 #define MOV_moff32_EAX(off) OP(0xA1); OPPTR(off)6.6 #define MOV_sh4r_r32(disp, r1) OP(0x8B); MODRM_r32_sh4r(r1,disp)6.7 +#define MOV_r32_r32ind(r2,r1) OP(0x89); OP(0 + (r2<<3) + r1 )6.8 #define MOV_r32ind_r32(r1,r2) OP(0x8B); OP(0 + (r2<<3) + r1 )6.9 #define MOVSX_r8_r32(r1,r2) OP(0x0F); OP(0xBE); MODRM_rm32_r32(r1,r2)6.10 #define MOVSX_r16_r32(r1,r2) OP(0x0F); OP(0xBF); MODRM_rm32_r32(r1,r2)
7.1 --- a/src/sh4/xltcache.c Sun Oct 26 00:52:32 2008 +00007.2 +++ b/src/sh4/xltcache.c Sun Oct 26 02:28:29 2008 +00007.3 @@ -38,7 +38,6 @@7.4 #define XLAT_LUT_ENTRY_USED (void *)17.6 #define NEXT(block) ( (xlat_cache_block_t)&((block)->code[(block)->size]))7.7 -#define BLOCK_FOR_CODE(code) (((xlat_cache_block_t)code)-1)7.8 #define IS_ENTRY_POINT(ent) (ent > XLAT_LUT_ENTRY_USED)7.9 #define IS_ENTRY_USED(ent) (ent != XLAT_LUT_ENTRY_EMPTY)7.11 @@ -124,7 +123,7 @@7.12 int i;7.13 for( i=0; i<XLAT_LUT_PAGE_ENTRIES; i++ ) {7.14 if( IS_ENTRY_POINT(page[i]) ) {7.15 - BLOCK_FOR_CODE(page[i])->active = 0;7.16 + XLAT_BLOCK_FOR_CODE(page[i])->active = 0;7.17 }7.18 page[i] = NULL;7.19 }7.20 @@ -212,7 +211,7 @@7.21 {7.22 if( code != NULL ) {7.23 uintptr_t pc_offset = ((uint8_t *)native_pc) - ((uint8_t *)code);7.24 - xlat_cache_block_t block = BLOCK_FOR_CODE(code);7.25 + xlat_cache_block_t block = XLAT_BLOCK_FOR_CODE(code);7.26 uint32_t count = block->recover_table_size;7.27 xlat_recovery_record_t records = (xlat_recovery_record_t)(&block->code[block->recover_table_offset]);7.28 uint32_t posn;7.29 @@ -235,7 +234,7 @@7.30 {7.31 if( code != NULL ) {7.32 uintptr_t pc_offset = ((uint8_t *)native_pc) - ((uint8_t *)code);7.33 - xlat_cache_block_t block = BLOCK_FOR_CODE(code);7.34 + xlat_cache_block_t block = XLAT_BLOCK_FOR_CODE(code);7.35 uint32_t count = block->recover_table_size;7.36 xlat_recovery_record_t records = (xlat_recovery_record_t)(&block->code[block->recover_table_offset]);7.37 uint32_t posn;7.38 @@ -334,6 +333,8 @@7.39 start_block->active = 1;7.40 start_block->size = allocation;7.41 start_block->lut_entry = block->lut_entry;7.42 + start_block->fpscr_mask = block->fpscr_mask;7.43 + start_block->fpscr = block->fpscr;7.44 start_block->recover_table_offset = block->recover_table_offset;7.45 start_block->recover_table_size = block->recover_table_size;7.46 *block->lut_entry = &start_block->code;7.47 @@ -379,6 +380,8 @@7.48 start_block->active = 1;7.49 start_block->size = allocation;7.50 start_block->lut_entry = block->lut_entry;7.51 + start_block->fpscr_mask = block->fpscr_mask;7.52 + start_block->fpscr = block->fpscr;7.53 start_block->recover_table_offset = block->recover_table_offset;7.54 start_block->recover_table_size = block->recover_table_size;7.55 *block->lut_entry = &start_block->code;7.56 @@ -416,7 +419,7 @@7.57 }7.59 if( IS_ENTRY_POINT(xlat_lut[XLAT_LUT_PAGE(address)][XLAT_LUT_ENTRY(address)]) ) {7.60 - xlat_cache_block_t oldblock = BLOCK_FOR_CODE(xlat_lut[XLAT_LUT_PAGE(address)][XLAT_LUT_ENTRY(address)]);7.61 + xlat_cache_block_t oldblock = XLAT_BLOCK_FOR_CODE(xlat_lut[XLAT_LUT_PAGE(address)][XLAT_LUT_ENTRY(address)]);7.62 oldblock->active = 0;7.63 }
8.1 --- a/src/sh4/xltcache.h Sun Oct 26 00:52:32 2008 +00008.2 +++ b/src/sh4/xltcache.h Sun Oct 26 02:28:29 2008 +00008.3 @@ -41,6 +41,7 @@8.4 int active; /* 0 = deleted, 1 = normal. 2 = accessed (temp-space only) */8.5 uint32_t size;8.6 void **lut_entry; /* For deletion */8.7 + uint32_t fpscr_mask, fpscr; /* fpscr condition check */8.8 uint32_t recover_table_offset; // Offset from code[0] of the recovery table;8.9 uint32_t recover_table_size;8.10 unsigned char code[0];8.11 @@ -48,6 +49,11 @@8.13 typedef struct xlat_cache_block *xlat_cache_block_t;8.15 +#define XLAT_BLOCK_FOR_CODE(code) (((xlat_cache_block_t)code)-1)8.16 +8.17 +#define XLAT_BLOCK_FPSCR_MASK(code) (XLAT_BLOCK_FOR_CODE(code)->fpscr_mask)8.18 +#define XLAT_BLOCK_FPSCR(code) (XLAT_BLOCK_FOR_CODE(code)->fpscr_mask)8.19 +8.20 /**8.21 * Initialize the translation cache8.22 */
.