Search
lxdream.org :: lxdream/src/sh4/sh4x86.in :: diff
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 590:4db6a084ca3c
prev586:2a3ba82cf243
next591:7b9612fd2395
author nkeynes
date Wed Jan 16 09:39:16 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Ensure PC correctness in presence of delay-slot exceptions
file annotate diff log raw
1.1 --- a/src/sh4/sh4x86.in Tue Jan 15 20:50:23 2008 +0000
1.2 +++ b/src/sh4/sh4x86.in Wed Jan 16 09:39:16 2008 +0000
1.3 @@ -42,13 +42,17 @@
1.4
1.5 #define MAX_RECOVERY_SIZE 2048
1.6
1.7 +#define DELAY_NONE 0
1.8 +#define DELAY_PC 1
1.9 +#define DELAY_PC_PR 2
1.10 +
1.11 /**
1.12 * Struct to manage internal translation state. This state is not saved -
1.13 * it is only valid between calls to sh4_translate_begin_block() and
1.14 * sh4_translate_end_block()
1.15 */
1.16 struct sh4_x86_state {
1.17 - gboolean in_delay_slot;
1.18 + int in_delay_slot;
1.19 gboolean priv_checked; /* true if we've already checked the cpu mode. */
1.20 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
1.21 gboolean branch_taken; /* true if we branched unconditionally */
1.22 @@ -342,7 +346,7 @@
1.23 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
1.24 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
1.25
1.26 -#define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
1.27 +#define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
1.28
1.29 /****** Import appropriate calling conventions ******/
1.30 #if SH4_TRANSLATOR == TARGET_X86_64
1.31 @@ -355,12 +359,41 @@
1.32 #endif
1.33 #endif
1.34
1.35 +/**
1.36 + * Embed a breakpoint into the generated code
1.37 + */
1.38 void sh4_translate_emit_breakpoint( sh4vma_t pc )
1.39 {
1.40 load_imm32( R_EAX, XLAT_EXIT_BREAKPOINT );
1.41 call_func1( sh4_translate_exit, R_EAX );
1.42 }
1.43 +
1.44 +/**
1.45 + * Embed a call to sh4_execute_instruction for situations that we
1.46 + * can't translate (mainly page-crossing delay slots at the moment).
1.47 + * Caller is responsible for setting new_pc.
1.48 + */
1.49 +void sh4_emulator_exit( sh4vma_t endpc )
1.50 +{
1.51 + load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
1.52 + ADD_r32_sh4r( R_ECX, R_PC );
1.53
1.54 + load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
1.55 + ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
1.56 + load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
1.57 + store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
1.58 +
1.59 + call_func0( sh4_execute_instruction );
1.60 + load_imm32( R_EAX, R_PC );
1.61 + if( sh4_x86.tlb_on ) {
1.62 + call_func1(xlat_get_code_by_vma,R_EAX);
1.63 + } else {
1.64 + call_func1(xlat_get_code,R_EAX);
1.65 + }
1.66 + AND_imm8s_r32( 0xFC, R_EAX ); // 3
1.67 + POP_r32(R_EBP);
1.68 + RET();
1.69 +}
1.70
1.71 /**
1.72 * Translate a single instruction. Delayed branches are handled specially
1.73 @@ -371,7 +404,7 @@
1.74 * @return true if the instruction marks the end of a basic block
1.75 * (eg a branch or
1.76 */
1.77 -uint32_t sh4_translate_instruction( sh4addr_t pc )
1.78 +uint32_t sh4_translate_instruction( sh4vma_t pc )
1.79 {
1.80 uint32_t ir;
1.81 /* Read instruction from icache */
1.82 @@ -1389,7 +1422,7 @@
1.83 SLOTILLEGAL();
1.84 } else {
1.85 sh4vma_t target = disp + pc + 4;
1.86 - sh4_x86.in_delay_slot = TRUE;
1.87 + sh4_x86.in_delay_slot = DELAY_PC;
1.88 if( sh4_x86.tstate == TSTATE_NONE ) {
1.89 CMP_imm8s_sh4r( 1, R_T );
1.90 sh4_x86.tstate = TSTATE_E;
1.91 @@ -1407,7 +1440,7 @@
1.92 if( sh4_x86.in_delay_slot ) {
1.93 SLOTILLEGAL();
1.94 } else {
1.95 - sh4_x86.in_delay_slot = TRUE;
1.96 + sh4_x86.in_delay_slot = DELAY_PC;
1.97 sh4_translate_instruction( pc + 2 );
1.98 exit_block_rel( disp + pc + 4, pc+4 );
1.99 sh4_x86.branch_taken = TRUE;
1.100 @@ -1418,13 +1451,14 @@
1.101 if( sh4_x86.in_delay_slot ) {
1.102 SLOTILLEGAL();
1.103 } else {
1.104 - load_reg( R_EAX, Rn );
1.105 - ADD_imm32_r32( pc + 4, R_EAX );
1.106 - store_spreg( R_EAX, REG_OFFSET(pc) );
1.107 - sh4_x86.in_delay_slot = TRUE;
1.108 + load_spreg( R_EAX, R_PC );
1.109 + ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1.110 + ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1.111 + store_spreg( R_EAX, R_NEW_PC );
1.112 + sh4_x86.in_delay_slot = DELAY_PC;
1.113 sh4_x86.tstate = TSTATE_NONE;
1.114 sh4_translate_instruction( pc + 2 );
1.115 - exit_block_pcset(pc+2);
1.116 + exit_block_newpcset(pc+2);
1.117 sh4_x86.branch_taken = TRUE;
1.118 return 4;
1.119 }
1.120 @@ -1433,9 +1467,10 @@
1.121 if( sh4_x86.in_delay_slot ) {
1.122 SLOTILLEGAL();
1.123 } else {
1.124 - load_imm32( R_EAX, pc + 4 );
1.125 + load_spreg( R_EAX, R_PC );
1.126 + ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1.127 store_spreg( R_EAX, R_PR );
1.128 - sh4_x86.in_delay_slot = TRUE;
1.129 + sh4_x86.in_delay_slot = DELAY_PC;
1.130 sh4_translate_instruction( pc + 2 );
1.131 exit_block_rel( disp + pc + 4, pc+4 );
1.132 sh4_x86.branch_taken = TRUE;
1.133 @@ -1446,14 +1481,15 @@
1.134 if( sh4_x86.in_delay_slot ) {
1.135 SLOTILLEGAL();
1.136 } else {
1.137 - load_imm32( R_ECX, pc + 4 );
1.138 - store_spreg( R_ECX, R_PR );
1.139 - ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
1.140 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.141 - sh4_x86.in_delay_slot = TRUE;
1.142 + load_spreg( R_EAX, R_PC );
1.143 + ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1.144 + store_spreg( R_EAX, R_PR );
1.145 + ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1.146 + store_spreg( R_EAX, R_NEW_PC );
1.147 +
1.148 sh4_x86.tstate = TSTATE_NONE;
1.149 sh4_translate_instruction( pc + 2 );
1.150 - exit_block_pcset(pc+2);
1.151 + exit_block_newpcset(pc+2);
1.152 sh4_x86.branch_taken = TRUE;
1.153 return 4;
1.154 }
1.155 @@ -1473,7 +1509,7 @@
1.156 if( sh4_x86.in_delay_slot ) {
1.157 SLOTILLEGAL();
1.158 } else {
1.159 - sh4_x86.in_delay_slot = TRUE;
1.160 + sh4_x86.in_delay_slot = DELAY_PC;
1.161 if( sh4_x86.tstate == TSTATE_NONE ) {
1.162 CMP_imm8s_sh4r( 1, R_T );
1.163 sh4_x86.tstate = TSTATE_E;
1.164 @@ -1492,10 +1528,10 @@
1.165 SLOTILLEGAL();
1.166 } else {
1.167 load_reg( R_ECX, Rn );
1.168 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.169 - sh4_x86.in_delay_slot = TRUE;
1.170 + store_spreg( R_ECX, R_NEW_PC );
1.171 + sh4_x86.in_delay_slot = DELAY_PC;
1.172 sh4_translate_instruction(pc+2);
1.173 - exit_block_pcset(pc+2);
1.174 + exit_block_newpcset(pc+2);
1.175 sh4_x86.branch_taken = TRUE;
1.176 return 4;
1.177 }
1.178 @@ -1504,13 +1540,13 @@
1.179 if( sh4_x86.in_delay_slot ) {
1.180 SLOTILLEGAL();
1.181 } else {
1.182 - load_imm32( R_EAX, pc + 4 );
1.183 + load_spreg( R_EAX, R_PC );
1.184 + ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1.185 store_spreg( R_EAX, R_PR );
1.186 load_reg( R_ECX, Rn );
1.187 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.188 - sh4_x86.in_delay_slot = TRUE;
1.189 + store_spreg( R_ECX, R_NEW_PC );
1.190 sh4_translate_instruction(pc+2);
1.191 - exit_block_pcset(pc+2);
1.192 + exit_block_newpcset(pc+2);
1.193 sh4_x86.branch_taken = TRUE;
1.194 return 4;
1.195 }
1.196 @@ -1521,15 +1557,15 @@
1.197 } else {
1.198 check_priv();
1.199 load_spreg( R_ECX, R_SPC );
1.200 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.201 + store_spreg( R_ECX, R_NEW_PC );
1.202 load_spreg( R_EAX, R_SSR );
1.203 call_func1( sh4_write_sr, R_EAX );
1.204 - sh4_x86.in_delay_slot = TRUE;
1.205 + sh4_x86.in_delay_slot = DELAY_PC;
1.206 sh4_x86.priv_checked = FALSE;
1.207 sh4_x86.fpuen_checked = FALSE;
1.208 sh4_x86.tstate = TSTATE_NONE;
1.209 sh4_translate_instruction(pc+2);
1.210 - exit_block_pcset(pc+2);
1.211 + exit_block_newpcset(pc+2);
1.212 sh4_x86.branch_taken = TRUE;
1.213 return 4;
1.214 }
1.215 @@ -1539,10 +1575,10 @@
1.216 SLOTILLEGAL();
1.217 } else {
1.218 load_spreg( R_ECX, R_PR );
1.219 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.220 - sh4_x86.in_delay_slot = TRUE;
1.221 + store_spreg( R_ECX, R_NEW_PC );
1.222 + sh4_x86.in_delay_slot = DELAY_PC;
1.223 sh4_translate_instruction(pc+2);
1.224 - exit_block_pcset(pc+2);
1.225 + exit_block_newpcset(pc+2);
1.226 sh4_x86.branch_taken = TRUE;
1.227 return 4;
1.228 }
1.229 @@ -1551,8 +1587,8 @@
1.230 if( sh4_x86.in_delay_slot ) {
1.231 SLOTILLEGAL();
1.232 } else {
1.233 - load_imm32( R_ECX, pc+2 );
1.234 - store_spreg( R_ECX, REG_OFFSET(pc) );
1.235 + load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1.236 + ADD_r32_sh4r( R_ECX, R_PC );
1.237 load_imm32( R_EAX, imm );
1.238 call_func1( sh4_raise_trap, R_EAX );
1.239 sh4_x86.tstate = TSTATE_NONE;
1.240 @@ -2451,7 +2487,7 @@
1.241 check_priv();
1.242 call_func0( sh4_sleep );
1.243 sh4_x86.tstate = TSTATE_NONE;
1.244 - sh4_x86.in_delay_slot = FALSE;
1.245 + sh4_x86.in_delay_slot = DELAY_NONE;
1.246 return 2;
1.247 :}
1.248 STC SR, Rn {:
1.249 @@ -2662,6 +2698,6 @@
1.250
1.251 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
1.252 %%
1.253 - sh4_x86.in_delay_slot = FALSE;
1.254 + sh4_x86.in_delay_slot = DELAY_NONE;
1.255 return 0;
1.256 }
.