4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
28 #include "sh4/xltcache.h"
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4trans.h"
31 #include "sh4/sh4stat.h"
32 #include "sh4/sh4mmio.h"
33 #include "sh4/x86op.h"
36 #define DEFAULT_BACKPATCH_SIZE 4096
38 struct backpatch_record {
39 uint32_t fixup_offset;
40 uint32_t fixup_icount;
44 #define MAX_RECOVERY_SIZE 2048
51 * Struct to manage internal translation state. This state is not saved -
52 * it is only valid between calls to sh4_translate_begin_block() and
53 * sh4_translate_end_block()
55 struct sh4_x86_state {
57 gboolean priv_checked; /* true if we've already checked the cpu mode. */
58 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
59 gboolean branch_taken; /* true if we branched unconditionally */
60 uint32_t block_start_pc;
61 uint32_t stack_posn; /* Trace stack height for alignment purposes */
65 gboolean tlb_on; /* True if tlb translation is active */
67 /* Allocated memory for the (block-wide) back-patch list */
68 struct backpatch_record *backpatch_list;
69 uint32_t backpatch_posn;
70 uint32_t backpatch_size;
73 #define TSTATE_NONE -1
83 #ifdef ENABLE_SH4STATS
84 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
86 #define COUNT_INST(id)
89 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
90 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
91 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
92 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
94 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
95 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
96 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
97 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
99 static struct sh4_x86_state sh4_x86;
101 static uint32_t max_int = 0x7FFFFFFF;
102 static uint32_t min_int = 0x80000000;
103 static uint32_t save_fcw; /* save value for fpu control word */
104 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
106 void sh4_translate_init(void)
108 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
109 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
113 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
115 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
116 sh4_x86.backpatch_size <<= 1;
117 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
118 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
119 assert( sh4_x86.backpatch_list != NULL );
121 if( sh4_x86.in_delay_slot ) {
124 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
125 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
126 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
127 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
128 sh4_x86.backpatch_posn++;
132 * Emit an instruction to load an SH4 reg into a real register
134 static inline void load_reg( int x86reg, int sh4reg )
136 /* mov [bp+n], reg */
138 OP(0x45 + (x86reg<<3));
139 OP(REG_OFFSET(r[sh4reg]));
142 static inline void load_reg16s( int x86reg, int sh4reg )
146 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
149 static inline void load_reg16u( int x86reg, int sh4reg )
153 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
157 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
158 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
160 * Emit an instruction to load an immediate value into a register
162 static inline void load_imm32( int x86reg, uint32_t value ) {
163 /* mov #value, reg */
169 * Load an immediate 64-bit quantity (note: x86-64 only)
171 static inline void load_imm64( int x86reg, uint32_t value ) {
172 /* mov #value, reg */
179 * Emit an instruction to store an SH4 reg (RN)
181 void static inline store_reg( int x86reg, int sh4reg ) {
182 /* mov reg, [bp+n] */
184 OP(0x45 + (x86reg<<3));
185 OP(REG_OFFSET(r[sh4reg]));
189 * Load an FR register (single-precision floating point) into an integer x86
190 * register (eg for register-to-register moves)
192 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
193 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
196 * Load the low half of a DR register (DR or XD) into an integer x86 register
198 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
199 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
202 * Store an FR register (single-precision floating point) from an integer x86+
203 * register (eg for register-to-register moves)
205 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
206 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
208 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
209 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
212 #define push_fpul() FLDF_sh4r(R_FPUL)
213 #define pop_fpul() FSTPF_sh4r(R_FPUL)
214 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
215 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
216 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
217 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
218 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
219 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
220 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
221 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
225 /* Exception checks - Note that all exception checks will clobber EAX */
227 #define check_priv( ) \
228 if( !sh4_x86.priv_checked ) { \
229 sh4_x86.priv_checked = TRUE;\
230 load_spreg( R_EAX, R_SR );\
231 AND_imm32_r32( SR_MD, R_EAX );\
232 if( sh4_x86.in_delay_slot ) {\
233 JE_exc( EXC_SLOT_ILLEGAL );\
235 JE_exc( EXC_ILLEGAL );\
239 #define check_fpuen( ) \
240 if( !sh4_x86.fpuen_checked ) {\
241 sh4_x86.fpuen_checked = TRUE;\
242 load_spreg( R_EAX, R_SR );\
243 AND_imm32_r32( SR_FD, R_EAX );\
244 if( sh4_x86.in_delay_slot ) {\
245 JNE_exc(EXC_SLOT_FPU_DISABLED);\
247 JNE_exc(EXC_FPU_DISABLED);\
251 #define check_ralign16( x86reg ) \
252 TEST_imm32_r32( 0x00000001, x86reg ); \
253 JNE_exc(EXC_DATA_ADDR_READ)
255 #define check_walign16( x86reg ) \
256 TEST_imm32_r32( 0x00000001, x86reg ); \
257 JNE_exc(EXC_DATA_ADDR_WRITE);
259 #define check_ralign32( x86reg ) \
260 TEST_imm32_r32( 0x00000003, x86reg ); \
261 JNE_exc(EXC_DATA_ADDR_READ)
263 #define check_walign32( x86reg ) \
264 TEST_imm32_r32( 0x00000003, x86reg ); \
265 JNE_exc(EXC_DATA_ADDR_WRITE);
268 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
269 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
270 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
271 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
272 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
273 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
274 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
277 * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned
278 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
280 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
282 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
284 * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned
285 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
287 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
289 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
290 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
291 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
293 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
295 /****** Import appropriate calling conventions ******/
296 #if SH4_TRANSLATOR == TARGET_X86_64
297 #include "sh4/ia64abi.h"
298 #else /* SH4_TRANSLATOR == TARGET_X86 */
300 #include "sh4/ia32mac.h"
302 #include "sh4/ia32abi.h"
306 uint32_t sh4_translate_end_block_size()
308 if( sh4_x86.backpatch_posn <= 3 ) {
309 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
311 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
317 * Embed a breakpoint into the generated code
319 void sh4_translate_emit_breakpoint( sh4vma_t pc )
321 load_imm32( R_EAX, pc );
322 call_func1( sh4_translate_breakpoint_hit, R_EAX );
326 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
329 * Embed a call to sh4_execute_instruction for situations that we
330 * can't translate (just page-crossing delay slots at the moment).
331 * Caller is responsible for setting new_pc before calling this function.
335 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
336 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
337 * Call sh4_execute_instruction
338 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
340 void exit_block_emu( sh4vma_t endpc )
342 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
343 ADD_r32_sh4r( R_ECX, R_PC );
345 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
346 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
347 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
348 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
350 call_func0( sh4_execute_instruction );
351 load_spreg( R_EAX, R_PC );
352 if( sh4_x86.tlb_on ) {
353 call_func1(xlat_get_code_by_vma,R_EAX);
355 call_func1(xlat_get_code,R_EAX);
357 AND_imm8s_rptr( 0xFC, R_EAX );
363 * Translate a single instruction. Delayed branches are handled specially
364 * by translating both branch and delayed instruction as a single unit (as
366 * The instruction MUST be in the icache (assert check)
368 * @return true if the instruction marks the end of a basic block
371 uint32_t sh4_translate_instruction( sh4vma_t pc )
374 /* Read instruction from icache */
375 assert( IS_IN_ICACHE(pc) );
376 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
378 /* PC is not in the current icache - this usually means we're running
379 * with MMU on, and we've gone past the end of the page. And since
380 * sh4_translate_block is pretty careful about this, it means we're
381 * almost certainly in a delay slot.
383 * Since we can't assume the page is present (and we can't fault it in
384 * at this point, inline a call to sh4_execute_instruction (with a few
385 * small repairs to cope with the different environment).
388 if( !sh4_x86.in_delay_slot ) {
389 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
395 load_reg( R_EAX, Rm );
396 load_reg( R_ECX, Rn );
397 ADD_r32_r32( R_EAX, R_ECX );
398 store_reg( R_ECX, Rn );
399 sh4_x86.tstate = TSTATE_NONE;
403 load_reg( R_EAX, Rn );
404 ADD_imm8s_r32( imm, R_EAX );
405 store_reg( R_EAX, Rn );
406 sh4_x86.tstate = TSTATE_NONE;
410 if( sh4_x86.tstate != TSTATE_C ) {
413 load_reg( R_EAX, Rm );
414 load_reg( R_ECX, Rn );
415 ADC_r32_r32( R_EAX, R_ECX );
416 store_reg( R_ECX, Rn );
418 sh4_x86.tstate = TSTATE_C;
422 load_reg( R_EAX, Rm );
423 load_reg( R_ECX, Rn );
424 ADD_r32_r32( R_EAX, R_ECX );
425 store_reg( R_ECX, Rn );
427 sh4_x86.tstate = TSTATE_O;
431 load_reg( R_EAX, Rm );
432 load_reg( R_ECX, Rn );
433 AND_r32_r32( R_EAX, R_ECX );
434 store_reg( R_ECX, Rn );
435 sh4_x86.tstate = TSTATE_NONE;
439 load_reg( R_EAX, 0 );
440 AND_imm32_r32(imm, R_EAX);
441 store_reg( R_EAX, 0 );
442 sh4_x86.tstate = TSTATE_NONE;
444 AND.B #imm, @(R0, GBR) {:
446 load_reg( R_EAX, 0 );
447 load_spreg( R_ECX, R_GBR );
448 ADD_r32_r32( R_ECX, R_EAX );
449 MMU_TRANSLATE_WRITE( R_EAX );
450 PUSH_realigned_r32(R_EAX);
451 MEM_READ_BYTE( R_EAX, R_EAX );
452 POP_realigned_r32(R_ECX);
453 AND_imm32_r32(imm, R_EAX );
454 MEM_WRITE_BYTE( R_ECX, R_EAX );
455 sh4_x86.tstate = TSTATE_NONE;
459 load_reg( R_EAX, Rm );
460 load_reg( R_ECX, Rn );
461 CMP_r32_r32( R_EAX, R_ECX );
463 sh4_x86.tstate = TSTATE_E;
466 COUNT_INST(I_CMPEQI);
467 load_reg( R_EAX, 0 );
468 CMP_imm8s_r32(imm, R_EAX);
470 sh4_x86.tstate = TSTATE_E;
474 load_reg( R_EAX, Rm );
475 load_reg( R_ECX, Rn );
476 CMP_r32_r32( R_EAX, R_ECX );
478 sh4_x86.tstate = TSTATE_GE;
482 load_reg( R_EAX, Rm );
483 load_reg( R_ECX, Rn );
484 CMP_r32_r32( R_EAX, R_ECX );
486 sh4_x86.tstate = TSTATE_G;
490 load_reg( R_EAX, Rm );
491 load_reg( R_ECX, Rn );
492 CMP_r32_r32( R_EAX, R_ECX );
494 sh4_x86.tstate = TSTATE_A;
498 load_reg( R_EAX, Rm );
499 load_reg( R_ECX, Rn );
500 CMP_r32_r32( R_EAX, R_ECX );
502 sh4_x86.tstate = TSTATE_AE;
506 load_reg( R_EAX, Rn );
507 CMP_imm8s_r32( 0, R_EAX );
509 sh4_x86.tstate = TSTATE_G;
513 load_reg( R_EAX, Rn );
514 CMP_imm8s_r32( 0, R_EAX );
516 sh4_x86.tstate = TSTATE_GE;
519 COUNT_INST(I_CMPSTR);
520 load_reg( R_EAX, Rm );
521 load_reg( R_ECX, Rn );
522 XOR_r32_r32( R_ECX, R_EAX );
523 TEST_r8_r8( R_AL, R_AL );
525 TEST_r8_r8( R_AH, R_AH );
527 SHR_imm8_r32( 16, R_EAX );
528 TEST_r8_r8( R_AL, R_AL );
530 TEST_r8_r8( R_AH, R_AH );
535 sh4_x86.tstate = TSTATE_E;
539 load_reg( R_EAX, Rm );
540 load_reg( R_ECX, Rn );
541 SHR_imm8_r32( 31, R_EAX );
542 SHR_imm8_r32( 31, R_ECX );
543 store_spreg( R_EAX, R_M );
544 store_spreg( R_ECX, R_Q );
545 CMP_r32_r32( R_EAX, R_ECX );
547 sh4_x86.tstate = TSTATE_NE;
551 XOR_r32_r32( R_EAX, R_EAX );
552 store_spreg( R_EAX, R_Q );
553 store_spreg( R_EAX, R_M );
554 store_spreg( R_EAX, R_T );
555 sh4_x86.tstate = TSTATE_C; // works for DIV1
559 load_spreg( R_ECX, R_M );
560 load_reg( R_EAX, Rn );
561 if( sh4_x86.tstate != TSTATE_C ) {
565 SETC_r8( R_DL ); // Q'
566 CMP_sh4r_r32( R_Q, R_ECX );
568 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
571 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
573 store_reg( R_EAX, Rn ); // Done with Rn now
574 SETC_r8(R_AL); // tmp1
575 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
576 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
577 store_spreg( R_ECX, R_Q );
578 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
579 MOVZX_r8_r32( R_AL, R_EAX );
580 store_spreg( R_EAX, R_T );
581 sh4_x86.tstate = TSTATE_NONE;
585 load_reg( R_EAX, Rm );
586 load_reg( R_ECX, Rn );
588 store_spreg( R_EDX, R_MACH );
589 store_spreg( R_EAX, R_MACL );
590 sh4_x86.tstate = TSTATE_NONE;
594 load_reg( R_EAX, Rm );
595 load_reg( R_ECX, Rn );
597 store_spreg( R_EDX, R_MACH );
598 store_spreg( R_EAX, R_MACL );
599 sh4_x86.tstate = TSTATE_NONE;
603 load_reg( R_EAX, Rn );
604 ADD_imm8s_r32( -1, R_EAX );
605 store_reg( R_EAX, Rn );
607 sh4_x86.tstate = TSTATE_E;
611 load_reg( R_EAX, Rm );
612 MOVSX_r8_r32( R_EAX, R_EAX );
613 store_reg( R_EAX, Rn );
617 load_reg( R_EAX, Rm );
618 MOVSX_r16_r32( R_EAX, R_EAX );
619 store_reg( R_EAX, Rn );
623 load_reg( R_EAX, Rm );
624 MOVZX_r8_r32( R_EAX, R_EAX );
625 store_reg( R_EAX, Rn );
629 load_reg( R_EAX, Rm );
630 MOVZX_r16_r32( R_EAX, R_EAX );
631 store_reg( R_EAX, Rn );
636 load_reg( R_EAX, Rm );
637 check_ralign32( R_EAX );
638 MMU_TRANSLATE_READ( R_EAX );
639 PUSH_realigned_r32( R_EAX );
640 load_reg( R_EAX, Rn );
641 ADD_imm8s_r32( 4, R_EAX );
642 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
643 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
644 // Note translate twice in case of page boundaries. Maybe worth
645 // adding a page-boundary check to skip the second translation
647 load_reg( R_EAX, Rm );
648 check_ralign32( R_EAX );
649 MMU_TRANSLATE_READ( R_EAX );
650 load_reg( R_ECX, Rn );
651 check_ralign32( R_ECX );
652 PUSH_realigned_r32( R_EAX );
653 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
654 MOV_r32_r32( R_ECX, R_EAX );
655 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
656 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
658 MEM_READ_LONG( R_EAX, R_EAX );
661 MEM_READ_LONG( R_ECX, R_EAX );
662 POP_realigned_r32( R_ECX );
665 ADD_r32_sh4r( R_EAX, R_MACL );
666 ADC_r32_sh4r( R_EDX, R_MACH );
668 load_spreg( R_ECX, R_S );
669 TEST_r32_r32(R_ECX, R_ECX);
671 call_func0( signsat48 );
673 sh4_x86.tstate = TSTATE_NONE;
678 load_reg( R_EAX, Rm );
679 check_ralign16( R_EAX );
680 MMU_TRANSLATE_READ( R_EAX );
681 PUSH_realigned_r32( R_EAX );
682 load_reg( R_EAX, Rn );
683 ADD_imm8s_r32( 2, R_EAX );
684 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
685 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
686 // Note translate twice in case of page boundaries. Maybe worth
687 // adding a page-boundary check to skip the second translation
689 load_reg( R_EAX, Rm );
690 check_ralign16( R_EAX );
691 MMU_TRANSLATE_READ( R_EAX );
692 load_reg( R_ECX, Rn );
693 check_ralign16( R_ECX );
694 PUSH_realigned_r32( R_EAX );
695 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
696 MOV_r32_r32( R_ECX, R_EAX );
697 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
698 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
700 MEM_READ_WORD( R_EAX, R_EAX );
703 MEM_READ_WORD( R_ECX, R_EAX );
704 POP_realigned_r32( R_ECX );
707 load_spreg( R_ECX, R_S );
708 TEST_r32_r32( R_ECX, R_ECX );
711 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
712 JNO_rel8( end ); // 2
713 load_imm32( R_EDX, 1 ); // 5
714 store_spreg( R_EDX, R_MACH ); // 6
715 JS_rel8( positive ); // 2
716 load_imm32( R_EAX, 0x80000000 );// 5
717 store_spreg( R_EAX, R_MACL ); // 6
720 JMP_TARGET(positive);
721 load_imm32( R_EAX, 0x7FFFFFFF );// 5
722 store_spreg( R_EAX, R_MACL ); // 6
726 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
727 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
731 sh4_x86.tstate = TSTATE_NONE;
735 load_spreg( R_EAX, R_T );
736 store_reg( R_EAX, Rn );
740 load_reg( R_EAX, Rm );
741 load_reg( R_ECX, Rn );
743 store_spreg( R_EAX, R_MACL );
744 sh4_x86.tstate = TSTATE_NONE;
748 load_reg16s( R_EAX, Rm );
749 load_reg16s( R_ECX, Rn );
751 store_spreg( R_EAX, R_MACL );
752 sh4_x86.tstate = TSTATE_NONE;
756 load_reg16u( R_EAX, Rm );
757 load_reg16u( R_ECX, Rn );
759 store_spreg( R_EAX, R_MACL );
760 sh4_x86.tstate = TSTATE_NONE;
764 load_reg( R_EAX, Rm );
766 store_reg( R_EAX, Rn );
767 sh4_x86.tstate = TSTATE_NONE;
771 load_reg( R_EAX, Rm );
772 XOR_r32_r32( R_ECX, R_ECX );
774 SBB_r32_r32( R_EAX, R_ECX );
775 store_reg( R_ECX, Rn );
777 sh4_x86.tstate = TSTATE_C;
781 load_reg( R_EAX, Rm );
783 store_reg( R_EAX, Rn );
784 sh4_x86.tstate = TSTATE_NONE;
788 load_reg( R_EAX, Rm );
789 load_reg( R_ECX, Rn );
790 OR_r32_r32( R_EAX, R_ECX );
791 store_reg( R_ECX, Rn );
792 sh4_x86.tstate = TSTATE_NONE;
796 load_reg( R_EAX, 0 );
797 OR_imm32_r32(imm, R_EAX);
798 store_reg( R_EAX, 0 );
799 sh4_x86.tstate = TSTATE_NONE;
801 OR.B #imm, @(R0, GBR) {:
803 load_reg( R_EAX, 0 );
804 load_spreg( R_ECX, R_GBR );
805 ADD_r32_r32( R_ECX, R_EAX );
806 MMU_TRANSLATE_WRITE( R_EAX );
807 PUSH_realigned_r32(R_EAX);
808 MEM_READ_BYTE( R_EAX, R_EAX );
809 POP_realigned_r32(R_ECX);
810 OR_imm32_r32(imm, R_EAX );
811 MEM_WRITE_BYTE( R_ECX, R_EAX );
812 sh4_x86.tstate = TSTATE_NONE;
816 load_reg( R_EAX, Rn );
817 if( sh4_x86.tstate != TSTATE_C ) {
821 store_reg( R_EAX, Rn );
823 sh4_x86.tstate = TSTATE_C;
827 load_reg( R_EAX, Rn );
828 if( sh4_x86.tstate != TSTATE_C ) {
832 store_reg( R_EAX, Rn );
834 sh4_x86.tstate = TSTATE_C;
838 load_reg( R_EAX, Rn );
840 store_reg( R_EAX, Rn );
842 sh4_x86.tstate = TSTATE_C;
846 load_reg( R_EAX, Rn );
848 store_reg( R_EAX, Rn );
850 sh4_x86.tstate = TSTATE_C;
854 /* Annoyingly enough, not directly convertible */
855 load_reg( R_EAX, Rn );
856 load_reg( R_ECX, Rm );
857 CMP_imm32_r32( 0, R_ECX );
860 NEG_r32( R_ECX ); // 2
861 AND_imm8_r8( 0x1F, R_CL ); // 3
862 JE_rel8(emptysar); // 2
863 SAR_r32_CL( R_EAX ); // 2
866 JMP_TARGET(emptysar);
867 SAR_imm8_r32(31, R_EAX ); // 3
871 AND_imm8_r8( 0x1F, R_CL ); // 3
872 SHL_r32_CL( R_EAX ); // 2
875 store_reg( R_EAX, Rn );
876 sh4_x86.tstate = TSTATE_NONE;
880 load_reg( R_EAX, Rn );
881 load_reg( R_ECX, Rm );
882 CMP_imm32_r32( 0, R_ECX );
885 NEG_r32( R_ECX ); // 2
886 AND_imm8_r8( 0x1F, R_CL ); // 3
888 SHR_r32_CL( R_EAX ); // 2
891 JMP_TARGET(emptyshr);
892 XOR_r32_r32( R_EAX, R_EAX );
896 AND_imm8_r8( 0x1F, R_CL ); // 3
897 SHL_r32_CL( R_EAX ); // 2
900 store_reg( R_EAX, Rn );
901 sh4_x86.tstate = TSTATE_NONE;
905 load_reg( R_EAX, Rn );
908 store_reg( R_EAX, Rn );
909 sh4_x86.tstate = TSTATE_C;
913 load_reg( R_EAX, Rn );
916 store_reg( R_EAX, Rn );
917 sh4_x86.tstate = TSTATE_C;
921 load_reg( R_EAX, Rn );
924 store_reg( R_EAX, Rn );
925 sh4_x86.tstate = TSTATE_C;
929 load_reg( R_EAX, Rn );
930 SHL_imm8_r32( 2, R_EAX );
931 store_reg( R_EAX, Rn );
932 sh4_x86.tstate = TSTATE_NONE;
936 load_reg( R_EAX, Rn );
937 SHL_imm8_r32( 8, R_EAX );
938 store_reg( R_EAX, Rn );
939 sh4_x86.tstate = TSTATE_NONE;
943 load_reg( R_EAX, Rn );
944 SHL_imm8_r32( 16, R_EAX );
945 store_reg( R_EAX, Rn );
946 sh4_x86.tstate = TSTATE_NONE;
950 load_reg( R_EAX, Rn );
953 store_reg( R_EAX, Rn );
954 sh4_x86.tstate = TSTATE_C;
958 load_reg( R_EAX, Rn );
959 SHR_imm8_r32( 2, R_EAX );
960 store_reg( R_EAX, Rn );
961 sh4_x86.tstate = TSTATE_NONE;
965 load_reg( R_EAX, Rn );
966 SHR_imm8_r32( 8, R_EAX );
967 store_reg( R_EAX, Rn );
968 sh4_x86.tstate = TSTATE_NONE;
972 load_reg( R_EAX, Rn );
973 SHR_imm8_r32( 16, R_EAX );
974 store_reg( R_EAX, Rn );
975 sh4_x86.tstate = TSTATE_NONE;
979 load_reg( R_EAX, Rm );
980 load_reg( R_ECX, Rn );
981 SUB_r32_r32( R_EAX, R_ECX );
982 store_reg( R_ECX, Rn );
983 sh4_x86.tstate = TSTATE_NONE;
987 load_reg( R_EAX, Rm );
988 load_reg( R_ECX, Rn );
989 if( sh4_x86.tstate != TSTATE_C ) {
992 SBB_r32_r32( R_EAX, R_ECX );
993 store_reg( R_ECX, Rn );
995 sh4_x86.tstate = TSTATE_C;
999 load_reg( R_EAX, Rm );
1000 load_reg( R_ECX, Rn );
1001 SUB_r32_r32( R_EAX, R_ECX );
1002 store_reg( R_ECX, Rn );
1004 sh4_x86.tstate = TSTATE_O;
1007 COUNT_INST(I_SWAPB);
1008 load_reg( R_EAX, Rm );
1009 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1010 store_reg( R_EAX, Rn );
1013 COUNT_INST(I_SWAPB);
1014 load_reg( R_EAX, Rm );
1015 MOV_r32_r32( R_EAX, R_ECX );
1016 SHL_imm8_r32( 16, R_ECX );
1017 SHR_imm8_r32( 16, R_EAX );
1018 OR_r32_r32( R_EAX, R_ECX );
1019 store_reg( R_ECX, Rn );
1020 sh4_x86.tstate = TSTATE_NONE;
1024 load_reg( R_EAX, Rn );
1025 MMU_TRANSLATE_WRITE( R_EAX );
1026 PUSH_realigned_r32( R_EAX );
1027 MEM_READ_BYTE( R_EAX, R_EAX );
1028 TEST_r8_r8( R_AL, R_AL );
1030 OR_imm8_r8( 0x80, R_AL );
1031 POP_realigned_r32( R_ECX );
1032 MEM_WRITE_BYTE( R_ECX, R_EAX );
1033 sh4_x86.tstate = TSTATE_NONE;
1037 load_reg( R_EAX, Rm );
1038 load_reg( R_ECX, Rn );
1039 TEST_r32_r32( R_EAX, R_ECX );
1041 sh4_x86.tstate = TSTATE_E;
1045 load_reg( R_EAX, 0 );
1046 TEST_imm32_r32( imm, R_EAX );
1048 sh4_x86.tstate = TSTATE_E;
1050 TST.B #imm, @(R0, GBR) {:
1052 load_reg( R_EAX, 0);
1053 load_reg( R_ECX, R_GBR);
1054 ADD_r32_r32( R_ECX, R_EAX );
1055 MMU_TRANSLATE_READ( R_EAX );
1056 MEM_READ_BYTE( R_EAX, R_EAX );
1057 TEST_imm8_r8( imm, R_AL );
1059 sh4_x86.tstate = TSTATE_E;
1063 load_reg( R_EAX, Rm );
1064 load_reg( R_ECX, Rn );
1065 XOR_r32_r32( R_EAX, R_ECX );
1066 store_reg( R_ECX, Rn );
1067 sh4_x86.tstate = TSTATE_NONE;
1071 load_reg( R_EAX, 0 );
1072 XOR_imm32_r32( imm, R_EAX );
1073 store_reg( R_EAX, 0 );
1074 sh4_x86.tstate = TSTATE_NONE;
1076 XOR.B #imm, @(R0, GBR) {:
1078 load_reg( R_EAX, 0 );
1079 load_spreg( R_ECX, R_GBR );
1080 ADD_r32_r32( R_ECX, R_EAX );
1081 MMU_TRANSLATE_WRITE( R_EAX );
1082 PUSH_realigned_r32(R_EAX);
1083 MEM_READ_BYTE(R_EAX, R_EAX);
1084 POP_realigned_r32(R_ECX);
1085 XOR_imm32_r32( imm, R_EAX );
1086 MEM_WRITE_BYTE( R_ECX, R_EAX );
1087 sh4_x86.tstate = TSTATE_NONE;
1090 COUNT_INST(I_XTRCT);
1091 load_reg( R_EAX, Rm );
1092 load_reg( R_ECX, Rn );
1093 SHL_imm8_r32( 16, R_EAX );
1094 SHR_imm8_r32( 16, R_ECX );
1095 OR_r32_r32( R_EAX, R_ECX );
1096 store_reg( R_ECX, Rn );
1097 sh4_x86.tstate = TSTATE_NONE;
1100 /* Data move instructions */
1103 load_reg( R_EAX, Rm );
1104 store_reg( R_EAX, Rn );
1108 load_imm32( R_EAX, imm );
1109 store_reg( R_EAX, Rn );
1113 load_reg( R_EAX, Rn );
1114 MMU_TRANSLATE_WRITE( R_EAX );
1115 load_reg( R_EDX, Rm );
1116 MEM_WRITE_BYTE( R_EAX, R_EDX );
1117 sh4_x86.tstate = TSTATE_NONE;
1121 load_reg( R_EAX, Rn );
1122 ADD_imm8s_r32( -1, R_EAX );
1123 MMU_TRANSLATE_WRITE( R_EAX );
1124 load_reg( R_EDX, Rm );
1125 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1126 MEM_WRITE_BYTE( R_EAX, R_EDX );
1127 sh4_x86.tstate = TSTATE_NONE;
1129 MOV.B Rm, @(R0, Rn) {:
1131 load_reg( R_EAX, 0 );
1132 load_reg( R_ECX, Rn );
1133 ADD_r32_r32( R_ECX, R_EAX );
1134 MMU_TRANSLATE_WRITE( R_EAX );
1135 load_reg( R_EDX, Rm );
1136 MEM_WRITE_BYTE( R_EAX, R_EDX );
1137 sh4_x86.tstate = TSTATE_NONE;
1139 MOV.B R0, @(disp, GBR) {:
1141 load_spreg( R_EAX, R_GBR );
1142 ADD_imm32_r32( disp, R_EAX );
1143 MMU_TRANSLATE_WRITE( R_EAX );
1144 load_reg( R_EDX, 0 );
1145 MEM_WRITE_BYTE( R_EAX, R_EDX );
1146 sh4_x86.tstate = TSTATE_NONE;
1148 MOV.B R0, @(disp, Rn) {:
1150 load_reg( R_EAX, Rn );
1151 ADD_imm32_r32( disp, R_EAX );
1152 MMU_TRANSLATE_WRITE( R_EAX );
1153 load_reg( R_EDX, 0 );
1154 MEM_WRITE_BYTE( R_EAX, R_EDX );
1155 sh4_x86.tstate = TSTATE_NONE;
1159 load_reg( R_EAX, Rm );
1160 MMU_TRANSLATE_READ( R_EAX );
1161 MEM_READ_BYTE( R_EAX, R_EAX );
1162 store_reg( R_EAX, Rn );
1163 sh4_x86.tstate = TSTATE_NONE;
1167 load_reg( R_EAX, Rm );
1168 MMU_TRANSLATE_READ( R_EAX );
1169 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1170 MEM_READ_BYTE( R_EAX, R_EAX );
1171 store_reg( R_EAX, Rn );
1172 sh4_x86.tstate = TSTATE_NONE;
1174 MOV.B @(R0, Rm), Rn {:
1176 load_reg( R_EAX, 0 );
1177 load_reg( R_ECX, Rm );
1178 ADD_r32_r32( R_ECX, R_EAX );
1179 MMU_TRANSLATE_READ( R_EAX )
1180 MEM_READ_BYTE( R_EAX, R_EAX );
1181 store_reg( R_EAX, Rn );
1182 sh4_x86.tstate = TSTATE_NONE;
1184 MOV.B @(disp, GBR), R0 {:
1186 load_spreg( R_EAX, R_GBR );
1187 ADD_imm32_r32( disp, R_EAX );
1188 MMU_TRANSLATE_READ( R_EAX );
1189 MEM_READ_BYTE( R_EAX, R_EAX );
1190 store_reg( R_EAX, 0 );
1191 sh4_x86.tstate = TSTATE_NONE;
1193 MOV.B @(disp, Rm), R0 {:
1195 load_reg( R_EAX, Rm );
1196 ADD_imm32_r32( disp, R_EAX );
1197 MMU_TRANSLATE_READ( R_EAX );
1198 MEM_READ_BYTE( R_EAX, R_EAX );
1199 store_reg( R_EAX, 0 );
1200 sh4_x86.tstate = TSTATE_NONE;
1204 load_reg( R_EAX, Rn );
1205 check_walign32(R_EAX);
1206 MMU_TRANSLATE_WRITE( R_EAX );
1207 load_reg( R_EDX, Rm );
1208 MEM_WRITE_LONG( R_EAX, R_EDX );
1209 sh4_x86.tstate = TSTATE_NONE;
1213 load_reg( R_EAX, Rn );
1214 ADD_imm8s_r32( -4, R_EAX );
1215 check_walign32( R_EAX );
1216 MMU_TRANSLATE_WRITE( R_EAX );
1217 load_reg( R_EDX, Rm );
1218 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1219 MEM_WRITE_LONG( R_EAX, R_EDX );
1220 sh4_x86.tstate = TSTATE_NONE;
1222 MOV.L Rm, @(R0, Rn) {:
1224 load_reg( R_EAX, 0 );
1225 load_reg( R_ECX, Rn );
1226 ADD_r32_r32( R_ECX, R_EAX );
1227 check_walign32( R_EAX );
1228 MMU_TRANSLATE_WRITE( R_EAX );
1229 load_reg( R_EDX, Rm );
1230 MEM_WRITE_LONG( R_EAX, R_EDX );
1231 sh4_x86.tstate = TSTATE_NONE;
1233 MOV.L R0, @(disp, GBR) {:
1235 load_spreg( R_EAX, R_GBR );
1236 ADD_imm32_r32( disp, R_EAX );
1237 check_walign32( R_EAX );
1238 MMU_TRANSLATE_WRITE( R_EAX );
1239 load_reg( R_EDX, 0 );
1240 MEM_WRITE_LONG( R_EAX, R_EDX );
1241 sh4_x86.tstate = TSTATE_NONE;
1243 MOV.L Rm, @(disp, Rn) {:
1245 load_reg( R_EAX, Rn );
1246 ADD_imm32_r32( disp, R_EAX );
1247 check_walign32( R_EAX );
1248 MMU_TRANSLATE_WRITE( R_EAX );
1249 load_reg( R_EDX, Rm );
1250 MEM_WRITE_LONG( R_EAX, R_EDX );
1251 sh4_x86.tstate = TSTATE_NONE;
1255 load_reg( R_EAX, Rm );
1256 check_ralign32( R_EAX );
1257 MMU_TRANSLATE_READ( R_EAX );
1258 MEM_READ_LONG( R_EAX, R_EAX );
1259 store_reg( R_EAX, Rn );
1260 sh4_x86.tstate = TSTATE_NONE;
1264 load_reg( R_EAX, Rm );
1265 check_ralign32( R_EAX );
1266 MMU_TRANSLATE_READ( R_EAX );
1267 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1268 MEM_READ_LONG( R_EAX, R_EAX );
1269 store_reg( R_EAX, Rn );
1270 sh4_x86.tstate = TSTATE_NONE;
1272 MOV.L @(R0, Rm), Rn {:
1274 load_reg( R_EAX, 0 );
1275 load_reg( R_ECX, Rm );
1276 ADD_r32_r32( R_ECX, R_EAX );
1277 check_ralign32( R_EAX );
1278 MMU_TRANSLATE_READ( R_EAX );
1279 MEM_READ_LONG( R_EAX, R_EAX );
1280 store_reg( R_EAX, Rn );
1281 sh4_x86.tstate = TSTATE_NONE;
1283 MOV.L @(disp, GBR), R0 {:
1285 load_spreg( R_EAX, R_GBR );
1286 ADD_imm32_r32( disp, R_EAX );
1287 check_ralign32( R_EAX );
1288 MMU_TRANSLATE_READ( R_EAX );
1289 MEM_READ_LONG( R_EAX, R_EAX );
1290 store_reg( R_EAX, 0 );
1291 sh4_x86.tstate = TSTATE_NONE;
1293 MOV.L @(disp, PC), Rn {:
1294 COUNT_INST(I_MOVLPC);
1295 if( sh4_x86.in_delay_slot ) {
1298 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1299 if( IS_IN_ICACHE(target) ) {
1300 // If the target address is in the same page as the code, it's
1301 // pretty safe to just ref it directly and circumvent the whole
1302 // memory subsystem. (this is a big performance win)
1304 // FIXME: There's a corner-case that's not handled here when
1305 // the current code-page is in the ITLB but not in the UTLB.
1306 // (should generate a TLB miss although need to test SH4
1307 // behaviour to confirm) Unlikely to be anyone depending on this
1308 // behaviour though.
1309 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1310 MOV_moff32_EAX( ptr );
1312 // Note: we use sh4r.pc for the calc as we could be running at a
1313 // different virtual address than the translation was done with,
1314 // but we can safely assume that the low bits are the same.
1315 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1316 ADD_sh4r_r32( R_PC, R_EAX );
1317 MMU_TRANSLATE_READ( R_EAX );
1318 MEM_READ_LONG( R_EAX, R_EAX );
1319 sh4_x86.tstate = TSTATE_NONE;
1321 store_reg( R_EAX, Rn );
1324 MOV.L @(disp, Rm), Rn {:
1326 load_reg( R_EAX, Rm );
1327 ADD_imm8s_r32( disp, R_EAX );
1328 check_ralign32( R_EAX );
1329 MMU_TRANSLATE_READ( R_EAX );
1330 MEM_READ_LONG( R_EAX, R_EAX );
1331 store_reg( R_EAX, Rn );
1332 sh4_x86.tstate = TSTATE_NONE;
1336 load_reg( R_EAX, Rn );
1337 check_walign16( R_EAX );
1338 MMU_TRANSLATE_WRITE( R_EAX )
1339 load_reg( R_EDX, Rm );
1340 MEM_WRITE_WORD( R_EAX, R_EDX );
1341 sh4_x86.tstate = TSTATE_NONE;
1345 load_reg( R_EAX, Rn );
1346 ADD_imm8s_r32( -2, R_EAX );
1347 check_walign16( R_EAX );
1348 MMU_TRANSLATE_WRITE( R_EAX );
1349 load_reg( R_EDX, Rm );
1350 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1351 MEM_WRITE_WORD( R_EAX, R_EDX );
1352 sh4_x86.tstate = TSTATE_NONE;
1354 MOV.W Rm, @(R0, Rn) {:
1356 load_reg( R_EAX, 0 );
1357 load_reg( R_ECX, Rn );
1358 ADD_r32_r32( R_ECX, R_EAX );
1359 check_walign16( R_EAX );
1360 MMU_TRANSLATE_WRITE( R_EAX );
1361 load_reg( R_EDX, Rm );
1362 MEM_WRITE_WORD( R_EAX, R_EDX );
1363 sh4_x86.tstate = TSTATE_NONE;
1365 MOV.W R0, @(disp, GBR) {:
1367 load_spreg( R_EAX, R_GBR );
1368 ADD_imm32_r32( disp, R_EAX );
1369 check_walign16( R_EAX );
1370 MMU_TRANSLATE_WRITE( R_EAX );
1371 load_reg( R_EDX, 0 );
1372 MEM_WRITE_WORD( R_EAX, R_EDX );
1373 sh4_x86.tstate = TSTATE_NONE;
1375 MOV.W R0, @(disp, Rn) {:
1377 load_reg( R_EAX, Rn );
1378 ADD_imm32_r32( disp, R_EAX );
1379 check_walign16( R_EAX );
1380 MMU_TRANSLATE_WRITE( R_EAX );
1381 load_reg( R_EDX, 0 );
1382 MEM_WRITE_WORD( R_EAX, R_EDX );
1383 sh4_x86.tstate = TSTATE_NONE;
1387 load_reg( R_EAX, Rm );
1388 check_ralign16( R_EAX );
1389 MMU_TRANSLATE_READ( R_EAX );
1390 MEM_READ_WORD( R_EAX, R_EAX );
1391 store_reg( R_EAX, Rn );
1392 sh4_x86.tstate = TSTATE_NONE;
1396 load_reg( R_EAX, Rm );
1397 check_ralign16( R_EAX );
1398 MMU_TRANSLATE_READ( R_EAX );
1399 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1400 MEM_READ_WORD( R_EAX, R_EAX );
1401 store_reg( R_EAX, Rn );
1402 sh4_x86.tstate = TSTATE_NONE;
1404 MOV.W @(R0, Rm), Rn {:
1406 load_reg( R_EAX, 0 );
1407 load_reg( R_ECX, Rm );
1408 ADD_r32_r32( R_ECX, R_EAX );
1409 check_ralign16( R_EAX );
1410 MMU_TRANSLATE_READ( R_EAX );
1411 MEM_READ_WORD( R_EAX, R_EAX );
1412 store_reg( R_EAX, Rn );
1413 sh4_x86.tstate = TSTATE_NONE;
1415 MOV.W @(disp, GBR), R0 {:
1417 load_spreg( R_EAX, R_GBR );
1418 ADD_imm32_r32( disp, R_EAX );
1419 check_ralign16( R_EAX );
1420 MMU_TRANSLATE_READ( R_EAX );
1421 MEM_READ_WORD( R_EAX, R_EAX );
1422 store_reg( R_EAX, 0 );
1423 sh4_x86.tstate = TSTATE_NONE;
1425 MOV.W @(disp, PC), Rn {:
1427 if( sh4_x86.in_delay_slot ) {
1430 // See comments for MOV.L @(disp, PC), Rn
1431 uint32_t target = pc + disp + 4;
1432 if( IS_IN_ICACHE(target) ) {
1433 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1434 MOV_moff32_EAX( ptr );
1435 MOVSX_r16_r32( R_EAX, R_EAX );
1437 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1438 ADD_sh4r_r32( R_PC, R_EAX );
1439 MMU_TRANSLATE_READ( R_EAX );
1440 MEM_READ_WORD( R_EAX, R_EAX );
1441 sh4_x86.tstate = TSTATE_NONE;
1443 store_reg( R_EAX, Rn );
1446 MOV.W @(disp, Rm), R0 {:
1448 load_reg( R_EAX, Rm );
1449 ADD_imm32_r32( disp, R_EAX );
1450 check_ralign16( R_EAX );
1451 MMU_TRANSLATE_READ( R_EAX );
1452 MEM_READ_WORD( R_EAX, R_EAX );
1453 store_reg( R_EAX, 0 );
1454 sh4_x86.tstate = TSTATE_NONE;
1456 MOVA @(disp, PC), R0 {:
1458 if( sh4_x86.in_delay_slot ) {
1461 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1462 ADD_sh4r_r32( R_PC, R_ECX );
1463 store_reg( R_ECX, 0 );
1464 sh4_x86.tstate = TSTATE_NONE;
1468 COUNT_INST(I_MOVCA);
1469 load_reg( R_EAX, Rn );
1470 check_walign32( R_EAX );
1471 MMU_TRANSLATE_WRITE( R_EAX );
1472 load_reg( R_EDX, 0 );
1473 MEM_WRITE_LONG( R_EAX, R_EDX );
1474 sh4_x86.tstate = TSTATE_NONE;
1477 /* Control transfer instructions */
1480 if( sh4_x86.in_delay_slot ) {
1483 sh4vma_t target = disp + pc + 4;
1484 JT_rel8( nottaken );
1485 exit_block_rel(target, pc+2 );
1486 JMP_TARGET(nottaken);
1492 if( sh4_x86.in_delay_slot ) {
1495 sh4_x86.in_delay_slot = DELAY_PC;
1496 if( UNTRANSLATABLE(pc+2) ) {
1497 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1499 ADD_imm32_r32( disp, R_EAX );
1500 JMP_TARGET(nottaken);
1501 ADD_sh4r_r32( R_PC, R_EAX );
1502 store_spreg( R_EAX, R_NEW_PC );
1503 exit_block_emu(pc+2);
1504 sh4_x86.branch_taken = TRUE;
1507 if( sh4_x86.tstate == TSTATE_NONE ) {
1508 CMP_imm8s_sh4r( 1, R_T );
1509 sh4_x86.tstate = TSTATE_E;
1511 sh4vma_t target = disp + pc + 4;
1512 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1513 sh4_translate_instruction(pc+2);
1514 exit_block_rel( target, pc+4 );
1517 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1518 sh4_translate_instruction(pc+2);
1525 if( sh4_x86.in_delay_slot ) {
1528 sh4_x86.in_delay_slot = DELAY_PC;
1529 sh4_x86.branch_taken = TRUE;
1530 if( UNTRANSLATABLE(pc+2) ) {
1531 load_spreg( R_EAX, R_PC );
1532 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1533 store_spreg( R_EAX, R_NEW_PC );
1534 exit_block_emu(pc+2);
1537 sh4_translate_instruction( pc + 2 );
1538 exit_block_rel( disp + pc + 4, pc+4 );
1545 if( sh4_x86.in_delay_slot ) {
1548 load_spreg( R_EAX, R_PC );
1549 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1550 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1551 store_spreg( R_EAX, R_NEW_PC );
1552 sh4_x86.in_delay_slot = DELAY_PC;
1553 sh4_x86.tstate = TSTATE_NONE;
1554 sh4_x86.branch_taken = TRUE;
1555 if( UNTRANSLATABLE(pc+2) ) {
1556 exit_block_emu(pc+2);
1559 sh4_translate_instruction( pc + 2 );
1560 exit_block_newpcset(pc+2);
1567 if( sh4_x86.in_delay_slot ) {
1570 load_spreg( R_EAX, R_PC );
1571 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1572 store_spreg( R_EAX, R_PR );
1573 sh4_x86.in_delay_slot = DELAY_PC;
1574 sh4_x86.branch_taken = TRUE;
1575 sh4_x86.tstate = TSTATE_NONE;
1576 if( UNTRANSLATABLE(pc+2) ) {
1577 ADD_imm32_r32( disp, R_EAX );
1578 store_spreg( R_EAX, R_NEW_PC );
1579 exit_block_emu(pc+2);
1582 sh4_translate_instruction( pc + 2 );
1583 exit_block_rel( disp + pc + 4, pc+4 );
1590 if( sh4_x86.in_delay_slot ) {
1593 load_spreg( R_EAX, R_PC );
1594 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1595 store_spreg( R_EAX, R_PR );
1596 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1597 store_spreg( R_EAX, R_NEW_PC );
1599 sh4_x86.in_delay_slot = DELAY_PC;
1600 sh4_x86.tstate = TSTATE_NONE;
1601 sh4_x86.branch_taken = TRUE;
1602 if( UNTRANSLATABLE(pc+2) ) {
1603 exit_block_emu(pc+2);
1606 sh4_translate_instruction( pc + 2 );
1607 exit_block_newpcset(pc+2);
1614 if( sh4_x86.in_delay_slot ) {
1617 sh4vma_t target = disp + pc + 4;
1618 JF_rel8( nottaken );
1619 exit_block_rel(target, pc+2 );
1620 JMP_TARGET(nottaken);
1626 if( sh4_x86.in_delay_slot ) {
1629 sh4_x86.in_delay_slot = DELAY_PC;
1630 if( UNTRANSLATABLE(pc+2) ) {
1631 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1633 ADD_imm32_r32( disp, R_EAX );
1634 JMP_TARGET(nottaken);
1635 ADD_sh4r_r32( R_PC, R_EAX );
1636 store_spreg( R_EAX, R_NEW_PC );
1637 exit_block_emu(pc+2);
1638 sh4_x86.branch_taken = TRUE;
1641 if( sh4_x86.tstate == TSTATE_NONE ) {
1642 CMP_imm8s_sh4r( 1, R_T );
1643 sh4_x86.tstate = TSTATE_E;
1645 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1646 sh4_translate_instruction(pc+2);
1647 exit_block_rel( disp + pc + 4, pc+4 );
1649 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1650 sh4_translate_instruction(pc+2);
1657 if( sh4_x86.in_delay_slot ) {
1660 load_reg( R_ECX, Rn );
1661 store_spreg( R_ECX, R_NEW_PC );
1662 sh4_x86.in_delay_slot = DELAY_PC;
1663 sh4_x86.branch_taken = TRUE;
1664 if( UNTRANSLATABLE(pc+2) ) {
1665 exit_block_emu(pc+2);
1668 sh4_translate_instruction(pc+2);
1669 exit_block_newpcset(pc+2);
1676 if( sh4_x86.in_delay_slot ) {
1679 load_spreg( R_EAX, R_PC );
1680 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1681 store_spreg( R_EAX, R_PR );
1682 load_reg( R_ECX, Rn );
1683 store_spreg( R_ECX, R_NEW_PC );
1684 sh4_x86.in_delay_slot = DELAY_PC;
1685 sh4_x86.branch_taken = TRUE;
1686 sh4_x86.tstate = TSTATE_NONE;
1687 if( UNTRANSLATABLE(pc+2) ) {
1688 exit_block_emu(pc+2);
1691 sh4_translate_instruction(pc+2);
1692 exit_block_newpcset(pc+2);
1699 if( sh4_x86.in_delay_slot ) {
1703 load_spreg( R_ECX, R_SPC );
1704 store_spreg( R_ECX, R_NEW_PC );
1705 load_spreg( R_EAX, R_SSR );
1706 call_func1( sh4_write_sr, R_EAX );
1707 sh4_x86.in_delay_slot = DELAY_PC;
1708 sh4_x86.priv_checked = FALSE;
1709 sh4_x86.fpuen_checked = FALSE;
1710 sh4_x86.tstate = TSTATE_NONE;
1711 sh4_x86.branch_taken = TRUE;
1712 if( UNTRANSLATABLE(pc+2) ) {
1713 exit_block_emu(pc+2);
1716 sh4_translate_instruction(pc+2);
1717 exit_block_newpcset(pc+2);
1724 if( sh4_x86.in_delay_slot ) {
1727 load_spreg( R_ECX, R_PR );
1728 store_spreg( R_ECX, R_NEW_PC );
1729 sh4_x86.in_delay_slot = DELAY_PC;
1730 sh4_x86.branch_taken = TRUE;
1731 if( UNTRANSLATABLE(pc+2) ) {
1732 exit_block_emu(pc+2);
1735 sh4_translate_instruction(pc+2);
1736 exit_block_newpcset(pc+2);
1742 COUNT_INST(I_TRAPA);
1743 if( sh4_x86.in_delay_slot ) {
1746 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1747 ADD_r32_sh4r( R_ECX, R_PC );
1748 load_imm32( R_EAX, imm );
1749 call_func1( sh4_raise_trap, R_EAX );
1750 sh4_x86.tstate = TSTATE_NONE;
1751 exit_block_pcset(pc);
1752 sh4_x86.branch_taken = TRUE;
1757 COUNT_INST(I_UNDEF);
1758 if( sh4_x86.in_delay_slot ) {
1761 JMP_exc(EXC_ILLEGAL);
1767 COUNT_INST(I_CLRMAC);
1768 XOR_r32_r32(R_EAX, R_EAX);
1769 store_spreg( R_EAX, R_MACL );
1770 store_spreg( R_EAX, R_MACH );
1771 sh4_x86.tstate = TSTATE_NONE;
1777 sh4_x86.tstate = TSTATE_C;
1783 sh4_x86.tstate = TSTATE_C;
1789 sh4_x86.tstate = TSTATE_C;
1795 sh4_x86.tstate = TSTATE_C;
1798 /* Floating point moves */
1800 COUNT_INST(I_FMOV1);
1801 /* As horrible as this looks, it's actually covering 5 separate cases:
1802 * 1. 32-bit fr-to-fr (PR=0)
1803 * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
1804 * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
1805 * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
1806 * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
1809 load_spreg( R_ECX, R_FPSCR );
1810 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1811 JNE_rel8(doublesize);
1812 load_fr( R_EAX, FRm ); // PR=0 branch
1813 store_fr( R_EAX, FRn );
1815 JMP_TARGET(doublesize);
1816 load_dr0( R_EAX, FRm );
1817 load_dr1( R_ECX, FRm );
1818 store_dr0( R_EAX, FRn );
1819 store_dr1( R_ECX, FRn );
1821 sh4_x86.tstate = TSTATE_NONE;
1824 COUNT_INST(I_FMOV2);
1826 load_reg( R_EAX, Rn );
1827 check_walign32( R_EAX );
1828 MMU_TRANSLATE_WRITE( R_EAX );
1829 load_spreg( R_EDX, R_FPSCR );
1830 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1831 JNE_rel8(doublesize);
1833 load_fr( R_ECX, FRm );
1834 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1837 JMP_TARGET(doublesize);
1838 load_dr0( R_ECX, FRm );
1839 load_dr1( R_EDX, FRm );
1840 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1842 sh4_x86.tstate = TSTATE_NONE;
1845 COUNT_INST(I_FMOV5);
1847 load_reg( R_EAX, Rm );
1848 check_ralign32( R_EAX );
1849 MMU_TRANSLATE_READ( R_EAX );
1850 load_spreg( R_EDX, R_FPSCR );
1851 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1852 JNE_rel8(doublesize);
1854 MEM_READ_LONG( R_EAX, R_EAX );
1855 store_fr( R_EAX, FRn );
1858 JMP_TARGET(doublesize);
1859 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1860 store_dr0( R_ECX, FRn );
1861 store_dr1( R_EAX, FRn );
1863 sh4_x86.tstate = TSTATE_NONE;
1866 COUNT_INST(I_FMOV3);
1868 load_reg( R_EAX, Rn );
1869 check_walign32( R_EAX );
1870 load_spreg( R_EDX, R_FPSCR );
1871 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1872 JNE_rel8(doublesize);
1874 ADD_imm8s_r32( -4, R_EAX );
1875 MMU_TRANSLATE_WRITE( R_EAX );
1876 load_fr( R_ECX, FRm );
1877 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1878 MEM_WRITE_LONG( R_EAX, R_ECX );
1881 JMP_TARGET(doublesize);
1882 ADD_imm8s_r32(-8,R_EAX);
1883 MMU_TRANSLATE_WRITE( R_EAX );
1884 load_dr0( R_ECX, FRm );
1885 load_dr1( R_EDX, FRm );
1886 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1887 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1890 sh4_x86.tstate = TSTATE_NONE;
1893 COUNT_INST(I_FMOV6);
1895 load_reg( R_EAX, Rm );
1896 check_ralign32( R_EAX );
1897 MMU_TRANSLATE_READ( R_EAX );
1898 load_spreg( R_EDX, R_FPSCR );
1899 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1900 JNE_rel8(doublesize);
1902 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1903 MEM_READ_LONG( R_EAX, R_EAX );
1904 store_fr( R_EAX, FRn );
1907 JMP_TARGET(doublesize);
1908 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1909 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1910 store_dr0( R_ECX, FRn );
1911 store_dr1( R_EAX, FRn );
1914 sh4_x86.tstate = TSTATE_NONE;
1916 FMOV FRm, @(R0, Rn) {:
1917 COUNT_INST(I_FMOV4);
1919 load_reg( R_EAX, Rn );
1920 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1921 check_walign32( R_EAX );
1922 MMU_TRANSLATE_WRITE( R_EAX );
1923 load_spreg( R_EDX, R_FPSCR );
1924 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1925 JNE_rel8(doublesize);
1927 load_fr( R_ECX, FRm );
1928 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1931 JMP_TARGET(doublesize);
1932 load_dr0( R_ECX, FRm );
1933 load_dr1( R_EDX, FRm );
1934 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1937 sh4_x86.tstate = TSTATE_NONE;
1939 FMOV @(R0, Rm), FRn {:
1940 COUNT_INST(I_FMOV7);
1942 load_reg( R_EAX, Rm );
1943 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1944 check_ralign32( R_EAX );
1945 MMU_TRANSLATE_READ( R_EAX );
1946 load_spreg( R_EDX, R_FPSCR );
1947 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1948 JNE_rel8(doublesize);
1950 MEM_READ_LONG( R_EAX, R_EAX );
1951 store_fr( R_EAX, FRn );
1954 JMP_TARGET(doublesize);
1955 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1956 store_dr0( R_ECX, FRn );
1957 store_dr1( R_EAX, FRn );
1960 sh4_x86.tstate = TSTATE_NONE;
1962 FLDI0 FRn {: /* IFF PR=0 */
1963 COUNT_INST(I_FLDI0);
1965 load_spreg( R_ECX, R_FPSCR );
1966 TEST_imm32_r32( FPSCR_PR, R_ECX );
1968 XOR_r32_r32( R_EAX, R_EAX );
1969 store_fr( R_EAX, FRn );
1971 sh4_x86.tstate = TSTATE_NONE;
1973 FLDI1 FRn {: /* IFF PR=0 */
1974 COUNT_INST(I_FLDI1);
1976 load_spreg( R_ECX, R_FPSCR );
1977 TEST_imm32_r32( FPSCR_PR, R_ECX );
1979 load_imm32(R_EAX, 0x3F800000);
1980 store_fr( R_EAX, FRn );
1982 sh4_x86.tstate = TSTATE_NONE;
1986 COUNT_INST(I_FLOAT);
1988 load_spreg( R_ECX, R_FPSCR );
1990 TEST_imm32_r32( FPSCR_PR, R_ECX );
1991 JNE_rel8(doubleprec);
1994 JMP_TARGET(doubleprec);
1997 sh4_x86.tstate = TSTATE_NONE;
2002 load_spreg( R_ECX, R_FPSCR );
2003 TEST_imm32_r32( FPSCR_PR, R_ECX );
2004 JNE_rel8(doubleprec);
2007 JMP_TARGET(doubleprec);
2010 load_imm32( R_ECX, (uint32_t)&max_int );
2011 FILD_r32ind( R_ECX );
2014 load_imm32( R_ECX, (uint32_t)&min_int ); // 5
2015 FILD_r32ind( R_ECX ); // 2
2017 JAE_rel8( sat2 ); // 2
2018 load_imm32( R_EAX, (uint32_t)&save_fcw );
2019 FNSTCW_r32ind( R_EAX );
2020 load_imm32( R_EDX, (uint32_t)&trunc_fcw );
2021 FLDCW_r32ind( R_EDX );
2022 FISTP_sh4r(R_FPUL); // 3
2023 FLDCW_r32ind( R_EAX );
2028 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
2029 store_spreg( R_ECX, R_FPUL );
2032 sh4_x86.tstate = TSTATE_NONE;
2037 load_fr( R_EAX, FRm );
2038 store_spreg( R_EAX, R_FPUL );
2039 sh4_x86.tstate = TSTATE_NONE;
2044 load_spreg( R_EAX, R_FPUL );
2045 store_fr( R_EAX, FRn );
2046 sh4_x86.tstate = TSTATE_NONE;
2049 COUNT_INST(I_FCNVDS);
2051 load_spreg( R_ECX, R_FPSCR );
2052 TEST_imm32_r32( FPSCR_PR, R_ECX );
2053 JE_rel8(end); // only when PR=1
2057 sh4_x86.tstate = TSTATE_NONE;
2060 COUNT_INST(I_FCNVSD);
2062 load_spreg( R_ECX, R_FPSCR );
2063 TEST_imm32_r32( FPSCR_PR, R_ECX );
2064 JE_rel8(end); // only when PR=1
2068 sh4_x86.tstate = TSTATE_NONE;
2071 /* Floating point instructions */
2075 load_spreg( R_ECX, R_FPSCR );
2076 TEST_imm32_r32( FPSCR_PR, R_ECX );
2077 JNE_rel8(doubleprec);
2082 JMP_TARGET(doubleprec);
2087 sh4_x86.tstate = TSTATE_NONE;
2092 load_spreg( R_ECX, R_FPSCR );
2093 TEST_imm32_r32( FPSCR_PR, R_ECX );
2094 JNE_rel8(doubleprec);
2100 JMP_TARGET(doubleprec);
2106 sh4_x86.tstate = TSTATE_NONE;
2111 load_spreg( R_ECX, R_FPSCR );
2112 TEST_imm32_r32( FPSCR_PR, R_ECX );
2113 JNE_rel8(doubleprec);
2119 JMP_TARGET(doubleprec);
2125 sh4_x86.tstate = TSTATE_NONE;
2127 FMAC FR0, FRm, FRn {:
2130 load_spreg( R_ECX, R_FPSCR );
2131 TEST_imm32_r32( FPSCR_PR, R_ECX );
2132 JNE_rel8(doubleprec);
2140 JMP_TARGET(doubleprec);
2148 sh4_x86.tstate = TSTATE_NONE;
2154 load_spreg( R_ECX, R_FPSCR );
2155 TEST_imm32_r32( FPSCR_PR, R_ECX );
2156 JNE_rel8(doubleprec);
2162 JMP_TARGET(doubleprec);
2168 sh4_x86.tstate = TSTATE_NONE;
2173 load_spreg( R_ECX, R_FPSCR );
2174 TEST_imm32_r32( FPSCR_PR, R_ECX );
2175 JNE_rel8(doubleprec);
2180 JMP_TARGET(doubleprec);
2185 sh4_x86.tstate = TSTATE_NONE;
2188 COUNT_INST(I_FSRRA);
2190 load_spreg( R_ECX, R_FPSCR );
2191 TEST_imm32_r32( FPSCR_PR, R_ECX );
2192 JNE_rel8(end); // PR=0 only
2199 sh4_x86.tstate = TSTATE_NONE;
2202 COUNT_INST(I_FSQRT);
2204 load_spreg( R_ECX, R_FPSCR );
2205 TEST_imm32_r32( FPSCR_PR, R_ECX );
2206 JNE_rel8(doubleprec);
2211 JMP_TARGET(doubleprec);
2216 sh4_x86.tstate = TSTATE_NONE;
2221 load_spreg( R_ECX, R_FPSCR );
2222 TEST_imm32_r32( FPSCR_PR, R_ECX );
2223 JNE_rel8(doubleprec);
2229 JMP_TARGET(doubleprec);
2235 sh4_x86.tstate = TSTATE_NONE;
2239 COUNT_INST(I_FCMPEQ);
2241 load_spreg( R_ECX, R_FPSCR );
2242 TEST_imm32_r32( FPSCR_PR, R_ECX );
2243 JNE_rel8(doubleprec);
2247 JMP_TARGET(doubleprec);
2254 sh4_x86.tstate = TSTATE_NONE;
2257 COUNT_INST(I_FCMPGT);
2259 load_spreg( R_ECX, R_FPSCR );
2260 TEST_imm32_r32( FPSCR_PR, R_ECX );
2261 JNE_rel8(doubleprec);
2265 JMP_TARGET(doubleprec);
2272 sh4_x86.tstate = TSTATE_NONE;
2278 load_spreg( R_ECX, R_FPSCR );
2279 TEST_imm32_r32( FPSCR_PR, R_ECX );
2280 JNE_rel8(doubleprec );
2281 LEA_sh4r_r32( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
2282 load_spreg( R_EDX, R_FPUL );
2283 call_func2( sh4_fsca, R_EDX, R_ECX );
2284 JMP_TARGET(doubleprec);
2285 sh4_x86.tstate = TSTATE_NONE;
2290 load_spreg( R_ECX, R_FPSCR );
2291 TEST_imm32_r32( FPSCR_PR, R_ECX );
2292 JNE_rel8( doubleprec);
2297 push_fr( (FVm<<2)+1);
2298 push_fr( (FVn<<2)+1);
2301 push_fr( (FVm<<2)+2);
2302 push_fr( (FVn<<2)+2);
2305 push_fr( (FVm<<2)+3);
2306 push_fr( (FVn<<2)+3);
2309 pop_fr( (FVn<<2)+3);
2310 JMP_TARGET(doubleprec);
2311 sh4_x86.tstate = TSTATE_NONE;
2316 load_spreg( R_ECX, R_FPSCR );
2317 TEST_imm32_r32( FPSCR_PR, R_ECX );
2318 JNE_rel8( doubleprec );
2319 LEA_sh4r_r32( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
2320 call_func1( sh4_ftrv, R_EDX ); // 12
2321 JMP_TARGET(doubleprec);
2322 sh4_x86.tstate = TSTATE_NONE;
2326 COUNT_INST(I_FRCHG);
2328 load_spreg( R_ECX, R_FPSCR );
2329 XOR_imm32_r32( FPSCR_FR, R_ECX );
2330 store_spreg( R_ECX, R_FPSCR );
2331 call_func0( sh4_switch_fr_banks );
2332 sh4_x86.tstate = TSTATE_NONE;
2335 COUNT_INST(I_FSCHG);
2337 load_spreg( R_ECX, R_FPSCR );
2338 XOR_imm32_r32( FPSCR_SZ, R_ECX );
2339 store_spreg( R_ECX, R_FPSCR );
2340 sh4_x86.tstate = TSTATE_NONE;
2343 /* Processor control instructions */
2345 COUNT_INST(I_LDCSR);
2346 if( sh4_x86.in_delay_slot ) {
2350 load_reg( R_EAX, Rm );
2351 call_func1( sh4_write_sr, R_EAX );
2352 sh4_x86.priv_checked = FALSE;
2353 sh4_x86.fpuen_checked = FALSE;
2354 sh4_x86.tstate = TSTATE_NONE;
2359 load_reg( R_EAX, Rm );
2360 store_spreg( R_EAX, R_GBR );
2365 load_reg( R_EAX, Rm );
2366 store_spreg( R_EAX, R_VBR );
2367 sh4_x86.tstate = TSTATE_NONE;
2372 load_reg( R_EAX, Rm );
2373 store_spreg( R_EAX, R_SSR );
2374 sh4_x86.tstate = TSTATE_NONE;
2379 load_reg( R_EAX, Rm );
2380 store_spreg( R_EAX, R_SGR );
2381 sh4_x86.tstate = TSTATE_NONE;
2386 load_reg( R_EAX, Rm );
2387 store_spreg( R_EAX, R_SPC );
2388 sh4_x86.tstate = TSTATE_NONE;
2393 load_reg( R_EAX, Rm );
2394 store_spreg( R_EAX, R_DBR );
2395 sh4_x86.tstate = TSTATE_NONE;
2400 load_reg( R_EAX, Rm );
2401 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2402 sh4_x86.tstate = TSTATE_NONE;
2406 load_reg( R_EAX, Rm );
2407 check_ralign32( R_EAX );
2408 MMU_TRANSLATE_READ( R_EAX );
2409 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2410 MEM_READ_LONG( R_EAX, R_EAX );
2411 store_spreg( R_EAX, R_GBR );
2412 sh4_x86.tstate = TSTATE_NONE;
2415 COUNT_INST(I_LDCSRM);
2416 if( sh4_x86.in_delay_slot ) {
2420 load_reg( R_EAX, Rm );
2421 check_ralign32( R_EAX );
2422 MMU_TRANSLATE_READ( R_EAX );
2423 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2424 MEM_READ_LONG( R_EAX, R_EAX );
2425 call_func1( sh4_write_sr, R_EAX );
2426 sh4_x86.priv_checked = FALSE;
2427 sh4_x86.fpuen_checked = FALSE;
2428 sh4_x86.tstate = TSTATE_NONE;
2434 load_reg( R_EAX, Rm );
2435 check_ralign32( R_EAX );
2436 MMU_TRANSLATE_READ( R_EAX );
2437 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2438 MEM_READ_LONG( R_EAX, R_EAX );
2439 store_spreg( R_EAX, R_VBR );
2440 sh4_x86.tstate = TSTATE_NONE;
2445 load_reg( R_EAX, Rm );
2446 check_ralign32( R_EAX );
2447 MMU_TRANSLATE_READ( R_EAX );
2448 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2449 MEM_READ_LONG( R_EAX, R_EAX );
2450 store_spreg( R_EAX, R_SSR );
2451 sh4_x86.tstate = TSTATE_NONE;
2456 load_reg( R_EAX, Rm );
2457 check_ralign32( R_EAX );
2458 MMU_TRANSLATE_READ( R_EAX );
2459 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2460 MEM_READ_LONG( R_EAX, R_EAX );
2461 store_spreg( R_EAX, R_SGR );
2462 sh4_x86.tstate = TSTATE_NONE;
2467 load_reg( R_EAX, Rm );
2468 check_ralign32( R_EAX );
2469 MMU_TRANSLATE_READ( R_EAX );
2470 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2471 MEM_READ_LONG( R_EAX, R_EAX );
2472 store_spreg( R_EAX, R_SPC );
2473 sh4_x86.tstate = TSTATE_NONE;
2478 load_reg( R_EAX, Rm );
2479 check_ralign32( R_EAX );
2480 MMU_TRANSLATE_READ( R_EAX );
2481 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2482 MEM_READ_LONG( R_EAX, R_EAX );
2483 store_spreg( R_EAX, R_DBR );
2484 sh4_x86.tstate = TSTATE_NONE;
2486 LDC.L @Rm+, Rn_BANK {:
2489 load_reg( R_EAX, Rm );
2490 check_ralign32( R_EAX );
2491 MMU_TRANSLATE_READ( R_EAX );
2492 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2493 MEM_READ_LONG( R_EAX, R_EAX );
2494 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2495 sh4_x86.tstate = TSTATE_NONE;
2500 load_reg( R_EAX, Rm );
2501 call_func1( sh4_write_fpscr, R_EAX );
2502 sh4_x86.tstate = TSTATE_NONE;
2504 LDS.L @Rm+, FPSCR {:
2507 load_reg( R_EAX, Rm );
2508 check_ralign32( R_EAX );
2509 MMU_TRANSLATE_READ( R_EAX );
2510 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2511 MEM_READ_LONG( R_EAX, R_EAX );
2512 call_func1( sh4_write_fpscr, R_EAX );
2513 sh4_x86.tstate = TSTATE_NONE;
2518 load_reg( R_EAX, Rm );
2519 store_spreg( R_EAX, R_FPUL );
2524 load_reg( R_EAX, Rm );
2525 check_ralign32( R_EAX );
2526 MMU_TRANSLATE_READ( R_EAX );
2527 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2528 MEM_READ_LONG( R_EAX, R_EAX );
2529 store_spreg( R_EAX, R_FPUL );
2530 sh4_x86.tstate = TSTATE_NONE;
2534 load_reg( R_EAX, Rm );
2535 store_spreg( R_EAX, R_MACH );
2539 load_reg( R_EAX, Rm );
2540 check_ralign32( R_EAX );
2541 MMU_TRANSLATE_READ( R_EAX );
2542 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2543 MEM_READ_LONG( R_EAX, R_EAX );
2544 store_spreg( R_EAX, R_MACH );
2545 sh4_x86.tstate = TSTATE_NONE;
2549 load_reg( R_EAX, Rm );
2550 store_spreg( R_EAX, R_MACL );
2554 load_reg( R_EAX, Rm );
2555 check_ralign32( R_EAX );
2556 MMU_TRANSLATE_READ( R_EAX );
2557 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2558 MEM_READ_LONG( R_EAX, R_EAX );
2559 store_spreg( R_EAX, R_MACL );
2560 sh4_x86.tstate = TSTATE_NONE;
2564 load_reg( R_EAX, Rm );
2565 store_spreg( R_EAX, R_PR );
2569 load_reg( R_EAX, Rm );
2570 check_ralign32( R_EAX );
2571 MMU_TRANSLATE_READ( R_EAX );
2572 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2573 MEM_READ_LONG( R_EAX, R_EAX );
2574 store_spreg( R_EAX, R_PR );
2575 sh4_x86.tstate = TSTATE_NONE;
2578 COUNT_INST(I_LDTLB);
2579 call_func0( MMU_ldtlb );
2588 COUNT_INST(I_OCBWB);
2592 load_reg( R_EAX, Rn );
2593 MOV_r32_r32( R_EAX, R_ECX );
2594 AND_imm32_r32( 0xFC000000, R_EAX );
2595 CMP_imm32_r32( 0xE0000000, R_EAX );
2597 call_func1( sh4_flush_store_queue, R_ECX );
2598 TEST_r32_r32( R_EAX, R_EAX );
2601 sh4_x86.tstate = TSTATE_NONE;
2604 COUNT_INST(I_SLEEP);
2606 call_func0( sh4_sleep );
2607 sh4_x86.tstate = TSTATE_NONE;
2608 sh4_x86.in_delay_slot = DELAY_NONE;
2612 COUNT_INST(I_STCSR);
2614 call_func0(sh4_read_sr);
2615 store_reg( R_EAX, Rn );
2616 sh4_x86.tstate = TSTATE_NONE;
2620 load_spreg( R_EAX, R_GBR );
2621 store_reg( R_EAX, Rn );
2626 load_spreg( R_EAX, R_VBR );
2627 store_reg( R_EAX, Rn );
2628 sh4_x86.tstate = TSTATE_NONE;
2633 load_spreg( R_EAX, R_SSR );
2634 store_reg( R_EAX, Rn );
2635 sh4_x86.tstate = TSTATE_NONE;
2640 load_spreg( R_EAX, R_SPC );
2641 store_reg( R_EAX, Rn );
2642 sh4_x86.tstate = TSTATE_NONE;
2647 load_spreg( R_EAX, R_SGR );
2648 store_reg( R_EAX, Rn );
2649 sh4_x86.tstate = TSTATE_NONE;
2654 load_spreg( R_EAX, R_DBR );
2655 store_reg( R_EAX, Rn );
2656 sh4_x86.tstate = TSTATE_NONE;
2661 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2662 store_reg( R_EAX, Rn );
2663 sh4_x86.tstate = TSTATE_NONE;
2666 COUNT_INST(I_STCSRM);
2668 load_reg( R_EAX, Rn );
2669 check_walign32( R_EAX );
2670 ADD_imm8s_r32( -4, R_EAX );
2671 MMU_TRANSLATE_WRITE( R_EAX );
2672 PUSH_realigned_r32( R_EAX );
2673 call_func0( sh4_read_sr );
2674 POP_realigned_r32( R_ECX );
2675 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2676 MEM_WRITE_LONG( R_ECX, R_EAX );
2677 sh4_x86.tstate = TSTATE_NONE;
2682 load_reg( R_EAX, Rn );
2683 check_walign32( R_EAX );
2684 ADD_imm8s_r32( -4, R_EAX );
2685 MMU_TRANSLATE_WRITE( R_EAX );
2686 load_spreg( R_EDX, R_VBR );
2687 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2688 MEM_WRITE_LONG( R_EAX, R_EDX );
2689 sh4_x86.tstate = TSTATE_NONE;
2694 load_reg( R_EAX, Rn );
2695 check_walign32( R_EAX );
2696 ADD_imm8s_r32( -4, R_EAX );
2697 MMU_TRANSLATE_WRITE( R_EAX );
2698 load_spreg( R_EDX, R_SSR );
2699 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2700 MEM_WRITE_LONG( R_EAX, R_EDX );
2701 sh4_x86.tstate = TSTATE_NONE;
2706 load_reg( R_EAX, Rn );
2707 check_walign32( R_EAX );
2708 ADD_imm8s_r32( -4, R_EAX );
2709 MMU_TRANSLATE_WRITE( R_EAX );
2710 load_spreg( R_EDX, R_SPC );
2711 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2712 MEM_WRITE_LONG( R_EAX, R_EDX );
2713 sh4_x86.tstate = TSTATE_NONE;
2718 load_reg( R_EAX, Rn );
2719 check_walign32( R_EAX );
2720 ADD_imm8s_r32( -4, R_EAX );
2721 MMU_TRANSLATE_WRITE( R_EAX );
2722 load_spreg( R_EDX, R_SGR );
2723 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2724 MEM_WRITE_LONG( R_EAX, R_EDX );
2725 sh4_x86.tstate = TSTATE_NONE;
2730 load_reg( R_EAX, Rn );
2731 check_walign32( R_EAX );
2732 ADD_imm8s_r32( -4, R_EAX );
2733 MMU_TRANSLATE_WRITE( R_EAX );
2734 load_spreg( R_EDX, R_DBR );
2735 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2736 MEM_WRITE_LONG( R_EAX, R_EDX );
2737 sh4_x86.tstate = TSTATE_NONE;
2739 STC.L Rm_BANK, @-Rn {:
2742 load_reg( R_EAX, Rn );
2743 check_walign32( R_EAX );
2744 ADD_imm8s_r32( -4, R_EAX );
2745 MMU_TRANSLATE_WRITE( R_EAX );
2746 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2747 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2748 MEM_WRITE_LONG( R_EAX, R_EDX );
2749 sh4_x86.tstate = TSTATE_NONE;
2753 load_reg( R_EAX, Rn );
2754 check_walign32( R_EAX );
2755 ADD_imm8s_r32( -4, R_EAX );
2756 MMU_TRANSLATE_WRITE( R_EAX );
2757 load_spreg( R_EDX, R_GBR );
2758 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2759 MEM_WRITE_LONG( R_EAX, R_EDX );
2760 sh4_x86.tstate = TSTATE_NONE;
2765 load_spreg( R_EAX, R_FPSCR );
2766 store_reg( R_EAX, Rn );
2768 STS.L FPSCR, @-Rn {:
2771 load_reg( R_EAX, Rn );
2772 check_walign32( R_EAX );
2773 ADD_imm8s_r32( -4, R_EAX );
2774 MMU_TRANSLATE_WRITE( R_EAX );
2775 load_spreg( R_EDX, R_FPSCR );
2776 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2777 MEM_WRITE_LONG( R_EAX, R_EDX );
2778 sh4_x86.tstate = TSTATE_NONE;
2783 load_spreg( R_EAX, R_FPUL );
2784 store_reg( R_EAX, Rn );
2789 load_reg( R_EAX, Rn );
2790 check_walign32( R_EAX );
2791 ADD_imm8s_r32( -4, R_EAX );
2792 MMU_TRANSLATE_WRITE( R_EAX );
2793 load_spreg( R_EDX, R_FPUL );
2794 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2795 MEM_WRITE_LONG( R_EAX, R_EDX );
2796 sh4_x86.tstate = TSTATE_NONE;
2800 load_spreg( R_EAX, R_MACH );
2801 store_reg( R_EAX, Rn );
2805 load_reg( R_EAX, Rn );
2806 check_walign32( R_EAX );
2807 ADD_imm8s_r32( -4, R_EAX );
2808 MMU_TRANSLATE_WRITE( R_EAX );
2809 load_spreg( R_EDX, R_MACH );
2810 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2811 MEM_WRITE_LONG( R_EAX, R_EDX );
2812 sh4_x86.tstate = TSTATE_NONE;
2816 load_spreg( R_EAX, R_MACL );
2817 store_reg( R_EAX, Rn );
2821 load_reg( R_EAX, Rn );
2822 check_walign32( R_EAX );
2823 ADD_imm8s_r32( -4, R_EAX );
2824 MMU_TRANSLATE_WRITE( R_EAX );
2825 load_spreg( R_EDX, R_MACL );
2826 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2827 MEM_WRITE_LONG( R_EAX, R_EDX );
2828 sh4_x86.tstate = TSTATE_NONE;
2832 load_spreg( R_EAX, R_PR );
2833 store_reg( R_EAX, Rn );
2837 load_reg( R_EAX, Rn );
2838 check_walign32( R_EAX );
2839 ADD_imm8s_r32( -4, R_EAX );
2840 MMU_TRANSLATE_WRITE( R_EAX );
2841 load_spreg( R_EDX, R_PR );
2842 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2843 MEM_WRITE_LONG( R_EAX, R_EDX );
2844 sh4_x86.tstate = TSTATE_NONE;
2849 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2852 sh4_x86.in_delay_slot = DELAY_NONE;
.