4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
28 #include "sh4/xltcache.h"
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4trans.h"
31 #include "sh4/sh4stat.h"
32 #include "sh4/sh4mmio.h"
33 #include "sh4/x86op.h"
36 #define DEFAULT_BACKPATCH_SIZE 4096
38 struct backpatch_record {
39 uint32_t fixup_offset;
40 uint32_t fixup_icount;
49 * Struct to manage internal translation state. This state is not saved -
50 * it is only valid between calls to sh4_translate_begin_block() and
51 * sh4_translate_end_block()
53 struct sh4_x86_state {
55 gboolean priv_checked; /* true if we've already checked the cpu mode. */
56 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
57 gboolean branch_taken; /* true if we branched unconditionally */
58 uint32_t block_start_pc;
59 uint32_t stack_posn; /* Trace stack height for alignment purposes */
63 gboolean tlb_on; /* True if tlb translation is active */
65 /* Allocated memory for the (block-wide) back-patch list */
66 struct backpatch_record *backpatch_list;
67 uint32_t backpatch_posn;
68 uint32_t backpatch_size;
71 #define TSTATE_NONE -1
81 #ifdef ENABLE_SH4STATS
82 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
84 #define COUNT_INST(id)
87 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
88 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
89 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
90 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
92 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
93 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
94 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
95 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
97 static struct sh4_x86_state sh4_x86;
99 static uint32_t max_int = 0x7FFFFFFF;
100 static uint32_t min_int = 0x80000000;
101 static uint32_t save_fcw; /* save value for fpu control word */
102 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
104 void sh4_translate_init(void)
106 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
107 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
111 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
113 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
114 sh4_x86.backpatch_size <<= 1;
115 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
116 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
117 assert( sh4_x86.backpatch_list != NULL );
119 if( sh4_x86.in_delay_slot ) {
122 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
123 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
124 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
125 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
126 sh4_x86.backpatch_posn++;
130 * Emit an instruction to load an SH4 reg into a real register
132 static inline void load_reg( int x86reg, int sh4reg )
134 /* mov [bp+n], reg */
136 OP(0x45 + (x86reg<<3));
137 OP(REG_OFFSET(r[sh4reg]));
140 static inline void load_reg16s( int x86reg, int sh4reg )
144 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
147 static inline void load_reg16u( int x86reg, int sh4reg )
151 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
155 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
156 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
158 * Emit an instruction to load an immediate value into a register
160 static inline void load_imm32( int x86reg, uint32_t value ) {
161 /* mov #value, reg */
167 * Load an immediate 64-bit quantity (note: x86-64 only)
169 static inline void load_imm64( int x86reg, uint64_t value ) {
170 /* mov #value, reg */
177 * Emit an instruction to store an SH4 reg (RN)
179 void static inline store_reg( int x86reg, int sh4reg ) {
180 /* mov reg, [bp+n] */
182 OP(0x45 + (x86reg<<3));
183 OP(REG_OFFSET(r[sh4reg]));
187 * Load an FR register (single-precision floating point) into an integer x86
188 * register (eg for register-to-register moves)
190 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
191 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
194 * Load the low half of a DR register (DR or XD) into an integer x86 register
196 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
197 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
200 * Store an FR register (single-precision floating point) from an integer x86+
201 * register (eg for register-to-register moves)
203 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
204 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
206 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
207 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
210 #define push_fpul() FLDF_sh4r(R_FPUL)
211 #define pop_fpul() FSTPF_sh4r(R_FPUL)
212 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
213 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
214 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
215 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
216 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
217 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
218 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
219 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
223 /* Exception checks - Note that all exception checks will clobber EAX */
225 #define check_priv( ) \
226 if( !sh4_x86.priv_checked ) { \
227 sh4_x86.priv_checked = TRUE;\
228 load_spreg( R_EAX, R_SR );\
229 AND_imm32_r32( SR_MD, R_EAX );\
230 if( sh4_x86.in_delay_slot ) {\
231 JE_exc( EXC_SLOT_ILLEGAL );\
233 JE_exc( EXC_ILLEGAL );\
235 sh4_x86.tstate = TSTATE_NONE; \
238 #define check_fpuen( ) \
239 if( !sh4_x86.fpuen_checked ) {\
240 sh4_x86.fpuen_checked = TRUE;\
241 load_spreg( R_EAX, R_SR );\
242 AND_imm32_r32( SR_FD, R_EAX );\
243 if( sh4_x86.in_delay_slot ) {\
244 JNE_exc(EXC_SLOT_FPU_DISABLED);\
246 JNE_exc(EXC_FPU_DISABLED);\
248 sh4_x86.tstate = TSTATE_NONE; \
251 #define check_ralign16( x86reg ) \
252 TEST_imm32_r32( 0x00000001, x86reg ); \
253 JNE_exc(EXC_DATA_ADDR_READ)
255 #define check_walign16( x86reg ) \
256 TEST_imm32_r32( 0x00000001, x86reg ); \
257 JNE_exc(EXC_DATA_ADDR_WRITE);
259 #define check_ralign32( x86reg ) \
260 TEST_imm32_r32( 0x00000003, x86reg ); \
261 JNE_exc(EXC_DATA_ADDR_READ)
263 #define check_walign32( x86reg ) \
264 TEST_imm32_r32( 0x00000003, x86reg ); \
265 JNE_exc(EXC_DATA_ADDR_WRITE);
267 #define check_ralign64( x86reg ) \
268 TEST_imm32_r32( 0x00000007, x86reg ); \
269 JNE_exc(EXC_DATA_ADDR_READ)
271 #define check_walign64( x86reg ) \
272 TEST_imm32_r32( 0x00000007, x86reg ); \
273 JNE_exc(EXC_DATA_ADDR_WRITE);
276 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
277 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
278 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
279 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
280 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
281 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
282 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
285 * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned
286 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
288 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
290 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
292 * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned
293 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
295 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
297 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
298 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
299 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
301 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
303 /****** Import appropriate calling conventions ******/
304 #if SIZEOF_VOID_P == 8
305 #include "sh4/ia64abi.h"
306 #else /* 32-bit system */
308 #include "sh4/ia32mac.h"
310 #include "sh4/ia32abi.h"
314 uint32_t sh4_translate_end_block_size()
316 if( sh4_x86.backpatch_posn <= 3 ) {
317 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
319 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
325 * Embed a breakpoint into the generated code
327 void sh4_translate_emit_breakpoint( sh4vma_t pc )
329 load_imm32( R_EAX, pc );
330 call_func1( sh4_translate_breakpoint_hit, R_EAX );
331 sh4_x86.tstate = TSTATE_NONE;
335 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
338 * Embed a call to sh4_execute_instruction for situations that we
339 * can't translate (just page-crossing delay slots at the moment).
340 * Caller is responsible for setting new_pc before calling this function.
344 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
345 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
346 * Call sh4_execute_instruction
347 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
349 void exit_block_emu( sh4vma_t endpc )
351 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
352 ADD_r32_sh4r( R_ECX, R_PC );
354 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
355 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
356 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
357 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
359 call_func0( sh4_execute_instruction );
360 load_spreg( R_EAX, R_PC );
361 if( sh4_x86.tlb_on ) {
362 call_func1(xlat_get_code_by_vma,R_EAX);
364 call_func1(xlat_get_code,R_EAX);
366 AND_imm8s_rptr( 0xFC, R_EAX );
372 * Translate a single instruction. Delayed branches are handled specially
373 * by translating both branch and delayed instruction as a single unit (as
375 * The instruction MUST be in the icache (assert check)
377 * @return true if the instruction marks the end of a basic block
380 uint32_t sh4_translate_instruction( sh4vma_t pc )
383 /* Read instruction from icache */
384 assert( IS_IN_ICACHE(pc) );
385 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
387 /* PC is not in the current icache - this usually means we're running
388 * with MMU on, and we've gone past the end of the page. And since
389 * sh4_translate_block is pretty careful about this, it means we're
390 * almost certainly in a delay slot.
392 * Since we can't assume the page is present (and we can't fault it in
393 * at this point, inline a call to sh4_execute_instruction (with a few
394 * small repairs to cope with the different environment).
397 if( !sh4_x86.in_delay_slot ) {
398 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
404 load_reg( R_EAX, Rm );
405 load_reg( R_ECX, Rn );
406 ADD_r32_r32( R_EAX, R_ECX );
407 store_reg( R_ECX, Rn );
408 sh4_x86.tstate = TSTATE_NONE;
412 load_reg( R_EAX, Rn );
413 ADD_imm8s_r32( imm, R_EAX );
414 store_reg( R_EAX, Rn );
415 sh4_x86.tstate = TSTATE_NONE;
419 if( sh4_x86.tstate != TSTATE_C ) {
422 load_reg( R_EAX, Rm );
423 load_reg( R_ECX, Rn );
424 ADC_r32_r32( R_EAX, R_ECX );
425 store_reg( R_ECX, Rn );
427 sh4_x86.tstate = TSTATE_C;
431 load_reg( R_EAX, Rm );
432 load_reg( R_ECX, Rn );
433 ADD_r32_r32( R_EAX, R_ECX );
434 store_reg( R_ECX, Rn );
436 sh4_x86.tstate = TSTATE_O;
440 load_reg( R_EAX, Rm );
441 load_reg( R_ECX, Rn );
442 AND_r32_r32( R_EAX, R_ECX );
443 store_reg( R_ECX, Rn );
444 sh4_x86.tstate = TSTATE_NONE;
448 load_reg( R_EAX, 0 );
449 AND_imm32_r32(imm, R_EAX);
450 store_reg( R_EAX, 0 );
451 sh4_x86.tstate = TSTATE_NONE;
453 AND.B #imm, @(R0, GBR) {:
455 load_reg( R_EAX, 0 );
456 load_spreg( R_ECX, R_GBR );
457 ADD_r32_r32( R_ECX, R_EAX );
458 MMU_TRANSLATE_WRITE( R_EAX );
459 PUSH_realigned_r32(R_EAX);
460 MEM_READ_BYTE( R_EAX, R_EAX );
461 POP_realigned_r32(R_ECX);
462 AND_imm32_r32(imm, R_EAX );
463 MEM_WRITE_BYTE( R_ECX, R_EAX );
464 sh4_x86.tstate = TSTATE_NONE;
468 load_reg( R_EAX, Rm );
469 load_reg( R_ECX, Rn );
470 CMP_r32_r32( R_EAX, R_ECX );
472 sh4_x86.tstate = TSTATE_E;
475 COUNT_INST(I_CMPEQI);
476 load_reg( R_EAX, 0 );
477 CMP_imm8s_r32(imm, R_EAX);
479 sh4_x86.tstate = TSTATE_E;
483 load_reg( R_EAX, Rm );
484 load_reg( R_ECX, Rn );
485 CMP_r32_r32( R_EAX, R_ECX );
487 sh4_x86.tstate = TSTATE_GE;
491 load_reg( R_EAX, Rm );
492 load_reg( R_ECX, Rn );
493 CMP_r32_r32( R_EAX, R_ECX );
495 sh4_x86.tstate = TSTATE_G;
499 load_reg( R_EAX, Rm );
500 load_reg( R_ECX, Rn );
501 CMP_r32_r32( R_EAX, R_ECX );
503 sh4_x86.tstate = TSTATE_A;
507 load_reg( R_EAX, Rm );
508 load_reg( R_ECX, Rn );
509 CMP_r32_r32( R_EAX, R_ECX );
511 sh4_x86.tstate = TSTATE_AE;
515 load_reg( R_EAX, Rn );
516 CMP_imm8s_r32( 0, R_EAX );
518 sh4_x86.tstate = TSTATE_G;
522 load_reg( R_EAX, Rn );
523 CMP_imm8s_r32( 0, R_EAX );
525 sh4_x86.tstate = TSTATE_GE;
528 COUNT_INST(I_CMPSTR);
529 load_reg( R_EAX, Rm );
530 load_reg( R_ECX, Rn );
531 XOR_r32_r32( R_ECX, R_EAX );
532 TEST_r8_r8( R_AL, R_AL );
534 TEST_r8_r8( R_AH, R_AH );
536 SHR_imm8_r32( 16, R_EAX );
537 TEST_r8_r8( R_AL, R_AL );
539 TEST_r8_r8( R_AH, R_AH );
544 sh4_x86.tstate = TSTATE_E;
548 load_reg( R_EAX, Rm );
549 load_reg( R_ECX, Rn );
550 SHR_imm8_r32( 31, R_EAX );
551 SHR_imm8_r32( 31, R_ECX );
552 store_spreg( R_EAX, R_M );
553 store_spreg( R_ECX, R_Q );
554 CMP_r32_r32( R_EAX, R_ECX );
556 sh4_x86.tstate = TSTATE_NE;
560 XOR_r32_r32( R_EAX, R_EAX );
561 store_spreg( R_EAX, R_Q );
562 store_spreg( R_EAX, R_M );
563 store_spreg( R_EAX, R_T );
564 sh4_x86.tstate = TSTATE_C; // works for DIV1
568 load_spreg( R_ECX, R_M );
569 load_reg( R_EAX, Rn );
570 if( sh4_x86.tstate != TSTATE_C ) {
574 SETC_r8( R_DL ); // Q'
575 CMP_sh4r_r32( R_Q, R_ECX );
577 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
580 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
582 store_reg( R_EAX, Rn ); // Done with Rn now
583 SETC_r8(R_AL); // tmp1
584 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
585 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
586 store_spreg( R_ECX, R_Q );
587 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
588 MOVZX_r8_r32( R_AL, R_EAX );
589 store_spreg( R_EAX, R_T );
590 sh4_x86.tstate = TSTATE_NONE;
594 load_reg( R_EAX, Rm );
595 load_reg( R_ECX, Rn );
597 store_spreg( R_EDX, R_MACH );
598 store_spreg( R_EAX, R_MACL );
599 sh4_x86.tstate = TSTATE_NONE;
603 load_reg( R_EAX, Rm );
604 load_reg( R_ECX, Rn );
606 store_spreg( R_EDX, R_MACH );
607 store_spreg( R_EAX, R_MACL );
608 sh4_x86.tstate = TSTATE_NONE;
612 load_reg( R_EAX, Rn );
613 ADD_imm8s_r32( -1, R_EAX );
614 store_reg( R_EAX, Rn );
616 sh4_x86.tstate = TSTATE_E;
620 load_reg( R_EAX, Rm );
621 MOVSX_r8_r32( R_EAX, R_EAX );
622 store_reg( R_EAX, Rn );
626 load_reg( R_EAX, Rm );
627 MOVSX_r16_r32( R_EAX, R_EAX );
628 store_reg( R_EAX, Rn );
632 load_reg( R_EAX, Rm );
633 MOVZX_r8_r32( R_EAX, R_EAX );
634 store_reg( R_EAX, Rn );
638 load_reg( R_EAX, Rm );
639 MOVZX_r16_r32( R_EAX, R_EAX );
640 store_reg( R_EAX, Rn );
645 load_reg( R_EAX, Rm );
646 check_ralign32( R_EAX );
647 MMU_TRANSLATE_READ( R_EAX );
648 PUSH_realigned_r32( R_EAX );
649 load_reg( R_EAX, Rn );
650 ADD_imm8s_r32( 4, R_EAX );
651 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
652 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
653 // Note translate twice in case of page boundaries. Maybe worth
654 // adding a page-boundary check to skip the second translation
656 load_reg( R_EAX, Rm );
657 check_ralign32( R_EAX );
658 MMU_TRANSLATE_READ( R_EAX );
659 load_reg( R_ECX, Rn );
660 check_ralign32( R_ECX );
661 PUSH_realigned_r32( R_EAX );
662 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
663 MOV_r32_r32( R_ECX, R_EAX );
664 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
665 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
667 MEM_READ_LONG( R_EAX, R_EAX );
670 MEM_READ_LONG( R_ECX, R_EAX );
671 POP_realigned_r32( R_ECX );
674 ADD_r32_sh4r( R_EAX, R_MACL );
675 ADC_r32_sh4r( R_EDX, R_MACH );
677 load_spreg( R_ECX, R_S );
678 TEST_r32_r32(R_ECX, R_ECX);
680 call_func0( signsat48 );
682 sh4_x86.tstate = TSTATE_NONE;
687 load_reg( R_EAX, Rm );
688 check_ralign16( R_EAX );
689 MMU_TRANSLATE_READ( R_EAX );
690 PUSH_realigned_r32( R_EAX );
691 load_reg( R_EAX, Rn );
692 ADD_imm8s_r32( 2, R_EAX );
693 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
694 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
695 // Note translate twice in case of page boundaries. Maybe worth
696 // adding a page-boundary check to skip the second translation
698 load_reg( R_EAX, Rm );
699 check_ralign16( R_EAX );
700 MMU_TRANSLATE_READ( R_EAX );
701 load_reg( R_ECX, Rn );
702 check_ralign16( R_ECX );
703 PUSH_realigned_r32( R_EAX );
704 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
705 MOV_r32_r32( R_ECX, R_EAX );
706 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
707 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
709 MEM_READ_WORD( R_EAX, R_EAX );
712 MEM_READ_WORD( R_ECX, R_EAX );
713 POP_realigned_r32( R_ECX );
716 load_spreg( R_ECX, R_S );
717 TEST_r32_r32( R_ECX, R_ECX );
720 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
721 JNO_rel8( end ); // 2
722 load_imm32( R_EDX, 1 ); // 5
723 store_spreg( R_EDX, R_MACH ); // 6
724 JS_rel8( positive ); // 2
725 load_imm32( R_EAX, 0x80000000 );// 5
726 store_spreg( R_EAX, R_MACL ); // 6
729 JMP_TARGET(positive);
730 load_imm32( R_EAX, 0x7FFFFFFF );// 5
731 store_spreg( R_EAX, R_MACL ); // 6
735 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
736 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
740 sh4_x86.tstate = TSTATE_NONE;
744 load_spreg( R_EAX, R_T );
745 store_reg( R_EAX, Rn );
749 load_reg( R_EAX, Rm );
750 load_reg( R_ECX, Rn );
752 store_spreg( R_EAX, R_MACL );
753 sh4_x86.tstate = TSTATE_NONE;
757 load_reg16s( R_EAX, Rm );
758 load_reg16s( R_ECX, Rn );
760 store_spreg( R_EAX, R_MACL );
761 sh4_x86.tstate = TSTATE_NONE;
765 load_reg16u( R_EAX, Rm );
766 load_reg16u( R_ECX, Rn );
768 store_spreg( R_EAX, R_MACL );
769 sh4_x86.tstate = TSTATE_NONE;
773 load_reg( R_EAX, Rm );
775 store_reg( R_EAX, Rn );
776 sh4_x86.tstate = TSTATE_NONE;
780 load_reg( R_EAX, Rm );
781 XOR_r32_r32( R_ECX, R_ECX );
783 SBB_r32_r32( R_EAX, R_ECX );
784 store_reg( R_ECX, Rn );
786 sh4_x86.tstate = TSTATE_C;
790 load_reg( R_EAX, Rm );
792 store_reg( R_EAX, Rn );
793 sh4_x86.tstate = TSTATE_NONE;
797 load_reg( R_EAX, Rm );
798 load_reg( R_ECX, Rn );
799 OR_r32_r32( R_EAX, R_ECX );
800 store_reg( R_ECX, Rn );
801 sh4_x86.tstate = TSTATE_NONE;
805 load_reg( R_EAX, 0 );
806 OR_imm32_r32(imm, R_EAX);
807 store_reg( R_EAX, 0 );
808 sh4_x86.tstate = TSTATE_NONE;
810 OR.B #imm, @(R0, GBR) {:
812 load_reg( R_EAX, 0 );
813 load_spreg( R_ECX, R_GBR );
814 ADD_r32_r32( R_ECX, R_EAX );
815 MMU_TRANSLATE_WRITE( R_EAX );
816 PUSH_realigned_r32(R_EAX);
817 MEM_READ_BYTE( R_EAX, R_EAX );
818 POP_realigned_r32(R_ECX);
819 OR_imm32_r32(imm, R_EAX );
820 MEM_WRITE_BYTE( R_ECX, R_EAX );
821 sh4_x86.tstate = TSTATE_NONE;
825 load_reg( R_EAX, Rn );
826 if( sh4_x86.tstate != TSTATE_C ) {
830 store_reg( R_EAX, Rn );
832 sh4_x86.tstate = TSTATE_C;
836 load_reg( R_EAX, Rn );
837 if( sh4_x86.tstate != TSTATE_C ) {
841 store_reg( R_EAX, Rn );
843 sh4_x86.tstate = TSTATE_C;
847 load_reg( R_EAX, Rn );
849 store_reg( R_EAX, Rn );
851 sh4_x86.tstate = TSTATE_C;
855 load_reg( R_EAX, Rn );
857 store_reg( R_EAX, Rn );
859 sh4_x86.tstate = TSTATE_C;
863 /* Annoyingly enough, not directly convertible */
864 load_reg( R_EAX, Rn );
865 load_reg( R_ECX, Rm );
866 CMP_imm32_r32( 0, R_ECX );
869 NEG_r32( R_ECX ); // 2
870 AND_imm8_r8( 0x1F, R_CL ); // 3
871 JE_rel8(emptysar); // 2
872 SAR_r32_CL( R_EAX ); // 2
875 JMP_TARGET(emptysar);
876 SAR_imm8_r32(31, R_EAX ); // 3
880 AND_imm8_r8( 0x1F, R_CL ); // 3
881 SHL_r32_CL( R_EAX ); // 2
884 store_reg( R_EAX, Rn );
885 sh4_x86.tstate = TSTATE_NONE;
889 load_reg( R_EAX, Rn );
890 load_reg( R_ECX, Rm );
891 CMP_imm32_r32( 0, R_ECX );
894 NEG_r32( R_ECX ); // 2
895 AND_imm8_r8( 0x1F, R_CL ); // 3
897 SHR_r32_CL( R_EAX ); // 2
900 JMP_TARGET(emptyshr);
901 XOR_r32_r32( R_EAX, R_EAX );
905 AND_imm8_r8( 0x1F, R_CL ); // 3
906 SHL_r32_CL( R_EAX ); // 2
909 store_reg( R_EAX, Rn );
910 sh4_x86.tstate = TSTATE_NONE;
914 load_reg( R_EAX, Rn );
917 store_reg( R_EAX, Rn );
918 sh4_x86.tstate = TSTATE_C;
922 load_reg( R_EAX, Rn );
925 store_reg( R_EAX, Rn );
926 sh4_x86.tstate = TSTATE_C;
930 load_reg( R_EAX, Rn );
933 store_reg( R_EAX, Rn );
934 sh4_x86.tstate = TSTATE_C;
938 load_reg( R_EAX, Rn );
939 SHL_imm8_r32( 2, R_EAX );
940 store_reg( R_EAX, Rn );
941 sh4_x86.tstate = TSTATE_NONE;
945 load_reg( R_EAX, Rn );
946 SHL_imm8_r32( 8, R_EAX );
947 store_reg( R_EAX, Rn );
948 sh4_x86.tstate = TSTATE_NONE;
952 load_reg( R_EAX, Rn );
953 SHL_imm8_r32( 16, R_EAX );
954 store_reg( R_EAX, Rn );
955 sh4_x86.tstate = TSTATE_NONE;
959 load_reg( R_EAX, Rn );
962 store_reg( R_EAX, Rn );
963 sh4_x86.tstate = TSTATE_C;
967 load_reg( R_EAX, Rn );
968 SHR_imm8_r32( 2, R_EAX );
969 store_reg( R_EAX, Rn );
970 sh4_x86.tstate = TSTATE_NONE;
974 load_reg( R_EAX, Rn );
975 SHR_imm8_r32( 8, R_EAX );
976 store_reg( R_EAX, Rn );
977 sh4_x86.tstate = TSTATE_NONE;
981 load_reg( R_EAX, Rn );
982 SHR_imm8_r32( 16, R_EAX );
983 store_reg( R_EAX, Rn );
984 sh4_x86.tstate = TSTATE_NONE;
988 load_reg( R_EAX, Rm );
989 load_reg( R_ECX, Rn );
990 SUB_r32_r32( R_EAX, R_ECX );
991 store_reg( R_ECX, Rn );
992 sh4_x86.tstate = TSTATE_NONE;
996 load_reg( R_EAX, Rm );
997 load_reg( R_ECX, Rn );
998 if( sh4_x86.tstate != TSTATE_C ) {
1001 SBB_r32_r32( R_EAX, R_ECX );
1002 store_reg( R_ECX, Rn );
1004 sh4_x86.tstate = TSTATE_C;
1008 load_reg( R_EAX, Rm );
1009 load_reg( R_ECX, Rn );
1010 SUB_r32_r32( R_EAX, R_ECX );
1011 store_reg( R_ECX, Rn );
1013 sh4_x86.tstate = TSTATE_O;
1016 COUNT_INST(I_SWAPB);
1017 load_reg( R_EAX, Rm );
1018 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1019 store_reg( R_EAX, Rn );
1022 COUNT_INST(I_SWAPB);
1023 load_reg( R_EAX, Rm );
1024 MOV_r32_r32( R_EAX, R_ECX );
1025 SHL_imm8_r32( 16, R_ECX );
1026 SHR_imm8_r32( 16, R_EAX );
1027 OR_r32_r32( R_EAX, R_ECX );
1028 store_reg( R_ECX, Rn );
1029 sh4_x86.tstate = TSTATE_NONE;
1033 load_reg( R_EAX, Rn );
1034 MMU_TRANSLATE_WRITE( R_EAX );
1035 PUSH_realigned_r32( R_EAX );
1036 MEM_READ_BYTE( R_EAX, R_EAX );
1037 TEST_r8_r8( R_AL, R_AL );
1039 OR_imm8_r8( 0x80, R_AL );
1040 POP_realigned_r32( R_ECX );
1041 MEM_WRITE_BYTE( R_ECX, R_EAX );
1042 sh4_x86.tstate = TSTATE_NONE;
1046 load_reg( R_EAX, Rm );
1047 load_reg( R_ECX, Rn );
1048 TEST_r32_r32( R_EAX, R_ECX );
1050 sh4_x86.tstate = TSTATE_E;
1054 load_reg( R_EAX, 0 );
1055 TEST_imm32_r32( imm, R_EAX );
1057 sh4_x86.tstate = TSTATE_E;
1059 TST.B #imm, @(R0, GBR) {:
1061 load_reg( R_EAX, 0);
1062 load_reg( R_ECX, R_GBR);
1063 ADD_r32_r32( R_ECX, R_EAX );
1064 MMU_TRANSLATE_READ( R_EAX );
1065 MEM_READ_BYTE( R_EAX, R_EAX );
1066 TEST_imm8_r8( imm, R_AL );
1068 sh4_x86.tstate = TSTATE_E;
1072 load_reg( R_EAX, Rm );
1073 load_reg( R_ECX, Rn );
1074 XOR_r32_r32( R_EAX, R_ECX );
1075 store_reg( R_ECX, Rn );
1076 sh4_x86.tstate = TSTATE_NONE;
1080 load_reg( R_EAX, 0 );
1081 XOR_imm32_r32( imm, R_EAX );
1082 store_reg( R_EAX, 0 );
1083 sh4_x86.tstate = TSTATE_NONE;
1085 XOR.B #imm, @(R0, GBR) {:
1087 load_reg( R_EAX, 0 );
1088 load_spreg( R_ECX, R_GBR );
1089 ADD_r32_r32( R_ECX, R_EAX );
1090 MMU_TRANSLATE_WRITE( R_EAX );
1091 PUSH_realigned_r32(R_EAX);
1092 MEM_READ_BYTE(R_EAX, R_EAX);
1093 POP_realigned_r32(R_ECX);
1094 XOR_imm32_r32( imm, R_EAX );
1095 MEM_WRITE_BYTE( R_ECX, R_EAX );
1096 sh4_x86.tstate = TSTATE_NONE;
1099 COUNT_INST(I_XTRCT);
1100 load_reg( R_EAX, Rm );
1101 load_reg( R_ECX, Rn );
1102 SHL_imm8_r32( 16, R_EAX );
1103 SHR_imm8_r32( 16, R_ECX );
1104 OR_r32_r32( R_EAX, R_ECX );
1105 store_reg( R_ECX, Rn );
1106 sh4_x86.tstate = TSTATE_NONE;
1109 /* Data move instructions */
1112 load_reg( R_EAX, Rm );
1113 store_reg( R_EAX, Rn );
1117 load_imm32( R_EAX, imm );
1118 store_reg( R_EAX, Rn );
1122 load_reg( R_EAX, Rn );
1123 MMU_TRANSLATE_WRITE( R_EAX );
1124 load_reg( R_EDX, Rm );
1125 MEM_WRITE_BYTE( R_EAX, R_EDX );
1126 sh4_x86.tstate = TSTATE_NONE;
1130 load_reg( R_EAX, Rn );
1131 ADD_imm8s_r32( -1, R_EAX );
1132 MMU_TRANSLATE_WRITE( R_EAX );
1133 load_reg( R_EDX, Rm );
1134 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1135 MEM_WRITE_BYTE( R_EAX, R_EDX );
1136 sh4_x86.tstate = TSTATE_NONE;
1138 MOV.B Rm, @(R0, Rn) {:
1140 load_reg( R_EAX, 0 );
1141 load_reg( R_ECX, Rn );
1142 ADD_r32_r32( R_ECX, R_EAX );
1143 MMU_TRANSLATE_WRITE( R_EAX );
1144 load_reg( R_EDX, Rm );
1145 MEM_WRITE_BYTE( R_EAX, R_EDX );
1146 sh4_x86.tstate = TSTATE_NONE;
1148 MOV.B R0, @(disp, GBR) {:
1150 load_spreg( R_EAX, R_GBR );
1151 ADD_imm32_r32( disp, R_EAX );
1152 MMU_TRANSLATE_WRITE( R_EAX );
1153 load_reg( R_EDX, 0 );
1154 MEM_WRITE_BYTE( R_EAX, R_EDX );
1155 sh4_x86.tstate = TSTATE_NONE;
1157 MOV.B R0, @(disp, Rn) {:
1159 load_reg( R_EAX, Rn );
1160 ADD_imm32_r32( disp, R_EAX );
1161 MMU_TRANSLATE_WRITE( R_EAX );
1162 load_reg( R_EDX, 0 );
1163 MEM_WRITE_BYTE( R_EAX, R_EDX );
1164 sh4_x86.tstate = TSTATE_NONE;
1168 load_reg( R_EAX, Rm );
1169 MMU_TRANSLATE_READ( R_EAX );
1170 MEM_READ_BYTE( R_EAX, R_EAX );
1171 store_reg( R_EAX, Rn );
1172 sh4_x86.tstate = TSTATE_NONE;
1176 load_reg( R_EAX, Rm );
1177 MMU_TRANSLATE_READ( R_EAX );
1178 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1179 MEM_READ_BYTE( R_EAX, R_EAX );
1180 store_reg( R_EAX, Rn );
1181 sh4_x86.tstate = TSTATE_NONE;
1183 MOV.B @(R0, Rm), Rn {:
1185 load_reg( R_EAX, 0 );
1186 load_reg( R_ECX, Rm );
1187 ADD_r32_r32( R_ECX, R_EAX );
1188 MMU_TRANSLATE_READ( R_EAX )
1189 MEM_READ_BYTE( R_EAX, R_EAX );
1190 store_reg( R_EAX, Rn );
1191 sh4_x86.tstate = TSTATE_NONE;
1193 MOV.B @(disp, GBR), R0 {:
1195 load_spreg( R_EAX, R_GBR );
1196 ADD_imm32_r32( disp, R_EAX );
1197 MMU_TRANSLATE_READ( R_EAX );
1198 MEM_READ_BYTE( R_EAX, R_EAX );
1199 store_reg( R_EAX, 0 );
1200 sh4_x86.tstate = TSTATE_NONE;
1202 MOV.B @(disp, Rm), R0 {:
1204 load_reg( R_EAX, Rm );
1205 ADD_imm32_r32( disp, R_EAX );
1206 MMU_TRANSLATE_READ( R_EAX );
1207 MEM_READ_BYTE( R_EAX, R_EAX );
1208 store_reg( R_EAX, 0 );
1209 sh4_x86.tstate = TSTATE_NONE;
1213 load_reg( R_EAX, Rn );
1214 check_walign32(R_EAX);
1215 MMU_TRANSLATE_WRITE( R_EAX );
1216 load_reg( R_EDX, Rm );
1217 MEM_WRITE_LONG( R_EAX, R_EDX );
1218 sh4_x86.tstate = TSTATE_NONE;
1222 load_reg( R_EAX, Rn );
1223 ADD_imm8s_r32( -4, R_EAX );
1224 check_walign32( R_EAX );
1225 MMU_TRANSLATE_WRITE( R_EAX );
1226 load_reg( R_EDX, Rm );
1227 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1228 MEM_WRITE_LONG( R_EAX, R_EDX );
1229 sh4_x86.tstate = TSTATE_NONE;
1231 MOV.L Rm, @(R0, Rn) {:
1233 load_reg( R_EAX, 0 );
1234 load_reg( R_ECX, Rn );
1235 ADD_r32_r32( R_ECX, R_EAX );
1236 check_walign32( R_EAX );
1237 MMU_TRANSLATE_WRITE( R_EAX );
1238 load_reg( R_EDX, Rm );
1239 MEM_WRITE_LONG( R_EAX, R_EDX );
1240 sh4_x86.tstate = TSTATE_NONE;
1242 MOV.L R0, @(disp, GBR) {:
1244 load_spreg( R_EAX, R_GBR );
1245 ADD_imm32_r32( disp, R_EAX );
1246 check_walign32( R_EAX );
1247 MMU_TRANSLATE_WRITE( R_EAX );
1248 load_reg( R_EDX, 0 );
1249 MEM_WRITE_LONG( R_EAX, R_EDX );
1250 sh4_x86.tstate = TSTATE_NONE;
1252 MOV.L Rm, @(disp, Rn) {:
1254 load_reg( R_EAX, Rn );
1255 ADD_imm32_r32( disp, R_EAX );
1256 check_walign32( R_EAX );
1257 MMU_TRANSLATE_WRITE( R_EAX );
1258 load_reg( R_EDX, Rm );
1259 MEM_WRITE_LONG( R_EAX, R_EDX );
1260 sh4_x86.tstate = TSTATE_NONE;
1264 load_reg( R_EAX, Rm );
1265 check_ralign32( R_EAX );
1266 MMU_TRANSLATE_READ( R_EAX );
1267 MEM_READ_LONG( R_EAX, R_EAX );
1268 store_reg( R_EAX, Rn );
1269 sh4_x86.tstate = TSTATE_NONE;
1273 load_reg( R_EAX, Rm );
1274 check_ralign32( R_EAX );
1275 MMU_TRANSLATE_READ( R_EAX );
1276 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1277 MEM_READ_LONG( R_EAX, R_EAX );
1278 store_reg( R_EAX, Rn );
1279 sh4_x86.tstate = TSTATE_NONE;
1281 MOV.L @(R0, Rm), Rn {:
1283 load_reg( R_EAX, 0 );
1284 load_reg( R_ECX, Rm );
1285 ADD_r32_r32( R_ECX, R_EAX );
1286 check_ralign32( R_EAX );
1287 MMU_TRANSLATE_READ( R_EAX );
1288 MEM_READ_LONG( R_EAX, R_EAX );
1289 store_reg( R_EAX, Rn );
1290 sh4_x86.tstate = TSTATE_NONE;
1292 MOV.L @(disp, GBR), R0 {:
1294 load_spreg( R_EAX, R_GBR );
1295 ADD_imm32_r32( disp, R_EAX );
1296 check_ralign32( R_EAX );
1297 MMU_TRANSLATE_READ( R_EAX );
1298 MEM_READ_LONG( R_EAX, R_EAX );
1299 store_reg( R_EAX, 0 );
1300 sh4_x86.tstate = TSTATE_NONE;
1302 MOV.L @(disp, PC), Rn {:
1303 COUNT_INST(I_MOVLPC);
1304 if( sh4_x86.in_delay_slot ) {
1307 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1308 if( IS_IN_ICACHE(target) ) {
1309 // If the target address is in the same page as the code, it's
1310 // pretty safe to just ref it directly and circumvent the whole
1311 // memory subsystem. (this is a big performance win)
1313 // FIXME: There's a corner-case that's not handled here when
1314 // the current code-page is in the ITLB but not in the UTLB.
1315 // (should generate a TLB miss although need to test SH4
1316 // behaviour to confirm) Unlikely to be anyone depending on this
1317 // behaviour though.
1318 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1319 MOV_moff32_EAX( ptr );
1321 // Note: we use sh4r.pc for the calc as we could be running at a
1322 // different virtual address than the translation was done with,
1323 // but we can safely assume that the low bits are the same.
1324 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1325 ADD_sh4r_r32( R_PC, R_EAX );
1326 MMU_TRANSLATE_READ( R_EAX );
1327 MEM_READ_LONG( R_EAX, R_EAX );
1328 sh4_x86.tstate = TSTATE_NONE;
1330 store_reg( R_EAX, Rn );
1333 MOV.L @(disp, Rm), Rn {:
1335 load_reg( R_EAX, Rm );
1336 ADD_imm8s_r32( disp, R_EAX );
1337 check_ralign32( R_EAX );
1338 MMU_TRANSLATE_READ( R_EAX );
1339 MEM_READ_LONG( R_EAX, R_EAX );
1340 store_reg( R_EAX, Rn );
1341 sh4_x86.tstate = TSTATE_NONE;
1345 load_reg( R_EAX, Rn );
1346 check_walign16( R_EAX );
1347 MMU_TRANSLATE_WRITE( R_EAX )
1348 load_reg( R_EDX, Rm );
1349 MEM_WRITE_WORD( R_EAX, R_EDX );
1350 sh4_x86.tstate = TSTATE_NONE;
1354 load_reg( R_EAX, Rn );
1355 ADD_imm8s_r32( -2, R_EAX );
1356 check_walign16( R_EAX );
1357 MMU_TRANSLATE_WRITE( R_EAX );
1358 load_reg( R_EDX, Rm );
1359 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1360 MEM_WRITE_WORD( R_EAX, R_EDX );
1361 sh4_x86.tstate = TSTATE_NONE;
1363 MOV.W Rm, @(R0, Rn) {:
1365 load_reg( R_EAX, 0 );
1366 load_reg( R_ECX, Rn );
1367 ADD_r32_r32( R_ECX, R_EAX );
1368 check_walign16( R_EAX );
1369 MMU_TRANSLATE_WRITE( R_EAX );
1370 load_reg( R_EDX, Rm );
1371 MEM_WRITE_WORD( R_EAX, R_EDX );
1372 sh4_x86.tstate = TSTATE_NONE;
1374 MOV.W R0, @(disp, GBR) {:
1376 load_spreg( R_EAX, R_GBR );
1377 ADD_imm32_r32( disp, R_EAX );
1378 check_walign16( R_EAX );
1379 MMU_TRANSLATE_WRITE( R_EAX );
1380 load_reg( R_EDX, 0 );
1381 MEM_WRITE_WORD( R_EAX, R_EDX );
1382 sh4_x86.tstate = TSTATE_NONE;
1384 MOV.W R0, @(disp, Rn) {:
1386 load_reg( R_EAX, Rn );
1387 ADD_imm32_r32( disp, R_EAX );
1388 check_walign16( R_EAX );
1389 MMU_TRANSLATE_WRITE( R_EAX );
1390 load_reg( R_EDX, 0 );
1391 MEM_WRITE_WORD( R_EAX, R_EDX );
1392 sh4_x86.tstate = TSTATE_NONE;
1396 load_reg( R_EAX, Rm );
1397 check_ralign16( R_EAX );
1398 MMU_TRANSLATE_READ( R_EAX );
1399 MEM_READ_WORD( R_EAX, R_EAX );
1400 store_reg( R_EAX, Rn );
1401 sh4_x86.tstate = TSTATE_NONE;
1405 load_reg( R_EAX, Rm );
1406 check_ralign16( R_EAX );
1407 MMU_TRANSLATE_READ( R_EAX );
1408 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1409 MEM_READ_WORD( R_EAX, R_EAX );
1410 store_reg( R_EAX, Rn );
1411 sh4_x86.tstate = TSTATE_NONE;
1413 MOV.W @(R0, Rm), Rn {:
1415 load_reg( R_EAX, 0 );
1416 load_reg( R_ECX, Rm );
1417 ADD_r32_r32( R_ECX, R_EAX );
1418 check_ralign16( R_EAX );
1419 MMU_TRANSLATE_READ( R_EAX );
1420 MEM_READ_WORD( R_EAX, R_EAX );
1421 store_reg( R_EAX, Rn );
1422 sh4_x86.tstate = TSTATE_NONE;
1424 MOV.W @(disp, GBR), R0 {:
1426 load_spreg( R_EAX, R_GBR );
1427 ADD_imm32_r32( disp, R_EAX );
1428 check_ralign16( R_EAX );
1429 MMU_TRANSLATE_READ( R_EAX );
1430 MEM_READ_WORD( R_EAX, R_EAX );
1431 store_reg( R_EAX, 0 );
1432 sh4_x86.tstate = TSTATE_NONE;
1434 MOV.W @(disp, PC), Rn {:
1436 if( sh4_x86.in_delay_slot ) {
1439 // See comments for MOV.L @(disp, PC), Rn
1440 uint32_t target = pc + disp + 4;
1441 if( IS_IN_ICACHE(target) ) {
1442 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1443 MOV_moff32_EAX( ptr );
1444 MOVSX_r16_r32( R_EAX, R_EAX );
1446 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1447 ADD_sh4r_r32( R_PC, R_EAX );
1448 MMU_TRANSLATE_READ( R_EAX );
1449 MEM_READ_WORD( R_EAX, R_EAX );
1450 sh4_x86.tstate = TSTATE_NONE;
1452 store_reg( R_EAX, Rn );
1455 MOV.W @(disp, Rm), R0 {:
1457 load_reg( R_EAX, Rm );
1458 ADD_imm32_r32( disp, R_EAX );
1459 check_ralign16( R_EAX );
1460 MMU_TRANSLATE_READ( R_EAX );
1461 MEM_READ_WORD( R_EAX, R_EAX );
1462 store_reg( R_EAX, 0 );
1463 sh4_x86.tstate = TSTATE_NONE;
1465 MOVA @(disp, PC), R0 {:
1467 if( sh4_x86.in_delay_slot ) {
1470 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1471 ADD_sh4r_r32( R_PC, R_ECX );
1472 store_reg( R_ECX, 0 );
1473 sh4_x86.tstate = TSTATE_NONE;
1477 COUNT_INST(I_MOVCA);
1478 load_reg( R_EAX, Rn );
1479 check_walign32( R_EAX );
1480 MMU_TRANSLATE_WRITE( R_EAX );
1481 load_reg( R_EDX, 0 );
1482 MEM_WRITE_LONG( R_EAX, R_EDX );
1483 sh4_x86.tstate = TSTATE_NONE;
1486 /* Control transfer instructions */
1489 if( sh4_x86.in_delay_slot ) {
1492 sh4vma_t target = disp + pc + 4;
1493 JT_rel8( nottaken );
1494 exit_block_rel(target, pc+2 );
1495 JMP_TARGET(nottaken);
1501 if( sh4_x86.in_delay_slot ) {
1504 sh4_x86.in_delay_slot = DELAY_PC;
1505 if( UNTRANSLATABLE(pc+2) ) {
1506 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1508 ADD_imm32_r32( disp, R_EAX );
1509 JMP_TARGET(nottaken);
1510 ADD_sh4r_r32( R_PC, R_EAX );
1511 store_spreg( R_EAX, R_NEW_PC );
1512 exit_block_emu(pc+2);
1513 sh4_x86.branch_taken = TRUE;
1516 if( sh4_x86.tstate == TSTATE_NONE ) {
1517 CMP_imm8s_sh4r( 1, R_T );
1518 sh4_x86.tstate = TSTATE_E;
1520 sh4vma_t target = disp + pc + 4;
1521 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1522 int save_tstate = sh4_x86.tstate;
1523 sh4_translate_instruction(pc+2);
1524 exit_block_rel( target, pc+4 );
1527 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1528 sh4_x86.tstate = save_tstate;
1529 sh4_translate_instruction(pc+2);
1536 if( sh4_x86.in_delay_slot ) {
1539 sh4_x86.in_delay_slot = DELAY_PC;
1540 sh4_x86.branch_taken = TRUE;
1541 if( UNTRANSLATABLE(pc+2) ) {
1542 load_spreg( R_EAX, R_PC );
1543 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1544 store_spreg( R_EAX, R_NEW_PC );
1545 exit_block_emu(pc+2);
1548 sh4_translate_instruction( pc + 2 );
1549 exit_block_rel( disp + pc + 4, pc+4 );
1556 if( sh4_x86.in_delay_slot ) {
1559 load_spreg( R_EAX, R_PC );
1560 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1561 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1562 store_spreg( R_EAX, R_NEW_PC );
1563 sh4_x86.in_delay_slot = DELAY_PC;
1564 sh4_x86.tstate = TSTATE_NONE;
1565 sh4_x86.branch_taken = TRUE;
1566 if( UNTRANSLATABLE(pc+2) ) {
1567 exit_block_emu(pc+2);
1570 sh4_translate_instruction( pc + 2 );
1571 exit_block_newpcset(pc+2);
1578 if( sh4_x86.in_delay_slot ) {
1581 load_spreg( R_EAX, R_PC );
1582 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1583 store_spreg( R_EAX, R_PR );
1584 sh4_x86.in_delay_slot = DELAY_PC;
1585 sh4_x86.branch_taken = TRUE;
1586 sh4_x86.tstate = TSTATE_NONE;
1587 if( UNTRANSLATABLE(pc+2) ) {
1588 ADD_imm32_r32( disp, R_EAX );
1589 store_spreg( R_EAX, R_NEW_PC );
1590 exit_block_emu(pc+2);
1593 sh4_translate_instruction( pc + 2 );
1594 exit_block_rel( disp + pc + 4, pc+4 );
1601 if( sh4_x86.in_delay_slot ) {
1604 load_spreg( R_EAX, R_PC );
1605 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1606 store_spreg( R_EAX, R_PR );
1607 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1608 store_spreg( R_EAX, R_NEW_PC );
1610 sh4_x86.in_delay_slot = DELAY_PC;
1611 sh4_x86.tstate = TSTATE_NONE;
1612 sh4_x86.branch_taken = TRUE;
1613 if( UNTRANSLATABLE(pc+2) ) {
1614 exit_block_emu(pc+2);
1617 sh4_translate_instruction( pc + 2 );
1618 exit_block_newpcset(pc+2);
1625 if( sh4_x86.in_delay_slot ) {
1628 sh4vma_t target = disp + pc + 4;
1629 JF_rel8( nottaken );
1630 exit_block_rel(target, pc+2 );
1631 JMP_TARGET(nottaken);
1637 if( sh4_x86.in_delay_slot ) {
1640 sh4_x86.in_delay_slot = DELAY_PC;
1641 if( UNTRANSLATABLE(pc+2) ) {
1642 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1644 ADD_imm32_r32( disp, R_EAX );
1645 JMP_TARGET(nottaken);
1646 ADD_sh4r_r32( R_PC, R_EAX );
1647 store_spreg( R_EAX, R_NEW_PC );
1648 exit_block_emu(pc+2);
1649 sh4_x86.branch_taken = TRUE;
1652 if( sh4_x86.tstate == TSTATE_NONE ) {
1653 CMP_imm8s_sh4r( 1, R_T );
1654 sh4_x86.tstate = TSTATE_E;
1656 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1657 int save_tstate = sh4_x86.tstate;
1658 sh4_translate_instruction(pc+2);
1659 exit_block_rel( disp + pc + 4, pc+4 );
1661 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1662 sh4_x86.tstate = save_tstate;
1663 sh4_translate_instruction(pc+2);
1670 if( sh4_x86.in_delay_slot ) {
1673 load_reg( R_ECX, Rn );
1674 store_spreg( R_ECX, R_NEW_PC );
1675 sh4_x86.in_delay_slot = DELAY_PC;
1676 sh4_x86.branch_taken = TRUE;
1677 if( UNTRANSLATABLE(pc+2) ) {
1678 exit_block_emu(pc+2);
1681 sh4_translate_instruction(pc+2);
1682 exit_block_newpcset(pc+2);
1689 if( sh4_x86.in_delay_slot ) {
1692 load_spreg( R_EAX, R_PC );
1693 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1694 store_spreg( R_EAX, R_PR );
1695 load_reg( R_ECX, Rn );
1696 store_spreg( R_ECX, R_NEW_PC );
1697 sh4_x86.in_delay_slot = DELAY_PC;
1698 sh4_x86.branch_taken = TRUE;
1699 sh4_x86.tstate = TSTATE_NONE;
1700 if( UNTRANSLATABLE(pc+2) ) {
1701 exit_block_emu(pc+2);
1704 sh4_translate_instruction(pc+2);
1705 exit_block_newpcset(pc+2);
1712 if( sh4_x86.in_delay_slot ) {
1716 load_spreg( R_ECX, R_SPC );
1717 store_spreg( R_ECX, R_NEW_PC );
1718 load_spreg( R_EAX, R_SSR );
1719 call_func1( sh4_write_sr, R_EAX );
1720 sh4_x86.in_delay_slot = DELAY_PC;
1721 sh4_x86.priv_checked = FALSE;
1722 sh4_x86.fpuen_checked = FALSE;
1723 sh4_x86.tstate = TSTATE_NONE;
1724 sh4_x86.branch_taken = TRUE;
1725 if( UNTRANSLATABLE(pc+2) ) {
1726 exit_block_emu(pc+2);
1729 sh4_translate_instruction(pc+2);
1730 exit_block_newpcset(pc+2);
1737 if( sh4_x86.in_delay_slot ) {
1740 load_spreg( R_ECX, R_PR );
1741 store_spreg( R_ECX, R_NEW_PC );
1742 sh4_x86.in_delay_slot = DELAY_PC;
1743 sh4_x86.branch_taken = TRUE;
1744 if( UNTRANSLATABLE(pc+2) ) {
1745 exit_block_emu(pc+2);
1748 sh4_translate_instruction(pc+2);
1749 exit_block_newpcset(pc+2);
1755 COUNT_INST(I_TRAPA);
1756 if( sh4_x86.in_delay_slot ) {
1759 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1760 ADD_r32_sh4r( R_ECX, R_PC );
1761 load_imm32( R_EAX, imm );
1762 call_func1( sh4_raise_trap, R_EAX );
1763 sh4_x86.tstate = TSTATE_NONE;
1764 exit_block_pcset(pc);
1765 sh4_x86.branch_taken = TRUE;
1770 COUNT_INST(I_UNDEF);
1771 if( sh4_x86.in_delay_slot ) {
1774 JMP_exc(EXC_ILLEGAL);
1780 COUNT_INST(I_CLRMAC);
1781 XOR_r32_r32(R_EAX, R_EAX);
1782 store_spreg( R_EAX, R_MACL );
1783 store_spreg( R_EAX, R_MACH );
1784 sh4_x86.tstate = TSTATE_NONE;
1790 sh4_x86.tstate = TSTATE_NONE;
1796 sh4_x86.tstate = TSTATE_C;
1802 sh4_x86.tstate = TSTATE_NONE;
1808 sh4_x86.tstate = TSTATE_C;
1811 /* Floating point moves */
1813 COUNT_INST(I_FMOV1);
1815 load_spreg( R_ECX, R_FPSCR );
1816 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1817 JNE_rel8(doublesize);
1818 load_fr( R_EAX, FRm ); // SZ=0 branch
1819 store_fr( R_EAX, FRn );
1821 JMP_TARGET(doublesize);
1822 load_dr0( R_EAX, FRm );
1823 load_dr1( R_ECX, FRm );
1824 store_dr0( R_EAX, FRn );
1825 store_dr1( R_ECX, FRn );
1827 sh4_x86.tstate = TSTATE_NONE;
1830 COUNT_INST(I_FMOV2);
1832 load_reg( R_EAX, Rn );
1833 load_spreg( R_EDX, R_FPSCR );
1834 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1835 JNE_rel8(doublesize);
1837 check_walign32( R_EAX );
1838 MMU_TRANSLATE_WRITE( R_EAX );
1839 load_fr( R_ECX, FRm );
1840 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1843 JMP_TARGET(doublesize);
1844 check_walign64( R_EAX );
1845 MMU_TRANSLATE_WRITE( R_EAX );
1846 load_dr0( R_ECX, FRm );
1847 load_dr1( R_EDX, FRm );
1848 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1850 sh4_x86.tstate = TSTATE_NONE;
1853 COUNT_INST(I_FMOV5);
1855 load_reg( R_EAX, Rm );
1856 load_spreg( R_EDX, R_FPSCR );
1857 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1858 JNE_rel8(doublesize);
1860 check_ralign32( R_EAX );
1861 MMU_TRANSLATE_READ( R_EAX );
1862 MEM_READ_LONG( R_EAX, R_EAX );
1863 store_fr( R_EAX, FRn );
1866 JMP_TARGET(doublesize);
1867 check_ralign64( R_EAX );
1868 MMU_TRANSLATE_READ( R_EAX );
1869 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1870 store_dr0( R_ECX, FRn );
1871 store_dr1( R_EAX, FRn );
1873 sh4_x86.tstate = TSTATE_NONE;
1876 COUNT_INST(I_FMOV3);
1878 load_reg( R_EAX, Rn );
1879 load_spreg( R_EDX, R_FPSCR );
1880 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1881 JNE_rel8(doublesize);
1883 check_walign32( R_EAX );
1884 ADD_imm8s_r32( -4, R_EAX );
1885 MMU_TRANSLATE_WRITE( R_EAX );
1886 load_fr( R_ECX, FRm );
1887 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1888 MEM_WRITE_LONG( R_EAX, R_ECX );
1891 JMP_TARGET(doublesize);
1892 check_walign64( R_EAX );
1893 ADD_imm8s_r32(-8,R_EAX);
1894 MMU_TRANSLATE_WRITE( R_EAX );
1895 load_dr0( R_ECX, FRm );
1896 load_dr1( R_EDX, FRm );
1897 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1898 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1901 sh4_x86.tstate = TSTATE_NONE;
1904 COUNT_INST(I_FMOV6);
1906 load_reg( R_EAX, Rm );
1907 load_spreg( R_EDX, R_FPSCR );
1908 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1909 JNE_rel8(doublesize);
1911 check_ralign32( R_EAX );
1912 MMU_TRANSLATE_READ( R_EAX );
1913 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1914 MEM_READ_LONG( R_EAX, R_EAX );
1915 store_fr( R_EAX, FRn );
1918 JMP_TARGET(doublesize);
1919 check_ralign64( R_EAX );
1920 MMU_TRANSLATE_READ( R_EAX );
1921 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1922 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1923 store_dr0( R_ECX, FRn );
1924 store_dr1( R_EAX, FRn );
1927 sh4_x86.tstate = TSTATE_NONE;
1929 FMOV FRm, @(R0, Rn) {:
1930 COUNT_INST(I_FMOV4);
1932 load_reg( R_EAX, Rn );
1933 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1934 load_spreg( R_EDX, R_FPSCR );
1935 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1936 JNE_rel8(doublesize);
1938 check_walign32( R_EAX );
1939 MMU_TRANSLATE_WRITE( R_EAX );
1940 load_fr( R_ECX, FRm );
1941 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1944 JMP_TARGET(doublesize);
1945 check_walign64( R_EAX );
1946 MMU_TRANSLATE_WRITE( R_EAX );
1947 load_dr0( R_ECX, FRm );
1948 load_dr1( R_EDX, FRm );
1949 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1952 sh4_x86.tstate = TSTATE_NONE;
1954 FMOV @(R0, Rm), FRn {:
1955 COUNT_INST(I_FMOV7);
1957 load_reg( R_EAX, Rm );
1958 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1959 load_spreg( R_EDX, R_FPSCR );
1960 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1961 JNE_rel8(doublesize);
1963 check_ralign32( R_EAX );
1964 MMU_TRANSLATE_READ( R_EAX );
1965 MEM_READ_LONG( R_EAX, R_EAX );
1966 store_fr( R_EAX, FRn );
1969 JMP_TARGET(doublesize);
1970 check_ralign64( R_EAX );
1971 MMU_TRANSLATE_READ( R_EAX );
1972 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1973 store_dr0( R_ECX, FRn );
1974 store_dr1( R_EAX, FRn );
1977 sh4_x86.tstate = TSTATE_NONE;
1979 FLDI0 FRn {: /* IFF PR=0 */
1980 COUNT_INST(I_FLDI0);
1982 load_spreg( R_ECX, R_FPSCR );
1983 TEST_imm32_r32( FPSCR_PR, R_ECX );
1985 XOR_r32_r32( R_EAX, R_EAX );
1986 store_fr( R_EAX, FRn );
1988 sh4_x86.tstate = TSTATE_NONE;
1990 FLDI1 FRn {: /* IFF PR=0 */
1991 COUNT_INST(I_FLDI1);
1993 load_spreg( R_ECX, R_FPSCR );
1994 TEST_imm32_r32( FPSCR_PR, R_ECX );
1996 load_imm32(R_EAX, 0x3F800000);
1997 store_fr( R_EAX, FRn );
1999 sh4_x86.tstate = TSTATE_NONE;
2003 COUNT_INST(I_FLOAT);
2005 load_spreg( R_ECX, R_FPSCR );
2007 TEST_imm32_r32( FPSCR_PR, R_ECX );
2008 JNE_rel8(doubleprec);
2011 JMP_TARGET(doubleprec);
2014 sh4_x86.tstate = TSTATE_NONE;
2019 load_spreg( R_ECX, R_FPSCR );
2020 TEST_imm32_r32( FPSCR_PR, R_ECX );
2021 JNE_rel8(doubleprec);
2024 JMP_TARGET(doubleprec);
2027 load_ptr( R_ECX, &max_int );
2028 FILD_r32ind( R_ECX );
2031 load_ptr( R_ECX, &min_int ); // 5
2032 FILD_r32ind( R_ECX ); // 2
2034 JAE_rel8( sat2 ); // 2
2035 load_ptr( R_EAX, &save_fcw );
2036 FNSTCW_r32ind( R_EAX );
2037 load_ptr( R_EDX, &trunc_fcw );
2038 FLDCW_r32ind( R_EDX );
2039 FISTP_sh4r(R_FPUL); // 3
2040 FLDCW_r32ind( R_EAX );
2045 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
2046 store_spreg( R_ECX, R_FPUL );
2049 sh4_x86.tstate = TSTATE_NONE;
2054 load_fr( R_EAX, FRm );
2055 store_spreg( R_EAX, R_FPUL );
2056 sh4_x86.tstate = TSTATE_NONE;
2061 load_spreg( R_EAX, R_FPUL );
2062 store_fr( R_EAX, FRn );
2063 sh4_x86.tstate = TSTATE_NONE;
2066 COUNT_INST(I_FCNVDS);
2068 load_spreg( R_ECX, R_FPSCR );
2069 TEST_imm32_r32( FPSCR_PR, R_ECX );
2070 JE_rel8(end); // only when PR=1
2074 sh4_x86.tstate = TSTATE_NONE;
2077 COUNT_INST(I_FCNVSD);
2079 load_spreg( R_ECX, R_FPSCR );
2080 TEST_imm32_r32( FPSCR_PR, R_ECX );
2081 JE_rel8(end); // only when PR=1
2085 sh4_x86.tstate = TSTATE_NONE;
2088 /* Floating point instructions */
2092 load_spreg( R_ECX, R_FPSCR );
2093 TEST_imm32_r32( FPSCR_PR, R_ECX );
2094 JNE_rel8(doubleprec);
2099 JMP_TARGET(doubleprec);
2104 sh4_x86.tstate = TSTATE_NONE;
2109 load_spreg( R_ECX, R_FPSCR );
2110 TEST_imm32_r32( FPSCR_PR, R_ECX );
2111 JNE_rel8(doubleprec);
2117 JMP_TARGET(doubleprec);
2123 sh4_x86.tstate = TSTATE_NONE;
2128 load_spreg( R_ECX, R_FPSCR );
2129 TEST_imm32_r32( FPSCR_PR, R_ECX );
2130 JNE_rel8(doubleprec);
2136 JMP_TARGET(doubleprec);
2142 sh4_x86.tstate = TSTATE_NONE;
2144 FMAC FR0, FRm, FRn {:
2147 load_spreg( R_ECX, R_FPSCR );
2148 TEST_imm32_r32( FPSCR_PR, R_ECX );
2149 JNE_rel8(doubleprec);
2157 JMP_TARGET(doubleprec);
2165 sh4_x86.tstate = TSTATE_NONE;
2171 load_spreg( R_ECX, R_FPSCR );
2172 TEST_imm32_r32( FPSCR_PR, R_ECX );
2173 JNE_rel8(doubleprec);
2179 JMP_TARGET(doubleprec);
2185 sh4_x86.tstate = TSTATE_NONE;
2190 load_spreg( R_ECX, R_FPSCR );
2191 TEST_imm32_r32( FPSCR_PR, R_ECX );
2192 JNE_rel8(doubleprec);
2197 JMP_TARGET(doubleprec);
2202 sh4_x86.tstate = TSTATE_NONE;
2205 COUNT_INST(I_FSRRA);
2207 load_spreg( R_ECX, R_FPSCR );
2208 TEST_imm32_r32( FPSCR_PR, R_ECX );
2209 JNE_rel8(end); // PR=0 only
2216 sh4_x86.tstate = TSTATE_NONE;
2219 COUNT_INST(I_FSQRT);
2221 load_spreg( R_ECX, R_FPSCR );
2222 TEST_imm32_r32( FPSCR_PR, R_ECX );
2223 JNE_rel8(doubleprec);
2228 JMP_TARGET(doubleprec);
2233 sh4_x86.tstate = TSTATE_NONE;
2238 load_spreg( R_ECX, R_FPSCR );
2239 TEST_imm32_r32( FPSCR_PR, R_ECX );
2240 JNE_rel8(doubleprec);
2246 JMP_TARGET(doubleprec);
2252 sh4_x86.tstate = TSTATE_NONE;
2256 COUNT_INST(I_FCMPEQ);
2258 load_spreg( R_ECX, R_FPSCR );
2259 TEST_imm32_r32( FPSCR_PR, R_ECX );
2260 JNE_rel8(doubleprec);
2264 JMP_TARGET(doubleprec);
2271 sh4_x86.tstate = TSTATE_NONE;
2274 COUNT_INST(I_FCMPGT);
2276 load_spreg( R_ECX, R_FPSCR );
2277 TEST_imm32_r32( FPSCR_PR, R_ECX );
2278 JNE_rel8(doubleprec);
2282 JMP_TARGET(doubleprec);
2289 sh4_x86.tstate = TSTATE_NONE;
2295 load_spreg( R_ECX, R_FPSCR );
2296 TEST_imm32_r32( FPSCR_PR, R_ECX );
2297 JNE_rel8(doubleprec );
2298 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
2299 load_spreg( R_EDX, R_FPUL );
2300 call_func2( sh4_fsca, R_EDX, R_ECX );
2301 JMP_TARGET(doubleprec);
2302 sh4_x86.tstate = TSTATE_NONE;
2307 load_spreg( R_ECX, R_FPSCR );
2308 TEST_imm32_r32( FPSCR_PR, R_ECX );
2309 JNE_rel8( doubleprec);
2314 push_fr( (FVm<<2)+1);
2315 push_fr( (FVn<<2)+1);
2318 push_fr( (FVm<<2)+2);
2319 push_fr( (FVn<<2)+2);
2322 push_fr( (FVm<<2)+3);
2323 push_fr( (FVn<<2)+3);
2326 pop_fr( (FVn<<2)+3);
2327 JMP_TARGET(doubleprec);
2328 sh4_x86.tstate = TSTATE_NONE;
2333 load_spreg( R_ECX, R_FPSCR );
2334 TEST_imm32_r32( FPSCR_PR, R_ECX );
2335 JNE_rel8( doubleprec );
2336 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
2337 call_func1( sh4_ftrv, R_EDX ); // 12
2338 JMP_TARGET(doubleprec);
2339 sh4_x86.tstate = TSTATE_NONE;
2343 COUNT_INST(I_FRCHG);
2345 load_spreg( R_ECX, R_FPSCR );
2346 XOR_imm32_r32( FPSCR_FR, R_ECX );
2347 store_spreg( R_ECX, R_FPSCR );
2348 call_func0( sh4_switch_fr_banks );
2349 sh4_x86.tstate = TSTATE_NONE;
2352 COUNT_INST(I_FSCHG);
2354 load_spreg( R_ECX, R_FPSCR );
2355 XOR_imm32_r32( FPSCR_SZ, R_ECX );
2356 store_spreg( R_ECX, R_FPSCR );
2357 sh4_x86.tstate = TSTATE_NONE;
2360 /* Processor control instructions */
2362 COUNT_INST(I_LDCSR);
2363 if( sh4_x86.in_delay_slot ) {
2367 load_reg( R_EAX, Rm );
2368 call_func1( sh4_write_sr, R_EAX );
2369 sh4_x86.priv_checked = FALSE;
2370 sh4_x86.fpuen_checked = FALSE;
2371 sh4_x86.tstate = TSTATE_NONE;
2376 load_reg( R_EAX, Rm );
2377 store_spreg( R_EAX, R_GBR );
2382 load_reg( R_EAX, Rm );
2383 store_spreg( R_EAX, R_VBR );
2384 sh4_x86.tstate = TSTATE_NONE;
2389 load_reg( R_EAX, Rm );
2390 store_spreg( R_EAX, R_SSR );
2391 sh4_x86.tstate = TSTATE_NONE;
2396 load_reg( R_EAX, Rm );
2397 store_spreg( R_EAX, R_SGR );
2398 sh4_x86.tstate = TSTATE_NONE;
2403 load_reg( R_EAX, Rm );
2404 store_spreg( R_EAX, R_SPC );
2405 sh4_x86.tstate = TSTATE_NONE;
2410 load_reg( R_EAX, Rm );
2411 store_spreg( R_EAX, R_DBR );
2412 sh4_x86.tstate = TSTATE_NONE;
2417 load_reg( R_EAX, Rm );
2418 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2419 sh4_x86.tstate = TSTATE_NONE;
2423 load_reg( R_EAX, Rm );
2424 check_ralign32( R_EAX );
2425 MMU_TRANSLATE_READ( R_EAX );
2426 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2427 MEM_READ_LONG( R_EAX, R_EAX );
2428 store_spreg( R_EAX, R_GBR );
2429 sh4_x86.tstate = TSTATE_NONE;
2432 COUNT_INST(I_LDCSRM);
2433 if( sh4_x86.in_delay_slot ) {
2437 load_reg( R_EAX, Rm );
2438 check_ralign32( R_EAX );
2439 MMU_TRANSLATE_READ( R_EAX );
2440 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2441 MEM_READ_LONG( R_EAX, R_EAX );
2442 call_func1( sh4_write_sr, R_EAX );
2443 sh4_x86.priv_checked = FALSE;
2444 sh4_x86.fpuen_checked = FALSE;
2445 sh4_x86.tstate = TSTATE_NONE;
2451 load_reg( R_EAX, Rm );
2452 check_ralign32( R_EAX );
2453 MMU_TRANSLATE_READ( R_EAX );
2454 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2455 MEM_READ_LONG( R_EAX, R_EAX );
2456 store_spreg( R_EAX, R_VBR );
2457 sh4_x86.tstate = TSTATE_NONE;
2462 load_reg( R_EAX, Rm );
2463 check_ralign32( R_EAX );
2464 MMU_TRANSLATE_READ( R_EAX );
2465 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2466 MEM_READ_LONG( R_EAX, R_EAX );
2467 store_spreg( R_EAX, R_SSR );
2468 sh4_x86.tstate = TSTATE_NONE;
2473 load_reg( R_EAX, Rm );
2474 check_ralign32( R_EAX );
2475 MMU_TRANSLATE_READ( R_EAX );
2476 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2477 MEM_READ_LONG( R_EAX, R_EAX );
2478 store_spreg( R_EAX, R_SGR );
2479 sh4_x86.tstate = TSTATE_NONE;
2484 load_reg( R_EAX, Rm );
2485 check_ralign32( R_EAX );
2486 MMU_TRANSLATE_READ( R_EAX );
2487 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2488 MEM_READ_LONG( R_EAX, R_EAX );
2489 store_spreg( R_EAX, R_SPC );
2490 sh4_x86.tstate = TSTATE_NONE;
2495 load_reg( R_EAX, Rm );
2496 check_ralign32( R_EAX );
2497 MMU_TRANSLATE_READ( R_EAX );
2498 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2499 MEM_READ_LONG( R_EAX, R_EAX );
2500 store_spreg( R_EAX, R_DBR );
2501 sh4_x86.tstate = TSTATE_NONE;
2503 LDC.L @Rm+, Rn_BANK {:
2506 load_reg( R_EAX, Rm );
2507 check_ralign32( R_EAX );
2508 MMU_TRANSLATE_READ( R_EAX );
2509 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2510 MEM_READ_LONG( R_EAX, R_EAX );
2511 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2512 sh4_x86.tstate = TSTATE_NONE;
2515 COUNT_INST(I_LDSFPSCR);
2517 load_reg( R_EAX, Rm );
2518 call_func1( sh4_write_fpscr, R_EAX );
2519 sh4_x86.tstate = TSTATE_NONE;
2521 LDS.L @Rm+, FPSCR {:
2522 COUNT_INST(I_LDSFPSCRM);
2524 load_reg( R_EAX, Rm );
2525 check_ralign32( R_EAX );
2526 MMU_TRANSLATE_READ( R_EAX );
2527 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2528 MEM_READ_LONG( R_EAX, R_EAX );
2529 call_func1( sh4_write_fpscr, R_EAX );
2530 sh4_x86.tstate = TSTATE_NONE;
2535 load_reg( R_EAX, Rm );
2536 store_spreg( R_EAX, R_FPUL );
2541 load_reg( R_EAX, Rm );
2542 check_ralign32( R_EAX );
2543 MMU_TRANSLATE_READ( R_EAX );
2544 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2545 MEM_READ_LONG( R_EAX, R_EAX );
2546 store_spreg( R_EAX, R_FPUL );
2547 sh4_x86.tstate = TSTATE_NONE;
2551 load_reg( R_EAX, Rm );
2552 store_spreg( R_EAX, R_MACH );
2556 load_reg( R_EAX, Rm );
2557 check_ralign32( R_EAX );
2558 MMU_TRANSLATE_READ( R_EAX );
2559 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2560 MEM_READ_LONG( R_EAX, R_EAX );
2561 store_spreg( R_EAX, R_MACH );
2562 sh4_x86.tstate = TSTATE_NONE;
2566 load_reg( R_EAX, Rm );
2567 store_spreg( R_EAX, R_MACL );
2571 load_reg( R_EAX, Rm );
2572 check_ralign32( R_EAX );
2573 MMU_TRANSLATE_READ( R_EAX );
2574 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2575 MEM_READ_LONG( R_EAX, R_EAX );
2576 store_spreg( R_EAX, R_MACL );
2577 sh4_x86.tstate = TSTATE_NONE;
2581 load_reg( R_EAX, Rm );
2582 store_spreg( R_EAX, R_PR );
2586 load_reg( R_EAX, Rm );
2587 check_ralign32( R_EAX );
2588 MMU_TRANSLATE_READ( R_EAX );
2589 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2590 MEM_READ_LONG( R_EAX, R_EAX );
2591 store_spreg( R_EAX, R_PR );
2592 sh4_x86.tstate = TSTATE_NONE;
2595 COUNT_INST(I_LDTLB);
2596 call_func0( MMU_ldtlb );
2597 sh4_x86.tstate = TSTATE_NONE;
2606 COUNT_INST(I_OCBWB);
2610 load_reg( R_EAX, Rn );
2611 MOV_r32_r32( R_EAX, R_ECX );
2612 AND_imm32_r32( 0xFC000000, R_EAX );
2613 CMP_imm32_r32( 0xE0000000, R_EAX );
2615 call_func1( sh4_flush_store_queue, R_ECX );
2616 TEST_r32_r32( R_EAX, R_EAX );
2619 sh4_x86.tstate = TSTATE_NONE;
2622 COUNT_INST(I_SLEEP);
2624 call_func0( sh4_sleep );
2625 sh4_x86.tstate = TSTATE_NONE;
2626 sh4_x86.in_delay_slot = DELAY_NONE;
2630 COUNT_INST(I_STCSR);
2632 call_func0(sh4_read_sr);
2633 store_reg( R_EAX, Rn );
2634 sh4_x86.tstate = TSTATE_NONE;
2638 load_spreg( R_EAX, R_GBR );
2639 store_reg( R_EAX, Rn );
2644 load_spreg( R_EAX, R_VBR );
2645 store_reg( R_EAX, Rn );
2646 sh4_x86.tstate = TSTATE_NONE;
2651 load_spreg( R_EAX, R_SSR );
2652 store_reg( R_EAX, Rn );
2653 sh4_x86.tstate = TSTATE_NONE;
2658 load_spreg( R_EAX, R_SPC );
2659 store_reg( R_EAX, Rn );
2660 sh4_x86.tstate = TSTATE_NONE;
2665 load_spreg( R_EAX, R_SGR );
2666 store_reg( R_EAX, Rn );
2667 sh4_x86.tstate = TSTATE_NONE;
2672 load_spreg( R_EAX, R_DBR );
2673 store_reg( R_EAX, Rn );
2674 sh4_x86.tstate = TSTATE_NONE;
2679 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2680 store_reg( R_EAX, Rn );
2681 sh4_x86.tstate = TSTATE_NONE;
2684 COUNT_INST(I_STCSRM);
2686 load_reg( R_EAX, Rn );
2687 check_walign32( R_EAX );
2688 ADD_imm8s_r32( -4, R_EAX );
2689 MMU_TRANSLATE_WRITE( R_EAX );
2690 PUSH_realigned_r32( R_EAX );
2691 call_func0( sh4_read_sr );
2692 POP_realigned_r32( R_ECX );
2693 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2694 MEM_WRITE_LONG( R_ECX, R_EAX );
2695 sh4_x86.tstate = TSTATE_NONE;
2700 load_reg( R_EAX, Rn );
2701 check_walign32( R_EAX );
2702 ADD_imm8s_r32( -4, R_EAX );
2703 MMU_TRANSLATE_WRITE( R_EAX );
2704 load_spreg( R_EDX, R_VBR );
2705 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2706 MEM_WRITE_LONG( R_EAX, R_EDX );
2707 sh4_x86.tstate = TSTATE_NONE;
2712 load_reg( R_EAX, Rn );
2713 check_walign32( R_EAX );
2714 ADD_imm8s_r32( -4, R_EAX );
2715 MMU_TRANSLATE_WRITE( R_EAX );
2716 load_spreg( R_EDX, R_SSR );
2717 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2718 MEM_WRITE_LONG( R_EAX, R_EDX );
2719 sh4_x86.tstate = TSTATE_NONE;
2724 load_reg( R_EAX, Rn );
2725 check_walign32( R_EAX );
2726 ADD_imm8s_r32( -4, R_EAX );
2727 MMU_TRANSLATE_WRITE( R_EAX );
2728 load_spreg( R_EDX, R_SPC );
2729 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2730 MEM_WRITE_LONG( R_EAX, R_EDX );
2731 sh4_x86.tstate = TSTATE_NONE;
2736 load_reg( R_EAX, Rn );
2737 check_walign32( R_EAX );
2738 ADD_imm8s_r32( -4, R_EAX );
2739 MMU_TRANSLATE_WRITE( R_EAX );
2740 load_spreg( R_EDX, R_SGR );
2741 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2742 MEM_WRITE_LONG( R_EAX, R_EDX );
2743 sh4_x86.tstate = TSTATE_NONE;
2748 load_reg( R_EAX, Rn );
2749 check_walign32( R_EAX );
2750 ADD_imm8s_r32( -4, R_EAX );
2751 MMU_TRANSLATE_WRITE( R_EAX );
2752 load_spreg( R_EDX, R_DBR );
2753 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2754 MEM_WRITE_LONG( R_EAX, R_EDX );
2755 sh4_x86.tstate = TSTATE_NONE;
2757 STC.L Rm_BANK, @-Rn {:
2760 load_reg( R_EAX, Rn );
2761 check_walign32( R_EAX );
2762 ADD_imm8s_r32( -4, R_EAX );
2763 MMU_TRANSLATE_WRITE( R_EAX );
2764 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2765 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2766 MEM_WRITE_LONG( R_EAX, R_EDX );
2767 sh4_x86.tstate = TSTATE_NONE;
2771 load_reg( R_EAX, Rn );
2772 check_walign32( R_EAX );
2773 ADD_imm8s_r32( -4, R_EAX );
2774 MMU_TRANSLATE_WRITE( R_EAX );
2775 load_spreg( R_EDX, R_GBR );
2776 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2777 MEM_WRITE_LONG( R_EAX, R_EDX );
2778 sh4_x86.tstate = TSTATE_NONE;
2781 COUNT_INST(I_STSFPSCR);
2783 load_spreg( R_EAX, R_FPSCR );
2784 store_reg( R_EAX, Rn );
2786 STS.L FPSCR, @-Rn {:
2787 COUNT_INST(I_STSFPSCRM);
2789 load_reg( R_EAX, Rn );
2790 check_walign32( R_EAX );
2791 ADD_imm8s_r32( -4, R_EAX );
2792 MMU_TRANSLATE_WRITE( R_EAX );
2793 load_spreg( R_EDX, R_FPSCR );
2794 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2795 MEM_WRITE_LONG( R_EAX, R_EDX );
2796 sh4_x86.tstate = TSTATE_NONE;
2801 load_spreg( R_EAX, R_FPUL );
2802 store_reg( R_EAX, Rn );
2807 load_reg( R_EAX, Rn );
2808 check_walign32( R_EAX );
2809 ADD_imm8s_r32( -4, R_EAX );
2810 MMU_TRANSLATE_WRITE( R_EAX );
2811 load_spreg( R_EDX, R_FPUL );
2812 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2813 MEM_WRITE_LONG( R_EAX, R_EDX );
2814 sh4_x86.tstate = TSTATE_NONE;
2818 load_spreg( R_EAX, R_MACH );
2819 store_reg( R_EAX, Rn );
2823 load_reg( R_EAX, Rn );
2824 check_walign32( R_EAX );
2825 ADD_imm8s_r32( -4, R_EAX );
2826 MMU_TRANSLATE_WRITE( R_EAX );
2827 load_spreg( R_EDX, R_MACH );
2828 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2829 MEM_WRITE_LONG( R_EAX, R_EDX );
2830 sh4_x86.tstate = TSTATE_NONE;
2834 load_spreg( R_EAX, R_MACL );
2835 store_reg( R_EAX, Rn );
2839 load_reg( R_EAX, Rn );
2840 check_walign32( R_EAX );
2841 ADD_imm8s_r32( -4, R_EAX );
2842 MMU_TRANSLATE_WRITE( R_EAX );
2843 load_spreg( R_EDX, R_MACL );
2844 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2845 MEM_WRITE_LONG( R_EAX, R_EDX );
2846 sh4_x86.tstate = TSTATE_NONE;
2850 load_spreg( R_EAX, R_PR );
2851 store_reg( R_EAX, Rn );
2855 load_reg( R_EAX, Rn );
2856 check_walign32( R_EAX );
2857 ADD_imm8s_r32( -4, R_EAX );
2858 MMU_TRANSLATE_WRITE( R_EAX );
2859 load_spreg( R_EDX, R_PR );
2860 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2861 MEM_WRITE_LONG( R_EAX, R_EDX );
2862 sh4_x86.tstate = TSTATE_NONE;
2867 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2870 sh4_x86.in_delay_slot = DELAY_NONE;
.