4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
28 #include "sh4/xltcache.h"
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4trans.h"
31 #include "sh4/sh4stat.h"
32 #include "sh4/sh4mmio.h"
33 #include "sh4/x86op.h"
36 #define DEFAULT_BACKPATCH_SIZE 4096
38 struct backpatch_record {
39 uint32_t fixup_offset;
40 uint32_t fixup_icount;
49 * Struct to manage internal translation state. This state is not saved -
50 * it is only valid between calls to sh4_translate_begin_block() and
51 * sh4_translate_end_block()
53 struct sh4_x86_state {
55 gboolean priv_checked; /* true if we've already checked the cpu mode. */
56 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
57 gboolean branch_taken; /* true if we branched unconditionally */
58 uint32_t block_start_pc;
59 uint32_t stack_posn; /* Trace stack height for alignment purposes */
63 gboolean tlb_on; /* True if tlb translation is active */
65 /* Allocated memory for the (block-wide) back-patch list */
66 struct backpatch_record *backpatch_list;
67 uint32_t backpatch_posn;
68 uint32_t backpatch_size;
71 #define TSTATE_NONE -1
81 #ifdef ENABLE_SH4STATS
82 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
84 #define COUNT_INST(id)
87 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
88 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
89 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
90 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
92 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
93 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
94 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
95 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
97 static struct sh4_x86_state sh4_x86;
99 static uint32_t max_int = 0x7FFFFFFF;
100 static uint32_t min_int = 0x80000000;
101 static uint32_t save_fcw; /* save value for fpu control word */
102 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
104 void sh4_translate_init(void)
106 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
107 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
111 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
113 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
114 sh4_x86.backpatch_size <<= 1;
115 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
116 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
117 assert( sh4_x86.backpatch_list != NULL );
119 if( sh4_x86.in_delay_slot ) {
122 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
123 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
124 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
125 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
126 sh4_x86.backpatch_posn++;
130 * Emit an instruction to load an SH4 reg into a real register
132 static inline void load_reg( int x86reg, int sh4reg )
134 /* mov [bp+n], reg */
136 OP(0x45 + (x86reg<<3));
137 OP(REG_OFFSET(r[sh4reg]));
140 static inline void load_reg16s( int x86reg, int sh4reg )
144 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
147 static inline void load_reg16u( int x86reg, int sh4reg )
151 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
155 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
156 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
158 * Emit an instruction to load an immediate value into a register
160 static inline void load_imm32( int x86reg, uint32_t value ) {
161 /* mov #value, reg */
167 * Load an immediate 64-bit quantity (note: x86-64 only)
169 static inline void load_imm64( int x86reg, uint64_t value ) {
170 /* mov #value, reg */
177 * Emit an instruction to store an SH4 reg (RN)
179 void static inline store_reg( int x86reg, int sh4reg ) {
180 /* mov reg, [bp+n] */
182 OP(0x45 + (x86reg<<3));
183 OP(REG_OFFSET(r[sh4reg]));
187 * Load an FR register (single-precision floating point) into an integer x86
188 * register (eg for register-to-register moves)
190 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
191 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
194 * Load the low half of a DR register (DR or XD) into an integer x86 register
196 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
197 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
200 * Store an FR register (single-precision floating point) from an integer x86+
201 * register (eg for register-to-register moves)
203 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
204 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
206 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
207 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
210 #define push_fpul() FLDF_sh4r(R_FPUL)
211 #define pop_fpul() FSTPF_sh4r(R_FPUL)
212 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
213 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
214 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
215 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
216 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
217 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
218 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
219 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
223 /* Exception checks - Note that all exception checks will clobber EAX */
225 #define check_priv( ) \
226 if( !sh4_x86.priv_checked ) { \
227 sh4_x86.priv_checked = TRUE;\
228 load_spreg( R_EAX, R_SR );\
229 AND_imm32_r32( SR_MD, R_EAX );\
230 if( sh4_x86.in_delay_slot ) {\
231 JE_exc( EXC_SLOT_ILLEGAL );\
233 JE_exc( EXC_ILLEGAL );\
235 sh4_x86.tstate = TSTATE_NONE; \
238 #define check_fpuen( ) \
239 if( !sh4_x86.fpuen_checked ) {\
240 sh4_x86.fpuen_checked = TRUE;\
241 load_spreg( R_EAX, R_SR );\
242 AND_imm32_r32( SR_FD, R_EAX );\
243 if( sh4_x86.in_delay_slot ) {\
244 JNE_exc(EXC_SLOT_FPU_DISABLED);\
246 JNE_exc(EXC_FPU_DISABLED);\
248 sh4_x86.tstate = TSTATE_NONE; \
251 #define check_ralign16( x86reg ) \
252 TEST_imm32_r32( 0x00000001, x86reg ); \
253 JNE_exc(EXC_DATA_ADDR_READ)
255 #define check_walign16( x86reg ) \
256 TEST_imm32_r32( 0x00000001, x86reg ); \
257 JNE_exc(EXC_DATA_ADDR_WRITE);
259 #define check_ralign32( x86reg ) \
260 TEST_imm32_r32( 0x00000003, x86reg ); \
261 JNE_exc(EXC_DATA_ADDR_READ)
263 #define check_walign32( x86reg ) \
264 TEST_imm32_r32( 0x00000003, x86reg ); \
265 JNE_exc(EXC_DATA_ADDR_WRITE);
267 #define check_ralign64( x86reg ) \
268 TEST_imm32_r32( 0x00000007, x86reg ); \
269 JNE_exc(EXC_DATA_ADDR_READ)
271 #define check_walign64( x86reg ) \
272 TEST_imm32_r32( 0x00000007, x86reg ); \
273 JNE_exc(EXC_DATA_ADDR_WRITE);
276 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
277 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
278 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
279 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
280 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
281 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
282 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
285 * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned
286 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
288 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
290 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
292 * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned
293 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
295 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
297 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
298 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
299 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
301 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
303 /****** Import appropriate calling conventions ******/
304 #if SIZEOF_VOID_P == 8
305 #include "sh4/ia64abi.h"
306 #else /* 32-bit system */
308 #include "sh4/ia32mac.h"
310 #include "sh4/ia32abi.h"
314 uint32_t sh4_translate_end_block_size()
316 if( sh4_x86.backpatch_posn <= 3 ) {
317 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
319 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
325 * Embed a breakpoint into the generated code
327 void sh4_translate_emit_breakpoint( sh4vma_t pc )
329 load_imm32( R_EAX, pc );
330 call_func1( sh4_translate_breakpoint_hit, R_EAX );
331 sh4_x86.tstate = TSTATE_NONE;
335 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
338 * Embed a call to sh4_execute_instruction for situations that we
339 * can't translate (just page-crossing delay slots at the moment).
340 * Caller is responsible for setting new_pc before calling this function.
344 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
345 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
346 * Call sh4_execute_instruction
347 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
349 void exit_block_emu( sh4vma_t endpc )
351 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
352 ADD_r32_sh4r( R_ECX, R_PC );
354 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
355 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
356 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
357 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
359 call_func0( sh4_execute_instruction );
360 load_spreg( R_EAX, R_PC );
361 if( sh4_x86.tlb_on ) {
362 call_func1(xlat_get_code_by_vma,R_EAX);
364 call_func1(xlat_get_code,R_EAX);
366 AND_imm8s_rptr( 0xFC, R_EAX );
372 * Translate a single instruction. Delayed branches are handled specially
373 * by translating both branch and delayed instruction as a single unit (as
375 * The instruction MUST be in the icache (assert check)
377 * @return true if the instruction marks the end of a basic block
380 uint32_t sh4_translate_instruction( sh4vma_t pc )
383 /* Read instruction from icache */
384 assert( IS_IN_ICACHE(pc) );
385 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
387 /* PC is not in the current icache - this usually means we're running
388 * with MMU on, and we've gone past the end of the page. And since
389 * sh4_translate_block is pretty careful about this, it means we're
390 * almost certainly in a delay slot.
392 * Since we can't assume the page is present (and we can't fault it in
393 * at this point, inline a call to sh4_execute_instruction (with a few
394 * small repairs to cope with the different environment).
397 if( !sh4_x86.in_delay_slot ) {
398 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
404 load_reg( R_EAX, Rm );
405 load_reg( R_ECX, Rn );
406 ADD_r32_r32( R_EAX, R_ECX );
407 store_reg( R_ECX, Rn );
408 sh4_x86.tstate = TSTATE_NONE;
412 load_reg( R_EAX, Rn );
413 ADD_imm8s_r32( imm, R_EAX );
414 store_reg( R_EAX, Rn );
415 sh4_x86.tstate = TSTATE_NONE;
419 if( sh4_x86.tstate != TSTATE_C ) {
422 load_reg( R_EAX, Rm );
423 load_reg( R_ECX, Rn );
424 ADC_r32_r32( R_EAX, R_ECX );
425 store_reg( R_ECX, Rn );
427 sh4_x86.tstate = TSTATE_C;
431 load_reg( R_EAX, Rm );
432 load_reg( R_ECX, Rn );
433 ADD_r32_r32( R_EAX, R_ECX );
434 store_reg( R_ECX, Rn );
436 sh4_x86.tstate = TSTATE_O;
440 load_reg( R_EAX, Rm );
441 load_reg( R_ECX, Rn );
442 AND_r32_r32( R_EAX, R_ECX );
443 store_reg( R_ECX, Rn );
444 sh4_x86.tstate = TSTATE_NONE;
448 load_reg( R_EAX, 0 );
449 AND_imm32_r32(imm, R_EAX);
450 store_reg( R_EAX, 0 );
451 sh4_x86.tstate = TSTATE_NONE;
453 AND.B #imm, @(R0, GBR) {:
455 load_reg( R_EAX, 0 );
456 load_spreg( R_ECX, R_GBR );
457 ADD_r32_r32( R_ECX, R_EAX );
458 MMU_TRANSLATE_WRITE( R_EAX );
459 PUSH_realigned_r32(R_EAX);
460 MEM_READ_BYTE( R_EAX, R_EAX );
461 POP_realigned_r32(R_ECX);
462 AND_imm32_r32(imm, R_EAX );
463 MEM_WRITE_BYTE( R_ECX, R_EAX );
464 sh4_x86.tstate = TSTATE_NONE;
468 load_reg( R_EAX, Rm );
469 load_reg( R_ECX, Rn );
470 CMP_r32_r32( R_EAX, R_ECX );
472 sh4_x86.tstate = TSTATE_E;
475 COUNT_INST(I_CMPEQI);
476 load_reg( R_EAX, 0 );
477 CMP_imm8s_r32(imm, R_EAX);
479 sh4_x86.tstate = TSTATE_E;
483 load_reg( R_EAX, Rm );
484 load_reg( R_ECX, Rn );
485 CMP_r32_r32( R_EAX, R_ECX );
487 sh4_x86.tstate = TSTATE_GE;
491 load_reg( R_EAX, Rm );
492 load_reg( R_ECX, Rn );
493 CMP_r32_r32( R_EAX, R_ECX );
495 sh4_x86.tstate = TSTATE_G;
499 load_reg( R_EAX, Rm );
500 load_reg( R_ECX, Rn );
501 CMP_r32_r32( R_EAX, R_ECX );
503 sh4_x86.tstate = TSTATE_A;
507 load_reg( R_EAX, Rm );
508 load_reg( R_ECX, Rn );
509 CMP_r32_r32( R_EAX, R_ECX );
511 sh4_x86.tstate = TSTATE_AE;
515 load_reg( R_EAX, Rn );
516 CMP_imm8s_r32( 0, R_EAX );
518 sh4_x86.tstate = TSTATE_G;
522 load_reg( R_EAX, Rn );
523 CMP_imm8s_r32( 0, R_EAX );
525 sh4_x86.tstate = TSTATE_GE;
528 COUNT_INST(I_CMPSTR);
529 load_reg( R_EAX, Rm );
530 load_reg( R_ECX, Rn );
531 XOR_r32_r32( R_ECX, R_EAX );
532 TEST_r8_r8( R_AL, R_AL );
534 TEST_r8_r8( R_AH, R_AH );
536 SHR_imm8_r32( 16, R_EAX );
537 TEST_r8_r8( R_AL, R_AL );
539 TEST_r8_r8( R_AH, R_AH );
544 sh4_x86.tstate = TSTATE_E;
548 load_reg( R_EAX, Rm );
549 load_reg( R_ECX, Rn );
550 SHR_imm8_r32( 31, R_EAX );
551 SHR_imm8_r32( 31, R_ECX );
552 store_spreg( R_EAX, R_M );
553 store_spreg( R_ECX, R_Q );
554 CMP_r32_r32( R_EAX, R_ECX );
556 sh4_x86.tstate = TSTATE_NE;
560 XOR_r32_r32( R_EAX, R_EAX );
561 store_spreg( R_EAX, R_Q );
562 store_spreg( R_EAX, R_M );
563 store_spreg( R_EAX, R_T );
564 sh4_x86.tstate = TSTATE_C; // works for DIV1
568 load_spreg( R_ECX, R_M );
569 load_reg( R_EAX, Rn );
570 if( sh4_x86.tstate != TSTATE_C ) {
574 SETC_r8( R_DL ); // Q'
575 CMP_sh4r_r32( R_Q, R_ECX );
577 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
580 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
582 store_reg( R_EAX, Rn ); // Done with Rn now
583 SETC_r8(R_AL); // tmp1
584 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
585 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
586 store_spreg( R_ECX, R_Q );
587 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
588 MOVZX_r8_r32( R_AL, R_EAX );
589 store_spreg( R_EAX, R_T );
590 sh4_x86.tstate = TSTATE_NONE;
594 load_reg( R_EAX, Rm );
595 load_reg( R_ECX, Rn );
597 store_spreg( R_EDX, R_MACH );
598 store_spreg( R_EAX, R_MACL );
599 sh4_x86.tstate = TSTATE_NONE;
603 load_reg( R_EAX, Rm );
604 load_reg( R_ECX, Rn );
606 store_spreg( R_EDX, R_MACH );
607 store_spreg( R_EAX, R_MACL );
608 sh4_x86.tstate = TSTATE_NONE;
612 load_reg( R_EAX, Rn );
613 ADD_imm8s_r32( -1, R_EAX );
614 store_reg( R_EAX, Rn );
616 sh4_x86.tstate = TSTATE_E;
620 load_reg( R_EAX, Rm );
621 MOVSX_r8_r32( R_EAX, R_EAX );
622 store_reg( R_EAX, Rn );
626 load_reg( R_EAX, Rm );
627 MOVSX_r16_r32( R_EAX, R_EAX );
628 store_reg( R_EAX, Rn );
632 load_reg( R_EAX, Rm );
633 MOVZX_r8_r32( R_EAX, R_EAX );
634 store_reg( R_EAX, Rn );
638 load_reg( R_EAX, Rm );
639 MOVZX_r16_r32( R_EAX, R_EAX );
640 store_reg( R_EAX, Rn );
645 load_reg( R_EAX, Rm );
646 check_ralign32( R_EAX );
647 MMU_TRANSLATE_READ( R_EAX );
648 PUSH_realigned_r32( R_EAX );
649 load_reg( R_EAX, Rn );
650 ADD_imm8s_r32( 4, R_EAX );
651 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
652 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
653 // Note translate twice in case of page boundaries. Maybe worth
654 // adding a page-boundary check to skip the second translation
656 load_reg( R_EAX, Rm );
657 check_ralign32( R_EAX );
658 MMU_TRANSLATE_READ( R_EAX );
659 load_reg( R_ECX, Rn );
660 check_ralign32( R_ECX );
661 PUSH_realigned_r32( R_EAX );
662 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
663 MOV_r32_r32( R_ECX, R_EAX );
664 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
665 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
667 MEM_READ_LONG( R_EAX, R_EAX );
670 MEM_READ_LONG( R_ECX, R_EAX );
671 POP_realigned_r32( R_ECX );
674 ADD_r32_sh4r( R_EAX, R_MACL );
675 ADC_r32_sh4r( R_EDX, R_MACH );
677 load_spreg( R_ECX, R_S );
678 TEST_r32_r32(R_ECX, R_ECX);
680 call_func0( signsat48 );
682 sh4_x86.tstate = TSTATE_NONE;
687 load_reg( R_EAX, Rm );
688 check_ralign16( R_EAX );
689 MMU_TRANSLATE_READ( R_EAX );
690 PUSH_realigned_r32( R_EAX );
691 load_reg( R_EAX, Rn );
692 ADD_imm8s_r32( 2, R_EAX );
693 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
694 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
695 // Note translate twice in case of page boundaries. Maybe worth
696 // adding a page-boundary check to skip the second translation
698 load_reg( R_EAX, Rm );
699 check_ralign16( R_EAX );
700 MMU_TRANSLATE_READ( R_EAX );
701 load_reg( R_ECX, Rn );
702 check_ralign16( R_ECX );
703 PUSH_realigned_r32( R_EAX );
704 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
705 MOV_r32_r32( R_ECX, R_EAX );
706 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
707 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
709 MEM_READ_WORD( R_EAX, R_EAX );
712 MEM_READ_WORD( R_ECX, R_EAX );
713 POP_realigned_r32( R_ECX );
716 load_spreg( R_ECX, R_S );
717 TEST_r32_r32( R_ECX, R_ECX );
720 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
721 JNO_rel8( end ); // 2
722 load_imm32( R_EDX, 1 ); // 5
723 store_spreg( R_EDX, R_MACH ); // 6
724 JS_rel8( positive ); // 2
725 load_imm32( R_EAX, 0x80000000 );// 5
726 store_spreg( R_EAX, R_MACL ); // 6
729 JMP_TARGET(positive);
730 load_imm32( R_EAX, 0x7FFFFFFF );// 5
731 store_spreg( R_EAX, R_MACL ); // 6
735 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
736 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
740 sh4_x86.tstate = TSTATE_NONE;
744 load_spreg( R_EAX, R_T );
745 store_reg( R_EAX, Rn );
749 load_reg( R_EAX, Rm );
750 load_reg( R_ECX, Rn );
752 store_spreg( R_EAX, R_MACL );
753 sh4_x86.tstate = TSTATE_NONE;
757 load_reg16s( R_EAX, Rm );
758 load_reg16s( R_ECX, Rn );
760 store_spreg( R_EAX, R_MACL );
761 sh4_x86.tstate = TSTATE_NONE;
765 load_reg16u( R_EAX, Rm );
766 load_reg16u( R_ECX, Rn );
768 store_spreg( R_EAX, R_MACL );
769 sh4_x86.tstate = TSTATE_NONE;
773 load_reg( R_EAX, Rm );
775 store_reg( R_EAX, Rn );
776 sh4_x86.tstate = TSTATE_NONE;
780 load_reg( R_EAX, Rm );
781 XOR_r32_r32( R_ECX, R_ECX );
783 SBB_r32_r32( R_EAX, R_ECX );
784 store_reg( R_ECX, Rn );
786 sh4_x86.tstate = TSTATE_C;
790 load_reg( R_EAX, Rm );
792 store_reg( R_EAX, Rn );
793 sh4_x86.tstate = TSTATE_NONE;
797 load_reg( R_EAX, Rm );
798 load_reg( R_ECX, Rn );
799 OR_r32_r32( R_EAX, R_ECX );
800 store_reg( R_ECX, Rn );
801 sh4_x86.tstate = TSTATE_NONE;
805 load_reg( R_EAX, 0 );
806 OR_imm32_r32(imm, R_EAX);
807 store_reg( R_EAX, 0 );
808 sh4_x86.tstate = TSTATE_NONE;
810 OR.B #imm, @(R0, GBR) {:
812 load_reg( R_EAX, 0 );
813 load_spreg( R_ECX, R_GBR );
814 ADD_r32_r32( R_ECX, R_EAX );
815 MMU_TRANSLATE_WRITE( R_EAX );
816 PUSH_realigned_r32(R_EAX);
817 MEM_READ_BYTE( R_EAX, R_EAX );
818 POP_realigned_r32(R_ECX);
819 OR_imm32_r32(imm, R_EAX );
820 MEM_WRITE_BYTE( R_ECX, R_EAX );
821 sh4_x86.tstate = TSTATE_NONE;
825 load_reg( R_EAX, Rn );
826 if( sh4_x86.tstate != TSTATE_C ) {
830 store_reg( R_EAX, Rn );
832 sh4_x86.tstate = TSTATE_C;
836 load_reg( R_EAX, Rn );
837 if( sh4_x86.tstate != TSTATE_C ) {
841 store_reg( R_EAX, Rn );
843 sh4_x86.tstate = TSTATE_C;
847 load_reg( R_EAX, Rn );
849 store_reg( R_EAX, Rn );
851 sh4_x86.tstate = TSTATE_C;
855 load_reg( R_EAX, Rn );
857 store_reg( R_EAX, Rn );
859 sh4_x86.tstate = TSTATE_C;
863 /* Annoyingly enough, not directly convertible */
864 load_reg( R_EAX, Rn );
865 load_reg( R_ECX, Rm );
866 CMP_imm32_r32( 0, R_ECX );
869 NEG_r32( R_ECX ); // 2
870 AND_imm8_r8( 0x1F, R_CL ); // 3
871 JE_rel8(emptysar); // 2
872 SAR_r32_CL( R_EAX ); // 2
875 JMP_TARGET(emptysar);
876 SAR_imm8_r32(31, R_EAX ); // 3
880 AND_imm8_r8( 0x1F, R_CL ); // 3
881 SHL_r32_CL( R_EAX ); // 2
884 store_reg( R_EAX, Rn );
885 sh4_x86.tstate = TSTATE_NONE;
889 load_reg( R_EAX, Rn );
890 load_reg( R_ECX, Rm );
891 CMP_imm32_r32( 0, R_ECX );
894 NEG_r32( R_ECX ); // 2
895 AND_imm8_r8( 0x1F, R_CL ); // 3
897 SHR_r32_CL( R_EAX ); // 2
900 JMP_TARGET(emptyshr);
901 XOR_r32_r32( R_EAX, R_EAX );
905 AND_imm8_r8( 0x1F, R_CL ); // 3
906 SHL_r32_CL( R_EAX ); // 2
909 store_reg( R_EAX, Rn );
910 sh4_x86.tstate = TSTATE_NONE;
914 load_reg( R_EAX, Rn );
917 store_reg( R_EAX, Rn );
918 sh4_x86.tstate = TSTATE_C;
922 load_reg( R_EAX, Rn );
925 store_reg( R_EAX, Rn );
926 sh4_x86.tstate = TSTATE_C;
930 load_reg( R_EAX, Rn );
933 store_reg( R_EAX, Rn );
934 sh4_x86.tstate = TSTATE_C;
938 load_reg( R_EAX, Rn );
939 SHL_imm8_r32( 2, R_EAX );
940 store_reg( R_EAX, Rn );
941 sh4_x86.tstate = TSTATE_NONE;
945 load_reg( R_EAX, Rn );
946 SHL_imm8_r32( 8, R_EAX );
947 store_reg( R_EAX, Rn );
948 sh4_x86.tstate = TSTATE_NONE;
952 load_reg( R_EAX, Rn );
953 SHL_imm8_r32( 16, R_EAX );
954 store_reg( R_EAX, Rn );
955 sh4_x86.tstate = TSTATE_NONE;
959 load_reg( R_EAX, Rn );
962 store_reg( R_EAX, Rn );
963 sh4_x86.tstate = TSTATE_C;
967 load_reg( R_EAX, Rn );
968 SHR_imm8_r32( 2, R_EAX );
969 store_reg( R_EAX, Rn );
970 sh4_x86.tstate = TSTATE_NONE;
974 load_reg( R_EAX, Rn );
975 SHR_imm8_r32( 8, R_EAX );
976 store_reg( R_EAX, Rn );
977 sh4_x86.tstate = TSTATE_NONE;
981 load_reg( R_EAX, Rn );
982 SHR_imm8_r32( 16, R_EAX );
983 store_reg( R_EAX, Rn );
984 sh4_x86.tstate = TSTATE_NONE;
988 load_reg( R_EAX, Rm );
989 load_reg( R_ECX, Rn );
990 SUB_r32_r32( R_EAX, R_ECX );
991 store_reg( R_ECX, Rn );
992 sh4_x86.tstate = TSTATE_NONE;
996 load_reg( R_EAX, Rm );
997 load_reg( R_ECX, Rn );
998 if( sh4_x86.tstate != TSTATE_C ) {
1001 SBB_r32_r32( R_EAX, R_ECX );
1002 store_reg( R_ECX, Rn );
1004 sh4_x86.tstate = TSTATE_C;
1008 load_reg( R_EAX, Rm );
1009 load_reg( R_ECX, Rn );
1010 SUB_r32_r32( R_EAX, R_ECX );
1011 store_reg( R_ECX, Rn );
1013 sh4_x86.tstate = TSTATE_O;
1016 COUNT_INST(I_SWAPB);
1017 load_reg( R_EAX, Rm );
1018 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1019 store_reg( R_EAX, Rn );
1022 COUNT_INST(I_SWAPB);
1023 load_reg( R_EAX, Rm );
1024 MOV_r32_r32( R_EAX, R_ECX );
1025 SHL_imm8_r32( 16, R_ECX );
1026 SHR_imm8_r32( 16, R_EAX );
1027 OR_r32_r32( R_EAX, R_ECX );
1028 store_reg( R_ECX, Rn );
1029 sh4_x86.tstate = TSTATE_NONE;
1033 load_reg( R_EAX, Rn );
1034 MMU_TRANSLATE_WRITE( R_EAX );
1035 PUSH_realigned_r32( R_EAX );
1036 MEM_READ_BYTE( R_EAX, R_EAX );
1037 TEST_r8_r8( R_AL, R_AL );
1039 OR_imm8_r8( 0x80, R_AL );
1040 POP_realigned_r32( R_ECX );
1041 MEM_WRITE_BYTE( R_ECX, R_EAX );
1042 sh4_x86.tstate = TSTATE_NONE;
1046 load_reg( R_EAX, Rm );
1047 load_reg( R_ECX, Rn );
1048 TEST_r32_r32( R_EAX, R_ECX );
1050 sh4_x86.tstate = TSTATE_E;
1054 load_reg( R_EAX, 0 );
1055 TEST_imm32_r32( imm, R_EAX );
1057 sh4_x86.tstate = TSTATE_E;
1059 TST.B #imm, @(R0, GBR) {:
1061 load_reg( R_EAX, 0);
1062 load_reg( R_ECX, R_GBR);
1063 ADD_r32_r32( R_ECX, R_EAX );
1064 MMU_TRANSLATE_READ( R_EAX );
1065 MEM_READ_BYTE( R_EAX, R_EAX );
1066 TEST_imm8_r8( imm, R_AL );
1068 sh4_x86.tstate = TSTATE_E;
1072 load_reg( R_EAX, Rm );
1073 load_reg( R_ECX, Rn );
1074 XOR_r32_r32( R_EAX, R_ECX );
1075 store_reg( R_ECX, Rn );
1076 sh4_x86.tstate = TSTATE_NONE;
1080 load_reg( R_EAX, 0 );
1081 XOR_imm32_r32( imm, R_EAX );
1082 store_reg( R_EAX, 0 );
1083 sh4_x86.tstate = TSTATE_NONE;
1085 XOR.B #imm, @(R0, GBR) {:
1087 load_reg( R_EAX, 0 );
1088 load_spreg( R_ECX, R_GBR );
1089 ADD_r32_r32( R_ECX, R_EAX );
1090 MMU_TRANSLATE_WRITE( R_EAX );
1091 PUSH_realigned_r32(R_EAX);
1092 MEM_READ_BYTE(R_EAX, R_EAX);
1093 POP_realigned_r32(R_ECX);
1094 XOR_imm32_r32( imm, R_EAX );
1095 MEM_WRITE_BYTE( R_ECX, R_EAX );
1096 sh4_x86.tstate = TSTATE_NONE;
1099 COUNT_INST(I_XTRCT);
1100 load_reg( R_EAX, Rm );
1101 load_reg( R_ECX, Rn );
1102 SHL_imm8_r32( 16, R_EAX );
1103 SHR_imm8_r32( 16, R_ECX );
1104 OR_r32_r32( R_EAX, R_ECX );
1105 store_reg( R_ECX, Rn );
1106 sh4_x86.tstate = TSTATE_NONE;
1109 /* Data move instructions */
1112 load_reg( R_EAX, Rm );
1113 store_reg( R_EAX, Rn );
1117 load_imm32( R_EAX, imm );
1118 store_reg( R_EAX, Rn );
1122 load_reg( R_EAX, Rn );
1123 MMU_TRANSLATE_WRITE( R_EAX );
1124 load_reg( R_EDX, Rm );
1125 MEM_WRITE_BYTE( R_EAX, R_EDX );
1126 sh4_x86.tstate = TSTATE_NONE;
1130 load_reg( R_EAX, Rn );
1131 ADD_imm8s_r32( -1, R_EAX );
1132 MMU_TRANSLATE_WRITE( R_EAX );
1133 load_reg( R_EDX, Rm );
1134 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1135 MEM_WRITE_BYTE( R_EAX, R_EDX );
1136 sh4_x86.tstate = TSTATE_NONE;
1138 MOV.B Rm, @(R0, Rn) {:
1140 load_reg( R_EAX, 0 );
1141 load_reg( R_ECX, Rn );
1142 ADD_r32_r32( R_ECX, R_EAX );
1143 MMU_TRANSLATE_WRITE( R_EAX );
1144 load_reg( R_EDX, Rm );
1145 MEM_WRITE_BYTE( R_EAX, R_EDX );
1146 sh4_x86.tstate = TSTATE_NONE;
1148 MOV.B R0, @(disp, GBR) {:
1150 load_spreg( R_EAX, R_GBR );
1151 ADD_imm32_r32( disp, R_EAX );
1152 MMU_TRANSLATE_WRITE( R_EAX );
1153 load_reg( R_EDX, 0 );
1154 MEM_WRITE_BYTE( R_EAX, R_EDX );
1155 sh4_x86.tstate = TSTATE_NONE;
1157 MOV.B R0, @(disp, Rn) {:
1159 load_reg( R_EAX, Rn );
1160 ADD_imm32_r32( disp, R_EAX );
1161 MMU_TRANSLATE_WRITE( R_EAX );
1162 load_reg( R_EDX, 0 );
1163 MEM_WRITE_BYTE( R_EAX, R_EDX );
1164 sh4_x86.tstate = TSTATE_NONE;
1168 load_reg( R_EAX, Rm );
1169 MMU_TRANSLATE_READ( R_EAX );
1170 MEM_READ_BYTE( R_EAX, R_EAX );
1171 store_reg( R_EAX, Rn );
1172 sh4_x86.tstate = TSTATE_NONE;
1176 load_reg( R_EAX, Rm );
1177 MMU_TRANSLATE_READ( R_EAX );
1178 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1179 MEM_READ_BYTE( R_EAX, R_EAX );
1180 store_reg( R_EAX, Rn );
1181 sh4_x86.tstate = TSTATE_NONE;
1183 MOV.B @(R0, Rm), Rn {:
1185 load_reg( R_EAX, 0 );
1186 load_reg( R_ECX, Rm );
1187 ADD_r32_r32( R_ECX, R_EAX );
1188 MMU_TRANSLATE_READ( R_EAX )
1189 MEM_READ_BYTE( R_EAX, R_EAX );
1190 store_reg( R_EAX, Rn );
1191 sh4_x86.tstate = TSTATE_NONE;
1193 MOV.B @(disp, GBR), R0 {:
1195 load_spreg( R_EAX, R_GBR );
1196 ADD_imm32_r32( disp, R_EAX );
1197 MMU_TRANSLATE_READ( R_EAX );
1198 MEM_READ_BYTE( R_EAX, R_EAX );
1199 store_reg( R_EAX, 0 );
1200 sh4_x86.tstate = TSTATE_NONE;
1202 MOV.B @(disp, Rm), R0 {:
1204 load_reg( R_EAX, Rm );
1205 ADD_imm32_r32( disp, R_EAX );
1206 MMU_TRANSLATE_READ( R_EAX );
1207 MEM_READ_BYTE( R_EAX, R_EAX );
1208 store_reg( R_EAX, 0 );
1209 sh4_x86.tstate = TSTATE_NONE;
1213 load_reg( R_EAX, Rn );
1214 check_walign32(R_EAX);
1215 MMU_TRANSLATE_WRITE( R_EAX );
1216 load_reg( R_EDX, Rm );
1217 MEM_WRITE_LONG( R_EAX, R_EDX );
1218 sh4_x86.tstate = TSTATE_NONE;
1222 load_reg( R_EAX, Rn );
1223 ADD_imm8s_r32( -4, R_EAX );
1224 check_walign32( R_EAX );
1225 MMU_TRANSLATE_WRITE( R_EAX );
1226 load_reg( R_EDX, Rm );
1227 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1228 MEM_WRITE_LONG( R_EAX, R_EDX );
1229 sh4_x86.tstate = TSTATE_NONE;
1231 MOV.L Rm, @(R0, Rn) {:
1233 load_reg( R_EAX, 0 );
1234 load_reg( R_ECX, Rn );
1235 ADD_r32_r32( R_ECX, R_EAX );
1236 check_walign32( R_EAX );
1237 MMU_TRANSLATE_WRITE( R_EAX );
1238 load_reg( R_EDX, Rm );
1239 MEM_WRITE_LONG( R_EAX, R_EDX );
1240 sh4_x86.tstate = TSTATE_NONE;
1242 MOV.L R0, @(disp, GBR) {:
1244 load_spreg( R_EAX, R_GBR );
1245 ADD_imm32_r32( disp, R_EAX );
1246 check_walign32( R_EAX );
1247 MMU_TRANSLATE_WRITE( R_EAX );
1248 load_reg( R_EDX, 0 );
1249 MEM_WRITE_LONG( R_EAX, R_EDX );
1250 sh4_x86.tstate = TSTATE_NONE;
1252 MOV.L Rm, @(disp, Rn) {:
1254 load_reg( R_EAX, Rn );
1255 ADD_imm32_r32( disp, R_EAX );
1256 check_walign32( R_EAX );
1257 MMU_TRANSLATE_WRITE( R_EAX );
1258 load_reg( R_EDX, Rm );
1259 MEM_WRITE_LONG( R_EAX, R_EDX );
1260 sh4_x86.tstate = TSTATE_NONE;
1264 load_reg( R_EAX, Rm );
1265 check_ralign32( R_EAX );
1266 MMU_TRANSLATE_READ( R_EAX );
1267 MEM_READ_LONG( R_EAX, R_EAX );
1268 store_reg( R_EAX, Rn );
1269 sh4_x86.tstate = TSTATE_NONE;
1273 load_reg( R_EAX, Rm );
1274 check_ralign32( R_EAX );
1275 MMU_TRANSLATE_READ( R_EAX );
1276 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1277 MEM_READ_LONG( R_EAX, R_EAX );
1278 store_reg( R_EAX, Rn );
1279 sh4_x86.tstate = TSTATE_NONE;
1281 MOV.L @(R0, Rm), Rn {:
1283 load_reg( R_EAX, 0 );
1284 load_reg( R_ECX, Rm );
1285 ADD_r32_r32( R_ECX, R_EAX );
1286 check_ralign32( R_EAX );
1287 MMU_TRANSLATE_READ( R_EAX );
1288 MEM_READ_LONG( R_EAX, R_EAX );
1289 store_reg( R_EAX, Rn );
1290 sh4_x86.tstate = TSTATE_NONE;
1292 MOV.L @(disp, GBR), R0 {:
1294 load_spreg( R_EAX, R_GBR );
1295 ADD_imm32_r32( disp, R_EAX );
1296 check_ralign32( R_EAX );
1297 MMU_TRANSLATE_READ( R_EAX );
1298 MEM_READ_LONG( R_EAX, R_EAX );
1299 store_reg( R_EAX, 0 );
1300 sh4_x86.tstate = TSTATE_NONE;
1302 MOV.L @(disp, PC), Rn {:
1303 COUNT_INST(I_MOVLPC);
1304 if( sh4_x86.in_delay_slot ) {
1307 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1308 if( IS_IN_ICACHE(target) ) {
1309 // If the target address is in the same page as the code, it's
1310 // pretty safe to just ref it directly and circumvent the whole
1311 // memory subsystem. (this is a big performance win)
1313 // FIXME: There's a corner-case that's not handled here when
1314 // the current code-page is in the ITLB but not in the UTLB.
1315 // (should generate a TLB miss although need to test SH4
1316 // behaviour to confirm) Unlikely to be anyone depending on this
1317 // behaviour though.
1318 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1319 MOV_moff32_EAX( ptr );
1321 // Note: we use sh4r.pc for the calc as we could be running at a
1322 // different virtual address than the translation was done with,
1323 // but we can safely assume that the low bits are the same.
1324 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1325 ADD_sh4r_r32( R_PC, R_EAX );
1326 MMU_TRANSLATE_READ( R_EAX );
1327 MEM_READ_LONG( R_EAX, R_EAX );
1328 sh4_x86.tstate = TSTATE_NONE;
1330 store_reg( R_EAX, Rn );
1333 MOV.L @(disp, Rm), Rn {:
1335 load_reg( R_EAX, Rm );
1336 ADD_imm8s_r32( disp, R_EAX );
1337 check_ralign32( R_EAX );
1338 MMU_TRANSLATE_READ( R_EAX );
1339 MEM_READ_LONG( R_EAX, R_EAX );
1340 store_reg( R_EAX, Rn );
1341 sh4_x86.tstate = TSTATE_NONE;
1345 load_reg( R_EAX, Rn );
1346 check_walign16( R_EAX );
1347 MMU_TRANSLATE_WRITE( R_EAX )
1348 load_reg( R_EDX, Rm );
1349 MEM_WRITE_WORD( R_EAX, R_EDX );
1350 sh4_x86.tstate = TSTATE_NONE;
1354 load_reg( R_EAX, Rn );
1355 ADD_imm8s_r32( -2, R_EAX );
1356 check_walign16( R_EAX );
1357 MMU_TRANSLATE_WRITE( R_EAX );
1358 load_reg( R_EDX, Rm );
1359 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1360 MEM_WRITE_WORD( R_EAX, R_EDX );
1361 sh4_x86.tstate = TSTATE_NONE;
1363 MOV.W Rm, @(R0, Rn) {:
1365 load_reg( R_EAX, 0 );
1366 load_reg( R_ECX, Rn );
1367 ADD_r32_r32( R_ECX, R_EAX );
1368 check_walign16( R_EAX );
1369 MMU_TRANSLATE_WRITE( R_EAX );
1370 load_reg( R_EDX, Rm );
1371 MEM_WRITE_WORD( R_EAX, R_EDX );
1372 sh4_x86.tstate = TSTATE_NONE;
1374 MOV.W R0, @(disp, GBR) {:
1376 load_spreg( R_EAX, R_GBR );
1377 ADD_imm32_r32( disp, R_EAX );
1378 check_walign16( R_EAX );
1379 MMU_TRANSLATE_WRITE( R_EAX );
1380 load_reg( R_EDX, 0 );
1381 MEM_WRITE_WORD( R_EAX, R_EDX );
1382 sh4_x86.tstate = TSTATE_NONE;
1384 MOV.W R0, @(disp, Rn) {:
1386 load_reg( R_EAX, Rn );
1387 ADD_imm32_r32( disp, R_EAX );
1388 check_walign16( R_EAX );
1389 MMU_TRANSLATE_WRITE( R_EAX );
1390 load_reg( R_EDX, 0 );
1391 MEM_WRITE_WORD( R_EAX, R_EDX );
1392 sh4_x86.tstate = TSTATE_NONE;
1396 load_reg( R_EAX, Rm );
1397 check_ralign16( R_EAX );
1398 MMU_TRANSLATE_READ( R_EAX );
1399 MEM_READ_WORD( R_EAX, R_EAX );
1400 store_reg( R_EAX, Rn );
1401 sh4_x86.tstate = TSTATE_NONE;
1405 load_reg( R_EAX, Rm );
1406 check_ralign16( R_EAX );
1407 MMU_TRANSLATE_READ( R_EAX );
1408 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1409 MEM_READ_WORD( R_EAX, R_EAX );
1410 store_reg( R_EAX, Rn );
1411 sh4_x86.tstate = TSTATE_NONE;
1413 MOV.W @(R0, Rm), Rn {:
1415 load_reg( R_EAX, 0 );
1416 load_reg( R_ECX, Rm );
1417 ADD_r32_r32( R_ECX, R_EAX );
1418 check_ralign16( R_EAX );
1419 MMU_TRANSLATE_READ( R_EAX );
1420 MEM_READ_WORD( R_EAX, R_EAX );
1421 store_reg( R_EAX, Rn );
1422 sh4_x86.tstate = TSTATE_NONE;
1424 MOV.W @(disp, GBR), R0 {:
1426 load_spreg( R_EAX, R_GBR );
1427 ADD_imm32_r32( disp, R_EAX );
1428 check_ralign16( R_EAX );
1429 MMU_TRANSLATE_READ( R_EAX );
1430 MEM_READ_WORD( R_EAX, R_EAX );
1431 store_reg( R_EAX, 0 );
1432 sh4_x86.tstate = TSTATE_NONE;
1434 MOV.W @(disp, PC), Rn {:
1436 if( sh4_x86.in_delay_slot ) {
1439 // See comments for MOV.L @(disp, PC), Rn
1440 uint32_t target = pc + disp + 4;
1441 if( IS_IN_ICACHE(target) ) {
1442 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1443 MOV_moff32_EAX( ptr );
1444 MOVSX_r16_r32( R_EAX, R_EAX );
1446 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1447 ADD_sh4r_r32( R_PC, R_EAX );
1448 MMU_TRANSLATE_READ( R_EAX );
1449 MEM_READ_WORD( R_EAX, R_EAX );
1450 sh4_x86.tstate = TSTATE_NONE;
1452 store_reg( R_EAX, Rn );
1455 MOV.W @(disp, Rm), R0 {:
1457 load_reg( R_EAX, Rm );
1458 ADD_imm32_r32( disp, R_EAX );
1459 check_ralign16( R_EAX );
1460 MMU_TRANSLATE_READ( R_EAX );
1461 MEM_READ_WORD( R_EAX, R_EAX );
1462 store_reg( R_EAX, 0 );
1463 sh4_x86.tstate = TSTATE_NONE;
1465 MOVA @(disp, PC), R0 {:
1467 if( sh4_x86.in_delay_slot ) {
1470 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1471 ADD_sh4r_r32( R_PC, R_ECX );
1472 store_reg( R_ECX, 0 );
1473 sh4_x86.tstate = TSTATE_NONE;
1477 COUNT_INST(I_MOVCA);
1478 load_reg( R_EAX, Rn );
1479 check_walign32( R_EAX );
1480 MMU_TRANSLATE_WRITE( R_EAX );
1481 load_reg( R_EDX, 0 );
1482 MEM_WRITE_LONG( R_EAX, R_EDX );
1483 sh4_x86.tstate = TSTATE_NONE;
1486 /* Control transfer instructions */
1489 if( sh4_x86.in_delay_slot ) {
1492 sh4vma_t target = disp + pc + 4;
1493 JT_rel8( nottaken );
1494 exit_block_rel(target, pc+2 );
1495 JMP_TARGET(nottaken);
1501 if( sh4_x86.in_delay_slot ) {
1504 sh4_x86.in_delay_slot = DELAY_PC;
1505 if( UNTRANSLATABLE(pc+2) ) {
1506 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1508 ADD_imm32_r32( disp, R_EAX );
1509 JMP_TARGET(nottaken);
1510 ADD_sh4r_r32( R_PC, R_EAX );
1511 store_spreg( R_EAX, R_NEW_PC );
1512 exit_block_emu(pc+2);
1513 sh4_x86.branch_taken = TRUE;
1516 if( sh4_x86.tstate == TSTATE_NONE ) {
1517 CMP_imm8s_sh4r( 1, R_T );
1518 sh4_x86.tstate = TSTATE_E;
1520 sh4vma_t target = disp + pc + 4;
1521 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1522 sh4_translate_instruction(pc+2);
1523 exit_block_rel( target, pc+4 );
1526 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1527 sh4_translate_instruction(pc+2);
1534 if( sh4_x86.in_delay_slot ) {
1537 sh4_x86.in_delay_slot = DELAY_PC;
1538 sh4_x86.branch_taken = TRUE;
1539 if( UNTRANSLATABLE(pc+2) ) {
1540 load_spreg( R_EAX, R_PC );
1541 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1542 store_spreg( R_EAX, R_NEW_PC );
1543 exit_block_emu(pc+2);
1546 sh4_translate_instruction( pc + 2 );
1547 exit_block_rel( disp + pc + 4, pc+4 );
1554 if( sh4_x86.in_delay_slot ) {
1557 load_spreg( R_EAX, R_PC );
1558 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1559 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1560 store_spreg( R_EAX, R_NEW_PC );
1561 sh4_x86.in_delay_slot = DELAY_PC;
1562 sh4_x86.tstate = TSTATE_NONE;
1563 sh4_x86.branch_taken = TRUE;
1564 if( UNTRANSLATABLE(pc+2) ) {
1565 exit_block_emu(pc+2);
1568 sh4_translate_instruction( pc + 2 );
1569 exit_block_newpcset(pc+2);
1576 if( sh4_x86.in_delay_slot ) {
1579 load_spreg( R_EAX, R_PC );
1580 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1581 store_spreg( R_EAX, R_PR );
1582 sh4_x86.in_delay_slot = DELAY_PC;
1583 sh4_x86.branch_taken = TRUE;
1584 sh4_x86.tstate = TSTATE_NONE;
1585 if( UNTRANSLATABLE(pc+2) ) {
1586 ADD_imm32_r32( disp, R_EAX );
1587 store_spreg( R_EAX, R_NEW_PC );
1588 exit_block_emu(pc+2);
1591 sh4_translate_instruction( pc + 2 );
1592 exit_block_rel( disp + pc + 4, pc+4 );
1599 if( sh4_x86.in_delay_slot ) {
1602 load_spreg( R_EAX, R_PC );
1603 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1604 store_spreg( R_EAX, R_PR );
1605 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1606 store_spreg( R_EAX, R_NEW_PC );
1608 sh4_x86.in_delay_slot = DELAY_PC;
1609 sh4_x86.tstate = TSTATE_NONE;
1610 sh4_x86.branch_taken = TRUE;
1611 if( UNTRANSLATABLE(pc+2) ) {
1612 exit_block_emu(pc+2);
1615 sh4_translate_instruction( pc + 2 );
1616 exit_block_newpcset(pc+2);
1623 if( sh4_x86.in_delay_slot ) {
1626 sh4vma_t target = disp + pc + 4;
1627 JF_rel8( nottaken );
1628 exit_block_rel(target, pc+2 );
1629 JMP_TARGET(nottaken);
1635 if( sh4_x86.in_delay_slot ) {
1638 sh4_x86.in_delay_slot = DELAY_PC;
1639 if( UNTRANSLATABLE(pc+2) ) {
1640 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1642 ADD_imm32_r32( disp, R_EAX );
1643 JMP_TARGET(nottaken);
1644 ADD_sh4r_r32( R_PC, R_EAX );
1645 store_spreg( R_EAX, R_NEW_PC );
1646 exit_block_emu(pc+2);
1647 sh4_x86.branch_taken = TRUE;
1650 if( sh4_x86.tstate == TSTATE_NONE ) {
1651 CMP_imm8s_sh4r( 1, R_T );
1652 sh4_x86.tstate = TSTATE_E;
1654 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1655 sh4_translate_instruction(pc+2);
1656 exit_block_rel( disp + pc + 4, pc+4 );
1658 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1659 sh4_translate_instruction(pc+2);
1666 if( sh4_x86.in_delay_slot ) {
1669 load_reg( R_ECX, Rn );
1670 store_spreg( R_ECX, R_NEW_PC );
1671 sh4_x86.in_delay_slot = DELAY_PC;
1672 sh4_x86.branch_taken = TRUE;
1673 if( UNTRANSLATABLE(pc+2) ) {
1674 exit_block_emu(pc+2);
1677 sh4_translate_instruction(pc+2);
1678 exit_block_newpcset(pc+2);
1685 if( sh4_x86.in_delay_slot ) {
1688 load_spreg( R_EAX, R_PC );
1689 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1690 store_spreg( R_EAX, R_PR );
1691 load_reg( R_ECX, Rn );
1692 store_spreg( R_ECX, R_NEW_PC );
1693 sh4_x86.in_delay_slot = DELAY_PC;
1694 sh4_x86.branch_taken = TRUE;
1695 sh4_x86.tstate = TSTATE_NONE;
1696 if( UNTRANSLATABLE(pc+2) ) {
1697 exit_block_emu(pc+2);
1700 sh4_translate_instruction(pc+2);
1701 exit_block_newpcset(pc+2);
1708 if( sh4_x86.in_delay_slot ) {
1712 load_spreg( R_ECX, R_SPC );
1713 store_spreg( R_ECX, R_NEW_PC );
1714 load_spreg( R_EAX, R_SSR );
1715 call_func1( sh4_write_sr, R_EAX );
1716 sh4_x86.in_delay_slot = DELAY_PC;
1717 sh4_x86.priv_checked = FALSE;
1718 sh4_x86.fpuen_checked = FALSE;
1719 sh4_x86.tstate = TSTATE_NONE;
1720 sh4_x86.branch_taken = TRUE;
1721 if( UNTRANSLATABLE(pc+2) ) {
1722 exit_block_emu(pc+2);
1725 sh4_translate_instruction(pc+2);
1726 exit_block_newpcset(pc+2);
1733 if( sh4_x86.in_delay_slot ) {
1736 load_spreg( R_ECX, R_PR );
1737 store_spreg( R_ECX, R_NEW_PC );
1738 sh4_x86.in_delay_slot = DELAY_PC;
1739 sh4_x86.branch_taken = TRUE;
1740 if( UNTRANSLATABLE(pc+2) ) {
1741 exit_block_emu(pc+2);
1744 sh4_translate_instruction(pc+2);
1745 exit_block_newpcset(pc+2);
1751 COUNT_INST(I_TRAPA);
1752 if( sh4_x86.in_delay_slot ) {
1755 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1756 ADD_r32_sh4r( R_ECX, R_PC );
1757 load_imm32( R_EAX, imm );
1758 call_func1( sh4_raise_trap, R_EAX );
1759 sh4_x86.tstate = TSTATE_NONE;
1760 exit_block_pcset(pc);
1761 sh4_x86.branch_taken = TRUE;
1766 COUNT_INST(I_UNDEF);
1767 if( sh4_x86.in_delay_slot ) {
1770 JMP_exc(EXC_ILLEGAL);
1776 COUNT_INST(I_CLRMAC);
1777 XOR_r32_r32(R_EAX, R_EAX);
1778 store_spreg( R_EAX, R_MACL );
1779 store_spreg( R_EAX, R_MACH );
1780 sh4_x86.tstate = TSTATE_NONE;
1786 sh4_x86.tstate = TSTATE_NONE;
1792 sh4_x86.tstate = TSTATE_C;
1798 sh4_x86.tstate = TSTATE_NONE;
1804 sh4_x86.tstate = TSTATE_C;
1807 /* Floating point moves */
1809 COUNT_INST(I_FMOV1);
1811 load_spreg( R_ECX, R_FPSCR );
1812 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1813 JNE_rel8(doublesize);
1814 load_fr( R_EAX, FRm ); // SZ=0 branch
1815 store_fr( R_EAX, FRn );
1817 JMP_TARGET(doublesize);
1818 load_dr0( R_EAX, FRm );
1819 load_dr1( R_ECX, FRm );
1820 store_dr0( R_EAX, FRn );
1821 store_dr1( R_ECX, FRn );
1823 sh4_x86.tstate = TSTATE_NONE;
1826 COUNT_INST(I_FMOV2);
1828 load_reg( R_EAX, Rn );
1829 load_spreg( R_EDX, R_FPSCR );
1830 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1831 JNE_rel8(doublesize);
1833 check_walign32( R_EAX );
1834 MMU_TRANSLATE_WRITE( R_EAX );
1835 load_fr( R_ECX, FRm );
1836 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1839 JMP_TARGET(doublesize);
1840 check_walign64( R_EAX );
1841 MMU_TRANSLATE_WRITE( R_EAX );
1842 load_dr0( R_ECX, FRm );
1843 load_dr1( R_EDX, FRm );
1844 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1846 sh4_x86.tstate = TSTATE_NONE;
1849 COUNT_INST(I_FMOV5);
1851 load_reg( R_EAX, Rm );
1852 load_spreg( R_EDX, R_FPSCR );
1853 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1854 JNE_rel8(doublesize);
1856 check_ralign32( R_EAX );
1857 MMU_TRANSLATE_READ( R_EAX );
1858 MEM_READ_LONG( R_EAX, R_EAX );
1859 store_fr( R_EAX, FRn );
1862 JMP_TARGET(doublesize);
1863 check_ralign64( R_EAX );
1864 MMU_TRANSLATE_READ( R_EAX );
1865 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1866 store_dr0( R_ECX, FRn );
1867 store_dr1( R_EAX, FRn );
1869 sh4_x86.tstate = TSTATE_NONE;
1872 COUNT_INST(I_FMOV3);
1874 load_reg( R_EAX, Rn );
1875 load_spreg( R_EDX, R_FPSCR );
1876 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1877 JNE_rel8(doublesize);
1879 check_walign32( R_EAX );
1880 ADD_imm8s_r32( -4, R_EAX );
1881 MMU_TRANSLATE_WRITE( R_EAX );
1882 load_fr( R_ECX, FRm );
1883 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1884 MEM_WRITE_LONG( R_EAX, R_ECX );
1887 JMP_TARGET(doublesize);
1888 check_walign64( R_EAX );
1889 ADD_imm8s_r32(-8,R_EAX);
1890 MMU_TRANSLATE_WRITE( R_EAX );
1891 load_dr0( R_ECX, FRm );
1892 load_dr1( R_EDX, FRm );
1893 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1894 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1897 sh4_x86.tstate = TSTATE_NONE;
1900 COUNT_INST(I_FMOV6);
1902 load_reg( R_EAX, Rm );
1903 load_spreg( R_EDX, R_FPSCR );
1904 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1905 JNE_rel8(doublesize);
1907 check_ralign32( R_EAX );
1908 MMU_TRANSLATE_READ( R_EAX );
1909 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1910 MEM_READ_LONG( R_EAX, R_EAX );
1911 store_fr( R_EAX, FRn );
1914 JMP_TARGET(doublesize);
1915 check_ralign64( R_EAX );
1916 MMU_TRANSLATE_READ( R_EAX );
1917 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1918 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1919 store_dr0( R_ECX, FRn );
1920 store_dr1( R_EAX, FRn );
1923 sh4_x86.tstate = TSTATE_NONE;
1925 FMOV FRm, @(R0, Rn) {:
1926 COUNT_INST(I_FMOV4);
1928 load_reg( R_EAX, Rn );
1929 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1930 load_spreg( R_EDX, R_FPSCR );
1931 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1932 JNE_rel8(doublesize);
1934 check_walign32( R_EAX );
1935 MMU_TRANSLATE_WRITE( R_EAX );
1936 load_fr( R_ECX, FRm );
1937 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1940 JMP_TARGET(doublesize);
1941 check_walign64( R_EAX );
1942 MMU_TRANSLATE_WRITE( R_EAX );
1943 load_dr0( R_ECX, FRm );
1944 load_dr1( R_EDX, FRm );
1945 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1948 sh4_x86.tstate = TSTATE_NONE;
1950 FMOV @(R0, Rm), FRn {:
1951 COUNT_INST(I_FMOV7);
1953 load_reg( R_EAX, Rm );
1954 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1955 load_spreg( R_EDX, R_FPSCR );
1956 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1957 JNE_rel8(doublesize);
1959 check_ralign32( R_EAX );
1960 MMU_TRANSLATE_READ( R_EAX );
1961 MEM_READ_LONG( R_EAX, R_EAX );
1962 store_fr( R_EAX, FRn );
1965 JMP_TARGET(doublesize);
1966 check_ralign64( R_EAX );
1967 MMU_TRANSLATE_READ( R_EAX );
1968 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1969 store_dr0( R_ECX, FRn );
1970 store_dr1( R_EAX, FRn );
1973 sh4_x86.tstate = TSTATE_NONE;
1975 FLDI0 FRn {: /* IFF PR=0 */
1976 COUNT_INST(I_FLDI0);
1978 load_spreg( R_ECX, R_FPSCR );
1979 TEST_imm32_r32( FPSCR_PR, R_ECX );
1981 XOR_r32_r32( R_EAX, R_EAX );
1982 store_fr( R_EAX, FRn );
1984 sh4_x86.tstate = TSTATE_NONE;
1986 FLDI1 FRn {: /* IFF PR=0 */
1987 COUNT_INST(I_FLDI1);
1989 load_spreg( R_ECX, R_FPSCR );
1990 TEST_imm32_r32( FPSCR_PR, R_ECX );
1992 load_imm32(R_EAX, 0x3F800000);
1993 store_fr( R_EAX, FRn );
1995 sh4_x86.tstate = TSTATE_NONE;
1999 COUNT_INST(I_FLOAT);
2001 load_spreg( R_ECX, R_FPSCR );
2003 TEST_imm32_r32( FPSCR_PR, R_ECX );
2004 JNE_rel8(doubleprec);
2007 JMP_TARGET(doubleprec);
2010 sh4_x86.tstate = TSTATE_NONE;
2015 load_spreg( R_ECX, R_FPSCR );
2016 TEST_imm32_r32( FPSCR_PR, R_ECX );
2017 JNE_rel8(doubleprec);
2020 JMP_TARGET(doubleprec);
2023 load_ptr( R_ECX, &max_int );
2024 FILD_r32ind( R_ECX );
2027 load_ptr( R_ECX, &min_int ); // 5
2028 FILD_r32ind( R_ECX ); // 2
2030 JAE_rel8( sat2 ); // 2
2031 load_ptr( R_EAX, &save_fcw );
2032 FNSTCW_r32ind( R_EAX );
2033 load_ptr( R_EDX, &trunc_fcw );
2034 FLDCW_r32ind( R_EDX );
2035 FISTP_sh4r(R_FPUL); // 3
2036 FLDCW_r32ind( R_EAX );
2041 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
2042 store_spreg( R_ECX, R_FPUL );
2045 sh4_x86.tstate = TSTATE_NONE;
2050 load_fr( R_EAX, FRm );
2051 store_spreg( R_EAX, R_FPUL );
2052 sh4_x86.tstate = TSTATE_NONE;
2057 load_spreg( R_EAX, R_FPUL );
2058 store_fr( R_EAX, FRn );
2059 sh4_x86.tstate = TSTATE_NONE;
2062 COUNT_INST(I_FCNVDS);
2064 load_spreg( R_ECX, R_FPSCR );
2065 TEST_imm32_r32( FPSCR_PR, R_ECX );
2066 JE_rel8(end); // only when PR=1
2070 sh4_x86.tstate = TSTATE_NONE;
2073 COUNT_INST(I_FCNVSD);
2075 load_spreg( R_ECX, R_FPSCR );
2076 TEST_imm32_r32( FPSCR_PR, R_ECX );
2077 JE_rel8(end); // only when PR=1
2081 sh4_x86.tstate = TSTATE_NONE;
2084 /* Floating point instructions */
2088 load_spreg( R_ECX, R_FPSCR );
2089 TEST_imm32_r32( FPSCR_PR, R_ECX );
2090 JNE_rel8(doubleprec);
2095 JMP_TARGET(doubleprec);
2100 sh4_x86.tstate = TSTATE_NONE;
2105 load_spreg( R_ECX, R_FPSCR );
2106 TEST_imm32_r32( FPSCR_PR, R_ECX );
2107 JNE_rel8(doubleprec);
2113 JMP_TARGET(doubleprec);
2119 sh4_x86.tstate = TSTATE_NONE;
2124 load_spreg( R_ECX, R_FPSCR );
2125 TEST_imm32_r32( FPSCR_PR, R_ECX );
2126 JNE_rel8(doubleprec);
2132 JMP_TARGET(doubleprec);
2138 sh4_x86.tstate = TSTATE_NONE;
2140 FMAC FR0, FRm, FRn {:
2143 load_spreg( R_ECX, R_FPSCR );
2144 TEST_imm32_r32( FPSCR_PR, R_ECX );
2145 JNE_rel8(doubleprec);
2153 JMP_TARGET(doubleprec);
2161 sh4_x86.tstate = TSTATE_NONE;
2167 load_spreg( R_ECX, R_FPSCR );
2168 TEST_imm32_r32( FPSCR_PR, R_ECX );
2169 JNE_rel8(doubleprec);
2175 JMP_TARGET(doubleprec);
2181 sh4_x86.tstate = TSTATE_NONE;
2186 load_spreg( R_ECX, R_FPSCR );
2187 TEST_imm32_r32( FPSCR_PR, R_ECX );
2188 JNE_rel8(doubleprec);
2193 JMP_TARGET(doubleprec);
2198 sh4_x86.tstate = TSTATE_NONE;
2201 COUNT_INST(I_FSRRA);
2203 load_spreg( R_ECX, R_FPSCR );
2204 TEST_imm32_r32( FPSCR_PR, R_ECX );
2205 JNE_rel8(end); // PR=0 only
2212 sh4_x86.tstate = TSTATE_NONE;
2215 COUNT_INST(I_FSQRT);
2217 load_spreg( R_ECX, R_FPSCR );
2218 TEST_imm32_r32( FPSCR_PR, R_ECX );
2219 JNE_rel8(doubleprec);
2224 JMP_TARGET(doubleprec);
2229 sh4_x86.tstate = TSTATE_NONE;
2234 load_spreg( R_ECX, R_FPSCR );
2235 TEST_imm32_r32( FPSCR_PR, R_ECX );
2236 JNE_rel8(doubleprec);
2242 JMP_TARGET(doubleprec);
2248 sh4_x86.tstate = TSTATE_NONE;
2252 COUNT_INST(I_FCMPEQ);
2254 load_spreg( R_ECX, R_FPSCR );
2255 TEST_imm32_r32( FPSCR_PR, R_ECX );
2256 JNE_rel8(doubleprec);
2260 JMP_TARGET(doubleprec);
2267 sh4_x86.tstate = TSTATE_NONE;
2270 COUNT_INST(I_FCMPGT);
2272 load_spreg( R_ECX, R_FPSCR );
2273 TEST_imm32_r32( FPSCR_PR, R_ECX );
2274 JNE_rel8(doubleprec);
2278 JMP_TARGET(doubleprec);
2285 sh4_x86.tstate = TSTATE_NONE;
2291 load_spreg( R_ECX, R_FPSCR );
2292 TEST_imm32_r32( FPSCR_PR, R_ECX );
2293 JNE_rel8(doubleprec );
2294 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
2295 load_spreg( R_EDX, R_FPUL );
2296 call_func2( sh4_fsca, R_EDX, R_ECX );
2297 JMP_TARGET(doubleprec);
2298 sh4_x86.tstate = TSTATE_NONE;
2303 load_spreg( R_ECX, R_FPSCR );
2304 TEST_imm32_r32( FPSCR_PR, R_ECX );
2305 JNE_rel8( doubleprec);
2310 push_fr( (FVm<<2)+1);
2311 push_fr( (FVn<<2)+1);
2314 push_fr( (FVm<<2)+2);
2315 push_fr( (FVn<<2)+2);
2318 push_fr( (FVm<<2)+3);
2319 push_fr( (FVn<<2)+3);
2322 pop_fr( (FVn<<2)+3);
2323 JMP_TARGET(doubleprec);
2324 sh4_x86.tstate = TSTATE_NONE;
2329 load_spreg( R_ECX, R_FPSCR );
2330 TEST_imm32_r32( FPSCR_PR, R_ECX );
2331 JNE_rel8( doubleprec );
2332 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
2333 call_func1( sh4_ftrv, R_EDX ); // 12
2334 JMP_TARGET(doubleprec);
2335 sh4_x86.tstate = TSTATE_NONE;
2339 COUNT_INST(I_FRCHG);
2341 load_spreg( R_ECX, R_FPSCR );
2342 XOR_imm32_r32( FPSCR_FR, R_ECX );
2343 store_spreg( R_ECX, R_FPSCR );
2344 call_func0( sh4_switch_fr_banks );
2345 sh4_x86.tstate = TSTATE_NONE;
2348 COUNT_INST(I_FSCHG);
2350 load_spreg( R_ECX, R_FPSCR );
2351 XOR_imm32_r32( FPSCR_SZ, R_ECX );
2352 store_spreg( R_ECX, R_FPSCR );
2353 sh4_x86.tstate = TSTATE_NONE;
2356 /* Processor control instructions */
2358 COUNT_INST(I_LDCSR);
2359 if( sh4_x86.in_delay_slot ) {
2363 load_reg( R_EAX, Rm );
2364 call_func1( sh4_write_sr, R_EAX );
2365 sh4_x86.priv_checked = FALSE;
2366 sh4_x86.fpuen_checked = FALSE;
2367 sh4_x86.tstate = TSTATE_NONE;
2372 load_reg( R_EAX, Rm );
2373 store_spreg( R_EAX, R_GBR );
2378 load_reg( R_EAX, Rm );
2379 store_spreg( R_EAX, R_VBR );
2380 sh4_x86.tstate = TSTATE_NONE;
2385 load_reg( R_EAX, Rm );
2386 store_spreg( R_EAX, R_SSR );
2387 sh4_x86.tstate = TSTATE_NONE;
2392 load_reg( R_EAX, Rm );
2393 store_spreg( R_EAX, R_SGR );
2394 sh4_x86.tstate = TSTATE_NONE;
2399 load_reg( R_EAX, Rm );
2400 store_spreg( R_EAX, R_SPC );
2401 sh4_x86.tstate = TSTATE_NONE;
2406 load_reg( R_EAX, Rm );
2407 store_spreg( R_EAX, R_DBR );
2408 sh4_x86.tstate = TSTATE_NONE;
2413 load_reg( R_EAX, Rm );
2414 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2415 sh4_x86.tstate = TSTATE_NONE;
2419 load_reg( R_EAX, Rm );
2420 check_ralign32( R_EAX );
2421 MMU_TRANSLATE_READ( R_EAX );
2422 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2423 MEM_READ_LONG( R_EAX, R_EAX );
2424 store_spreg( R_EAX, R_GBR );
2425 sh4_x86.tstate = TSTATE_NONE;
2428 COUNT_INST(I_LDCSRM);
2429 if( sh4_x86.in_delay_slot ) {
2433 load_reg( R_EAX, Rm );
2434 check_ralign32( R_EAX );
2435 MMU_TRANSLATE_READ( R_EAX );
2436 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2437 MEM_READ_LONG( R_EAX, R_EAX );
2438 call_func1( sh4_write_sr, R_EAX );
2439 sh4_x86.priv_checked = FALSE;
2440 sh4_x86.fpuen_checked = FALSE;
2441 sh4_x86.tstate = TSTATE_NONE;
2447 load_reg( R_EAX, Rm );
2448 check_ralign32( R_EAX );
2449 MMU_TRANSLATE_READ( R_EAX );
2450 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2451 MEM_READ_LONG( R_EAX, R_EAX );
2452 store_spreg( R_EAX, R_VBR );
2453 sh4_x86.tstate = TSTATE_NONE;
2458 load_reg( R_EAX, Rm );
2459 check_ralign32( R_EAX );
2460 MMU_TRANSLATE_READ( R_EAX );
2461 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2462 MEM_READ_LONG( R_EAX, R_EAX );
2463 store_spreg( R_EAX, R_SSR );
2464 sh4_x86.tstate = TSTATE_NONE;
2469 load_reg( R_EAX, Rm );
2470 check_ralign32( R_EAX );
2471 MMU_TRANSLATE_READ( R_EAX );
2472 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2473 MEM_READ_LONG( R_EAX, R_EAX );
2474 store_spreg( R_EAX, R_SGR );
2475 sh4_x86.tstate = TSTATE_NONE;
2480 load_reg( R_EAX, Rm );
2481 check_ralign32( R_EAX );
2482 MMU_TRANSLATE_READ( R_EAX );
2483 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2484 MEM_READ_LONG( R_EAX, R_EAX );
2485 store_spreg( R_EAX, R_SPC );
2486 sh4_x86.tstate = TSTATE_NONE;
2491 load_reg( R_EAX, Rm );
2492 check_ralign32( R_EAX );
2493 MMU_TRANSLATE_READ( R_EAX );
2494 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2495 MEM_READ_LONG( R_EAX, R_EAX );
2496 store_spreg( R_EAX, R_DBR );
2497 sh4_x86.tstate = TSTATE_NONE;
2499 LDC.L @Rm+, Rn_BANK {:
2502 load_reg( R_EAX, Rm );
2503 check_ralign32( R_EAX );
2504 MMU_TRANSLATE_READ( R_EAX );
2505 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2506 MEM_READ_LONG( R_EAX, R_EAX );
2507 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2508 sh4_x86.tstate = TSTATE_NONE;
2511 COUNT_INST(I_LDSFPSCR);
2513 load_reg( R_EAX, Rm );
2514 call_func1( sh4_write_fpscr, R_EAX );
2515 sh4_x86.tstate = TSTATE_NONE;
2517 LDS.L @Rm+, FPSCR {:
2518 COUNT_INST(I_LDSFPSCRM);
2520 load_reg( R_EAX, Rm );
2521 check_ralign32( R_EAX );
2522 MMU_TRANSLATE_READ( R_EAX );
2523 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2524 MEM_READ_LONG( R_EAX, R_EAX );
2525 call_func1( sh4_write_fpscr, R_EAX );
2526 sh4_x86.tstate = TSTATE_NONE;
2531 load_reg( R_EAX, Rm );
2532 store_spreg( R_EAX, R_FPUL );
2537 load_reg( R_EAX, Rm );
2538 check_ralign32( R_EAX );
2539 MMU_TRANSLATE_READ( R_EAX );
2540 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2541 MEM_READ_LONG( R_EAX, R_EAX );
2542 store_spreg( R_EAX, R_FPUL );
2543 sh4_x86.tstate = TSTATE_NONE;
2547 load_reg( R_EAX, Rm );
2548 store_spreg( R_EAX, R_MACH );
2552 load_reg( R_EAX, Rm );
2553 check_ralign32( R_EAX );
2554 MMU_TRANSLATE_READ( R_EAX );
2555 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2556 MEM_READ_LONG( R_EAX, R_EAX );
2557 store_spreg( R_EAX, R_MACH );
2558 sh4_x86.tstate = TSTATE_NONE;
2562 load_reg( R_EAX, Rm );
2563 store_spreg( R_EAX, R_MACL );
2567 load_reg( R_EAX, Rm );
2568 check_ralign32( R_EAX );
2569 MMU_TRANSLATE_READ( R_EAX );
2570 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2571 MEM_READ_LONG( R_EAX, R_EAX );
2572 store_spreg( R_EAX, R_MACL );
2573 sh4_x86.tstate = TSTATE_NONE;
2577 load_reg( R_EAX, Rm );
2578 store_spreg( R_EAX, R_PR );
2582 load_reg( R_EAX, Rm );
2583 check_ralign32( R_EAX );
2584 MMU_TRANSLATE_READ( R_EAX );
2585 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2586 MEM_READ_LONG( R_EAX, R_EAX );
2587 store_spreg( R_EAX, R_PR );
2588 sh4_x86.tstate = TSTATE_NONE;
2591 COUNT_INST(I_LDTLB);
2592 call_func0( MMU_ldtlb );
2593 sh4_x86.tstate = TSTATE_NONE;
2602 COUNT_INST(I_OCBWB);
2606 load_reg( R_EAX, Rn );
2607 MOV_r32_r32( R_EAX, R_ECX );
2608 AND_imm32_r32( 0xFC000000, R_EAX );
2609 CMP_imm32_r32( 0xE0000000, R_EAX );
2611 call_func1( sh4_flush_store_queue, R_ECX );
2612 TEST_r32_r32( R_EAX, R_EAX );
2615 sh4_x86.tstate = TSTATE_NONE;
2618 COUNT_INST(I_SLEEP);
2620 call_func0( sh4_sleep );
2621 sh4_x86.tstate = TSTATE_NONE;
2622 sh4_x86.in_delay_slot = DELAY_NONE;
2626 COUNT_INST(I_STCSR);
2628 call_func0(sh4_read_sr);
2629 store_reg( R_EAX, Rn );
2630 sh4_x86.tstate = TSTATE_NONE;
2634 load_spreg( R_EAX, R_GBR );
2635 store_reg( R_EAX, Rn );
2640 load_spreg( R_EAX, R_VBR );
2641 store_reg( R_EAX, Rn );
2642 sh4_x86.tstate = TSTATE_NONE;
2647 load_spreg( R_EAX, R_SSR );
2648 store_reg( R_EAX, Rn );
2649 sh4_x86.tstate = TSTATE_NONE;
2654 load_spreg( R_EAX, R_SPC );
2655 store_reg( R_EAX, Rn );
2656 sh4_x86.tstate = TSTATE_NONE;
2661 load_spreg( R_EAX, R_SGR );
2662 store_reg( R_EAX, Rn );
2663 sh4_x86.tstate = TSTATE_NONE;
2668 load_spreg( R_EAX, R_DBR );
2669 store_reg( R_EAX, Rn );
2670 sh4_x86.tstate = TSTATE_NONE;
2675 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2676 store_reg( R_EAX, Rn );
2677 sh4_x86.tstate = TSTATE_NONE;
2680 COUNT_INST(I_STCSRM);
2682 load_reg( R_EAX, Rn );
2683 check_walign32( R_EAX );
2684 ADD_imm8s_r32( -4, R_EAX );
2685 MMU_TRANSLATE_WRITE( R_EAX );
2686 PUSH_realigned_r32( R_EAX );
2687 call_func0( sh4_read_sr );
2688 POP_realigned_r32( R_ECX );
2689 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2690 MEM_WRITE_LONG( R_ECX, R_EAX );
2691 sh4_x86.tstate = TSTATE_NONE;
2696 load_reg( R_EAX, Rn );
2697 check_walign32( R_EAX );
2698 ADD_imm8s_r32( -4, R_EAX );
2699 MMU_TRANSLATE_WRITE( R_EAX );
2700 load_spreg( R_EDX, R_VBR );
2701 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2702 MEM_WRITE_LONG( R_EAX, R_EDX );
2703 sh4_x86.tstate = TSTATE_NONE;
2708 load_reg( R_EAX, Rn );
2709 check_walign32( R_EAX );
2710 ADD_imm8s_r32( -4, R_EAX );
2711 MMU_TRANSLATE_WRITE( R_EAX );
2712 load_spreg( R_EDX, R_SSR );
2713 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2714 MEM_WRITE_LONG( R_EAX, R_EDX );
2715 sh4_x86.tstate = TSTATE_NONE;
2720 load_reg( R_EAX, Rn );
2721 check_walign32( R_EAX );
2722 ADD_imm8s_r32( -4, R_EAX );
2723 MMU_TRANSLATE_WRITE( R_EAX );
2724 load_spreg( R_EDX, R_SPC );
2725 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2726 MEM_WRITE_LONG( R_EAX, R_EDX );
2727 sh4_x86.tstate = TSTATE_NONE;
2732 load_reg( R_EAX, Rn );
2733 check_walign32( R_EAX );
2734 ADD_imm8s_r32( -4, R_EAX );
2735 MMU_TRANSLATE_WRITE( R_EAX );
2736 load_spreg( R_EDX, R_SGR );
2737 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2738 MEM_WRITE_LONG( R_EAX, R_EDX );
2739 sh4_x86.tstate = TSTATE_NONE;
2744 load_reg( R_EAX, Rn );
2745 check_walign32( R_EAX );
2746 ADD_imm8s_r32( -4, R_EAX );
2747 MMU_TRANSLATE_WRITE( R_EAX );
2748 load_spreg( R_EDX, R_DBR );
2749 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2750 MEM_WRITE_LONG( R_EAX, R_EDX );
2751 sh4_x86.tstate = TSTATE_NONE;
2753 STC.L Rm_BANK, @-Rn {:
2756 load_reg( R_EAX, Rn );
2757 check_walign32( R_EAX );
2758 ADD_imm8s_r32( -4, R_EAX );
2759 MMU_TRANSLATE_WRITE( R_EAX );
2760 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2761 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2762 MEM_WRITE_LONG( R_EAX, R_EDX );
2763 sh4_x86.tstate = TSTATE_NONE;
2767 load_reg( R_EAX, Rn );
2768 check_walign32( R_EAX );
2769 ADD_imm8s_r32( -4, R_EAX );
2770 MMU_TRANSLATE_WRITE( R_EAX );
2771 load_spreg( R_EDX, R_GBR );
2772 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2773 MEM_WRITE_LONG( R_EAX, R_EDX );
2774 sh4_x86.tstate = TSTATE_NONE;
2777 COUNT_INST(I_STSFPSCR);
2779 load_spreg( R_EAX, R_FPSCR );
2780 store_reg( R_EAX, Rn );
2782 STS.L FPSCR, @-Rn {:
2783 COUNT_INST(I_STSFPSCRM);
2785 load_reg( R_EAX, Rn );
2786 check_walign32( R_EAX );
2787 ADD_imm8s_r32( -4, R_EAX );
2788 MMU_TRANSLATE_WRITE( R_EAX );
2789 load_spreg( R_EDX, R_FPSCR );
2790 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2791 MEM_WRITE_LONG( R_EAX, R_EDX );
2792 sh4_x86.tstate = TSTATE_NONE;
2797 load_spreg( R_EAX, R_FPUL );
2798 store_reg( R_EAX, Rn );
2803 load_reg( R_EAX, Rn );
2804 check_walign32( R_EAX );
2805 ADD_imm8s_r32( -4, R_EAX );
2806 MMU_TRANSLATE_WRITE( R_EAX );
2807 load_spreg( R_EDX, R_FPUL );
2808 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2809 MEM_WRITE_LONG( R_EAX, R_EDX );
2810 sh4_x86.tstate = TSTATE_NONE;
2814 load_spreg( R_EAX, R_MACH );
2815 store_reg( R_EAX, Rn );
2819 load_reg( R_EAX, Rn );
2820 check_walign32( R_EAX );
2821 ADD_imm8s_r32( -4, R_EAX );
2822 MMU_TRANSLATE_WRITE( R_EAX );
2823 load_spreg( R_EDX, R_MACH );
2824 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2825 MEM_WRITE_LONG( R_EAX, R_EDX );
2826 sh4_x86.tstate = TSTATE_NONE;
2830 load_spreg( R_EAX, R_MACL );
2831 store_reg( R_EAX, Rn );
2835 load_reg( R_EAX, Rn );
2836 check_walign32( R_EAX );
2837 ADD_imm8s_r32( -4, R_EAX );
2838 MMU_TRANSLATE_WRITE( R_EAX );
2839 load_spreg( R_EDX, R_MACL );
2840 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2841 MEM_WRITE_LONG( R_EAX, R_EDX );
2842 sh4_x86.tstate = TSTATE_NONE;
2846 load_spreg( R_EAX, R_PR );
2847 store_reg( R_EAX, Rn );
2851 load_reg( R_EAX, Rn );
2852 check_walign32( R_EAX );
2853 ADD_imm8s_r32( -4, R_EAX );
2854 MMU_TRANSLATE_WRITE( R_EAX );
2855 load_spreg( R_EDX, R_PR );
2856 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2857 MEM_WRITE_LONG( R_EAX, R_EDX );
2858 sh4_x86.tstate = TSTATE_NONE;
2863 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2866 sh4_x86.in_delay_slot = DELAY_NONE;
.