4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
28 #include "sh4/xltcache.h"
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4trans.h"
31 #include "sh4/sh4stat.h"
32 #include "sh4/sh4mmio.h"
33 #include "sh4/x86op.h"
36 #define DEFAULT_BACKPATCH_SIZE 4096
38 struct backpatch_record {
39 uint32_t fixup_offset;
40 uint32_t fixup_icount;
49 * Struct to manage internal translation state. This state is not saved -
50 * it is only valid between calls to sh4_translate_begin_block() and
51 * sh4_translate_end_block()
53 struct sh4_x86_state {
55 gboolean priv_checked; /* true if we've already checked the cpu mode. */
56 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
57 gboolean branch_taken; /* true if we branched unconditionally */
58 uint32_t block_start_pc;
59 uint32_t stack_posn; /* Trace stack height for alignment purposes */
63 gboolean tlb_on; /* True if tlb translation is active */
65 /* Allocated memory for the (block-wide) back-patch list */
66 struct backpatch_record *backpatch_list;
67 uint32_t backpatch_posn;
68 uint32_t backpatch_size;
71 #define TSTATE_NONE -1
81 #ifdef ENABLE_SH4STATS
82 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
84 #define COUNT_INST(id)
87 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
88 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
89 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
90 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
92 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
93 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
94 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
95 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
97 static struct sh4_x86_state sh4_x86;
99 static uint32_t max_int = 0x7FFFFFFF;
100 static uint32_t min_int = 0x80000000;
101 static uint32_t save_fcw; /* save value for fpu control word */
102 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
104 void sh4_translate_init(void)
106 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
107 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
111 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
113 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
114 sh4_x86.backpatch_size <<= 1;
115 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
116 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
117 assert( sh4_x86.backpatch_list != NULL );
119 if( sh4_x86.in_delay_slot ) {
122 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
123 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
124 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
125 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
126 sh4_x86.backpatch_posn++;
130 * Emit an instruction to load an SH4 reg into a real register
132 static inline void load_reg( int x86reg, int sh4reg )
134 /* mov [bp+n], reg */
136 OP(0x45 + (x86reg<<3));
137 OP(REG_OFFSET(r[sh4reg]));
140 static inline void load_reg16s( int x86reg, int sh4reg )
144 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
147 static inline void load_reg16u( int x86reg, int sh4reg )
151 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
155 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
156 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
158 * Emit an instruction to load an immediate value into a register
160 static inline void load_imm32( int x86reg, uint32_t value ) {
161 /* mov #value, reg */
167 * Load an immediate 64-bit quantity (note: x86-64 only)
169 static inline void load_imm64( int x86reg, uint64_t value ) {
170 /* mov #value, reg */
177 * Emit an instruction to store an SH4 reg (RN)
179 void static inline store_reg( int x86reg, int sh4reg ) {
180 /* mov reg, [bp+n] */
182 OP(0x45 + (x86reg<<3));
183 OP(REG_OFFSET(r[sh4reg]));
187 * Load an FR register (single-precision floating point) into an integer x86
188 * register (eg for register-to-register moves)
190 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
191 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
194 * Load the low half of a DR register (DR or XD) into an integer x86 register
196 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
197 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
200 * Store an FR register (single-precision floating point) from an integer x86+
201 * register (eg for register-to-register moves)
203 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
204 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
206 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
207 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
210 #define push_fpul() FLDF_sh4r(R_FPUL)
211 #define pop_fpul() FSTPF_sh4r(R_FPUL)
212 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
213 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
214 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
215 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
216 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
217 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
218 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
219 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
223 /* Exception checks - Note that all exception checks will clobber EAX */
225 #define check_priv( ) \
226 if( !sh4_x86.priv_checked ) { \
227 sh4_x86.priv_checked = TRUE;\
228 load_spreg( R_EAX, R_SR );\
229 AND_imm32_r32( SR_MD, R_EAX );\
230 if( sh4_x86.in_delay_slot ) {\
231 JE_exc( EXC_SLOT_ILLEGAL );\
233 JE_exc( EXC_ILLEGAL );\
237 #define check_fpuen( ) \
238 if( !sh4_x86.fpuen_checked ) {\
239 sh4_x86.fpuen_checked = TRUE;\
240 load_spreg( R_EAX, R_SR );\
241 AND_imm32_r32( SR_FD, R_EAX );\
242 if( sh4_x86.in_delay_slot ) {\
243 JNE_exc(EXC_SLOT_FPU_DISABLED);\
245 JNE_exc(EXC_FPU_DISABLED);\
249 #define check_ralign16( x86reg ) \
250 TEST_imm32_r32( 0x00000001, x86reg ); \
251 JNE_exc(EXC_DATA_ADDR_READ)
253 #define check_walign16( x86reg ) \
254 TEST_imm32_r32( 0x00000001, x86reg ); \
255 JNE_exc(EXC_DATA_ADDR_WRITE);
257 #define check_ralign32( x86reg ) \
258 TEST_imm32_r32( 0x00000003, x86reg ); \
259 JNE_exc(EXC_DATA_ADDR_READ)
261 #define check_walign32( x86reg ) \
262 TEST_imm32_r32( 0x00000003, x86reg ); \
263 JNE_exc(EXC_DATA_ADDR_WRITE);
265 #define check_ralign64( x86reg ) \
266 TEST_imm32_r32( 0x00000007, x86reg ); \
267 JNE_exc(EXC_DATA_ADDR_READ)
269 #define check_walign64( x86reg ) \
270 TEST_imm32_r32( 0x00000007, x86reg ); \
271 JNE_exc(EXC_DATA_ADDR_WRITE);
274 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
275 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
276 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
277 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
278 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
279 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
280 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
283 * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned
284 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
286 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
288 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
290 * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned
291 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
293 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
295 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
296 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
297 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
299 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
301 /****** Import appropriate calling conventions ******/
302 #if SIZEOF_VOID_P == 8
303 #include "sh4/ia64abi.h"
304 #else /* 32-bit system */
306 #include "sh4/ia32mac.h"
308 #include "sh4/ia32abi.h"
312 uint32_t sh4_translate_end_block_size()
314 if( sh4_x86.backpatch_posn <= 3 ) {
315 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
317 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
323 * Embed a breakpoint into the generated code
325 void sh4_translate_emit_breakpoint( sh4vma_t pc )
327 load_imm32( R_EAX, pc );
328 call_func1( sh4_translate_breakpoint_hit, R_EAX );
332 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
335 * Embed a call to sh4_execute_instruction for situations that we
336 * can't translate (just page-crossing delay slots at the moment).
337 * Caller is responsible for setting new_pc before calling this function.
341 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
342 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
343 * Call sh4_execute_instruction
344 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
346 void exit_block_emu( sh4vma_t endpc )
348 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
349 ADD_r32_sh4r( R_ECX, R_PC );
351 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
352 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
353 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
354 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
356 call_func0( sh4_execute_instruction );
357 load_spreg( R_EAX, R_PC );
358 if( sh4_x86.tlb_on ) {
359 call_func1(xlat_get_code_by_vma,R_EAX);
361 call_func1(xlat_get_code,R_EAX);
363 AND_imm8s_rptr( 0xFC, R_EAX );
369 * Translate a single instruction. Delayed branches are handled specially
370 * by translating both branch and delayed instruction as a single unit (as
372 * The instruction MUST be in the icache (assert check)
374 * @return true if the instruction marks the end of a basic block
377 uint32_t sh4_translate_instruction( sh4vma_t pc )
380 /* Read instruction from icache */
381 assert( IS_IN_ICACHE(pc) );
382 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
384 /* PC is not in the current icache - this usually means we're running
385 * with MMU on, and we've gone past the end of the page. And since
386 * sh4_translate_block is pretty careful about this, it means we're
387 * almost certainly in a delay slot.
389 * Since we can't assume the page is present (and we can't fault it in
390 * at this point, inline a call to sh4_execute_instruction (with a few
391 * small repairs to cope with the different environment).
394 if( !sh4_x86.in_delay_slot ) {
395 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
401 load_reg( R_EAX, Rm );
402 load_reg( R_ECX, Rn );
403 ADD_r32_r32( R_EAX, R_ECX );
404 store_reg( R_ECX, Rn );
405 sh4_x86.tstate = TSTATE_NONE;
409 load_reg( R_EAX, Rn );
410 ADD_imm8s_r32( imm, R_EAX );
411 store_reg( R_EAX, Rn );
412 sh4_x86.tstate = TSTATE_NONE;
416 if( sh4_x86.tstate != TSTATE_C ) {
419 load_reg( R_EAX, Rm );
420 load_reg( R_ECX, Rn );
421 ADC_r32_r32( R_EAX, R_ECX );
422 store_reg( R_ECX, Rn );
424 sh4_x86.tstate = TSTATE_C;
428 load_reg( R_EAX, Rm );
429 load_reg( R_ECX, Rn );
430 ADD_r32_r32( R_EAX, R_ECX );
431 store_reg( R_ECX, Rn );
433 sh4_x86.tstate = TSTATE_O;
437 load_reg( R_EAX, Rm );
438 load_reg( R_ECX, Rn );
439 AND_r32_r32( R_EAX, R_ECX );
440 store_reg( R_ECX, Rn );
441 sh4_x86.tstate = TSTATE_NONE;
445 load_reg( R_EAX, 0 );
446 AND_imm32_r32(imm, R_EAX);
447 store_reg( R_EAX, 0 );
448 sh4_x86.tstate = TSTATE_NONE;
450 AND.B #imm, @(R0, GBR) {:
452 load_reg( R_EAX, 0 );
453 load_spreg( R_ECX, R_GBR );
454 ADD_r32_r32( R_ECX, R_EAX );
455 MMU_TRANSLATE_WRITE( R_EAX );
456 PUSH_realigned_r32(R_EAX);
457 MEM_READ_BYTE( R_EAX, R_EAX );
458 POP_realigned_r32(R_ECX);
459 AND_imm32_r32(imm, R_EAX );
460 MEM_WRITE_BYTE( R_ECX, R_EAX );
461 sh4_x86.tstate = TSTATE_NONE;
465 load_reg( R_EAX, Rm );
466 load_reg( R_ECX, Rn );
467 CMP_r32_r32( R_EAX, R_ECX );
469 sh4_x86.tstate = TSTATE_E;
472 COUNT_INST(I_CMPEQI);
473 load_reg( R_EAX, 0 );
474 CMP_imm8s_r32(imm, R_EAX);
476 sh4_x86.tstate = TSTATE_E;
480 load_reg( R_EAX, Rm );
481 load_reg( R_ECX, Rn );
482 CMP_r32_r32( R_EAX, R_ECX );
484 sh4_x86.tstate = TSTATE_GE;
488 load_reg( R_EAX, Rm );
489 load_reg( R_ECX, Rn );
490 CMP_r32_r32( R_EAX, R_ECX );
492 sh4_x86.tstate = TSTATE_G;
496 load_reg( R_EAX, Rm );
497 load_reg( R_ECX, Rn );
498 CMP_r32_r32( R_EAX, R_ECX );
500 sh4_x86.tstate = TSTATE_A;
504 load_reg( R_EAX, Rm );
505 load_reg( R_ECX, Rn );
506 CMP_r32_r32( R_EAX, R_ECX );
508 sh4_x86.tstate = TSTATE_AE;
512 load_reg( R_EAX, Rn );
513 CMP_imm8s_r32( 0, R_EAX );
515 sh4_x86.tstate = TSTATE_G;
519 load_reg( R_EAX, Rn );
520 CMP_imm8s_r32( 0, R_EAX );
522 sh4_x86.tstate = TSTATE_GE;
525 COUNT_INST(I_CMPSTR);
526 load_reg( R_EAX, Rm );
527 load_reg( R_ECX, Rn );
528 XOR_r32_r32( R_ECX, R_EAX );
529 TEST_r8_r8( R_AL, R_AL );
531 TEST_r8_r8( R_AH, R_AH );
533 SHR_imm8_r32( 16, R_EAX );
534 TEST_r8_r8( R_AL, R_AL );
536 TEST_r8_r8( R_AH, R_AH );
541 sh4_x86.tstate = TSTATE_E;
545 load_reg( R_EAX, Rm );
546 load_reg( R_ECX, Rn );
547 SHR_imm8_r32( 31, R_EAX );
548 SHR_imm8_r32( 31, R_ECX );
549 store_spreg( R_EAX, R_M );
550 store_spreg( R_ECX, R_Q );
551 CMP_r32_r32( R_EAX, R_ECX );
553 sh4_x86.tstate = TSTATE_NE;
557 XOR_r32_r32( R_EAX, R_EAX );
558 store_spreg( R_EAX, R_Q );
559 store_spreg( R_EAX, R_M );
560 store_spreg( R_EAX, R_T );
561 sh4_x86.tstate = TSTATE_C; // works for DIV1
565 load_spreg( R_ECX, R_M );
566 load_reg( R_EAX, Rn );
567 if( sh4_x86.tstate != TSTATE_C ) {
571 SETC_r8( R_DL ); // Q'
572 CMP_sh4r_r32( R_Q, R_ECX );
574 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
577 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
579 store_reg( R_EAX, Rn ); // Done with Rn now
580 SETC_r8(R_AL); // tmp1
581 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
582 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
583 store_spreg( R_ECX, R_Q );
584 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
585 MOVZX_r8_r32( R_AL, R_EAX );
586 store_spreg( R_EAX, R_T );
587 sh4_x86.tstate = TSTATE_NONE;
591 load_reg( R_EAX, Rm );
592 load_reg( R_ECX, Rn );
594 store_spreg( R_EDX, R_MACH );
595 store_spreg( R_EAX, R_MACL );
596 sh4_x86.tstate = TSTATE_NONE;
600 load_reg( R_EAX, Rm );
601 load_reg( R_ECX, Rn );
603 store_spreg( R_EDX, R_MACH );
604 store_spreg( R_EAX, R_MACL );
605 sh4_x86.tstate = TSTATE_NONE;
609 load_reg( R_EAX, Rn );
610 ADD_imm8s_r32( -1, R_EAX );
611 store_reg( R_EAX, Rn );
613 sh4_x86.tstate = TSTATE_E;
617 load_reg( R_EAX, Rm );
618 MOVSX_r8_r32( R_EAX, R_EAX );
619 store_reg( R_EAX, Rn );
623 load_reg( R_EAX, Rm );
624 MOVSX_r16_r32( R_EAX, R_EAX );
625 store_reg( R_EAX, Rn );
629 load_reg( R_EAX, Rm );
630 MOVZX_r8_r32( R_EAX, R_EAX );
631 store_reg( R_EAX, Rn );
635 load_reg( R_EAX, Rm );
636 MOVZX_r16_r32( R_EAX, R_EAX );
637 store_reg( R_EAX, Rn );
642 load_reg( R_EAX, Rm );
643 check_ralign32( R_EAX );
644 MMU_TRANSLATE_READ( R_EAX );
645 PUSH_realigned_r32( R_EAX );
646 load_reg( R_EAX, Rn );
647 ADD_imm8s_r32( 4, R_EAX );
648 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
649 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
650 // Note translate twice in case of page boundaries. Maybe worth
651 // adding a page-boundary check to skip the second translation
653 load_reg( R_EAX, Rm );
654 check_ralign32( R_EAX );
655 MMU_TRANSLATE_READ( R_EAX );
656 load_reg( R_ECX, Rn );
657 check_ralign32( R_ECX );
658 PUSH_realigned_r32( R_EAX );
659 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
660 MOV_r32_r32( R_ECX, R_EAX );
661 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
662 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
664 MEM_READ_LONG( R_EAX, R_EAX );
667 MEM_READ_LONG( R_ECX, R_EAX );
668 POP_realigned_r32( R_ECX );
671 ADD_r32_sh4r( R_EAX, R_MACL );
672 ADC_r32_sh4r( R_EDX, R_MACH );
674 load_spreg( R_ECX, R_S );
675 TEST_r32_r32(R_ECX, R_ECX);
677 call_func0( signsat48 );
679 sh4_x86.tstate = TSTATE_NONE;
684 load_reg( R_EAX, Rm );
685 check_ralign16( R_EAX );
686 MMU_TRANSLATE_READ( R_EAX );
687 PUSH_realigned_r32( R_EAX );
688 load_reg( R_EAX, Rn );
689 ADD_imm8s_r32( 2, R_EAX );
690 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
691 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
692 // Note translate twice in case of page boundaries. Maybe worth
693 // adding a page-boundary check to skip the second translation
695 load_reg( R_EAX, Rm );
696 check_ralign16( R_EAX );
697 MMU_TRANSLATE_READ( R_EAX );
698 load_reg( R_ECX, Rn );
699 check_ralign16( R_ECX );
700 PUSH_realigned_r32( R_EAX );
701 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
702 MOV_r32_r32( R_ECX, R_EAX );
703 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
704 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
706 MEM_READ_WORD( R_EAX, R_EAX );
709 MEM_READ_WORD( R_ECX, R_EAX );
710 POP_realigned_r32( R_ECX );
713 load_spreg( R_ECX, R_S );
714 TEST_r32_r32( R_ECX, R_ECX );
717 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
718 JNO_rel8( end ); // 2
719 load_imm32( R_EDX, 1 ); // 5
720 store_spreg( R_EDX, R_MACH ); // 6
721 JS_rel8( positive ); // 2
722 load_imm32( R_EAX, 0x80000000 );// 5
723 store_spreg( R_EAX, R_MACL ); // 6
726 JMP_TARGET(positive);
727 load_imm32( R_EAX, 0x7FFFFFFF );// 5
728 store_spreg( R_EAX, R_MACL ); // 6
732 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
733 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
737 sh4_x86.tstate = TSTATE_NONE;
741 load_spreg( R_EAX, R_T );
742 store_reg( R_EAX, Rn );
746 load_reg( R_EAX, Rm );
747 load_reg( R_ECX, Rn );
749 store_spreg( R_EAX, R_MACL );
750 sh4_x86.tstate = TSTATE_NONE;
754 load_reg16s( R_EAX, Rm );
755 load_reg16s( R_ECX, Rn );
757 store_spreg( R_EAX, R_MACL );
758 sh4_x86.tstate = TSTATE_NONE;
762 load_reg16u( R_EAX, Rm );
763 load_reg16u( R_ECX, Rn );
765 store_spreg( R_EAX, R_MACL );
766 sh4_x86.tstate = TSTATE_NONE;
770 load_reg( R_EAX, Rm );
772 store_reg( R_EAX, Rn );
773 sh4_x86.tstate = TSTATE_NONE;
777 load_reg( R_EAX, Rm );
778 XOR_r32_r32( R_ECX, R_ECX );
780 SBB_r32_r32( R_EAX, R_ECX );
781 store_reg( R_ECX, Rn );
783 sh4_x86.tstate = TSTATE_C;
787 load_reg( R_EAX, Rm );
789 store_reg( R_EAX, Rn );
790 sh4_x86.tstate = TSTATE_NONE;
794 load_reg( R_EAX, Rm );
795 load_reg( R_ECX, Rn );
796 OR_r32_r32( R_EAX, R_ECX );
797 store_reg( R_ECX, Rn );
798 sh4_x86.tstate = TSTATE_NONE;
802 load_reg( R_EAX, 0 );
803 OR_imm32_r32(imm, R_EAX);
804 store_reg( R_EAX, 0 );
805 sh4_x86.tstate = TSTATE_NONE;
807 OR.B #imm, @(R0, GBR) {:
809 load_reg( R_EAX, 0 );
810 load_spreg( R_ECX, R_GBR );
811 ADD_r32_r32( R_ECX, R_EAX );
812 MMU_TRANSLATE_WRITE( R_EAX );
813 PUSH_realigned_r32(R_EAX);
814 MEM_READ_BYTE( R_EAX, R_EAX );
815 POP_realigned_r32(R_ECX);
816 OR_imm32_r32(imm, R_EAX );
817 MEM_WRITE_BYTE( R_ECX, R_EAX );
818 sh4_x86.tstate = TSTATE_NONE;
822 load_reg( R_EAX, Rn );
823 if( sh4_x86.tstate != TSTATE_C ) {
827 store_reg( R_EAX, Rn );
829 sh4_x86.tstate = TSTATE_C;
833 load_reg( R_EAX, Rn );
834 if( sh4_x86.tstate != TSTATE_C ) {
838 store_reg( R_EAX, Rn );
840 sh4_x86.tstate = TSTATE_C;
844 load_reg( R_EAX, Rn );
846 store_reg( R_EAX, Rn );
848 sh4_x86.tstate = TSTATE_C;
852 load_reg( R_EAX, Rn );
854 store_reg( R_EAX, Rn );
856 sh4_x86.tstate = TSTATE_C;
860 /* Annoyingly enough, not directly convertible */
861 load_reg( R_EAX, Rn );
862 load_reg( R_ECX, Rm );
863 CMP_imm32_r32( 0, R_ECX );
866 NEG_r32( R_ECX ); // 2
867 AND_imm8_r8( 0x1F, R_CL ); // 3
868 JE_rel8(emptysar); // 2
869 SAR_r32_CL( R_EAX ); // 2
872 JMP_TARGET(emptysar);
873 SAR_imm8_r32(31, R_EAX ); // 3
877 AND_imm8_r8( 0x1F, R_CL ); // 3
878 SHL_r32_CL( R_EAX ); // 2
881 store_reg( R_EAX, Rn );
882 sh4_x86.tstate = TSTATE_NONE;
886 load_reg( R_EAX, Rn );
887 load_reg( R_ECX, Rm );
888 CMP_imm32_r32( 0, R_ECX );
891 NEG_r32( R_ECX ); // 2
892 AND_imm8_r8( 0x1F, R_CL ); // 3
894 SHR_r32_CL( R_EAX ); // 2
897 JMP_TARGET(emptyshr);
898 XOR_r32_r32( R_EAX, R_EAX );
902 AND_imm8_r8( 0x1F, R_CL ); // 3
903 SHL_r32_CL( R_EAX ); // 2
906 store_reg( R_EAX, Rn );
907 sh4_x86.tstate = TSTATE_NONE;
911 load_reg( R_EAX, Rn );
914 store_reg( R_EAX, Rn );
915 sh4_x86.tstate = TSTATE_C;
919 load_reg( R_EAX, Rn );
922 store_reg( R_EAX, Rn );
923 sh4_x86.tstate = TSTATE_C;
927 load_reg( R_EAX, Rn );
930 store_reg( R_EAX, Rn );
931 sh4_x86.tstate = TSTATE_C;
935 load_reg( R_EAX, Rn );
936 SHL_imm8_r32( 2, R_EAX );
937 store_reg( R_EAX, Rn );
938 sh4_x86.tstate = TSTATE_NONE;
942 load_reg( R_EAX, Rn );
943 SHL_imm8_r32( 8, R_EAX );
944 store_reg( R_EAX, Rn );
945 sh4_x86.tstate = TSTATE_NONE;
949 load_reg( R_EAX, Rn );
950 SHL_imm8_r32( 16, R_EAX );
951 store_reg( R_EAX, Rn );
952 sh4_x86.tstate = TSTATE_NONE;
956 load_reg( R_EAX, Rn );
959 store_reg( R_EAX, Rn );
960 sh4_x86.tstate = TSTATE_C;
964 load_reg( R_EAX, Rn );
965 SHR_imm8_r32( 2, R_EAX );
966 store_reg( R_EAX, Rn );
967 sh4_x86.tstate = TSTATE_NONE;
971 load_reg( R_EAX, Rn );
972 SHR_imm8_r32( 8, R_EAX );
973 store_reg( R_EAX, Rn );
974 sh4_x86.tstate = TSTATE_NONE;
978 load_reg( R_EAX, Rn );
979 SHR_imm8_r32( 16, R_EAX );
980 store_reg( R_EAX, Rn );
981 sh4_x86.tstate = TSTATE_NONE;
985 load_reg( R_EAX, Rm );
986 load_reg( R_ECX, Rn );
987 SUB_r32_r32( R_EAX, R_ECX );
988 store_reg( R_ECX, Rn );
989 sh4_x86.tstate = TSTATE_NONE;
993 load_reg( R_EAX, Rm );
994 load_reg( R_ECX, Rn );
995 if( sh4_x86.tstate != TSTATE_C ) {
998 SBB_r32_r32( R_EAX, R_ECX );
999 store_reg( R_ECX, Rn );
1001 sh4_x86.tstate = TSTATE_C;
1005 load_reg( R_EAX, Rm );
1006 load_reg( R_ECX, Rn );
1007 SUB_r32_r32( R_EAX, R_ECX );
1008 store_reg( R_ECX, Rn );
1010 sh4_x86.tstate = TSTATE_O;
1013 COUNT_INST(I_SWAPB);
1014 load_reg( R_EAX, Rm );
1015 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1016 store_reg( R_EAX, Rn );
1019 COUNT_INST(I_SWAPB);
1020 load_reg( R_EAX, Rm );
1021 MOV_r32_r32( R_EAX, R_ECX );
1022 SHL_imm8_r32( 16, R_ECX );
1023 SHR_imm8_r32( 16, R_EAX );
1024 OR_r32_r32( R_EAX, R_ECX );
1025 store_reg( R_ECX, Rn );
1026 sh4_x86.tstate = TSTATE_NONE;
1030 load_reg( R_EAX, Rn );
1031 MMU_TRANSLATE_WRITE( R_EAX );
1032 PUSH_realigned_r32( R_EAX );
1033 MEM_READ_BYTE( R_EAX, R_EAX );
1034 TEST_r8_r8( R_AL, R_AL );
1036 OR_imm8_r8( 0x80, R_AL );
1037 POP_realigned_r32( R_ECX );
1038 MEM_WRITE_BYTE( R_ECX, R_EAX );
1039 sh4_x86.tstate = TSTATE_NONE;
1043 load_reg( R_EAX, Rm );
1044 load_reg( R_ECX, Rn );
1045 TEST_r32_r32( R_EAX, R_ECX );
1047 sh4_x86.tstate = TSTATE_E;
1051 load_reg( R_EAX, 0 );
1052 TEST_imm32_r32( imm, R_EAX );
1054 sh4_x86.tstate = TSTATE_E;
1056 TST.B #imm, @(R0, GBR) {:
1058 load_reg( R_EAX, 0);
1059 load_reg( R_ECX, R_GBR);
1060 ADD_r32_r32( R_ECX, R_EAX );
1061 MMU_TRANSLATE_READ( R_EAX );
1062 MEM_READ_BYTE( R_EAX, R_EAX );
1063 TEST_imm8_r8( imm, R_AL );
1065 sh4_x86.tstate = TSTATE_E;
1069 load_reg( R_EAX, Rm );
1070 load_reg( R_ECX, Rn );
1071 XOR_r32_r32( R_EAX, R_ECX );
1072 store_reg( R_ECX, Rn );
1073 sh4_x86.tstate = TSTATE_NONE;
1077 load_reg( R_EAX, 0 );
1078 XOR_imm32_r32( imm, R_EAX );
1079 store_reg( R_EAX, 0 );
1080 sh4_x86.tstate = TSTATE_NONE;
1082 XOR.B #imm, @(R0, GBR) {:
1084 load_reg( R_EAX, 0 );
1085 load_spreg( R_ECX, R_GBR );
1086 ADD_r32_r32( R_ECX, R_EAX );
1087 MMU_TRANSLATE_WRITE( R_EAX );
1088 PUSH_realigned_r32(R_EAX);
1089 MEM_READ_BYTE(R_EAX, R_EAX);
1090 POP_realigned_r32(R_ECX);
1091 XOR_imm32_r32( imm, R_EAX );
1092 MEM_WRITE_BYTE( R_ECX, R_EAX );
1093 sh4_x86.tstate = TSTATE_NONE;
1096 COUNT_INST(I_XTRCT);
1097 load_reg( R_EAX, Rm );
1098 load_reg( R_ECX, Rn );
1099 SHL_imm8_r32( 16, R_EAX );
1100 SHR_imm8_r32( 16, R_ECX );
1101 OR_r32_r32( R_EAX, R_ECX );
1102 store_reg( R_ECX, Rn );
1103 sh4_x86.tstate = TSTATE_NONE;
1106 /* Data move instructions */
1109 load_reg( R_EAX, Rm );
1110 store_reg( R_EAX, Rn );
1114 load_imm32( R_EAX, imm );
1115 store_reg( R_EAX, Rn );
1119 load_reg( R_EAX, Rn );
1120 MMU_TRANSLATE_WRITE( R_EAX );
1121 load_reg( R_EDX, Rm );
1122 MEM_WRITE_BYTE( R_EAX, R_EDX );
1123 sh4_x86.tstate = TSTATE_NONE;
1127 load_reg( R_EAX, Rn );
1128 ADD_imm8s_r32( -1, R_EAX );
1129 MMU_TRANSLATE_WRITE( R_EAX );
1130 load_reg( R_EDX, Rm );
1131 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1132 MEM_WRITE_BYTE( R_EAX, R_EDX );
1133 sh4_x86.tstate = TSTATE_NONE;
1135 MOV.B Rm, @(R0, Rn) {:
1137 load_reg( R_EAX, 0 );
1138 load_reg( R_ECX, Rn );
1139 ADD_r32_r32( R_ECX, R_EAX );
1140 MMU_TRANSLATE_WRITE( R_EAX );
1141 load_reg( R_EDX, Rm );
1142 MEM_WRITE_BYTE( R_EAX, R_EDX );
1143 sh4_x86.tstate = TSTATE_NONE;
1145 MOV.B R0, @(disp, GBR) {:
1147 load_spreg( R_EAX, R_GBR );
1148 ADD_imm32_r32( disp, R_EAX );
1149 MMU_TRANSLATE_WRITE( R_EAX );
1150 load_reg( R_EDX, 0 );
1151 MEM_WRITE_BYTE( R_EAX, R_EDX );
1152 sh4_x86.tstate = TSTATE_NONE;
1154 MOV.B R0, @(disp, Rn) {:
1156 load_reg( R_EAX, Rn );
1157 ADD_imm32_r32( disp, R_EAX );
1158 MMU_TRANSLATE_WRITE( R_EAX );
1159 load_reg( R_EDX, 0 );
1160 MEM_WRITE_BYTE( R_EAX, R_EDX );
1161 sh4_x86.tstate = TSTATE_NONE;
1165 load_reg( R_EAX, Rm );
1166 MMU_TRANSLATE_READ( R_EAX );
1167 MEM_READ_BYTE( R_EAX, R_EAX );
1168 store_reg( R_EAX, Rn );
1169 sh4_x86.tstate = TSTATE_NONE;
1173 load_reg( R_EAX, Rm );
1174 MMU_TRANSLATE_READ( R_EAX );
1175 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1176 MEM_READ_BYTE( R_EAX, R_EAX );
1177 store_reg( R_EAX, Rn );
1178 sh4_x86.tstate = TSTATE_NONE;
1180 MOV.B @(R0, Rm), Rn {:
1182 load_reg( R_EAX, 0 );
1183 load_reg( R_ECX, Rm );
1184 ADD_r32_r32( R_ECX, R_EAX );
1185 MMU_TRANSLATE_READ( R_EAX )
1186 MEM_READ_BYTE( R_EAX, R_EAX );
1187 store_reg( R_EAX, Rn );
1188 sh4_x86.tstate = TSTATE_NONE;
1190 MOV.B @(disp, GBR), R0 {:
1192 load_spreg( R_EAX, R_GBR );
1193 ADD_imm32_r32( disp, R_EAX );
1194 MMU_TRANSLATE_READ( R_EAX );
1195 MEM_READ_BYTE( R_EAX, R_EAX );
1196 store_reg( R_EAX, 0 );
1197 sh4_x86.tstate = TSTATE_NONE;
1199 MOV.B @(disp, Rm), R0 {:
1201 load_reg( R_EAX, Rm );
1202 ADD_imm32_r32( disp, R_EAX );
1203 MMU_TRANSLATE_READ( R_EAX );
1204 MEM_READ_BYTE( R_EAX, R_EAX );
1205 store_reg( R_EAX, 0 );
1206 sh4_x86.tstate = TSTATE_NONE;
1210 load_reg( R_EAX, Rn );
1211 check_walign32(R_EAX);
1212 MMU_TRANSLATE_WRITE( R_EAX );
1213 load_reg( R_EDX, Rm );
1214 MEM_WRITE_LONG( R_EAX, R_EDX );
1215 sh4_x86.tstate = TSTATE_NONE;
1219 load_reg( R_EAX, Rn );
1220 ADD_imm8s_r32( -4, R_EAX );
1221 check_walign32( R_EAX );
1222 MMU_TRANSLATE_WRITE( R_EAX );
1223 load_reg( R_EDX, Rm );
1224 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1225 MEM_WRITE_LONG( R_EAX, R_EDX );
1226 sh4_x86.tstate = TSTATE_NONE;
1228 MOV.L Rm, @(R0, Rn) {:
1230 load_reg( R_EAX, 0 );
1231 load_reg( R_ECX, Rn );
1232 ADD_r32_r32( R_ECX, R_EAX );
1233 check_walign32( R_EAX );
1234 MMU_TRANSLATE_WRITE( R_EAX );
1235 load_reg( R_EDX, Rm );
1236 MEM_WRITE_LONG( R_EAX, R_EDX );
1237 sh4_x86.tstate = TSTATE_NONE;
1239 MOV.L R0, @(disp, GBR) {:
1241 load_spreg( R_EAX, R_GBR );
1242 ADD_imm32_r32( disp, R_EAX );
1243 check_walign32( R_EAX );
1244 MMU_TRANSLATE_WRITE( R_EAX );
1245 load_reg( R_EDX, 0 );
1246 MEM_WRITE_LONG( R_EAX, R_EDX );
1247 sh4_x86.tstate = TSTATE_NONE;
1249 MOV.L Rm, @(disp, Rn) {:
1251 load_reg( R_EAX, Rn );
1252 ADD_imm32_r32( disp, R_EAX );
1253 check_walign32( R_EAX );
1254 MMU_TRANSLATE_WRITE( R_EAX );
1255 load_reg( R_EDX, Rm );
1256 MEM_WRITE_LONG( R_EAX, R_EDX );
1257 sh4_x86.tstate = TSTATE_NONE;
1261 load_reg( R_EAX, Rm );
1262 check_ralign32( R_EAX );
1263 MMU_TRANSLATE_READ( R_EAX );
1264 MEM_READ_LONG( R_EAX, R_EAX );
1265 store_reg( R_EAX, Rn );
1266 sh4_x86.tstate = TSTATE_NONE;
1270 load_reg( R_EAX, Rm );
1271 check_ralign32( R_EAX );
1272 MMU_TRANSLATE_READ( R_EAX );
1273 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1274 MEM_READ_LONG( R_EAX, R_EAX );
1275 store_reg( R_EAX, Rn );
1276 sh4_x86.tstate = TSTATE_NONE;
1278 MOV.L @(R0, Rm), Rn {:
1280 load_reg( R_EAX, 0 );
1281 load_reg( R_ECX, Rm );
1282 ADD_r32_r32( R_ECX, R_EAX );
1283 check_ralign32( R_EAX );
1284 MMU_TRANSLATE_READ( R_EAX );
1285 MEM_READ_LONG( R_EAX, R_EAX );
1286 store_reg( R_EAX, Rn );
1287 sh4_x86.tstate = TSTATE_NONE;
1289 MOV.L @(disp, GBR), R0 {:
1291 load_spreg( R_EAX, R_GBR );
1292 ADD_imm32_r32( disp, R_EAX );
1293 check_ralign32( R_EAX );
1294 MMU_TRANSLATE_READ( R_EAX );
1295 MEM_READ_LONG( R_EAX, R_EAX );
1296 store_reg( R_EAX, 0 );
1297 sh4_x86.tstate = TSTATE_NONE;
1299 MOV.L @(disp, PC), Rn {:
1300 COUNT_INST(I_MOVLPC);
1301 if( sh4_x86.in_delay_slot ) {
1304 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1305 if( IS_IN_ICACHE(target) ) {
1306 // If the target address is in the same page as the code, it's
1307 // pretty safe to just ref it directly and circumvent the whole
1308 // memory subsystem. (this is a big performance win)
1310 // FIXME: There's a corner-case that's not handled here when
1311 // the current code-page is in the ITLB but not in the UTLB.
1312 // (should generate a TLB miss although need to test SH4
1313 // behaviour to confirm) Unlikely to be anyone depending on this
1314 // behaviour though.
1315 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1316 MOV_moff32_EAX( ptr );
1318 // Note: we use sh4r.pc for the calc as we could be running at a
1319 // different virtual address than the translation was done with,
1320 // but we can safely assume that the low bits are the same.
1321 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1322 ADD_sh4r_r32( R_PC, R_EAX );
1323 MMU_TRANSLATE_READ( R_EAX );
1324 MEM_READ_LONG( R_EAX, R_EAX );
1325 sh4_x86.tstate = TSTATE_NONE;
1327 store_reg( R_EAX, Rn );
1330 MOV.L @(disp, Rm), Rn {:
1332 load_reg( R_EAX, Rm );
1333 ADD_imm8s_r32( disp, R_EAX );
1334 check_ralign32( R_EAX );
1335 MMU_TRANSLATE_READ( R_EAX );
1336 MEM_READ_LONG( R_EAX, R_EAX );
1337 store_reg( R_EAX, Rn );
1338 sh4_x86.tstate = TSTATE_NONE;
1342 load_reg( R_EAX, Rn );
1343 check_walign16( R_EAX );
1344 MMU_TRANSLATE_WRITE( R_EAX )
1345 load_reg( R_EDX, Rm );
1346 MEM_WRITE_WORD( R_EAX, R_EDX );
1347 sh4_x86.tstate = TSTATE_NONE;
1351 load_reg( R_EAX, Rn );
1352 ADD_imm8s_r32( -2, R_EAX );
1353 check_walign16( R_EAX );
1354 MMU_TRANSLATE_WRITE( R_EAX );
1355 load_reg( R_EDX, Rm );
1356 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1357 MEM_WRITE_WORD( R_EAX, R_EDX );
1358 sh4_x86.tstate = TSTATE_NONE;
1360 MOV.W Rm, @(R0, Rn) {:
1362 load_reg( R_EAX, 0 );
1363 load_reg( R_ECX, Rn );
1364 ADD_r32_r32( R_ECX, R_EAX );
1365 check_walign16( R_EAX );
1366 MMU_TRANSLATE_WRITE( R_EAX );
1367 load_reg( R_EDX, Rm );
1368 MEM_WRITE_WORD( R_EAX, R_EDX );
1369 sh4_x86.tstate = TSTATE_NONE;
1371 MOV.W R0, @(disp, GBR) {:
1373 load_spreg( R_EAX, R_GBR );
1374 ADD_imm32_r32( disp, R_EAX );
1375 check_walign16( R_EAX );
1376 MMU_TRANSLATE_WRITE( R_EAX );
1377 load_reg( R_EDX, 0 );
1378 MEM_WRITE_WORD( R_EAX, R_EDX );
1379 sh4_x86.tstate = TSTATE_NONE;
1381 MOV.W R0, @(disp, Rn) {:
1383 load_reg( R_EAX, Rn );
1384 ADD_imm32_r32( disp, R_EAX );
1385 check_walign16( R_EAX );
1386 MMU_TRANSLATE_WRITE( R_EAX );
1387 load_reg( R_EDX, 0 );
1388 MEM_WRITE_WORD( R_EAX, R_EDX );
1389 sh4_x86.tstate = TSTATE_NONE;
1393 load_reg( R_EAX, Rm );
1394 check_ralign16( R_EAX );
1395 MMU_TRANSLATE_READ( R_EAX );
1396 MEM_READ_WORD( R_EAX, R_EAX );
1397 store_reg( R_EAX, Rn );
1398 sh4_x86.tstate = TSTATE_NONE;
1402 load_reg( R_EAX, Rm );
1403 check_ralign16( R_EAX );
1404 MMU_TRANSLATE_READ( R_EAX );
1405 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1406 MEM_READ_WORD( R_EAX, R_EAX );
1407 store_reg( R_EAX, Rn );
1408 sh4_x86.tstate = TSTATE_NONE;
1410 MOV.W @(R0, Rm), Rn {:
1412 load_reg( R_EAX, 0 );
1413 load_reg( R_ECX, Rm );
1414 ADD_r32_r32( R_ECX, R_EAX );
1415 check_ralign16( R_EAX );
1416 MMU_TRANSLATE_READ( R_EAX );
1417 MEM_READ_WORD( R_EAX, R_EAX );
1418 store_reg( R_EAX, Rn );
1419 sh4_x86.tstate = TSTATE_NONE;
1421 MOV.W @(disp, GBR), R0 {:
1423 load_spreg( R_EAX, R_GBR );
1424 ADD_imm32_r32( disp, R_EAX );
1425 check_ralign16( R_EAX );
1426 MMU_TRANSLATE_READ( R_EAX );
1427 MEM_READ_WORD( R_EAX, R_EAX );
1428 store_reg( R_EAX, 0 );
1429 sh4_x86.tstate = TSTATE_NONE;
1431 MOV.W @(disp, PC), Rn {:
1433 if( sh4_x86.in_delay_slot ) {
1436 // See comments for MOV.L @(disp, PC), Rn
1437 uint32_t target = pc + disp + 4;
1438 if( IS_IN_ICACHE(target) ) {
1439 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1440 MOV_moff32_EAX( ptr );
1441 MOVSX_r16_r32( R_EAX, R_EAX );
1443 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1444 ADD_sh4r_r32( R_PC, R_EAX );
1445 MMU_TRANSLATE_READ( R_EAX );
1446 MEM_READ_WORD( R_EAX, R_EAX );
1447 sh4_x86.tstate = TSTATE_NONE;
1449 store_reg( R_EAX, Rn );
1452 MOV.W @(disp, Rm), R0 {:
1454 load_reg( R_EAX, Rm );
1455 ADD_imm32_r32( disp, R_EAX );
1456 check_ralign16( R_EAX );
1457 MMU_TRANSLATE_READ( R_EAX );
1458 MEM_READ_WORD( R_EAX, R_EAX );
1459 store_reg( R_EAX, 0 );
1460 sh4_x86.tstate = TSTATE_NONE;
1462 MOVA @(disp, PC), R0 {:
1464 if( sh4_x86.in_delay_slot ) {
1467 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1468 ADD_sh4r_r32( R_PC, R_ECX );
1469 store_reg( R_ECX, 0 );
1470 sh4_x86.tstate = TSTATE_NONE;
1474 COUNT_INST(I_MOVCA);
1475 load_reg( R_EAX, Rn );
1476 check_walign32( R_EAX );
1477 MMU_TRANSLATE_WRITE( R_EAX );
1478 load_reg( R_EDX, 0 );
1479 MEM_WRITE_LONG( R_EAX, R_EDX );
1480 sh4_x86.tstate = TSTATE_NONE;
1483 /* Control transfer instructions */
1486 if( sh4_x86.in_delay_slot ) {
1489 sh4vma_t target = disp + pc + 4;
1490 JT_rel8( nottaken );
1491 exit_block_rel(target, pc+2 );
1492 JMP_TARGET(nottaken);
1498 if( sh4_x86.in_delay_slot ) {
1501 sh4_x86.in_delay_slot = DELAY_PC;
1502 if( UNTRANSLATABLE(pc+2) ) {
1503 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1505 ADD_imm32_r32( disp, R_EAX );
1506 JMP_TARGET(nottaken);
1507 ADD_sh4r_r32( R_PC, R_EAX );
1508 store_spreg( R_EAX, R_NEW_PC );
1509 exit_block_emu(pc+2);
1510 sh4_x86.branch_taken = TRUE;
1513 if( sh4_x86.tstate == TSTATE_NONE ) {
1514 CMP_imm8s_sh4r( 1, R_T );
1515 sh4_x86.tstate = TSTATE_E;
1517 sh4vma_t target = disp + pc + 4;
1518 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1519 sh4_translate_instruction(pc+2);
1520 exit_block_rel( target, pc+4 );
1523 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1524 sh4_translate_instruction(pc+2);
1531 if( sh4_x86.in_delay_slot ) {
1534 sh4_x86.in_delay_slot = DELAY_PC;
1535 sh4_x86.branch_taken = TRUE;
1536 if( UNTRANSLATABLE(pc+2) ) {
1537 load_spreg( R_EAX, R_PC );
1538 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1539 store_spreg( R_EAX, R_NEW_PC );
1540 exit_block_emu(pc+2);
1543 sh4_translate_instruction( pc + 2 );
1544 exit_block_rel( disp + pc + 4, pc+4 );
1551 if( sh4_x86.in_delay_slot ) {
1554 load_spreg( R_EAX, R_PC );
1555 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1556 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1557 store_spreg( R_EAX, R_NEW_PC );
1558 sh4_x86.in_delay_slot = DELAY_PC;
1559 sh4_x86.tstate = TSTATE_NONE;
1560 sh4_x86.branch_taken = TRUE;
1561 if( UNTRANSLATABLE(pc+2) ) {
1562 exit_block_emu(pc+2);
1565 sh4_translate_instruction( pc + 2 );
1566 exit_block_newpcset(pc+2);
1573 if( sh4_x86.in_delay_slot ) {
1576 load_spreg( R_EAX, R_PC );
1577 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1578 store_spreg( R_EAX, R_PR );
1579 sh4_x86.in_delay_slot = DELAY_PC;
1580 sh4_x86.branch_taken = TRUE;
1581 sh4_x86.tstate = TSTATE_NONE;
1582 if( UNTRANSLATABLE(pc+2) ) {
1583 ADD_imm32_r32( disp, R_EAX );
1584 store_spreg( R_EAX, R_NEW_PC );
1585 exit_block_emu(pc+2);
1588 sh4_translate_instruction( pc + 2 );
1589 exit_block_rel( disp + pc + 4, pc+4 );
1596 if( sh4_x86.in_delay_slot ) {
1599 load_spreg( R_EAX, R_PC );
1600 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1601 store_spreg( R_EAX, R_PR );
1602 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1603 store_spreg( R_EAX, R_NEW_PC );
1605 sh4_x86.in_delay_slot = DELAY_PC;
1606 sh4_x86.tstate = TSTATE_NONE;
1607 sh4_x86.branch_taken = TRUE;
1608 if( UNTRANSLATABLE(pc+2) ) {
1609 exit_block_emu(pc+2);
1612 sh4_translate_instruction( pc + 2 );
1613 exit_block_newpcset(pc+2);
1620 if( sh4_x86.in_delay_slot ) {
1623 sh4vma_t target = disp + pc + 4;
1624 JF_rel8( nottaken );
1625 exit_block_rel(target, pc+2 );
1626 JMP_TARGET(nottaken);
1632 if( sh4_x86.in_delay_slot ) {
1635 sh4_x86.in_delay_slot = DELAY_PC;
1636 if( UNTRANSLATABLE(pc+2) ) {
1637 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1639 ADD_imm32_r32( disp, R_EAX );
1640 JMP_TARGET(nottaken);
1641 ADD_sh4r_r32( R_PC, R_EAX );
1642 store_spreg( R_EAX, R_NEW_PC );
1643 exit_block_emu(pc+2);
1644 sh4_x86.branch_taken = TRUE;
1647 if( sh4_x86.tstate == TSTATE_NONE ) {
1648 CMP_imm8s_sh4r( 1, R_T );
1649 sh4_x86.tstate = TSTATE_E;
1651 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1652 sh4_translate_instruction(pc+2);
1653 exit_block_rel( disp + pc + 4, pc+4 );
1655 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1656 sh4_translate_instruction(pc+2);
1663 if( sh4_x86.in_delay_slot ) {
1666 load_reg( R_ECX, Rn );
1667 store_spreg( R_ECX, R_NEW_PC );
1668 sh4_x86.in_delay_slot = DELAY_PC;
1669 sh4_x86.branch_taken = TRUE;
1670 if( UNTRANSLATABLE(pc+2) ) {
1671 exit_block_emu(pc+2);
1674 sh4_translate_instruction(pc+2);
1675 exit_block_newpcset(pc+2);
1682 if( sh4_x86.in_delay_slot ) {
1685 load_spreg( R_EAX, R_PC );
1686 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1687 store_spreg( R_EAX, R_PR );
1688 load_reg( R_ECX, Rn );
1689 store_spreg( R_ECX, R_NEW_PC );
1690 sh4_x86.in_delay_slot = DELAY_PC;
1691 sh4_x86.branch_taken = TRUE;
1692 sh4_x86.tstate = TSTATE_NONE;
1693 if( UNTRANSLATABLE(pc+2) ) {
1694 exit_block_emu(pc+2);
1697 sh4_translate_instruction(pc+2);
1698 exit_block_newpcset(pc+2);
1705 if( sh4_x86.in_delay_slot ) {
1709 load_spreg( R_ECX, R_SPC );
1710 store_spreg( R_ECX, R_NEW_PC );
1711 load_spreg( R_EAX, R_SSR );
1712 call_func1( sh4_write_sr, R_EAX );
1713 sh4_x86.in_delay_slot = DELAY_PC;
1714 sh4_x86.priv_checked = FALSE;
1715 sh4_x86.fpuen_checked = FALSE;
1716 sh4_x86.tstate = TSTATE_NONE;
1717 sh4_x86.branch_taken = TRUE;
1718 if( UNTRANSLATABLE(pc+2) ) {
1719 exit_block_emu(pc+2);
1722 sh4_translate_instruction(pc+2);
1723 exit_block_newpcset(pc+2);
1730 if( sh4_x86.in_delay_slot ) {
1733 load_spreg( R_ECX, R_PR );
1734 store_spreg( R_ECX, R_NEW_PC );
1735 sh4_x86.in_delay_slot = DELAY_PC;
1736 sh4_x86.branch_taken = TRUE;
1737 if( UNTRANSLATABLE(pc+2) ) {
1738 exit_block_emu(pc+2);
1741 sh4_translate_instruction(pc+2);
1742 exit_block_newpcset(pc+2);
1748 COUNT_INST(I_TRAPA);
1749 if( sh4_x86.in_delay_slot ) {
1752 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1753 ADD_r32_sh4r( R_ECX, R_PC );
1754 load_imm32( R_EAX, imm );
1755 call_func1( sh4_raise_trap, R_EAX );
1756 sh4_x86.tstate = TSTATE_NONE;
1757 exit_block_pcset(pc);
1758 sh4_x86.branch_taken = TRUE;
1763 COUNT_INST(I_UNDEF);
1764 if( sh4_x86.in_delay_slot ) {
1767 JMP_exc(EXC_ILLEGAL);
1773 COUNT_INST(I_CLRMAC);
1774 XOR_r32_r32(R_EAX, R_EAX);
1775 store_spreg( R_EAX, R_MACL );
1776 store_spreg( R_EAX, R_MACH );
1777 sh4_x86.tstate = TSTATE_NONE;
1783 sh4_x86.tstate = TSTATE_C;
1789 sh4_x86.tstate = TSTATE_C;
1795 sh4_x86.tstate = TSTATE_C;
1801 sh4_x86.tstate = TSTATE_C;
1804 /* Floating point moves */
1806 COUNT_INST(I_FMOV1);
1808 load_spreg( R_ECX, R_FPSCR );
1809 TEST_imm32_r32( FPSCR_SZ, R_ECX );
1810 JNE_rel8(doublesize);
1811 load_fr( R_EAX, FRm ); // SZ=0 branch
1812 store_fr( R_EAX, FRn );
1814 JMP_TARGET(doublesize);
1815 load_dr0( R_EAX, FRm );
1816 load_dr1( R_ECX, FRm );
1817 store_dr0( R_EAX, FRn );
1818 store_dr1( R_ECX, FRn );
1820 sh4_x86.tstate = TSTATE_NONE;
1823 COUNT_INST(I_FMOV2);
1825 load_reg( R_EAX, Rn );
1826 load_spreg( R_EDX, R_FPSCR );
1827 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1828 JNE_rel8(doublesize);
1830 check_walign32( R_EAX );
1831 MMU_TRANSLATE_WRITE( R_EAX );
1832 load_fr( R_ECX, FRm );
1833 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1836 JMP_TARGET(doublesize);
1837 check_walign64( R_EAX );
1838 MMU_TRANSLATE_WRITE( R_EAX );
1839 load_dr0( R_ECX, FRm );
1840 load_dr1( R_EDX, FRm );
1841 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1843 sh4_x86.tstate = TSTATE_NONE;
1846 COUNT_INST(I_FMOV5);
1848 load_reg( R_EAX, Rm );
1849 load_spreg( R_EDX, R_FPSCR );
1850 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1851 JNE_rel8(doublesize);
1853 check_ralign32( R_EAX );
1854 MMU_TRANSLATE_READ( R_EAX );
1855 MEM_READ_LONG( R_EAX, R_EAX );
1856 store_fr( R_EAX, FRn );
1859 JMP_TARGET(doublesize);
1860 check_ralign64( R_EAX );
1861 MMU_TRANSLATE_READ( R_EAX );
1862 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1863 store_dr0( R_ECX, FRn );
1864 store_dr1( R_EAX, FRn );
1866 sh4_x86.tstate = TSTATE_NONE;
1869 COUNT_INST(I_FMOV3);
1871 load_reg( R_EAX, Rn );
1872 load_spreg( R_EDX, R_FPSCR );
1873 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1874 JNE_rel8(doublesize);
1876 check_walign32( R_EAX );
1877 ADD_imm8s_r32( -4, R_EAX );
1878 MMU_TRANSLATE_WRITE( R_EAX );
1879 load_fr( R_ECX, FRm );
1880 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1881 MEM_WRITE_LONG( R_EAX, R_ECX );
1884 JMP_TARGET(doublesize);
1885 check_walign64( R_EAX );
1886 ADD_imm8s_r32(-8,R_EAX);
1887 MMU_TRANSLATE_WRITE( R_EAX );
1888 load_dr0( R_ECX, FRm );
1889 load_dr1( R_EDX, FRm );
1890 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1891 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1894 sh4_x86.tstate = TSTATE_NONE;
1897 COUNT_INST(I_FMOV6);
1899 load_reg( R_EAX, Rm );
1900 load_spreg( R_EDX, R_FPSCR );
1901 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1902 JNE_rel8(doublesize);
1904 check_ralign32( R_EAX );
1905 MMU_TRANSLATE_READ( R_EAX );
1906 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1907 MEM_READ_LONG( R_EAX, R_EAX );
1908 store_fr( R_EAX, FRn );
1911 JMP_TARGET(doublesize);
1912 check_ralign64( R_EAX );
1913 MMU_TRANSLATE_READ( R_EAX );
1914 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1915 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1916 store_dr0( R_ECX, FRn );
1917 store_dr1( R_EAX, FRn );
1920 sh4_x86.tstate = TSTATE_NONE;
1922 FMOV FRm, @(R0, Rn) {:
1923 COUNT_INST(I_FMOV4);
1925 load_reg( R_EAX, Rn );
1926 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1927 load_spreg( R_EDX, R_FPSCR );
1928 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1929 JNE_rel8(doublesize);
1931 check_walign32( R_EAX );
1932 MMU_TRANSLATE_WRITE( R_EAX );
1933 load_fr( R_ECX, FRm );
1934 MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
1937 JMP_TARGET(doublesize);
1938 check_walign64( R_EAX );
1939 MMU_TRANSLATE_WRITE( R_EAX );
1940 load_dr0( R_ECX, FRm );
1941 load_dr1( R_EDX, FRm );
1942 MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
1945 sh4_x86.tstate = TSTATE_NONE;
1947 FMOV @(R0, Rm), FRn {:
1948 COUNT_INST(I_FMOV7);
1950 load_reg( R_EAX, Rm );
1951 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1952 load_spreg( R_EDX, R_FPSCR );
1953 TEST_imm32_r32( FPSCR_SZ, R_EDX );
1954 JNE_rel8(doublesize);
1956 check_ralign32( R_EAX );
1957 MMU_TRANSLATE_READ( R_EAX );
1958 MEM_READ_LONG( R_EAX, R_EAX );
1959 store_fr( R_EAX, FRn );
1962 JMP_TARGET(doublesize);
1963 check_ralign64( R_EAX );
1964 MMU_TRANSLATE_READ( R_EAX );
1965 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1966 store_dr0( R_ECX, FRn );
1967 store_dr1( R_EAX, FRn );
1970 sh4_x86.tstate = TSTATE_NONE;
1972 FLDI0 FRn {: /* IFF PR=0 */
1973 COUNT_INST(I_FLDI0);
1975 load_spreg( R_ECX, R_FPSCR );
1976 TEST_imm32_r32( FPSCR_PR, R_ECX );
1978 XOR_r32_r32( R_EAX, R_EAX );
1979 store_fr( R_EAX, FRn );
1981 sh4_x86.tstate = TSTATE_NONE;
1983 FLDI1 FRn {: /* IFF PR=0 */
1984 COUNT_INST(I_FLDI1);
1986 load_spreg( R_ECX, R_FPSCR );
1987 TEST_imm32_r32( FPSCR_PR, R_ECX );
1989 load_imm32(R_EAX, 0x3F800000);
1990 store_fr( R_EAX, FRn );
1992 sh4_x86.tstate = TSTATE_NONE;
1996 COUNT_INST(I_FLOAT);
1998 load_spreg( R_ECX, R_FPSCR );
2000 TEST_imm32_r32( FPSCR_PR, R_ECX );
2001 JNE_rel8(doubleprec);
2004 JMP_TARGET(doubleprec);
2007 sh4_x86.tstate = TSTATE_NONE;
2012 load_spreg( R_ECX, R_FPSCR );
2013 TEST_imm32_r32( FPSCR_PR, R_ECX );
2014 JNE_rel8(doubleprec);
2017 JMP_TARGET(doubleprec);
2020 load_ptr( R_ECX, &max_int );
2021 FILD_r32ind( R_ECX );
2024 load_ptr( R_ECX, &min_int ); // 5
2025 FILD_r32ind( R_ECX ); // 2
2027 JAE_rel8( sat2 ); // 2
2028 load_ptr( R_EAX, &save_fcw );
2029 FNSTCW_r32ind( R_EAX );
2030 load_ptr( R_EDX, &trunc_fcw );
2031 FLDCW_r32ind( R_EDX );
2032 FISTP_sh4r(R_FPUL); // 3
2033 FLDCW_r32ind( R_EAX );
2038 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
2039 store_spreg( R_ECX, R_FPUL );
2042 sh4_x86.tstate = TSTATE_NONE;
2047 load_fr( R_EAX, FRm );
2048 store_spreg( R_EAX, R_FPUL );
2049 sh4_x86.tstate = TSTATE_NONE;
2054 load_spreg( R_EAX, R_FPUL );
2055 store_fr( R_EAX, FRn );
2056 sh4_x86.tstate = TSTATE_NONE;
2059 COUNT_INST(I_FCNVDS);
2061 load_spreg( R_ECX, R_FPSCR );
2062 TEST_imm32_r32( FPSCR_PR, R_ECX );
2063 JE_rel8(end); // only when PR=1
2067 sh4_x86.tstate = TSTATE_NONE;
2070 COUNT_INST(I_FCNVSD);
2072 load_spreg( R_ECX, R_FPSCR );
2073 TEST_imm32_r32( FPSCR_PR, R_ECX );
2074 JE_rel8(end); // only when PR=1
2078 sh4_x86.tstate = TSTATE_NONE;
2081 /* Floating point instructions */
2085 load_spreg( R_ECX, R_FPSCR );
2086 TEST_imm32_r32( FPSCR_PR, R_ECX );
2087 JNE_rel8(doubleprec);
2092 JMP_TARGET(doubleprec);
2097 sh4_x86.tstate = TSTATE_NONE;
2102 load_spreg( R_ECX, R_FPSCR );
2103 TEST_imm32_r32( FPSCR_PR, R_ECX );
2104 JNE_rel8(doubleprec);
2110 JMP_TARGET(doubleprec);
2116 sh4_x86.tstate = TSTATE_NONE;
2121 load_spreg( R_ECX, R_FPSCR );
2122 TEST_imm32_r32( FPSCR_PR, R_ECX );
2123 JNE_rel8(doubleprec);
2129 JMP_TARGET(doubleprec);
2135 sh4_x86.tstate = TSTATE_NONE;
2137 FMAC FR0, FRm, FRn {:
2140 load_spreg( R_ECX, R_FPSCR );
2141 TEST_imm32_r32( FPSCR_PR, R_ECX );
2142 JNE_rel8(doubleprec);
2150 JMP_TARGET(doubleprec);
2158 sh4_x86.tstate = TSTATE_NONE;
2164 load_spreg( R_ECX, R_FPSCR );
2165 TEST_imm32_r32( FPSCR_PR, R_ECX );
2166 JNE_rel8(doubleprec);
2172 JMP_TARGET(doubleprec);
2178 sh4_x86.tstate = TSTATE_NONE;
2183 load_spreg( R_ECX, R_FPSCR );
2184 TEST_imm32_r32( FPSCR_PR, R_ECX );
2185 JNE_rel8(doubleprec);
2190 JMP_TARGET(doubleprec);
2195 sh4_x86.tstate = TSTATE_NONE;
2198 COUNT_INST(I_FSRRA);
2200 load_spreg( R_ECX, R_FPSCR );
2201 TEST_imm32_r32( FPSCR_PR, R_ECX );
2202 JNE_rel8(end); // PR=0 only
2209 sh4_x86.tstate = TSTATE_NONE;
2212 COUNT_INST(I_FSQRT);
2214 load_spreg( R_ECX, R_FPSCR );
2215 TEST_imm32_r32( FPSCR_PR, R_ECX );
2216 JNE_rel8(doubleprec);
2221 JMP_TARGET(doubleprec);
2226 sh4_x86.tstate = TSTATE_NONE;
2231 load_spreg( R_ECX, R_FPSCR );
2232 TEST_imm32_r32( FPSCR_PR, R_ECX );
2233 JNE_rel8(doubleprec);
2239 JMP_TARGET(doubleprec);
2245 sh4_x86.tstate = TSTATE_NONE;
2249 COUNT_INST(I_FCMPEQ);
2251 load_spreg( R_ECX, R_FPSCR );
2252 TEST_imm32_r32( FPSCR_PR, R_ECX );
2253 JNE_rel8(doubleprec);
2257 JMP_TARGET(doubleprec);
2264 sh4_x86.tstate = TSTATE_NONE;
2267 COUNT_INST(I_FCMPGT);
2269 load_spreg( R_ECX, R_FPSCR );
2270 TEST_imm32_r32( FPSCR_PR, R_ECX );
2271 JNE_rel8(doubleprec);
2275 JMP_TARGET(doubleprec);
2282 sh4_x86.tstate = TSTATE_NONE;
2288 load_spreg( R_ECX, R_FPSCR );
2289 TEST_imm32_r32( FPSCR_PR, R_ECX );
2290 JNE_rel8(doubleprec );
2291 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
2292 load_spreg( R_EDX, R_FPUL );
2293 call_func2( sh4_fsca, R_EDX, R_ECX );
2294 JMP_TARGET(doubleprec);
2295 sh4_x86.tstate = TSTATE_NONE;
2300 load_spreg( R_ECX, R_FPSCR );
2301 TEST_imm32_r32( FPSCR_PR, R_ECX );
2302 JNE_rel8( doubleprec);
2307 push_fr( (FVm<<2)+1);
2308 push_fr( (FVn<<2)+1);
2311 push_fr( (FVm<<2)+2);
2312 push_fr( (FVn<<2)+2);
2315 push_fr( (FVm<<2)+3);
2316 push_fr( (FVn<<2)+3);
2319 pop_fr( (FVn<<2)+3);
2320 JMP_TARGET(doubleprec);
2321 sh4_x86.tstate = TSTATE_NONE;
2326 load_spreg( R_ECX, R_FPSCR );
2327 TEST_imm32_r32( FPSCR_PR, R_ECX );
2328 JNE_rel8( doubleprec );
2329 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
2330 call_func1( sh4_ftrv, R_EDX ); // 12
2331 JMP_TARGET(doubleprec);
2332 sh4_x86.tstate = TSTATE_NONE;
2336 COUNT_INST(I_FRCHG);
2338 load_spreg( R_ECX, R_FPSCR );
2339 XOR_imm32_r32( FPSCR_FR, R_ECX );
2340 store_spreg( R_ECX, R_FPSCR );
2341 call_func0( sh4_switch_fr_banks );
2342 sh4_x86.tstate = TSTATE_NONE;
2345 COUNT_INST(I_FSCHG);
2347 load_spreg( R_ECX, R_FPSCR );
2348 XOR_imm32_r32( FPSCR_SZ, R_ECX );
2349 store_spreg( R_ECX, R_FPSCR );
2350 sh4_x86.tstate = TSTATE_NONE;
2353 /* Processor control instructions */
2355 COUNT_INST(I_LDCSR);
2356 if( sh4_x86.in_delay_slot ) {
2360 load_reg( R_EAX, Rm );
2361 call_func1( sh4_write_sr, R_EAX );
2362 sh4_x86.priv_checked = FALSE;
2363 sh4_x86.fpuen_checked = FALSE;
2364 sh4_x86.tstate = TSTATE_NONE;
2369 load_reg( R_EAX, Rm );
2370 store_spreg( R_EAX, R_GBR );
2375 load_reg( R_EAX, Rm );
2376 store_spreg( R_EAX, R_VBR );
2377 sh4_x86.tstate = TSTATE_NONE;
2382 load_reg( R_EAX, Rm );
2383 store_spreg( R_EAX, R_SSR );
2384 sh4_x86.tstate = TSTATE_NONE;
2389 load_reg( R_EAX, Rm );
2390 store_spreg( R_EAX, R_SGR );
2391 sh4_x86.tstate = TSTATE_NONE;
2396 load_reg( R_EAX, Rm );
2397 store_spreg( R_EAX, R_SPC );
2398 sh4_x86.tstate = TSTATE_NONE;
2403 load_reg( R_EAX, Rm );
2404 store_spreg( R_EAX, R_DBR );
2405 sh4_x86.tstate = TSTATE_NONE;
2410 load_reg( R_EAX, Rm );
2411 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2412 sh4_x86.tstate = TSTATE_NONE;
2416 load_reg( R_EAX, Rm );
2417 check_ralign32( R_EAX );
2418 MMU_TRANSLATE_READ( R_EAX );
2419 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2420 MEM_READ_LONG( R_EAX, R_EAX );
2421 store_spreg( R_EAX, R_GBR );
2422 sh4_x86.tstate = TSTATE_NONE;
2425 COUNT_INST(I_LDCSRM);
2426 if( sh4_x86.in_delay_slot ) {
2430 load_reg( R_EAX, Rm );
2431 check_ralign32( R_EAX );
2432 MMU_TRANSLATE_READ( R_EAX );
2433 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2434 MEM_READ_LONG( R_EAX, R_EAX );
2435 call_func1( sh4_write_sr, R_EAX );
2436 sh4_x86.priv_checked = FALSE;
2437 sh4_x86.fpuen_checked = FALSE;
2438 sh4_x86.tstate = TSTATE_NONE;
2444 load_reg( R_EAX, Rm );
2445 check_ralign32( R_EAX );
2446 MMU_TRANSLATE_READ( R_EAX );
2447 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2448 MEM_READ_LONG( R_EAX, R_EAX );
2449 store_spreg( R_EAX, R_VBR );
2450 sh4_x86.tstate = TSTATE_NONE;
2455 load_reg( R_EAX, Rm );
2456 check_ralign32( R_EAX );
2457 MMU_TRANSLATE_READ( R_EAX );
2458 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2459 MEM_READ_LONG( R_EAX, R_EAX );
2460 store_spreg( R_EAX, R_SSR );
2461 sh4_x86.tstate = TSTATE_NONE;
2466 load_reg( R_EAX, Rm );
2467 check_ralign32( R_EAX );
2468 MMU_TRANSLATE_READ( R_EAX );
2469 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2470 MEM_READ_LONG( R_EAX, R_EAX );
2471 store_spreg( R_EAX, R_SGR );
2472 sh4_x86.tstate = TSTATE_NONE;
2477 load_reg( R_EAX, Rm );
2478 check_ralign32( R_EAX );
2479 MMU_TRANSLATE_READ( R_EAX );
2480 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2481 MEM_READ_LONG( R_EAX, R_EAX );
2482 store_spreg( R_EAX, R_SPC );
2483 sh4_x86.tstate = TSTATE_NONE;
2488 load_reg( R_EAX, Rm );
2489 check_ralign32( R_EAX );
2490 MMU_TRANSLATE_READ( R_EAX );
2491 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2492 MEM_READ_LONG( R_EAX, R_EAX );
2493 store_spreg( R_EAX, R_DBR );
2494 sh4_x86.tstate = TSTATE_NONE;
2496 LDC.L @Rm+, Rn_BANK {:
2499 load_reg( R_EAX, Rm );
2500 check_ralign32( R_EAX );
2501 MMU_TRANSLATE_READ( R_EAX );
2502 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2503 MEM_READ_LONG( R_EAX, R_EAX );
2504 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2505 sh4_x86.tstate = TSTATE_NONE;
2508 COUNT_INST(I_LDSFPSCR);
2510 load_reg( R_EAX, Rm );
2511 call_func1( sh4_write_fpscr, R_EAX );
2512 sh4_x86.tstate = TSTATE_NONE;
2514 LDS.L @Rm+, FPSCR {:
2515 COUNT_INST(I_LDSFPSCRM);
2517 load_reg( R_EAX, Rm );
2518 check_ralign32( R_EAX );
2519 MMU_TRANSLATE_READ( R_EAX );
2520 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2521 MEM_READ_LONG( R_EAX, R_EAX );
2522 call_func1( sh4_write_fpscr, R_EAX );
2523 sh4_x86.tstate = TSTATE_NONE;
2528 load_reg( R_EAX, Rm );
2529 store_spreg( R_EAX, R_FPUL );
2534 load_reg( R_EAX, Rm );
2535 check_ralign32( R_EAX );
2536 MMU_TRANSLATE_READ( R_EAX );
2537 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2538 MEM_READ_LONG( R_EAX, R_EAX );
2539 store_spreg( R_EAX, R_FPUL );
2540 sh4_x86.tstate = TSTATE_NONE;
2544 load_reg( R_EAX, Rm );
2545 store_spreg( R_EAX, R_MACH );
2549 load_reg( R_EAX, Rm );
2550 check_ralign32( R_EAX );
2551 MMU_TRANSLATE_READ( R_EAX );
2552 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2553 MEM_READ_LONG( R_EAX, R_EAX );
2554 store_spreg( R_EAX, R_MACH );
2555 sh4_x86.tstate = TSTATE_NONE;
2559 load_reg( R_EAX, Rm );
2560 store_spreg( R_EAX, R_MACL );
2564 load_reg( R_EAX, Rm );
2565 check_ralign32( R_EAX );
2566 MMU_TRANSLATE_READ( R_EAX );
2567 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2568 MEM_READ_LONG( R_EAX, R_EAX );
2569 store_spreg( R_EAX, R_MACL );
2570 sh4_x86.tstate = TSTATE_NONE;
2574 load_reg( R_EAX, Rm );
2575 store_spreg( R_EAX, R_PR );
2579 load_reg( R_EAX, Rm );
2580 check_ralign32( R_EAX );
2581 MMU_TRANSLATE_READ( R_EAX );
2582 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2583 MEM_READ_LONG( R_EAX, R_EAX );
2584 store_spreg( R_EAX, R_PR );
2585 sh4_x86.tstate = TSTATE_NONE;
2588 COUNT_INST(I_LDTLB);
2589 call_func0( MMU_ldtlb );
2598 COUNT_INST(I_OCBWB);
2602 load_reg( R_EAX, Rn );
2603 MOV_r32_r32( R_EAX, R_ECX );
2604 AND_imm32_r32( 0xFC000000, R_EAX );
2605 CMP_imm32_r32( 0xE0000000, R_EAX );
2607 call_func1( sh4_flush_store_queue, R_ECX );
2608 TEST_r32_r32( R_EAX, R_EAX );
2611 sh4_x86.tstate = TSTATE_NONE;
2614 COUNT_INST(I_SLEEP);
2616 call_func0( sh4_sleep );
2617 sh4_x86.tstate = TSTATE_NONE;
2618 sh4_x86.in_delay_slot = DELAY_NONE;
2622 COUNT_INST(I_STCSR);
2624 call_func0(sh4_read_sr);
2625 store_reg( R_EAX, Rn );
2626 sh4_x86.tstate = TSTATE_NONE;
2630 load_spreg( R_EAX, R_GBR );
2631 store_reg( R_EAX, Rn );
2636 load_spreg( R_EAX, R_VBR );
2637 store_reg( R_EAX, Rn );
2638 sh4_x86.tstate = TSTATE_NONE;
2643 load_spreg( R_EAX, R_SSR );
2644 store_reg( R_EAX, Rn );
2645 sh4_x86.tstate = TSTATE_NONE;
2650 load_spreg( R_EAX, R_SPC );
2651 store_reg( R_EAX, Rn );
2652 sh4_x86.tstate = TSTATE_NONE;
2657 load_spreg( R_EAX, R_SGR );
2658 store_reg( R_EAX, Rn );
2659 sh4_x86.tstate = TSTATE_NONE;
2664 load_spreg( R_EAX, R_DBR );
2665 store_reg( R_EAX, Rn );
2666 sh4_x86.tstate = TSTATE_NONE;
2671 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2672 store_reg( R_EAX, Rn );
2673 sh4_x86.tstate = TSTATE_NONE;
2676 COUNT_INST(I_STCSRM);
2678 load_reg( R_EAX, Rn );
2679 check_walign32( R_EAX );
2680 ADD_imm8s_r32( -4, R_EAX );
2681 MMU_TRANSLATE_WRITE( R_EAX );
2682 PUSH_realigned_r32( R_EAX );
2683 call_func0( sh4_read_sr );
2684 POP_realigned_r32( R_ECX );
2685 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2686 MEM_WRITE_LONG( R_ECX, R_EAX );
2687 sh4_x86.tstate = TSTATE_NONE;
2692 load_reg( R_EAX, Rn );
2693 check_walign32( R_EAX );
2694 ADD_imm8s_r32( -4, R_EAX );
2695 MMU_TRANSLATE_WRITE( R_EAX );
2696 load_spreg( R_EDX, R_VBR );
2697 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2698 MEM_WRITE_LONG( R_EAX, R_EDX );
2699 sh4_x86.tstate = TSTATE_NONE;
2704 load_reg( R_EAX, Rn );
2705 check_walign32( R_EAX );
2706 ADD_imm8s_r32( -4, R_EAX );
2707 MMU_TRANSLATE_WRITE( R_EAX );
2708 load_spreg( R_EDX, R_SSR );
2709 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2710 MEM_WRITE_LONG( R_EAX, R_EDX );
2711 sh4_x86.tstate = TSTATE_NONE;
2716 load_reg( R_EAX, Rn );
2717 check_walign32( R_EAX );
2718 ADD_imm8s_r32( -4, R_EAX );
2719 MMU_TRANSLATE_WRITE( R_EAX );
2720 load_spreg( R_EDX, R_SPC );
2721 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2722 MEM_WRITE_LONG( R_EAX, R_EDX );
2723 sh4_x86.tstate = TSTATE_NONE;
2728 load_reg( R_EAX, Rn );
2729 check_walign32( R_EAX );
2730 ADD_imm8s_r32( -4, R_EAX );
2731 MMU_TRANSLATE_WRITE( R_EAX );
2732 load_spreg( R_EDX, R_SGR );
2733 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2734 MEM_WRITE_LONG( R_EAX, R_EDX );
2735 sh4_x86.tstate = TSTATE_NONE;
2740 load_reg( R_EAX, Rn );
2741 check_walign32( R_EAX );
2742 ADD_imm8s_r32( -4, R_EAX );
2743 MMU_TRANSLATE_WRITE( R_EAX );
2744 load_spreg( R_EDX, R_DBR );
2745 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2746 MEM_WRITE_LONG( R_EAX, R_EDX );
2747 sh4_x86.tstate = TSTATE_NONE;
2749 STC.L Rm_BANK, @-Rn {:
2752 load_reg( R_EAX, Rn );
2753 check_walign32( R_EAX );
2754 ADD_imm8s_r32( -4, R_EAX );
2755 MMU_TRANSLATE_WRITE( R_EAX );
2756 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2757 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2758 MEM_WRITE_LONG( R_EAX, R_EDX );
2759 sh4_x86.tstate = TSTATE_NONE;
2763 load_reg( R_EAX, Rn );
2764 check_walign32( R_EAX );
2765 ADD_imm8s_r32( -4, R_EAX );
2766 MMU_TRANSLATE_WRITE( R_EAX );
2767 load_spreg( R_EDX, R_GBR );
2768 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2769 MEM_WRITE_LONG( R_EAX, R_EDX );
2770 sh4_x86.tstate = TSTATE_NONE;
2773 COUNT_INST(I_STSFPSCR);
2775 load_spreg( R_EAX, R_FPSCR );
2776 store_reg( R_EAX, Rn );
2778 STS.L FPSCR, @-Rn {:
2779 COUNT_INST(I_STSFPSCRM);
2781 load_reg( R_EAX, Rn );
2782 check_walign32( R_EAX );
2783 ADD_imm8s_r32( -4, R_EAX );
2784 MMU_TRANSLATE_WRITE( R_EAX );
2785 load_spreg( R_EDX, R_FPSCR );
2786 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2787 MEM_WRITE_LONG( R_EAX, R_EDX );
2788 sh4_x86.tstate = TSTATE_NONE;
2793 load_spreg( R_EAX, R_FPUL );
2794 store_reg( R_EAX, Rn );
2799 load_reg( R_EAX, Rn );
2800 check_walign32( R_EAX );
2801 ADD_imm8s_r32( -4, R_EAX );
2802 MMU_TRANSLATE_WRITE( R_EAX );
2803 load_spreg( R_EDX, R_FPUL );
2804 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2805 MEM_WRITE_LONG( R_EAX, R_EDX );
2806 sh4_x86.tstate = TSTATE_NONE;
2810 load_spreg( R_EAX, R_MACH );
2811 store_reg( R_EAX, Rn );
2815 load_reg( R_EAX, Rn );
2816 check_walign32( R_EAX );
2817 ADD_imm8s_r32( -4, R_EAX );
2818 MMU_TRANSLATE_WRITE( R_EAX );
2819 load_spreg( R_EDX, R_MACH );
2820 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2821 MEM_WRITE_LONG( R_EAX, R_EDX );
2822 sh4_x86.tstate = TSTATE_NONE;
2826 load_spreg( R_EAX, R_MACL );
2827 store_reg( R_EAX, Rn );
2831 load_reg( R_EAX, Rn );
2832 check_walign32( R_EAX );
2833 ADD_imm8s_r32( -4, R_EAX );
2834 MMU_TRANSLATE_WRITE( R_EAX );
2835 load_spreg( R_EDX, R_MACL );
2836 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2837 MEM_WRITE_LONG( R_EAX, R_EDX );
2838 sh4_x86.tstate = TSTATE_NONE;
2842 load_spreg( R_EAX, R_PR );
2843 store_reg( R_EAX, Rn );
2847 load_reg( R_EAX, Rn );
2848 check_walign32( R_EAX );
2849 ADD_imm8s_r32( -4, R_EAX );
2850 MMU_TRANSLATE_WRITE( R_EAX );
2851 load_spreg( R_EDX, R_PR );
2852 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2853 MEM_WRITE_LONG( R_EAX, R_EDX );
2854 sh4_x86.tstate = TSTATE_NONE;
2859 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2862 sh4_x86.in_delay_slot = DELAY_NONE;
.