4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
29 #include "sh4/xltcache.h"
30 #include "sh4/sh4core.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/x86op.h"
37 #define DEFAULT_BACKPATCH_SIZE 4096
39 struct backpatch_record {
40 uint32_t fixup_offset;
41 uint32_t fixup_icount;
50 * Struct to manage internal translation state. This state is not saved -
51 * it is only valid between calls to sh4_translate_begin_block() and
52 * sh4_translate_end_block()
54 struct sh4_x86_state {
56 gboolean priv_checked; /* true if we've already checked the cpu mode. */
57 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
58 gboolean branch_taken; /* true if we branched unconditionally */
59 gboolean double_prec; /* true if FPU is in double-precision mode */
60 gboolean double_size; /* true if FPU is in double-size mode */
61 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
62 uint32_t block_start_pc;
63 uint32_t stack_posn; /* Trace stack height for alignment purposes */
67 gboolean tlb_on; /* True if tlb translation is active */
69 /* Allocated memory for the (block-wide) back-patch list */
70 struct backpatch_record *backpatch_list;
71 uint32_t backpatch_posn;
72 uint32_t backpatch_size;
75 #define TSTATE_NONE -1
85 #ifdef ENABLE_SH4STATS
86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
88 #define COUNT_INST(id)
91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
93 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
94 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
98 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
99 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
101 static struct sh4_x86_state sh4_x86;
103 static uint32_t max_int = 0x7FFFFFFF;
104 static uint32_t min_int = 0x80000000;
105 static uint32_t save_fcw; /* save value for fpu control word */
106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
108 gboolean is_sse3_supported()
112 __asm__ __volatile__(
113 "mov $0x01, %%eax\n\t"
114 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
115 return (features & 1) ? TRUE : FALSE;
118 void sh4_translate_init(void)
120 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
121 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
122 sh4_x86.sse3_enabled = is_sse3_supported();
126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
128 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
129 sh4_x86.backpatch_size <<= 1;
130 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
131 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
132 assert( sh4_x86.backpatch_list != NULL );
134 if( sh4_x86.in_delay_slot ) {
137 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
138 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
139 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
140 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
141 sh4_x86.backpatch_posn++;
145 * Emit an instruction to load an SH4 reg into a real register
147 static inline void load_reg( int x86reg, int sh4reg )
149 /* mov [bp+n], reg */
151 OP(0x45 + (x86reg<<3));
152 OP(REG_OFFSET(r[sh4reg]));
155 static inline void load_reg16s( int x86reg, int sh4reg )
159 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
162 static inline void load_reg16u( int x86reg, int sh4reg )
166 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
173 * Emit an instruction to load an immediate value into a register
175 static inline void load_imm32( int x86reg, uint32_t value ) {
176 /* mov #value, reg */
182 * Load an immediate 64-bit quantity (note: x86-64 only)
184 static inline void load_imm64( int x86reg, uint64_t value ) {
185 /* mov #value, reg */
192 * Emit an instruction to store an SH4 reg (RN)
194 void static inline store_reg( int x86reg, int sh4reg ) {
195 /* mov reg, [bp+n] */
197 OP(0x45 + (x86reg<<3));
198 OP(REG_OFFSET(r[sh4reg]));
202 * Load an FR register (single-precision floating point) into an integer x86
203 * register (eg for register-to-register moves)
205 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
206 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
209 * Load the low half of a DR register (DR or XD) into an integer x86 register
211 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
212 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
215 * Store an FR register (single-precision floating point) from an integer x86+
216 * register (eg for register-to-register moves)
218 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
219 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
221 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
222 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
225 #define push_fpul() FLDF_sh4r(R_FPUL)
226 #define pop_fpul() FSTPF_sh4r(R_FPUL)
227 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
228 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
229 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
230 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
231 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
232 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
233 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
234 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
238 /* Exception checks - Note that all exception checks will clobber EAX */
240 #define check_priv( ) \
241 if( !sh4_x86.priv_checked ) { \
242 sh4_x86.priv_checked = TRUE;\
243 load_spreg( R_EAX, R_SR );\
244 AND_imm32_r32( SR_MD, R_EAX );\
245 if( sh4_x86.in_delay_slot ) {\
246 JE_exc( EXC_SLOT_ILLEGAL );\
248 JE_exc( EXC_ILLEGAL );\
250 sh4_x86.tstate = TSTATE_NONE; \
253 #define check_fpuen( ) \
254 if( !sh4_x86.fpuen_checked ) {\
255 sh4_x86.fpuen_checked = TRUE;\
256 load_spreg( R_EAX, R_SR );\
257 AND_imm32_r32( SR_FD, R_EAX );\
258 if( sh4_x86.in_delay_slot ) {\
259 JNE_exc(EXC_SLOT_FPU_DISABLED);\
261 JNE_exc(EXC_FPU_DISABLED);\
263 sh4_x86.tstate = TSTATE_NONE; \
266 #define check_ralign16( x86reg ) \
267 TEST_imm32_r32( 0x00000001, x86reg ); \
268 JNE_exc(EXC_DATA_ADDR_READ)
270 #define check_walign16( x86reg ) \
271 TEST_imm32_r32( 0x00000001, x86reg ); \
272 JNE_exc(EXC_DATA_ADDR_WRITE);
274 #define check_ralign32( x86reg ) \
275 TEST_imm32_r32( 0x00000003, x86reg ); \
276 JNE_exc(EXC_DATA_ADDR_READ)
278 #define check_walign32( x86reg ) \
279 TEST_imm32_r32( 0x00000003, x86reg ); \
280 JNE_exc(EXC_DATA_ADDR_WRITE);
282 #define check_ralign64( x86reg ) \
283 TEST_imm32_r32( 0x00000007, x86reg ); \
284 JNE_exc(EXC_DATA_ADDR_READ)
286 #define check_walign64( x86reg ) \
287 TEST_imm32_r32( 0x00000007, x86reg ); \
288 JNE_exc(EXC_DATA_ADDR_WRITE);
291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
292 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
293 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
294 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
295 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
296 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
297 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
300 * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned
301 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
303 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
305 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
307 * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned
308 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
310 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
312 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
314 /****** Import appropriate calling conventions ******/
315 #if SIZEOF_VOID_P == 8
316 #include "sh4/ia64abi.h"
317 #else /* 32-bit system */
319 #include "sh4/ia32mac.h"
321 #include "sh4/ia32abi.h"
325 void sh4_translate_begin_block( sh4addr_t pc )
328 sh4_x86.in_delay_slot = FALSE;
329 sh4_x86.priv_checked = FALSE;
330 sh4_x86.fpuen_checked = FALSE;
331 sh4_x86.branch_taken = FALSE;
332 sh4_x86.backpatch_posn = 0;
333 sh4_x86.block_start_pc = pc;
334 sh4_x86.tlb_on = IS_MMU_ENABLED();
335 sh4_x86.tstate = TSTATE_NONE;
336 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
337 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
341 uint32_t sh4_translate_end_block_size()
343 if( sh4_x86.backpatch_posn <= 3 ) {
344 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
346 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
352 * Embed a breakpoint into the generated code
354 void sh4_translate_emit_breakpoint( sh4vma_t pc )
356 load_imm32( R_EAX, pc );
357 call_func1( sh4_translate_breakpoint_hit, R_EAX );
358 sh4_x86.tstate = TSTATE_NONE;
362 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
365 * Embed a call to sh4_execute_instruction for situations that we
366 * can't translate (just page-crossing delay slots at the moment).
367 * Caller is responsible for setting new_pc before calling this function.
371 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
372 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
373 * Call sh4_execute_instruction
374 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
376 void exit_block_emu( sh4vma_t endpc )
378 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
379 ADD_r32_sh4r( R_ECX, R_PC );
381 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
382 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
383 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
384 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
386 call_func0( sh4_execute_instruction );
387 load_spreg( R_EAX, R_PC );
388 if( sh4_x86.tlb_on ) {
389 call_func1(xlat_get_code_by_vma,R_EAX);
391 call_func1(xlat_get_code,R_EAX);
393 AND_imm8s_rptr( 0xFC, R_EAX );
399 * Translate a single instruction. Delayed branches are handled specially
400 * by translating both branch and delayed instruction as a single unit (as
402 * The instruction MUST be in the icache (assert check)
404 * @return true if the instruction marks the end of a basic block
407 uint32_t sh4_translate_instruction( sh4vma_t pc )
410 /* Read instruction from icache */
411 assert( IS_IN_ICACHE(pc) );
412 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
414 if( !sh4_x86.in_delay_slot ) {
415 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
421 load_reg( R_EAX, Rm );
422 load_reg( R_ECX, Rn );
423 ADD_r32_r32( R_EAX, R_ECX );
424 store_reg( R_ECX, Rn );
425 sh4_x86.tstate = TSTATE_NONE;
429 load_reg( R_EAX, Rn );
430 ADD_imm8s_r32( imm, R_EAX );
431 store_reg( R_EAX, Rn );
432 sh4_x86.tstate = TSTATE_NONE;
436 if( sh4_x86.tstate != TSTATE_C ) {
439 load_reg( R_EAX, Rm );
440 load_reg( R_ECX, Rn );
441 ADC_r32_r32( R_EAX, R_ECX );
442 store_reg( R_ECX, Rn );
444 sh4_x86.tstate = TSTATE_C;
448 load_reg( R_EAX, Rm );
449 load_reg( R_ECX, Rn );
450 ADD_r32_r32( R_EAX, R_ECX );
451 store_reg( R_ECX, Rn );
453 sh4_x86.tstate = TSTATE_O;
457 load_reg( R_EAX, Rm );
458 load_reg( R_ECX, Rn );
459 AND_r32_r32( R_EAX, R_ECX );
460 store_reg( R_ECX, Rn );
461 sh4_x86.tstate = TSTATE_NONE;
465 load_reg( R_EAX, 0 );
466 AND_imm32_r32(imm, R_EAX);
467 store_reg( R_EAX, 0 );
468 sh4_x86.tstate = TSTATE_NONE;
470 AND.B #imm, @(R0, GBR) {:
472 load_reg( R_EAX, 0 );
473 load_spreg( R_ECX, R_GBR );
474 ADD_r32_r32( R_ECX, R_EAX );
475 MMU_TRANSLATE_WRITE( R_EAX );
476 PUSH_realigned_r32(R_EAX);
477 MEM_READ_BYTE( R_EAX, R_EDX );
478 POP_realigned_r32(R_EAX);
479 AND_imm32_r32(imm, R_EDX );
480 MEM_WRITE_BYTE( R_EAX, R_EDX );
481 sh4_x86.tstate = TSTATE_NONE;
485 load_reg( R_EAX, Rm );
486 load_reg( R_ECX, Rn );
487 CMP_r32_r32( R_EAX, R_ECX );
489 sh4_x86.tstate = TSTATE_E;
492 COUNT_INST(I_CMPEQI);
493 load_reg( R_EAX, 0 );
494 CMP_imm8s_r32(imm, R_EAX);
496 sh4_x86.tstate = TSTATE_E;
500 load_reg( R_EAX, Rm );
501 load_reg( R_ECX, Rn );
502 CMP_r32_r32( R_EAX, R_ECX );
504 sh4_x86.tstate = TSTATE_GE;
508 load_reg( R_EAX, Rm );
509 load_reg( R_ECX, Rn );
510 CMP_r32_r32( R_EAX, R_ECX );
512 sh4_x86.tstate = TSTATE_G;
516 load_reg( R_EAX, Rm );
517 load_reg( R_ECX, Rn );
518 CMP_r32_r32( R_EAX, R_ECX );
520 sh4_x86.tstate = TSTATE_A;
524 load_reg( R_EAX, Rm );
525 load_reg( R_ECX, Rn );
526 CMP_r32_r32( R_EAX, R_ECX );
528 sh4_x86.tstate = TSTATE_AE;
532 load_reg( R_EAX, Rn );
533 CMP_imm8s_r32( 0, R_EAX );
535 sh4_x86.tstate = TSTATE_G;
539 load_reg( R_EAX, Rn );
540 CMP_imm8s_r32( 0, R_EAX );
542 sh4_x86.tstate = TSTATE_GE;
545 COUNT_INST(I_CMPSTR);
546 load_reg( R_EAX, Rm );
547 load_reg( R_ECX, Rn );
548 XOR_r32_r32( R_ECX, R_EAX );
549 TEST_r8_r8( R_AL, R_AL );
551 TEST_r8_r8( R_AH, R_AH );
553 SHR_imm8_r32( 16, R_EAX );
554 TEST_r8_r8( R_AL, R_AL );
556 TEST_r8_r8( R_AH, R_AH );
561 sh4_x86.tstate = TSTATE_E;
565 load_reg( R_EAX, Rm );
566 load_reg( R_ECX, Rn );
567 SHR_imm8_r32( 31, R_EAX );
568 SHR_imm8_r32( 31, R_ECX );
569 store_spreg( R_EAX, R_M );
570 store_spreg( R_ECX, R_Q );
571 CMP_r32_r32( R_EAX, R_ECX );
573 sh4_x86.tstate = TSTATE_NE;
577 XOR_r32_r32( R_EAX, R_EAX );
578 store_spreg( R_EAX, R_Q );
579 store_spreg( R_EAX, R_M );
580 store_spreg( R_EAX, R_T );
581 sh4_x86.tstate = TSTATE_C; // works for DIV1
585 load_spreg( R_ECX, R_M );
586 load_reg( R_EAX, Rn );
587 if( sh4_x86.tstate != TSTATE_C ) {
591 SETC_r8( R_DL ); // Q'
592 CMP_sh4r_r32( R_Q, R_ECX );
594 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
597 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
599 store_reg( R_EAX, Rn ); // Done with Rn now
600 SETC_r8(R_AL); // tmp1
601 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
602 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
603 store_spreg( R_ECX, R_Q );
604 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
605 MOVZX_r8_r32( R_AL, R_EAX );
606 store_spreg( R_EAX, R_T );
607 sh4_x86.tstate = TSTATE_NONE;
611 load_reg( R_EAX, Rm );
612 load_reg( R_ECX, Rn );
614 store_spreg( R_EDX, R_MACH );
615 store_spreg( R_EAX, R_MACL );
616 sh4_x86.tstate = TSTATE_NONE;
620 load_reg( R_EAX, Rm );
621 load_reg( R_ECX, Rn );
623 store_spreg( R_EDX, R_MACH );
624 store_spreg( R_EAX, R_MACL );
625 sh4_x86.tstate = TSTATE_NONE;
629 load_reg( R_EAX, Rn );
630 ADD_imm8s_r32( -1, R_EAX );
631 store_reg( R_EAX, Rn );
633 sh4_x86.tstate = TSTATE_E;
637 load_reg( R_EAX, Rm );
638 MOVSX_r8_r32( R_EAX, R_EAX );
639 store_reg( R_EAX, Rn );
643 load_reg( R_EAX, Rm );
644 MOVSX_r16_r32( R_EAX, R_EAX );
645 store_reg( R_EAX, Rn );
649 load_reg( R_EAX, Rm );
650 MOVZX_r8_r32( R_EAX, R_EAX );
651 store_reg( R_EAX, Rn );
655 load_reg( R_EAX, Rm );
656 MOVZX_r16_r32( R_EAX, R_EAX );
657 store_reg( R_EAX, Rn );
662 load_reg( R_EAX, Rm );
663 check_ralign32( R_EAX );
664 MMU_TRANSLATE_READ( R_EAX );
665 PUSH_realigned_r32( R_EAX );
666 load_reg( R_EAX, Rn );
667 ADD_imm8s_r32( 4, R_EAX );
668 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
669 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
670 // Note translate twice in case of page boundaries. Maybe worth
671 // adding a page-boundary check to skip the second translation
673 load_reg( R_EAX, Rm );
674 check_ralign32( R_EAX );
675 MMU_TRANSLATE_READ( R_EAX );
676 load_reg( R_ECX, Rn );
677 check_ralign32( R_ECX );
678 PUSH_realigned_r32( R_EAX );
679 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
680 MOV_r32_r32( R_ECX, R_EAX );
681 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
682 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
684 MEM_READ_LONG( R_EAX, R_EAX );
687 MEM_READ_LONG( R_ECX, R_EAX );
688 POP_realigned_r32( R_ECX );
691 ADD_r32_sh4r( R_EAX, R_MACL );
692 ADC_r32_sh4r( R_EDX, R_MACH );
694 load_spreg( R_ECX, R_S );
695 TEST_r32_r32(R_ECX, R_ECX);
697 call_func0( signsat48 );
699 sh4_x86.tstate = TSTATE_NONE;
704 load_reg( R_EAX, Rm );
705 check_ralign16( R_EAX );
706 MMU_TRANSLATE_READ( R_EAX );
707 PUSH_realigned_r32( R_EAX );
708 load_reg( R_EAX, Rn );
709 ADD_imm8s_r32( 2, R_EAX );
710 MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
711 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
712 // Note translate twice in case of page boundaries. Maybe worth
713 // adding a page-boundary check to skip the second translation
715 load_reg( R_EAX, Rm );
716 check_ralign16( R_EAX );
717 MMU_TRANSLATE_READ( R_EAX );
718 load_reg( R_ECX, Rn );
719 check_ralign16( R_ECX );
720 PUSH_realigned_r32( R_EAX );
721 MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
722 MOV_r32_r32( R_ECX, R_EAX );
723 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
724 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
726 MEM_READ_WORD( R_EAX, R_EAX );
729 MEM_READ_WORD( R_ECX, R_EAX );
730 POP_realigned_r32( R_ECX );
733 load_spreg( R_ECX, R_S );
734 TEST_r32_r32( R_ECX, R_ECX );
737 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
738 JNO_rel8( end ); // 2
739 load_imm32( R_EDX, 1 ); // 5
740 store_spreg( R_EDX, R_MACH ); // 6
741 JS_rel8( positive ); // 2
742 load_imm32( R_EAX, 0x80000000 );// 5
743 store_spreg( R_EAX, R_MACL ); // 6
746 JMP_TARGET(positive);
747 load_imm32( R_EAX, 0x7FFFFFFF );// 5
748 store_spreg( R_EAX, R_MACL ); // 6
752 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
753 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
757 sh4_x86.tstate = TSTATE_NONE;
761 load_spreg( R_EAX, R_T );
762 store_reg( R_EAX, Rn );
766 load_reg( R_EAX, Rm );
767 load_reg( R_ECX, Rn );
769 store_spreg( R_EAX, R_MACL );
770 sh4_x86.tstate = TSTATE_NONE;
774 load_reg16s( R_EAX, Rm );
775 load_reg16s( R_ECX, Rn );
777 store_spreg( R_EAX, R_MACL );
778 sh4_x86.tstate = TSTATE_NONE;
782 load_reg16u( R_EAX, Rm );
783 load_reg16u( R_ECX, Rn );
785 store_spreg( R_EAX, R_MACL );
786 sh4_x86.tstate = TSTATE_NONE;
790 load_reg( R_EAX, Rm );
792 store_reg( R_EAX, Rn );
793 sh4_x86.tstate = TSTATE_NONE;
797 load_reg( R_EAX, Rm );
798 XOR_r32_r32( R_ECX, R_ECX );
800 SBB_r32_r32( R_EAX, R_ECX );
801 store_reg( R_ECX, Rn );
803 sh4_x86.tstate = TSTATE_C;
807 load_reg( R_EAX, Rm );
809 store_reg( R_EAX, Rn );
810 sh4_x86.tstate = TSTATE_NONE;
814 load_reg( R_EAX, Rm );
815 load_reg( R_ECX, Rn );
816 OR_r32_r32( R_EAX, R_ECX );
817 store_reg( R_ECX, Rn );
818 sh4_x86.tstate = TSTATE_NONE;
822 load_reg( R_EAX, 0 );
823 OR_imm32_r32(imm, R_EAX);
824 store_reg( R_EAX, 0 );
825 sh4_x86.tstate = TSTATE_NONE;
827 OR.B #imm, @(R0, GBR) {:
829 load_reg( R_EAX, 0 );
830 load_spreg( R_ECX, R_GBR );
831 ADD_r32_r32( R_ECX, R_EAX );
832 MMU_TRANSLATE_WRITE( R_EAX );
833 PUSH_realigned_r32(R_EAX);
834 MEM_READ_BYTE( R_EAX, R_EDX );
835 POP_realigned_r32(R_EAX);
836 OR_imm32_r32(imm, R_EDX );
837 MEM_WRITE_BYTE( R_EAX, R_EDX );
838 sh4_x86.tstate = TSTATE_NONE;
842 load_reg( R_EAX, Rn );
843 if( sh4_x86.tstate != TSTATE_C ) {
847 store_reg( R_EAX, Rn );
849 sh4_x86.tstate = TSTATE_C;
853 load_reg( R_EAX, Rn );
854 if( sh4_x86.tstate != TSTATE_C ) {
858 store_reg( R_EAX, Rn );
860 sh4_x86.tstate = TSTATE_C;
864 load_reg( R_EAX, Rn );
866 store_reg( R_EAX, Rn );
868 sh4_x86.tstate = TSTATE_C;
872 load_reg( R_EAX, Rn );
874 store_reg( R_EAX, Rn );
876 sh4_x86.tstate = TSTATE_C;
880 /* Annoyingly enough, not directly convertible */
881 load_reg( R_EAX, Rn );
882 load_reg( R_ECX, Rm );
883 CMP_imm32_r32( 0, R_ECX );
886 NEG_r32( R_ECX ); // 2
887 AND_imm8_r8( 0x1F, R_CL ); // 3
888 JE_rel8(emptysar); // 2
889 SAR_r32_CL( R_EAX ); // 2
892 JMP_TARGET(emptysar);
893 SAR_imm8_r32(31, R_EAX ); // 3
897 AND_imm8_r8( 0x1F, R_CL ); // 3
898 SHL_r32_CL( R_EAX ); // 2
901 store_reg( R_EAX, Rn );
902 sh4_x86.tstate = TSTATE_NONE;
906 load_reg( R_EAX, Rn );
907 load_reg( R_ECX, Rm );
908 CMP_imm32_r32( 0, R_ECX );
911 NEG_r32( R_ECX ); // 2
912 AND_imm8_r8( 0x1F, R_CL ); // 3
914 SHR_r32_CL( R_EAX ); // 2
917 JMP_TARGET(emptyshr);
918 XOR_r32_r32( R_EAX, R_EAX );
922 AND_imm8_r8( 0x1F, R_CL ); // 3
923 SHL_r32_CL( R_EAX ); // 2
926 store_reg( R_EAX, Rn );
927 sh4_x86.tstate = TSTATE_NONE;
931 load_reg( R_EAX, Rn );
934 store_reg( R_EAX, Rn );
935 sh4_x86.tstate = TSTATE_C;
939 load_reg( R_EAX, Rn );
942 store_reg( R_EAX, Rn );
943 sh4_x86.tstate = TSTATE_C;
947 load_reg( R_EAX, Rn );
950 store_reg( R_EAX, Rn );
951 sh4_x86.tstate = TSTATE_C;
955 load_reg( R_EAX, Rn );
956 SHL_imm8_r32( 2, R_EAX );
957 store_reg( R_EAX, Rn );
958 sh4_x86.tstate = TSTATE_NONE;
962 load_reg( R_EAX, Rn );
963 SHL_imm8_r32( 8, R_EAX );
964 store_reg( R_EAX, Rn );
965 sh4_x86.tstate = TSTATE_NONE;
969 load_reg( R_EAX, Rn );
970 SHL_imm8_r32( 16, R_EAX );
971 store_reg( R_EAX, Rn );
972 sh4_x86.tstate = TSTATE_NONE;
976 load_reg( R_EAX, Rn );
979 store_reg( R_EAX, Rn );
980 sh4_x86.tstate = TSTATE_C;
984 load_reg( R_EAX, Rn );
985 SHR_imm8_r32( 2, R_EAX );
986 store_reg( R_EAX, Rn );
987 sh4_x86.tstate = TSTATE_NONE;
991 load_reg( R_EAX, Rn );
992 SHR_imm8_r32( 8, R_EAX );
993 store_reg( R_EAX, Rn );
994 sh4_x86.tstate = TSTATE_NONE;
998 load_reg( R_EAX, Rn );
999 SHR_imm8_r32( 16, R_EAX );
1000 store_reg( R_EAX, Rn );
1001 sh4_x86.tstate = TSTATE_NONE;
1005 load_reg( R_EAX, Rm );
1006 load_reg( R_ECX, Rn );
1007 SUB_r32_r32( R_EAX, R_ECX );
1008 store_reg( R_ECX, Rn );
1009 sh4_x86.tstate = TSTATE_NONE;
1013 load_reg( R_EAX, Rm );
1014 load_reg( R_ECX, Rn );
1015 if( sh4_x86.tstate != TSTATE_C ) {
1018 SBB_r32_r32( R_EAX, R_ECX );
1019 store_reg( R_ECX, Rn );
1021 sh4_x86.tstate = TSTATE_C;
1025 load_reg( R_EAX, Rm );
1026 load_reg( R_ECX, Rn );
1027 SUB_r32_r32( R_EAX, R_ECX );
1028 store_reg( R_ECX, Rn );
1030 sh4_x86.tstate = TSTATE_O;
1033 COUNT_INST(I_SWAPB);
1034 load_reg( R_EAX, Rm );
1035 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1036 store_reg( R_EAX, Rn );
1039 COUNT_INST(I_SWAPB);
1040 load_reg( R_EAX, Rm );
1041 MOV_r32_r32( R_EAX, R_ECX );
1042 SHL_imm8_r32( 16, R_ECX );
1043 SHR_imm8_r32( 16, R_EAX );
1044 OR_r32_r32( R_EAX, R_ECX );
1045 store_reg( R_ECX, Rn );
1046 sh4_x86.tstate = TSTATE_NONE;
1050 load_reg( R_EAX, Rn );
1051 MMU_TRANSLATE_WRITE( R_EAX );
1052 PUSH_realigned_r32( R_EAX );
1053 MEM_READ_BYTE( R_EAX, R_EDX );
1054 TEST_r8_r8( R_DL, R_DL );
1056 OR_imm8_r8( 0x80, R_DL );
1057 POP_realigned_r32( R_EAX );
1058 MEM_WRITE_BYTE( R_EAX, R_EDX );
1059 sh4_x86.tstate = TSTATE_NONE;
1063 load_reg( R_EAX, Rm );
1064 load_reg( R_ECX, Rn );
1065 TEST_r32_r32( R_EAX, R_ECX );
1067 sh4_x86.tstate = TSTATE_E;
1071 load_reg( R_EAX, 0 );
1072 TEST_imm32_r32( imm, R_EAX );
1074 sh4_x86.tstate = TSTATE_E;
1076 TST.B #imm, @(R0, GBR) {:
1078 load_reg( R_EAX, 0);
1079 load_reg( R_ECX, R_GBR);
1080 ADD_r32_r32( R_ECX, R_EAX );
1081 MMU_TRANSLATE_READ( R_EAX );
1082 MEM_READ_BYTE( R_EAX, R_EAX );
1083 TEST_imm8_r8( imm, R_AL );
1085 sh4_x86.tstate = TSTATE_E;
1089 load_reg( R_EAX, Rm );
1090 load_reg( R_ECX, Rn );
1091 XOR_r32_r32( R_EAX, R_ECX );
1092 store_reg( R_ECX, Rn );
1093 sh4_x86.tstate = TSTATE_NONE;
1097 load_reg( R_EAX, 0 );
1098 XOR_imm32_r32( imm, R_EAX );
1099 store_reg( R_EAX, 0 );
1100 sh4_x86.tstate = TSTATE_NONE;
1102 XOR.B #imm, @(R0, GBR) {:
1104 load_reg( R_EAX, 0 );
1105 load_spreg( R_ECX, R_GBR );
1106 ADD_r32_r32( R_ECX, R_EAX );
1107 MMU_TRANSLATE_WRITE( R_EAX );
1108 PUSH_realigned_r32(R_EAX);
1109 MEM_READ_BYTE(R_EAX, R_EDX);
1110 POP_realigned_r32(R_EAX);
1111 XOR_imm32_r32( imm, R_EDX );
1112 MEM_WRITE_BYTE( R_EAX, R_EDX );
1113 sh4_x86.tstate = TSTATE_NONE;
1116 COUNT_INST(I_XTRCT);
1117 load_reg( R_EAX, Rm );
1118 load_reg( R_ECX, Rn );
1119 SHL_imm8_r32( 16, R_EAX );
1120 SHR_imm8_r32( 16, R_ECX );
1121 OR_r32_r32( R_EAX, R_ECX );
1122 store_reg( R_ECX, Rn );
1123 sh4_x86.tstate = TSTATE_NONE;
1126 /* Data move instructions */
1129 load_reg( R_EAX, Rm );
1130 store_reg( R_EAX, Rn );
1134 load_imm32( R_EAX, imm );
1135 store_reg( R_EAX, Rn );
1139 load_reg( R_EAX, Rn );
1140 MMU_TRANSLATE_WRITE( R_EAX );
1141 load_reg( R_EDX, Rm );
1142 MEM_WRITE_BYTE( R_EAX, R_EDX );
1143 sh4_x86.tstate = TSTATE_NONE;
1147 load_reg( R_EAX, Rn );
1148 ADD_imm8s_r32( -1, R_EAX );
1149 MMU_TRANSLATE_WRITE( R_EAX );
1150 load_reg( R_EDX, Rm );
1151 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1152 MEM_WRITE_BYTE( R_EAX, R_EDX );
1153 sh4_x86.tstate = TSTATE_NONE;
1155 MOV.B Rm, @(R0, Rn) {:
1157 load_reg( R_EAX, 0 );
1158 load_reg( R_ECX, Rn );
1159 ADD_r32_r32( R_ECX, R_EAX );
1160 MMU_TRANSLATE_WRITE( R_EAX );
1161 load_reg( R_EDX, Rm );
1162 MEM_WRITE_BYTE( R_EAX, R_EDX );
1163 sh4_x86.tstate = TSTATE_NONE;
1165 MOV.B R0, @(disp, GBR) {:
1167 load_spreg( R_EAX, R_GBR );
1168 ADD_imm32_r32( disp, R_EAX );
1169 MMU_TRANSLATE_WRITE( R_EAX );
1170 load_reg( R_EDX, 0 );
1171 MEM_WRITE_BYTE( R_EAX, R_EDX );
1172 sh4_x86.tstate = TSTATE_NONE;
1174 MOV.B R0, @(disp, Rn) {:
1176 load_reg( R_EAX, Rn );
1177 ADD_imm32_r32( disp, R_EAX );
1178 MMU_TRANSLATE_WRITE( R_EAX );
1179 load_reg( R_EDX, 0 );
1180 MEM_WRITE_BYTE( R_EAX, R_EDX );
1181 sh4_x86.tstate = TSTATE_NONE;
1185 load_reg( R_EAX, Rm );
1186 MMU_TRANSLATE_READ( R_EAX );
1187 MEM_READ_BYTE( R_EAX, R_EAX );
1188 store_reg( R_EAX, Rn );
1189 sh4_x86.tstate = TSTATE_NONE;
1193 load_reg( R_EAX, Rm );
1194 MMU_TRANSLATE_READ( R_EAX );
1195 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1196 MEM_READ_BYTE( R_EAX, R_EAX );
1197 store_reg( R_EAX, Rn );
1198 sh4_x86.tstate = TSTATE_NONE;
1200 MOV.B @(R0, Rm), Rn {:
1202 load_reg( R_EAX, 0 );
1203 load_reg( R_ECX, Rm );
1204 ADD_r32_r32( R_ECX, R_EAX );
1205 MMU_TRANSLATE_READ( R_EAX )
1206 MEM_READ_BYTE( R_EAX, R_EAX );
1207 store_reg( R_EAX, Rn );
1208 sh4_x86.tstate = TSTATE_NONE;
1210 MOV.B @(disp, GBR), R0 {:
1212 load_spreg( R_EAX, R_GBR );
1213 ADD_imm32_r32( disp, R_EAX );
1214 MMU_TRANSLATE_READ( R_EAX );
1215 MEM_READ_BYTE( R_EAX, R_EAX );
1216 store_reg( R_EAX, 0 );
1217 sh4_x86.tstate = TSTATE_NONE;
1219 MOV.B @(disp, Rm), R0 {:
1221 load_reg( R_EAX, Rm );
1222 ADD_imm32_r32( disp, R_EAX );
1223 MMU_TRANSLATE_READ( R_EAX );
1224 MEM_READ_BYTE( R_EAX, R_EAX );
1225 store_reg( R_EAX, 0 );
1226 sh4_x86.tstate = TSTATE_NONE;
1230 load_reg( R_EAX, Rn );
1231 check_walign32(R_EAX);
1232 MMU_TRANSLATE_WRITE( R_EAX );
1233 load_reg( R_EDX, Rm );
1234 MEM_WRITE_LONG( R_EAX, R_EDX );
1235 sh4_x86.tstate = TSTATE_NONE;
1239 load_reg( R_EAX, Rn );
1240 ADD_imm8s_r32( -4, R_EAX );
1241 check_walign32( R_EAX );
1242 MMU_TRANSLATE_WRITE( R_EAX );
1243 load_reg( R_EDX, Rm );
1244 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1245 MEM_WRITE_LONG( R_EAX, R_EDX );
1246 sh4_x86.tstate = TSTATE_NONE;
1248 MOV.L Rm, @(R0, Rn) {:
1250 load_reg( R_EAX, 0 );
1251 load_reg( R_ECX, Rn );
1252 ADD_r32_r32( R_ECX, R_EAX );
1253 check_walign32( R_EAX );
1254 MMU_TRANSLATE_WRITE( R_EAX );
1255 load_reg( R_EDX, Rm );
1256 MEM_WRITE_LONG( R_EAX, R_EDX );
1257 sh4_x86.tstate = TSTATE_NONE;
1259 MOV.L R0, @(disp, GBR) {:
1261 load_spreg( R_EAX, R_GBR );
1262 ADD_imm32_r32( disp, R_EAX );
1263 check_walign32( R_EAX );
1264 MMU_TRANSLATE_WRITE( R_EAX );
1265 load_reg( R_EDX, 0 );
1266 MEM_WRITE_LONG( R_EAX, R_EDX );
1267 sh4_x86.tstate = TSTATE_NONE;
1269 MOV.L Rm, @(disp, Rn) {:
1271 load_reg( R_EAX, Rn );
1272 ADD_imm32_r32( disp, R_EAX );
1273 check_walign32( R_EAX );
1274 MMU_TRANSLATE_WRITE( R_EAX );
1275 load_reg( R_EDX, Rm );
1276 MEM_WRITE_LONG( R_EAX, R_EDX );
1277 sh4_x86.tstate = TSTATE_NONE;
1281 load_reg( R_EAX, Rm );
1282 check_ralign32( R_EAX );
1283 MMU_TRANSLATE_READ( R_EAX );
1284 MEM_READ_LONG( R_EAX, R_EAX );
1285 store_reg( R_EAX, Rn );
1286 sh4_x86.tstate = TSTATE_NONE;
1290 load_reg( R_EAX, Rm );
1291 check_ralign32( R_EAX );
1292 MMU_TRANSLATE_READ( R_EAX );
1293 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1294 MEM_READ_LONG( R_EAX, R_EAX );
1295 store_reg( R_EAX, Rn );
1296 sh4_x86.tstate = TSTATE_NONE;
1298 MOV.L @(R0, Rm), Rn {:
1300 load_reg( R_EAX, 0 );
1301 load_reg( R_ECX, Rm );
1302 ADD_r32_r32( R_ECX, R_EAX );
1303 check_ralign32( R_EAX );
1304 MMU_TRANSLATE_READ( R_EAX );
1305 MEM_READ_LONG( R_EAX, R_EAX );
1306 store_reg( R_EAX, Rn );
1307 sh4_x86.tstate = TSTATE_NONE;
1309 MOV.L @(disp, GBR), R0 {:
1311 load_spreg( R_EAX, R_GBR );
1312 ADD_imm32_r32( disp, R_EAX );
1313 check_ralign32( R_EAX );
1314 MMU_TRANSLATE_READ( R_EAX );
1315 MEM_READ_LONG( R_EAX, R_EAX );
1316 store_reg( R_EAX, 0 );
1317 sh4_x86.tstate = TSTATE_NONE;
1319 MOV.L @(disp, PC), Rn {:
1320 COUNT_INST(I_MOVLPC);
1321 if( sh4_x86.in_delay_slot ) {
1324 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1325 if( IS_IN_ICACHE(target) ) {
1326 // If the target address is in the same page as the code, it's
1327 // pretty safe to just ref it directly and circumvent the whole
1328 // memory subsystem. (this is a big performance win)
1330 // FIXME: There's a corner-case that's not handled here when
1331 // the current code-page is in the ITLB but not in the UTLB.
1332 // (should generate a TLB miss although need to test SH4
1333 // behaviour to confirm) Unlikely to be anyone depending on this
1334 // behaviour though.
1335 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1336 MOV_moff32_EAX( ptr );
1338 // Note: we use sh4r.pc for the calc as we could be running at a
1339 // different virtual address than the translation was done with,
1340 // but we can safely assume that the low bits are the same.
1341 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1342 ADD_sh4r_r32( R_PC, R_EAX );
1343 MMU_TRANSLATE_READ( R_EAX );
1344 MEM_READ_LONG( R_EAX, R_EAX );
1345 sh4_x86.tstate = TSTATE_NONE;
1347 store_reg( R_EAX, Rn );
1350 MOV.L @(disp, Rm), Rn {:
1352 load_reg( R_EAX, Rm );
1353 ADD_imm8s_r32( disp, R_EAX );
1354 check_ralign32( R_EAX );
1355 MMU_TRANSLATE_READ( R_EAX );
1356 MEM_READ_LONG( R_EAX, R_EAX );
1357 store_reg( R_EAX, Rn );
1358 sh4_x86.tstate = TSTATE_NONE;
1362 load_reg( R_EAX, Rn );
1363 check_walign16( R_EAX );
1364 MMU_TRANSLATE_WRITE( R_EAX )
1365 load_reg( R_EDX, Rm );
1366 MEM_WRITE_WORD( R_EAX, R_EDX );
1367 sh4_x86.tstate = TSTATE_NONE;
1371 load_reg( R_EAX, Rn );
1372 ADD_imm8s_r32( -2, R_EAX );
1373 check_walign16( R_EAX );
1374 MMU_TRANSLATE_WRITE( R_EAX );
1375 load_reg( R_EDX, Rm );
1376 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1377 MEM_WRITE_WORD( R_EAX, R_EDX );
1378 sh4_x86.tstate = TSTATE_NONE;
1380 MOV.W Rm, @(R0, Rn) {:
1382 load_reg( R_EAX, 0 );
1383 load_reg( R_ECX, Rn );
1384 ADD_r32_r32( R_ECX, R_EAX );
1385 check_walign16( R_EAX );
1386 MMU_TRANSLATE_WRITE( R_EAX );
1387 load_reg( R_EDX, Rm );
1388 MEM_WRITE_WORD( R_EAX, R_EDX );
1389 sh4_x86.tstate = TSTATE_NONE;
1391 MOV.W R0, @(disp, GBR) {:
1393 load_spreg( R_EAX, R_GBR );
1394 ADD_imm32_r32( disp, R_EAX );
1395 check_walign16( R_EAX );
1396 MMU_TRANSLATE_WRITE( R_EAX );
1397 load_reg( R_EDX, 0 );
1398 MEM_WRITE_WORD( R_EAX, R_EDX );
1399 sh4_x86.tstate = TSTATE_NONE;
1401 MOV.W R0, @(disp, Rn) {:
1403 load_reg( R_EAX, Rn );
1404 ADD_imm32_r32( disp, R_EAX );
1405 check_walign16( R_EAX );
1406 MMU_TRANSLATE_WRITE( R_EAX );
1407 load_reg( R_EDX, 0 );
1408 MEM_WRITE_WORD( R_EAX, R_EDX );
1409 sh4_x86.tstate = TSTATE_NONE;
1413 load_reg( R_EAX, Rm );
1414 check_ralign16( R_EAX );
1415 MMU_TRANSLATE_READ( R_EAX );
1416 MEM_READ_WORD( R_EAX, R_EAX );
1417 store_reg( R_EAX, Rn );
1418 sh4_x86.tstate = TSTATE_NONE;
1422 load_reg( R_EAX, Rm );
1423 check_ralign16( R_EAX );
1424 MMU_TRANSLATE_READ( R_EAX );
1425 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1426 MEM_READ_WORD( R_EAX, R_EAX );
1427 store_reg( R_EAX, Rn );
1428 sh4_x86.tstate = TSTATE_NONE;
1430 MOV.W @(R0, Rm), Rn {:
1432 load_reg( R_EAX, 0 );
1433 load_reg( R_ECX, Rm );
1434 ADD_r32_r32( R_ECX, R_EAX );
1435 check_ralign16( R_EAX );
1436 MMU_TRANSLATE_READ( R_EAX );
1437 MEM_READ_WORD( R_EAX, R_EAX );
1438 store_reg( R_EAX, Rn );
1439 sh4_x86.tstate = TSTATE_NONE;
1441 MOV.W @(disp, GBR), R0 {:
1443 load_spreg( R_EAX, R_GBR );
1444 ADD_imm32_r32( disp, R_EAX );
1445 check_ralign16( R_EAX );
1446 MMU_TRANSLATE_READ( R_EAX );
1447 MEM_READ_WORD( R_EAX, R_EAX );
1448 store_reg( R_EAX, 0 );
1449 sh4_x86.tstate = TSTATE_NONE;
1451 MOV.W @(disp, PC), Rn {:
1453 if( sh4_x86.in_delay_slot ) {
1456 // See comments for MOV.L @(disp, PC), Rn
1457 uint32_t target = pc + disp + 4;
1458 if( IS_IN_ICACHE(target) ) {
1459 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1460 MOV_moff32_EAX( ptr );
1461 MOVSX_r16_r32( R_EAX, R_EAX );
1463 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1464 ADD_sh4r_r32( R_PC, R_EAX );
1465 MMU_TRANSLATE_READ( R_EAX );
1466 MEM_READ_WORD( R_EAX, R_EAX );
1467 sh4_x86.tstate = TSTATE_NONE;
1469 store_reg( R_EAX, Rn );
1472 MOV.W @(disp, Rm), R0 {:
1474 load_reg( R_EAX, Rm );
1475 ADD_imm32_r32( disp, R_EAX );
1476 check_ralign16( R_EAX );
1477 MMU_TRANSLATE_READ( R_EAX );
1478 MEM_READ_WORD( R_EAX, R_EAX );
1479 store_reg( R_EAX, 0 );
1480 sh4_x86.tstate = TSTATE_NONE;
1482 MOVA @(disp, PC), R0 {:
1484 if( sh4_x86.in_delay_slot ) {
1487 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1488 ADD_sh4r_r32( R_PC, R_ECX );
1489 store_reg( R_ECX, 0 );
1490 sh4_x86.tstate = TSTATE_NONE;
1494 COUNT_INST(I_MOVCA);
1495 load_reg( R_EAX, Rn );
1496 check_walign32( R_EAX );
1497 MMU_TRANSLATE_WRITE( R_EAX );
1498 load_reg( R_EDX, 0 );
1499 MEM_WRITE_LONG( R_EAX, R_EDX );
1500 sh4_x86.tstate = TSTATE_NONE;
1503 /* Control transfer instructions */
1506 if( sh4_x86.in_delay_slot ) {
1509 sh4vma_t target = disp + pc + 4;
1510 JT_rel8( nottaken );
1511 exit_block_rel(target, pc+2 );
1512 JMP_TARGET(nottaken);
1518 if( sh4_x86.in_delay_slot ) {
1521 sh4_x86.in_delay_slot = DELAY_PC;
1522 if( UNTRANSLATABLE(pc+2) ) {
1523 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1525 ADD_imm32_r32( disp, R_EAX );
1526 JMP_TARGET(nottaken);
1527 ADD_sh4r_r32( R_PC, R_EAX );
1528 store_spreg( R_EAX, R_NEW_PC );
1529 exit_block_emu(pc+2);
1530 sh4_x86.branch_taken = TRUE;
1533 if( sh4_x86.tstate == TSTATE_NONE ) {
1534 CMP_imm8s_sh4r( 1, R_T );
1535 sh4_x86.tstate = TSTATE_E;
1537 sh4vma_t target = disp + pc + 4;
1538 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1539 int save_tstate = sh4_x86.tstate;
1540 sh4_translate_instruction(pc+2);
1541 exit_block_rel( target, pc+4 );
1544 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1545 sh4_x86.tstate = save_tstate;
1546 sh4_translate_instruction(pc+2);
1553 if( sh4_x86.in_delay_slot ) {
1556 sh4_x86.in_delay_slot = DELAY_PC;
1557 sh4_x86.branch_taken = TRUE;
1558 if( UNTRANSLATABLE(pc+2) ) {
1559 load_spreg( R_EAX, R_PC );
1560 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1561 store_spreg( R_EAX, R_NEW_PC );
1562 exit_block_emu(pc+2);
1565 sh4_translate_instruction( pc + 2 );
1566 exit_block_rel( disp + pc + 4, pc+4 );
1573 if( sh4_x86.in_delay_slot ) {
1576 load_spreg( R_EAX, R_PC );
1577 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1578 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1579 store_spreg( R_EAX, R_NEW_PC );
1580 sh4_x86.in_delay_slot = DELAY_PC;
1581 sh4_x86.tstate = TSTATE_NONE;
1582 sh4_x86.branch_taken = TRUE;
1583 if( UNTRANSLATABLE(pc+2) ) {
1584 exit_block_emu(pc+2);
1587 sh4_translate_instruction( pc + 2 );
1588 exit_block_newpcset(pc+2);
1595 if( sh4_x86.in_delay_slot ) {
1598 load_spreg( R_EAX, R_PC );
1599 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1600 store_spreg( R_EAX, R_PR );
1601 sh4_x86.in_delay_slot = DELAY_PC;
1602 sh4_x86.branch_taken = TRUE;
1603 sh4_x86.tstate = TSTATE_NONE;
1604 if( UNTRANSLATABLE(pc+2) ) {
1605 ADD_imm32_r32( disp, R_EAX );
1606 store_spreg( R_EAX, R_NEW_PC );
1607 exit_block_emu(pc+2);
1610 sh4_translate_instruction( pc + 2 );
1611 exit_block_rel( disp + pc + 4, pc+4 );
1618 if( sh4_x86.in_delay_slot ) {
1621 load_spreg( R_EAX, R_PC );
1622 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1623 store_spreg( R_EAX, R_PR );
1624 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1625 store_spreg( R_EAX, R_NEW_PC );
1627 sh4_x86.in_delay_slot = DELAY_PC;
1628 sh4_x86.tstate = TSTATE_NONE;
1629 sh4_x86.branch_taken = TRUE;
1630 if( UNTRANSLATABLE(pc+2) ) {
1631 exit_block_emu(pc+2);
1634 sh4_translate_instruction( pc + 2 );
1635 exit_block_newpcset(pc+2);
1642 if( sh4_x86.in_delay_slot ) {
1645 sh4vma_t target = disp + pc + 4;
1646 JF_rel8( nottaken );
1647 exit_block_rel(target, pc+2 );
1648 JMP_TARGET(nottaken);
1654 if( sh4_x86.in_delay_slot ) {
1657 sh4_x86.in_delay_slot = DELAY_PC;
1658 if( UNTRANSLATABLE(pc+2) ) {
1659 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1661 ADD_imm32_r32( disp, R_EAX );
1662 JMP_TARGET(nottaken);
1663 ADD_sh4r_r32( R_PC, R_EAX );
1664 store_spreg( R_EAX, R_NEW_PC );
1665 exit_block_emu(pc+2);
1666 sh4_x86.branch_taken = TRUE;
1669 if( sh4_x86.tstate == TSTATE_NONE ) {
1670 CMP_imm8s_sh4r( 1, R_T );
1671 sh4_x86.tstate = TSTATE_E;
1673 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1674 int save_tstate = sh4_x86.tstate;
1675 sh4_translate_instruction(pc+2);
1676 exit_block_rel( disp + pc + 4, pc+4 );
1678 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1679 sh4_x86.tstate = save_tstate;
1680 sh4_translate_instruction(pc+2);
1687 if( sh4_x86.in_delay_slot ) {
1690 load_reg( R_ECX, Rn );
1691 store_spreg( R_ECX, R_NEW_PC );
1692 sh4_x86.in_delay_slot = DELAY_PC;
1693 sh4_x86.branch_taken = TRUE;
1694 if( UNTRANSLATABLE(pc+2) ) {
1695 exit_block_emu(pc+2);
1698 sh4_translate_instruction(pc+2);
1699 exit_block_newpcset(pc+2);
1706 if( sh4_x86.in_delay_slot ) {
1709 load_spreg( R_EAX, R_PC );
1710 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1711 store_spreg( R_EAX, R_PR );
1712 load_reg( R_ECX, Rn );
1713 store_spreg( R_ECX, R_NEW_PC );
1714 sh4_x86.in_delay_slot = DELAY_PC;
1715 sh4_x86.branch_taken = TRUE;
1716 sh4_x86.tstate = TSTATE_NONE;
1717 if( UNTRANSLATABLE(pc+2) ) {
1718 exit_block_emu(pc+2);
1721 sh4_translate_instruction(pc+2);
1722 exit_block_newpcset(pc+2);
1729 if( sh4_x86.in_delay_slot ) {
1733 load_spreg( R_ECX, R_SPC );
1734 store_spreg( R_ECX, R_NEW_PC );
1735 load_spreg( R_EAX, R_SSR );
1736 call_func1( sh4_write_sr, R_EAX );
1737 sh4_x86.in_delay_slot = DELAY_PC;
1738 sh4_x86.priv_checked = FALSE;
1739 sh4_x86.fpuen_checked = FALSE;
1740 sh4_x86.tstate = TSTATE_NONE;
1741 sh4_x86.branch_taken = TRUE;
1742 if( UNTRANSLATABLE(pc+2) ) {
1743 exit_block_emu(pc+2);
1746 sh4_translate_instruction(pc+2);
1747 exit_block_newpcset(pc+2);
1754 if( sh4_x86.in_delay_slot ) {
1757 load_spreg( R_ECX, R_PR );
1758 store_spreg( R_ECX, R_NEW_PC );
1759 sh4_x86.in_delay_slot = DELAY_PC;
1760 sh4_x86.branch_taken = TRUE;
1761 if( UNTRANSLATABLE(pc+2) ) {
1762 exit_block_emu(pc+2);
1765 sh4_translate_instruction(pc+2);
1766 exit_block_newpcset(pc+2);
1772 COUNT_INST(I_TRAPA);
1773 if( sh4_x86.in_delay_slot ) {
1776 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1777 ADD_r32_sh4r( R_ECX, R_PC );
1778 load_imm32( R_EAX, imm );
1779 call_func1( sh4_raise_trap, R_EAX );
1780 sh4_x86.tstate = TSTATE_NONE;
1781 exit_block_pcset(pc);
1782 sh4_x86.branch_taken = TRUE;
1787 COUNT_INST(I_UNDEF);
1788 if( sh4_x86.in_delay_slot ) {
1791 JMP_exc(EXC_ILLEGAL);
1797 COUNT_INST(I_CLRMAC);
1798 XOR_r32_r32(R_EAX, R_EAX);
1799 store_spreg( R_EAX, R_MACL );
1800 store_spreg( R_EAX, R_MACH );
1801 sh4_x86.tstate = TSTATE_NONE;
1807 sh4_x86.tstate = TSTATE_NONE;
1813 sh4_x86.tstate = TSTATE_C;
1819 sh4_x86.tstate = TSTATE_NONE;
1825 sh4_x86.tstate = TSTATE_C;
1828 /* Floating point moves */
1830 COUNT_INST(I_FMOV1);
1832 if( sh4_x86.double_size ) {
1833 load_dr0( R_EAX, FRm );
1834 load_dr1( R_ECX, FRm );
1835 store_dr0( R_EAX, FRn );
1836 store_dr1( R_ECX, FRn );
1838 load_fr( R_EAX, FRm ); // SZ=0 branch
1839 store_fr( R_EAX, FRn );
1843 COUNT_INST(I_FMOV2);
1845 load_reg( R_EAX, Rn );
1846 if( sh4_x86.double_size ) {
1847 check_walign64( R_EAX );
1848 MMU_TRANSLATE_WRITE( R_EAX );
1849 load_dr0( R_EDX, FRm );
1850 load_dr1( R_ECX, FRm );
1851 MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1853 check_walign32( R_EAX );
1854 MMU_TRANSLATE_WRITE( R_EAX );
1855 load_fr( R_EDX, FRm );
1856 MEM_WRITE_LONG( R_EAX, R_EDX );
1858 sh4_x86.tstate = TSTATE_NONE;
1861 COUNT_INST(I_FMOV5);
1863 load_reg( R_EAX, Rm );
1864 if( sh4_x86.double_size ) {
1865 check_ralign64( R_EAX );
1866 MMU_TRANSLATE_READ( R_EAX );
1867 MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
1868 store_dr0( R_EDX, FRn );
1869 store_dr1( R_EAX, FRn );
1871 check_ralign32( R_EAX );
1872 MMU_TRANSLATE_READ( R_EAX );
1873 MEM_READ_LONG( R_EAX, R_EAX );
1874 store_fr( R_EAX, FRn );
1876 sh4_x86.tstate = TSTATE_NONE;
1879 COUNT_INST(I_FMOV3);
1881 load_reg( R_EAX, Rn );
1882 if( sh4_x86.double_size ) {
1883 check_walign64( R_EAX );
1884 ADD_imm8s_r32(-8,R_EAX);
1885 MMU_TRANSLATE_WRITE( R_EAX );
1886 load_dr0( R_EDX, FRm );
1887 load_dr1( R_ECX, FRm );
1888 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1889 MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1891 check_walign32( R_EAX );
1892 ADD_imm8s_r32( -4, R_EAX );
1893 MMU_TRANSLATE_WRITE( R_EAX );
1894 load_fr( R_EDX, FRm );
1895 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1896 MEM_WRITE_LONG( R_EAX, R_EDX );
1898 sh4_x86.tstate = TSTATE_NONE;
1901 COUNT_INST(I_FMOV6);
1903 load_reg( R_EAX, Rm );
1904 if( sh4_x86.double_size ) {
1905 check_ralign64( R_EAX );
1906 MMU_TRANSLATE_READ( R_EAX );
1907 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1908 MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
1909 store_dr0( R_EDX, FRn );
1910 store_dr1( R_EAX, FRn );
1912 check_ralign32( R_EAX );
1913 MMU_TRANSLATE_READ( R_EAX );
1914 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1915 MEM_READ_LONG( R_EAX, R_EAX );
1916 store_fr( R_EAX, FRn );
1918 sh4_x86.tstate = TSTATE_NONE;
1920 FMOV FRm, @(R0, Rn) {:
1921 COUNT_INST(I_FMOV4);
1923 load_reg( R_EAX, Rn );
1924 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1925 if( sh4_x86.double_size ) {
1926 check_walign64( R_EAX );
1927 MMU_TRANSLATE_WRITE( R_EAX );
1928 load_dr0( R_EDX, FRm );
1929 load_dr1( R_ECX, FRm );
1930 MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1932 check_walign32( R_EAX );
1933 MMU_TRANSLATE_WRITE( R_EAX );
1934 load_fr( R_EDX, FRm );
1935 MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
1937 sh4_x86.tstate = TSTATE_NONE;
1939 FMOV @(R0, Rm), FRn {:
1940 COUNT_INST(I_FMOV7);
1942 load_reg( R_EAX, Rm );
1943 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1944 if( sh4_x86.double_size ) {
1945 check_ralign64( R_EAX );
1946 MMU_TRANSLATE_READ( R_EAX );
1947 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1948 store_dr0( R_ECX, FRn );
1949 store_dr1( R_EAX, FRn );
1951 check_ralign32( R_EAX );
1952 MMU_TRANSLATE_READ( R_EAX );
1953 MEM_READ_LONG( R_EAX, R_EAX );
1954 store_fr( R_EAX, FRn );
1956 sh4_x86.tstate = TSTATE_NONE;
1958 FLDI0 FRn {: /* IFF PR=0 */
1959 COUNT_INST(I_FLDI0);
1961 if( sh4_x86.double_prec == 0 ) {
1962 XOR_r32_r32( R_EAX, R_EAX );
1963 store_fr( R_EAX, FRn );
1965 sh4_x86.tstate = TSTATE_NONE;
1967 FLDI1 FRn {: /* IFF PR=0 */
1968 COUNT_INST(I_FLDI1);
1970 if( sh4_x86.double_prec == 0 ) {
1971 load_imm32(R_EAX, 0x3F800000);
1972 store_fr( R_EAX, FRn );
1977 COUNT_INST(I_FLOAT);
1980 if( sh4_x86.double_prec ) {
1989 if( sh4_x86.double_prec ) {
1994 load_ptr( R_ECX, &max_int );
1995 FILD_r32ind( R_ECX );
1998 load_ptr( R_ECX, &min_int ); // 5
1999 FILD_r32ind( R_ECX ); // 2
2001 JAE_rel8( sat2 ); // 2
2002 load_ptr( R_EAX, &save_fcw );
2003 FNSTCW_r32ind( R_EAX );
2004 load_ptr( R_EDX, &trunc_fcw );
2005 FLDCW_r32ind( R_EDX );
2006 FISTP_sh4r(R_FPUL); // 3
2007 FLDCW_r32ind( R_EAX );
2012 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
2013 store_spreg( R_ECX, R_FPUL );
2016 sh4_x86.tstate = TSTATE_NONE;
2021 load_fr( R_EAX, FRm );
2022 store_spreg( R_EAX, R_FPUL );
2027 load_spreg( R_EAX, R_FPUL );
2028 store_fr( R_EAX, FRn );
2031 COUNT_INST(I_FCNVDS);
2033 if( sh4_x86.double_prec ) {
2039 COUNT_INST(I_FCNVSD);
2041 if( sh4_x86.double_prec ) {
2047 /* Floating point instructions */
2051 if( sh4_x86.double_prec ) {
2064 if( sh4_x86.double_prec ) {
2079 if( sh4_x86.double_prec ) {
2091 FMAC FR0, FRm, FRn {:
2094 if( sh4_x86.double_prec ) {
2114 if( sh4_x86.double_prec ) {
2129 if( sh4_x86.double_prec ) {
2140 COUNT_INST(I_FSRRA);
2142 if( sh4_x86.double_prec == 0 ) {
2151 COUNT_INST(I_FSQRT);
2153 if( sh4_x86.double_prec ) {
2166 if( sh4_x86.double_prec ) {
2180 COUNT_INST(I_FCMPEQ);
2182 if( sh4_x86.double_prec ) {
2192 sh4_x86.tstate = TSTATE_E;
2195 COUNT_INST(I_FCMPGT);
2197 if( sh4_x86.double_prec ) {
2207 sh4_x86.tstate = TSTATE_A;
2213 if( sh4_x86.double_prec == 0 ) {
2214 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
2215 load_spreg( R_EAX, R_FPUL );
2216 call_func2( sh4_fsca, R_EAX, R_EDX );
2218 sh4_x86.tstate = TSTATE_NONE;
2223 if( sh4_x86.double_prec == 0 ) {
2224 if( sh4_x86.sse3_enabled ) {
2225 MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2226 MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2227 HADDPS_xmm_xmm( 4, 4 );
2228 HADDPS_xmm_xmm( 4, 4 );
2229 MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2234 push_fr( (FVm<<2)+1);
2235 push_fr( (FVn<<2)+1);
2238 push_fr( (FVm<<2)+2);
2239 push_fr( (FVn<<2)+2);
2242 push_fr( (FVm<<2)+3);
2243 push_fr( (FVn<<2)+3);
2246 pop_fr( (FVn<<2)+3);
2253 if( sh4_x86.double_prec == 0 ) {
2254 if( sh4_x86.sse3_enabled ) {
2255 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2256 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2257 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2258 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2260 MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2261 MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2262 MOVAPS_xmm_xmm( 4, 6 );
2263 MOVAPS_xmm_xmm( 5, 7 );
2264 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2265 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2266 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2267 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2268 MULPS_xmm_xmm( 0, 4 );
2269 MULPS_xmm_xmm( 1, 5 );
2270 MULPS_xmm_xmm( 2, 6 );
2271 MULPS_xmm_xmm( 3, 7 );
2272 ADDPS_xmm_xmm( 5, 4 );
2273 ADDPS_xmm_xmm( 7, 6 );
2274 ADDPS_xmm_xmm( 6, 4 );
2275 MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
2277 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
2278 call_func1( sh4_ftrv, R_EAX );
2281 sh4_x86.tstate = TSTATE_NONE;
2285 COUNT_INST(I_FRCHG);
2287 load_spreg( R_ECX, R_FPSCR );
2288 XOR_imm32_r32( FPSCR_FR, R_ECX );
2289 store_spreg( R_ECX, R_FPSCR );
2290 call_func0( sh4_switch_fr_banks );
2291 sh4_x86.tstate = TSTATE_NONE;
2294 COUNT_INST(I_FSCHG);
2296 load_spreg( R_ECX, R_FPSCR );
2297 XOR_imm32_r32( FPSCR_SZ, R_ECX );
2298 store_spreg( R_ECX, R_FPSCR );
2299 sh4_x86.tstate = TSTATE_NONE;
2300 sh4_x86.double_size = !sh4_x86.double_size;
2303 /* Processor control instructions */
2305 COUNT_INST(I_LDCSR);
2306 if( sh4_x86.in_delay_slot ) {
2310 load_reg( R_EAX, Rm );
2311 call_func1( sh4_write_sr, R_EAX );
2312 sh4_x86.priv_checked = FALSE;
2313 sh4_x86.fpuen_checked = FALSE;
2314 sh4_x86.tstate = TSTATE_NONE;
2319 load_reg( R_EAX, Rm );
2320 store_spreg( R_EAX, R_GBR );
2325 load_reg( R_EAX, Rm );
2326 store_spreg( R_EAX, R_VBR );
2327 sh4_x86.tstate = TSTATE_NONE;
2332 load_reg( R_EAX, Rm );
2333 store_spreg( R_EAX, R_SSR );
2334 sh4_x86.tstate = TSTATE_NONE;
2339 load_reg( R_EAX, Rm );
2340 store_spreg( R_EAX, R_SGR );
2341 sh4_x86.tstate = TSTATE_NONE;
2346 load_reg( R_EAX, Rm );
2347 store_spreg( R_EAX, R_SPC );
2348 sh4_x86.tstate = TSTATE_NONE;
2353 load_reg( R_EAX, Rm );
2354 store_spreg( R_EAX, R_DBR );
2355 sh4_x86.tstate = TSTATE_NONE;
2360 load_reg( R_EAX, Rm );
2361 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2362 sh4_x86.tstate = TSTATE_NONE;
2366 load_reg( R_EAX, Rm );
2367 check_ralign32( R_EAX );
2368 MMU_TRANSLATE_READ( R_EAX );
2369 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2370 MEM_READ_LONG( R_EAX, R_EAX );
2371 store_spreg( R_EAX, R_GBR );
2372 sh4_x86.tstate = TSTATE_NONE;
2375 COUNT_INST(I_LDCSRM);
2376 if( sh4_x86.in_delay_slot ) {
2380 load_reg( R_EAX, Rm );
2381 check_ralign32( R_EAX );
2382 MMU_TRANSLATE_READ( R_EAX );
2383 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2384 MEM_READ_LONG( R_EAX, R_EAX );
2385 call_func1( sh4_write_sr, R_EAX );
2386 sh4_x86.priv_checked = FALSE;
2387 sh4_x86.fpuen_checked = FALSE;
2388 sh4_x86.tstate = TSTATE_NONE;
2394 load_reg( R_EAX, Rm );
2395 check_ralign32( R_EAX );
2396 MMU_TRANSLATE_READ( R_EAX );
2397 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2398 MEM_READ_LONG( R_EAX, R_EAX );
2399 store_spreg( R_EAX, R_VBR );
2400 sh4_x86.tstate = TSTATE_NONE;
2405 load_reg( R_EAX, Rm );
2406 check_ralign32( R_EAX );
2407 MMU_TRANSLATE_READ( R_EAX );
2408 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2409 MEM_READ_LONG( R_EAX, R_EAX );
2410 store_spreg( R_EAX, R_SSR );
2411 sh4_x86.tstate = TSTATE_NONE;
2416 load_reg( R_EAX, Rm );
2417 check_ralign32( R_EAX );
2418 MMU_TRANSLATE_READ( R_EAX );
2419 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2420 MEM_READ_LONG( R_EAX, R_EAX );
2421 store_spreg( R_EAX, R_SGR );
2422 sh4_x86.tstate = TSTATE_NONE;
2427 load_reg( R_EAX, Rm );
2428 check_ralign32( R_EAX );
2429 MMU_TRANSLATE_READ( R_EAX );
2430 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2431 MEM_READ_LONG( R_EAX, R_EAX );
2432 store_spreg( R_EAX, R_SPC );
2433 sh4_x86.tstate = TSTATE_NONE;
2438 load_reg( R_EAX, Rm );
2439 check_ralign32( R_EAX );
2440 MMU_TRANSLATE_READ( R_EAX );
2441 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2442 MEM_READ_LONG( R_EAX, R_EAX );
2443 store_spreg( R_EAX, R_DBR );
2444 sh4_x86.tstate = TSTATE_NONE;
2446 LDC.L @Rm+, Rn_BANK {:
2449 load_reg( R_EAX, Rm );
2450 check_ralign32( R_EAX );
2451 MMU_TRANSLATE_READ( R_EAX );
2452 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2453 MEM_READ_LONG( R_EAX, R_EAX );
2454 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2455 sh4_x86.tstate = TSTATE_NONE;
2458 COUNT_INST(I_LDSFPSCR);
2460 load_reg( R_EAX, Rm );
2461 call_func1( sh4_write_fpscr, R_EAX );
2462 sh4_x86.tstate = TSTATE_NONE;
2465 LDS.L @Rm+, FPSCR {:
2466 COUNT_INST(I_LDSFPSCRM);
2468 load_reg( R_EAX, Rm );
2469 check_ralign32( R_EAX );
2470 MMU_TRANSLATE_READ( R_EAX );
2471 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2472 MEM_READ_LONG( R_EAX, R_EAX );
2473 call_func1( sh4_write_fpscr, R_EAX );
2474 sh4_x86.tstate = TSTATE_NONE;
2480 load_reg( R_EAX, Rm );
2481 store_spreg( R_EAX, R_FPUL );
2486 load_reg( R_EAX, Rm );
2487 check_ralign32( R_EAX );
2488 MMU_TRANSLATE_READ( R_EAX );
2489 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2490 MEM_READ_LONG( R_EAX, R_EAX );
2491 store_spreg( R_EAX, R_FPUL );
2492 sh4_x86.tstate = TSTATE_NONE;
2496 load_reg( R_EAX, Rm );
2497 store_spreg( R_EAX, R_MACH );
2501 load_reg( R_EAX, Rm );
2502 check_ralign32( R_EAX );
2503 MMU_TRANSLATE_READ( R_EAX );
2504 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2505 MEM_READ_LONG( R_EAX, R_EAX );
2506 store_spreg( R_EAX, R_MACH );
2507 sh4_x86.tstate = TSTATE_NONE;
2511 load_reg( R_EAX, Rm );
2512 store_spreg( R_EAX, R_MACL );
2516 load_reg( R_EAX, Rm );
2517 check_ralign32( R_EAX );
2518 MMU_TRANSLATE_READ( R_EAX );
2519 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2520 MEM_READ_LONG( R_EAX, R_EAX );
2521 store_spreg( R_EAX, R_MACL );
2522 sh4_x86.tstate = TSTATE_NONE;
2526 load_reg( R_EAX, Rm );
2527 store_spreg( R_EAX, R_PR );
2531 load_reg( R_EAX, Rm );
2532 check_ralign32( R_EAX );
2533 MMU_TRANSLATE_READ( R_EAX );
2534 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2535 MEM_READ_LONG( R_EAX, R_EAX );
2536 store_spreg( R_EAX, R_PR );
2537 sh4_x86.tstate = TSTATE_NONE;
2540 COUNT_INST(I_LDTLB);
2541 call_func0( MMU_ldtlb );
2542 sh4_x86.tstate = TSTATE_NONE;
2551 COUNT_INST(I_OCBWB);
2555 load_reg( R_EAX, Rn );
2556 MOV_r32_r32( R_EAX, R_ECX );
2557 AND_imm32_r32( 0xFC000000, R_ECX );
2558 CMP_imm32_r32( 0xE0000000, R_ECX );
2560 if( sh4_x86.tlb_on ) {
2561 call_func1( sh4_flush_store_queue_mmu, R_EAX );
2562 TEST_r32_r32( R_EAX, R_EAX );
2565 call_func1( sh4_flush_store_queue, R_EAX );
2568 sh4_x86.tstate = TSTATE_NONE;
2571 COUNT_INST(I_SLEEP);
2573 call_func0( sh4_sleep );
2574 sh4_x86.tstate = TSTATE_NONE;
2575 sh4_x86.in_delay_slot = DELAY_NONE;
2579 COUNT_INST(I_STCSR);
2581 call_func0(sh4_read_sr);
2582 store_reg( R_EAX, Rn );
2583 sh4_x86.tstate = TSTATE_NONE;
2587 load_spreg( R_EAX, R_GBR );
2588 store_reg( R_EAX, Rn );
2593 load_spreg( R_EAX, R_VBR );
2594 store_reg( R_EAX, Rn );
2595 sh4_x86.tstate = TSTATE_NONE;
2600 load_spreg( R_EAX, R_SSR );
2601 store_reg( R_EAX, Rn );
2602 sh4_x86.tstate = TSTATE_NONE;
2607 load_spreg( R_EAX, R_SPC );
2608 store_reg( R_EAX, Rn );
2609 sh4_x86.tstate = TSTATE_NONE;
2614 load_spreg( R_EAX, R_SGR );
2615 store_reg( R_EAX, Rn );
2616 sh4_x86.tstate = TSTATE_NONE;
2621 load_spreg( R_EAX, R_DBR );
2622 store_reg( R_EAX, Rn );
2623 sh4_x86.tstate = TSTATE_NONE;
2628 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2629 store_reg( R_EAX, Rn );
2630 sh4_x86.tstate = TSTATE_NONE;
2633 COUNT_INST(I_STCSRM);
2635 load_reg( R_EAX, Rn );
2636 check_walign32( R_EAX );
2637 ADD_imm8s_r32( -4, R_EAX );
2638 MMU_TRANSLATE_WRITE( R_EAX );
2639 PUSH_realigned_r32( R_EAX );
2640 call_func0( sh4_read_sr );
2641 POP_realigned_r32( R_ECX );
2642 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2643 MEM_WRITE_LONG( R_ECX, R_EAX );
2644 sh4_x86.tstate = TSTATE_NONE;
2649 load_reg( R_EAX, Rn );
2650 check_walign32( R_EAX );
2651 ADD_imm8s_r32( -4, R_EAX );
2652 MMU_TRANSLATE_WRITE( R_EAX );
2653 load_spreg( R_EDX, R_VBR );
2654 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2655 MEM_WRITE_LONG( R_EAX, R_EDX );
2656 sh4_x86.tstate = TSTATE_NONE;
2661 load_reg( R_EAX, Rn );
2662 check_walign32( R_EAX );
2663 ADD_imm8s_r32( -4, R_EAX );
2664 MMU_TRANSLATE_WRITE( R_EAX );
2665 load_spreg( R_EDX, R_SSR );
2666 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2667 MEM_WRITE_LONG( R_EAX, R_EDX );
2668 sh4_x86.tstate = TSTATE_NONE;
2673 load_reg( R_EAX, Rn );
2674 check_walign32( R_EAX );
2675 ADD_imm8s_r32( -4, R_EAX );
2676 MMU_TRANSLATE_WRITE( R_EAX );
2677 load_spreg( R_EDX, R_SPC );
2678 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2679 MEM_WRITE_LONG( R_EAX, R_EDX );
2680 sh4_x86.tstate = TSTATE_NONE;
2685 load_reg( R_EAX, Rn );
2686 check_walign32( R_EAX );
2687 ADD_imm8s_r32( -4, R_EAX );
2688 MMU_TRANSLATE_WRITE( R_EAX );
2689 load_spreg( R_EDX, R_SGR );
2690 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2691 MEM_WRITE_LONG( R_EAX, R_EDX );
2692 sh4_x86.tstate = TSTATE_NONE;
2697 load_reg( R_EAX, Rn );
2698 check_walign32( R_EAX );
2699 ADD_imm8s_r32( -4, R_EAX );
2700 MMU_TRANSLATE_WRITE( R_EAX );
2701 load_spreg( R_EDX, R_DBR );
2702 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2703 MEM_WRITE_LONG( R_EAX, R_EDX );
2704 sh4_x86.tstate = TSTATE_NONE;
2706 STC.L Rm_BANK, @-Rn {:
2709 load_reg( R_EAX, Rn );
2710 check_walign32( R_EAX );
2711 ADD_imm8s_r32( -4, R_EAX );
2712 MMU_TRANSLATE_WRITE( R_EAX );
2713 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2714 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2715 MEM_WRITE_LONG( R_EAX, R_EDX );
2716 sh4_x86.tstate = TSTATE_NONE;
2720 load_reg( R_EAX, Rn );
2721 check_walign32( R_EAX );
2722 ADD_imm8s_r32( -4, R_EAX );
2723 MMU_TRANSLATE_WRITE( R_EAX );
2724 load_spreg( R_EDX, R_GBR );
2725 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2726 MEM_WRITE_LONG( R_EAX, R_EDX );
2727 sh4_x86.tstate = TSTATE_NONE;
2730 COUNT_INST(I_STSFPSCR);
2732 load_spreg( R_EAX, R_FPSCR );
2733 store_reg( R_EAX, Rn );
2735 STS.L FPSCR, @-Rn {:
2736 COUNT_INST(I_STSFPSCRM);
2738 load_reg( R_EAX, Rn );
2739 check_walign32( R_EAX );
2740 ADD_imm8s_r32( -4, R_EAX );
2741 MMU_TRANSLATE_WRITE( R_EAX );
2742 load_spreg( R_EDX, R_FPSCR );
2743 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2744 MEM_WRITE_LONG( R_EAX, R_EDX );
2745 sh4_x86.tstate = TSTATE_NONE;
2750 load_spreg( R_EAX, R_FPUL );
2751 store_reg( R_EAX, Rn );
2756 load_reg( R_EAX, Rn );
2757 check_walign32( R_EAX );
2758 ADD_imm8s_r32( -4, R_EAX );
2759 MMU_TRANSLATE_WRITE( R_EAX );
2760 load_spreg( R_EDX, R_FPUL );
2761 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2762 MEM_WRITE_LONG( R_EAX, R_EDX );
2763 sh4_x86.tstate = TSTATE_NONE;
2767 load_spreg( R_EAX, R_MACH );
2768 store_reg( R_EAX, Rn );
2772 load_reg( R_EAX, Rn );
2773 check_walign32( R_EAX );
2774 ADD_imm8s_r32( -4, R_EAX );
2775 MMU_TRANSLATE_WRITE( R_EAX );
2776 load_spreg( R_EDX, R_MACH );
2777 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2778 MEM_WRITE_LONG( R_EAX, R_EDX );
2779 sh4_x86.tstate = TSTATE_NONE;
2783 load_spreg( R_EAX, R_MACL );
2784 store_reg( R_EAX, Rn );
2788 load_reg( R_EAX, Rn );
2789 check_walign32( R_EAX );
2790 ADD_imm8s_r32( -4, R_EAX );
2791 MMU_TRANSLATE_WRITE( R_EAX );
2792 load_spreg( R_EDX, R_MACL );
2793 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2794 MEM_WRITE_LONG( R_EAX, R_EDX );
2795 sh4_x86.tstate = TSTATE_NONE;
2799 load_spreg( R_EAX, R_PR );
2800 store_reg( R_EAX, Rn );
2804 load_reg( R_EAX, Rn );
2805 check_walign32( R_EAX );
2806 ADD_imm8s_r32( -4, R_EAX );
2807 MMU_TRANSLATE_WRITE( R_EAX );
2808 load_spreg( R_EDX, R_PR );
2809 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2810 MEM_WRITE_LONG( R_EAX, R_EDX );
2811 sh4_x86.tstate = TSTATE_NONE;
2816 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2819 sh4_x86.in_delay_slot = DELAY_NONE;
.