4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
29 #include "sh4/xltcache.h"
30 #include "sh4/sh4core.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/x86op.h"
38 #define DEFAULT_BACKPATCH_SIZE 4096
40 struct backpatch_record {
41 uint32_t fixup_offset;
42 uint32_t fixup_icount;
51 * Struct to manage internal translation state. This state is not saved -
52 * it is only valid between calls to sh4_translate_begin_block() and
53 * sh4_translate_end_block()
55 struct sh4_x86_state {
57 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
58 gboolean branch_taken; /* true if we branched unconditionally */
59 gboolean double_prec; /* true if FPU is in double-precision mode */
60 gboolean double_size; /* true if FPU is in double-size mode */
61 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
62 uint32_t block_start_pc;
63 uint32_t stack_posn; /* Trace stack height for alignment purposes */
67 gboolean tlb_on; /* True if tlb translation is active */
69 /* Allocated memory for the (block-wide) back-patch list */
70 struct backpatch_record *backpatch_list;
71 uint32_t backpatch_posn;
72 uint32_t backpatch_size;
75 #define TSTATE_NONE -1
85 #ifdef ENABLE_SH4STATS
86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
88 #define COUNT_INST(id)
91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
93 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
94 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
98 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
99 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
101 static struct sh4_x86_state sh4_x86;
103 static uint32_t max_int = 0x7FFFFFFF;
104 static uint32_t min_int = 0x80000000;
105 static uint32_t save_fcw; /* save value for fpu control word */
106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
108 gboolean is_sse3_supported()
112 __asm__ __volatile__(
113 "mov $0x01, %%eax\n\t"
114 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
115 return (features & 1) ? TRUE : FALSE;
118 void sh4_translate_init(void)
120 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
121 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
122 sh4_x86.sse3_enabled = is_sse3_supported();
126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
128 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
129 sh4_x86.backpatch_size <<= 1;
130 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
131 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
132 assert( sh4_x86.backpatch_list != NULL );
134 if( sh4_x86.in_delay_slot ) {
137 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
138 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
139 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
140 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
141 sh4_x86.backpatch_posn++;
145 * Emit an instruction to load an SH4 reg into a real register
147 static inline void load_reg( int x86reg, int sh4reg )
149 /* mov [bp+n], reg */
151 OP(0x45 + (x86reg<<3));
152 OP(REG_OFFSET(r[sh4reg]));
155 static inline void load_reg16s( int x86reg, int sh4reg )
159 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
162 static inline void load_reg16u( int x86reg, int sh4reg )
166 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
173 * Emit an instruction to load an immediate value into a register
175 static inline void load_imm32( int x86reg, uint32_t value ) {
176 /* mov #value, reg */
183 * Load an immediate 64-bit quantity (note: x86-64 only)
185 static inline void load_imm64( int x86reg, uint64_t value ) {
186 /* mov #value, reg */
193 * Emit an instruction to store an SH4 reg (RN)
195 void static inline store_reg( int x86reg, int sh4reg ) {
196 /* mov reg, [bp+n] */
198 OP(0x45 + (x86reg<<3));
199 OP(REG_OFFSET(r[sh4reg]));
203 * Load an FR register (single-precision floating point) into an integer x86
204 * register (eg for register-to-register moves)
206 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
207 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
210 * Load the low half of a DR register (DR or XD) into an integer x86 register
212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
216 * Store an FR register (single-precision floating point) from an integer x86+
217 * register (eg for register-to-register moves)
219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
226 #define push_fpul() FLDF_sh4r(R_FPUL)
227 #define pop_fpul() FSTPF_sh4r(R_FPUL)
228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
229 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
231 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
233 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
235 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
239 /* Exception checks - Note that all exception checks will clobber EAX */
241 #define check_priv( ) \
242 if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
243 if( sh4_x86.in_delay_slot ) { \
244 JMP_exc(EXC_SLOT_ILLEGAL); \
246 JMP_exc(EXC_ILLEGAL ); \
248 sh4_x86.in_delay_slot = DELAY_NONE; \
252 #define check_fpuen( ) \
253 if( !sh4_x86.fpuen_checked ) {\
254 sh4_x86.fpuen_checked = TRUE;\
255 load_spreg( R_EAX, R_SR );\
256 AND_imm32_r32( SR_FD, R_EAX );\
257 if( sh4_x86.in_delay_slot ) {\
258 JNE_exc(EXC_SLOT_FPU_DISABLED);\
260 JNE_exc(EXC_FPU_DISABLED);\
262 sh4_x86.tstate = TSTATE_NONE; \
265 #define check_ralign16( x86reg ) \
266 TEST_imm32_r32( 0x00000001, x86reg ); \
267 JNE_exc(EXC_DATA_ADDR_READ)
269 #define check_walign16( x86reg ) \
270 TEST_imm32_r32( 0x00000001, x86reg ); \
271 JNE_exc(EXC_DATA_ADDR_WRITE);
273 #define check_ralign32( x86reg ) \
274 TEST_imm32_r32( 0x00000003, x86reg ); \
275 JNE_exc(EXC_DATA_ADDR_READ)
277 #define check_walign32( x86reg ) \
278 TEST_imm32_r32( 0x00000003, x86reg ); \
279 JNE_exc(EXC_DATA_ADDR_WRITE);
281 #define check_ralign64( x86reg ) \
282 TEST_imm32_r32( 0x00000007, x86reg ); \
283 JNE_exc(EXC_DATA_ADDR_READ)
285 #define check_walign64( x86reg ) \
286 TEST_imm32_r32( 0x00000007, x86reg ); \
287 JNE_exc(EXC_DATA_ADDR_WRITE);
290 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
292 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
293 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
296 #ifdef HAVE_FRAME_ADDRESS
297 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
298 call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
299 call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); }
300 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
301 call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
302 call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
304 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
305 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
308 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
309 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
310 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
311 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
312 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
313 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
315 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
317 /****** Import appropriate calling conventions ******/
318 #if SIZEOF_VOID_P == 8
319 #include "sh4/ia64abi.h"
320 #else /* 32-bit system */
321 #include "sh4/ia32abi.h"
324 void sh4_translate_begin_block( sh4addr_t pc )
327 sh4_x86.in_delay_slot = FALSE;
328 sh4_x86.fpuen_checked = FALSE;
329 sh4_x86.branch_taken = FALSE;
330 sh4_x86.backpatch_posn = 0;
331 sh4_x86.block_start_pc = pc;
332 sh4_x86.tlb_on = IS_TLB_ENABLED();
333 sh4_x86.tstate = TSTATE_NONE;
334 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
335 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
339 uint32_t sh4_translate_end_block_size()
341 if( sh4_x86.backpatch_posn <= 3 ) {
342 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
344 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
350 * Embed a breakpoint into the generated code
352 void sh4_translate_emit_breakpoint( sh4vma_t pc )
354 load_imm32( R_EAX, pc );
355 call_func1( sh4_translate_breakpoint_hit, R_EAX );
356 sh4_x86.tstate = TSTATE_NONE;
360 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
363 * Embed a call to sh4_execute_instruction for situations that we
364 * can't translate (just page-crossing delay slots at the moment).
365 * Caller is responsible for setting new_pc before calling this function.
369 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
370 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
371 * Call sh4_execute_instruction
372 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
374 void exit_block_emu( sh4vma_t endpc )
376 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
377 ADD_r32_sh4r( R_ECX, R_PC );
379 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
380 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
381 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
382 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
384 call_func0( sh4_execute_instruction );
385 load_spreg( R_EAX, R_PC );
386 if( sh4_x86.tlb_on ) {
387 call_func1(xlat_get_code_by_vma,R_EAX);
389 call_func1(xlat_get_code,R_EAX);
395 * Translate a single instruction. Delayed branches are handled specially
396 * by translating both branch and delayed instruction as a single unit (as
398 * The instruction MUST be in the icache (assert check)
400 * @return true if the instruction marks the end of a basic block
403 uint32_t sh4_translate_instruction( sh4vma_t pc )
406 /* Read instruction from icache */
407 assert( IS_IN_ICACHE(pc) );
408 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
410 if( !sh4_x86.in_delay_slot ) {
411 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
417 load_reg( R_EAX, Rm );
418 load_reg( R_ECX, Rn );
419 ADD_r32_r32( R_EAX, R_ECX );
420 store_reg( R_ECX, Rn );
421 sh4_x86.tstate = TSTATE_NONE;
425 ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
426 sh4_x86.tstate = TSTATE_NONE;
430 if( sh4_x86.tstate != TSTATE_C ) {
433 load_reg( R_EAX, Rm );
434 load_reg( R_ECX, Rn );
435 ADC_r32_r32( R_EAX, R_ECX );
436 store_reg( R_ECX, Rn );
438 sh4_x86.tstate = TSTATE_C;
442 load_reg( R_EAX, Rm );
443 load_reg( R_ECX, Rn );
444 ADD_r32_r32( R_EAX, R_ECX );
445 store_reg( R_ECX, Rn );
447 sh4_x86.tstate = TSTATE_O;
451 load_reg( R_EAX, Rm );
452 load_reg( R_ECX, Rn );
453 AND_r32_r32( R_EAX, R_ECX );
454 store_reg( R_ECX, Rn );
455 sh4_x86.tstate = TSTATE_NONE;
459 load_reg( R_EAX, 0 );
460 AND_imm32_r32(imm, R_EAX);
461 store_reg( R_EAX, 0 );
462 sh4_x86.tstate = TSTATE_NONE;
464 AND.B #imm, @(R0, GBR) {:
466 load_reg( R_EAX, 0 );
467 ADD_sh4r_r32( R_GBR, R_EAX );
468 MOV_r32_esp8(R_EAX, 0);
469 MEM_READ_BYTE( R_EAX, R_EDX );
470 MOV_esp8_r32(0, R_EAX);
471 AND_imm32_r32(imm, R_EDX );
472 MEM_WRITE_BYTE( R_EAX, R_EDX );
473 sh4_x86.tstate = TSTATE_NONE;
477 load_reg( R_EAX, Rm );
478 load_reg( R_ECX, Rn );
479 CMP_r32_r32( R_EAX, R_ECX );
481 sh4_x86.tstate = TSTATE_E;
484 COUNT_INST(I_CMPEQI);
485 load_reg( R_EAX, 0 );
486 CMP_imm8s_r32(imm, R_EAX);
488 sh4_x86.tstate = TSTATE_E;
492 load_reg( R_EAX, Rm );
493 load_reg( R_ECX, Rn );
494 CMP_r32_r32( R_EAX, R_ECX );
496 sh4_x86.tstate = TSTATE_GE;
500 load_reg( R_EAX, Rm );
501 load_reg( R_ECX, Rn );
502 CMP_r32_r32( R_EAX, R_ECX );
504 sh4_x86.tstate = TSTATE_G;
508 load_reg( R_EAX, Rm );
509 load_reg( R_ECX, Rn );
510 CMP_r32_r32( R_EAX, R_ECX );
512 sh4_x86.tstate = TSTATE_A;
516 load_reg( R_EAX, Rm );
517 load_reg( R_ECX, Rn );
518 CMP_r32_r32( R_EAX, R_ECX );
520 sh4_x86.tstate = TSTATE_AE;
524 load_reg( R_EAX, Rn );
525 CMP_imm8s_r32( 0, R_EAX );
527 sh4_x86.tstate = TSTATE_G;
531 load_reg( R_EAX, Rn );
532 CMP_imm8s_r32( 0, R_EAX );
534 sh4_x86.tstate = TSTATE_GE;
537 COUNT_INST(I_CMPSTR);
538 load_reg( R_EAX, Rm );
539 load_reg( R_ECX, Rn );
540 XOR_r32_r32( R_ECX, R_EAX );
541 TEST_r8_r8( R_AL, R_AL );
543 TEST_r8_r8( R_AH, R_AH );
545 SHR_imm8_r32( 16, R_EAX );
546 TEST_r8_r8( R_AL, R_AL );
548 TEST_r8_r8( R_AH, R_AH );
553 sh4_x86.tstate = TSTATE_E;
557 load_reg( R_EAX, Rm );
558 load_reg( R_ECX, Rn );
559 SHR_imm8_r32( 31, R_EAX );
560 SHR_imm8_r32( 31, R_ECX );
561 store_spreg( R_EAX, R_M );
562 store_spreg( R_ECX, R_Q );
563 CMP_r32_r32( R_EAX, R_ECX );
565 sh4_x86.tstate = TSTATE_NE;
569 XOR_r32_r32( R_EAX, R_EAX );
570 store_spreg( R_EAX, R_Q );
571 store_spreg( R_EAX, R_M );
572 store_spreg( R_EAX, R_T );
573 sh4_x86.tstate = TSTATE_C; // works for DIV1
577 load_spreg( R_ECX, R_M );
578 load_reg( R_EAX, Rn );
579 if( sh4_x86.tstate != TSTATE_C ) {
583 SETC_r8( R_DL ); // Q'
584 CMP_sh4r_r32( R_Q, R_ECX );
586 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
589 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
591 store_reg( R_EAX, Rn ); // Done with Rn now
592 SETC_r8(R_AL); // tmp1
593 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
594 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
595 store_spreg( R_ECX, R_Q );
596 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
597 MOVZX_r8_r32( R_AL, R_EAX );
598 store_spreg( R_EAX, R_T );
599 sh4_x86.tstate = TSTATE_NONE;
603 load_reg( R_EAX, Rm );
604 load_reg( R_ECX, Rn );
606 store_spreg( R_EDX, R_MACH );
607 store_spreg( R_EAX, R_MACL );
608 sh4_x86.tstate = TSTATE_NONE;
612 load_reg( R_EAX, Rm );
613 load_reg( R_ECX, Rn );
615 store_spreg( R_EDX, R_MACH );
616 store_spreg( R_EAX, R_MACL );
617 sh4_x86.tstate = TSTATE_NONE;
621 load_reg( R_EAX, Rn );
622 ADD_imm8s_r32( -1, R_EAX );
623 store_reg( R_EAX, Rn );
625 sh4_x86.tstate = TSTATE_E;
629 load_reg( R_EAX, Rm );
630 MOVSX_r8_r32( R_EAX, R_EAX );
631 store_reg( R_EAX, Rn );
635 load_reg( R_EAX, Rm );
636 MOVSX_r16_r32( R_EAX, R_EAX );
637 store_reg( R_EAX, Rn );
641 load_reg( R_EAX, Rm );
642 MOVZX_r8_r32( R_EAX, R_EAX );
643 store_reg( R_EAX, Rn );
647 load_reg( R_EAX, Rm );
648 MOVZX_r16_r32( R_EAX, R_EAX );
649 store_reg( R_EAX, Rn );
654 load_reg( R_EAX, Rm );
655 check_ralign32( R_EAX );
656 MEM_READ_LONG( R_EAX, R_EAX );
657 MOV_r32_esp8(R_EAX, 0);
658 load_reg( R_EAX, Rm );
659 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
660 MEM_READ_LONG( R_EAX, R_EAX );
661 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
663 load_reg( R_EAX, Rm );
664 check_ralign32( R_EAX );
665 MEM_READ_LONG( R_EAX, R_EAX );
666 MOV_r32_esp8( R_EAX, 0 );
667 load_reg( R_EAX, Rn );
668 check_ralign32( R_EAX );
669 MEM_READ_LONG( R_EAX, R_EAX );
670 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
671 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
675 ADD_r32_sh4r( R_EAX, R_MACL );
676 ADC_r32_sh4r( R_EDX, R_MACH );
678 load_spreg( R_ECX, R_S );
679 TEST_r32_r32(R_ECX, R_ECX);
681 call_func0( signsat48 );
683 sh4_x86.tstate = TSTATE_NONE;
688 load_reg( R_EAX, Rm );
689 check_ralign16( R_EAX );
690 MEM_READ_WORD( R_EAX, R_EAX );
691 MOV_r32_esp8( R_EAX, 0 );
692 load_reg( R_EAX, Rm );
693 LEA_r32disp8_r32( R_EAX, 2, R_EAX );
694 MEM_READ_WORD( R_EAX, R_EAX );
695 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
696 // Note translate twice in case of page boundaries. Maybe worth
697 // adding a page-boundary check to skip the second translation
699 load_reg( R_EAX, Rm );
700 check_ralign16( R_EAX );
701 MEM_READ_WORD( R_EAX, R_EAX );
702 MOV_r32_esp8( R_EAX, 0 );
703 load_reg( R_EAX, Rn );
704 check_ralign16( R_EAX );
705 MEM_READ_WORD( R_EAX, R_EAX );
706 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
707 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
710 load_spreg( R_ECX, R_S );
711 TEST_r32_r32( R_ECX, R_ECX );
714 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
715 JNO_rel8( end ); // 2
716 load_imm32( R_EDX, 1 ); // 5
717 store_spreg( R_EDX, R_MACH ); // 6
718 JS_rel8( positive ); // 2
719 load_imm32( R_EAX, 0x80000000 );// 5
720 store_spreg( R_EAX, R_MACL ); // 6
723 JMP_TARGET(positive);
724 load_imm32( R_EAX, 0x7FFFFFFF );// 5
725 store_spreg( R_EAX, R_MACL ); // 6
729 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
730 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
734 sh4_x86.tstate = TSTATE_NONE;
738 load_spreg( R_EAX, R_T );
739 store_reg( R_EAX, Rn );
743 load_reg( R_EAX, Rm );
744 load_reg( R_ECX, Rn );
746 store_spreg( R_EAX, R_MACL );
747 sh4_x86.tstate = TSTATE_NONE;
751 load_reg16s( R_EAX, Rm );
752 load_reg16s( R_ECX, Rn );
754 store_spreg( R_EAX, R_MACL );
755 sh4_x86.tstate = TSTATE_NONE;
759 load_reg16u( R_EAX, Rm );
760 load_reg16u( R_ECX, Rn );
762 store_spreg( R_EAX, R_MACL );
763 sh4_x86.tstate = TSTATE_NONE;
767 load_reg( R_EAX, Rm );
769 store_reg( R_EAX, Rn );
770 sh4_x86.tstate = TSTATE_NONE;
774 load_reg( R_EAX, Rm );
775 XOR_r32_r32( R_ECX, R_ECX );
777 SBB_r32_r32( R_EAX, R_ECX );
778 store_reg( R_ECX, Rn );
780 sh4_x86.tstate = TSTATE_C;
784 load_reg( R_EAX, Rm );
786 store_reg( R_EAX, Rn );
787 sh4_x86.tstate = TSTATE_NONE;
791 load_reg( R_EAX, Rm );
792 load_reg( R_ECX, Rn );
793 OR_r32_r32( R_EAX, R_ECX );
794 store_reg( R_ECX, Rn );
795 sh4_x86.tstate = TSTATE_NONE;
799 load_reg( R_EAX, 0 );
800 OR_imm32_r32(imm, R_EAX);
801 store_reg( R_EAX, 0 );
802 sh4_x86.tstate = TSTATE_NONE;
804 OR.B #imm, @(R0, GBR) {:
806 load_reg( R_EAX, 0 );
807 ADD_sh4r_r32( R_GBR, R_EAX );
808 MOV_r32_esp8( R_EAX, 0 );
809 MEM_READ_BYTE( R_EAX, R_EDX );
810 MOV_esp8_r32( 0, R_EAX );
811 OR_imm32_r32(imm, R_EDX );
812 MEM_WRITE_BYTE( R_EAX, R_EDX );
813 sh4_x86.tstate = TSTATE_NONE;
817 load_reg( R_EAX, Rn );
818 if( sh4_x86.tstate != TSTATE_C ) {
822 store_reg( R_EAX, Rn );
824 sh4_x86.tstate = TSTATE_C;
828 load_reg( R_EAX, Rn );
829 if( sh4_x86.tstate != TSTATE_C ) {
833 store_reg( R_EAX, Rn );
835 sh4_x86.tstate = TSTATE_C;
839 load_reg( R_EAX, Rn );
841 store_reg( R_EAX, Rn );
843 sh4_x86.tstate = TSTATE_C;
847 load_reg( R_EAX, Rn );
849 store_reg( R_EAX, Rn );
851 sh4_x86.tstate = TSTATE_C;
855 /* Annoyingly enough, not directly convertible */
856 load_reg( R_EAX, Rn );
857 load_reg( R_ECX, Rm );
858 CMP_imm32_r32( 0, R_ECX );
861 NEG_r32( R_ECX ); // 2
862 AND_imm8_r8( 0x1F, R_CL ); // 3
863 JE_rel8(emptysar); // 2
864 SAR_r32_CL( R_EAX ); // 2
867 JMP_TARGET(emptysar);
868 SAR_imm8_r32(31, R_EAX ); // 3
872 AND_imm8_r8( 0x1F, R_CL ); // 3
873 SHL_r32_CL( R_EAX ); // 2
876 store_reg( R_EAX, Rn );
877 sh4_x86.tstate = TSTATE_NONE;
881 load_reg( R_EAX, Rn );
882 load_reg( R_ECX, Rm );
883 CMP_imm32_r32( 0, R_ECX );
886 NEG_r32( R_ECX ); // 2
887 AND_imm8_r8( 0x1F, R_CL ); // 3
889 SHR_r32_CL( R_EAX ); // 2
892 JMP_TARGET(emptyshr);
893 XOR_r32_r32( R_EAX, R_EAX );
897 AND_imm8_r8( 0x1F, R_CL ); // 3
898 SHL_r32_CL( R_EAX ); // 2
901 store_reg( R_EAX, Rn );
902 sh4_x86.tstate = TSTATE_NONE;
906 load_reg( R_EAX, Rn );
909 store_reg( R_EAX, Rn );
910 sh4_x86.tstate = TSTATE_C;
914 load_reg( R_EAX, Rn );
917 store_reg( R_EAX, Rn );
918 sh4_x86.tstate = TSTATE_C;
922 load_reg( R_EAX, Rn );
925 store_reg( R_EAX, Rn );
926 sh4_x86.tstate = TSTATE_C;
930 load_reg( R_EAX, Rn );
931 SHL_imm8_r32( 2, R_EAX );
932 store_reg( R_EAX, Rn );
933 sh4_x86.tstate = TSTATE_NONE;
937 load_reg( R_EAX, Rn );
938 SHL_imm8_r32( 8, R_EAX );
939 store_reg( R_EAX, Rn );
940 sh4_x86.tstate = TSTATE_NONE;
944 load_reg( R_EAX, Rn );
945 SHL_imm8_r32( 16, R_EAX );
946 store_reg( R_EAX, Rn );
947 sh4_x86.tstate = TSTATE_NONE;
951 load_reg( R_EAX, Rn );
954 store_reg( R_EAX, Rn );
955 sh4_x86.tstate = TSTATE_C;
959 load_reg( R_EAX, Rn );
960 SHR_imm8_r32( 2, R_EAX );
961 store_reg( R_EAX, Rn );
962 sh4_x86.tstate = TSTATE_NONE;
966 load_reg( R_EAX, Rn );
967 SHR_imm8_r32( 8, R_EAX );
968 store_reg( R_EAX, Rn );
969 sh4_x86.tstate = TSTATE_NONE;
973 load_reg( R_EAX, Rn );
974 SHR_imm8_r32( 16, R_EAX );
975 store_reg( R_EAX, Rn );
976 sh4_x86.tstate = TSTATE_NONE;
980 load_reg( R_EAX, Rm );
981 load_reg( R_ECX, Rn );
982 SUB_r32_r32( R_EAX, R_ECX );
983 store_reg( R_ECX, Rn );
984 sh4_x86.tstate = TSTATE_NONE;
988 load_reg( R_EAX, Rm );
989 load_reg( R_ECX, Rn );
990 if( sh4_x86.tstate != TSTATE_C ) {
993 SBB_r32_r32( R_EAX, R_ECX );
994 store_reg( R_ECX, Rn );
996 sh4_x86.tstate = TSTATE_C;
1000 load_reg( R_EAX, Rm );
1001 load_reg( R_ECX, Rn );
1002 SUB_r32_r32( R_EAX, R_ECX );
1003 store_reg( R_ECX, Rn );
1005 sh4_x86.tstate = TSTATE_O;
1008 COUNT_INST(I_SWAPB);
1009 load_reg( R_EAX, Rm );
1010 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1011 store_reg( R_EAX, Rn );
1014 COUNT_INST(I_SWAPB);
1015 load_reg( R_EAX, Rm );
1016 MOV_r32_r32( R_EAX, R_ECX );
1017 SHL_imm8_r32( 16, R_ECX );
1018 SHR_imm8_r32( 16, R_EAX );
1019 OR_r32_r32( R_EAX, R_ECX );
1020 store_reg( R_ECX, Rn );
1021 sh4_x86.tstate = TSTATE_NONE;
1025 load_reg( R_EAX, Rn );
1026 MOV_r32_esp8( R_EAX, 0 );
1027 MEM_READ_BYTE( R_EAX, R_EDX );
1028 TEST_r8_r8( R_DL, R_DL );
1030 OR_imm8_r8( 0x80, R_DL );
1031 MOV_esp8_r32( 0, R_EAX );
1032 MEM_WRITE_BYTE( R_EAX, R_EDX );
1033 sh4_x86.tstate = TSTATE_NONE;
1037 load_reg( R_EAX, Rm );
1038 load_reg( R_ECX, Rn );
1039 TEST_r32_r32( R_EAX, R_ECX );
1041 sh4_x86.tstate = TSTATE_E;
1045 load_reg( R_EAX, 0 );
1046 TEST_imm32_r32( imm, R_EAX );
1048 sh4_x86.tstate = TSTATE_E;
1050 TST.B #imm, @(R0, GBR) {:
1052 load_reg( R_EAX, 0);
1053 ADD_sh4r_r32( R_GBR, R_EAX );
1054 MEM_READ_BYTE( R_EAX, R_EAX );
1055 TEST_imm8_r8( imm, R_AL );
1057 sh4_x86.tstate = TSTATE_E;
1061 load_reg( R_EAX, Rm );
1062 load_reg( R_ECX, Rn );
1063 XOR_r32_r32( R_EAX, R_ECX );
1064 store_reg( R_ECX, Rn );
1065 sh4_x86.tstate = TSTATE_NONE;
1069 load_reg( R_EAX, 0 );
1070 XOR_imm32_r32( imm, R_EAX );
1071 store_reg( R_EAX, 0 );
1072 sh4_x86.tstate = TSTATE_NONE;
1074 XOR.B #imm, @(R0, GBR) {:
1076 load_reg( R_EAX, 0 );
1077 ADD_sh4r_r32( R_GBR, R_EAX );
1078 MOV_r32_esp8( R_EAX, 0 );
1079 MEM_READ_BYTE(R_EAX, R_EDX);
1080 MOV_esp8_r32( 0, R_EAX );
1081 XOR_imm32_r32( imm, R_EDX );
1082 MEM_WRITE_BYTE( R_EAX, R_EDX );
1083 sh4_x86.tstate = TSTATE_NONE;
1086 COUNT_INST(I_XTRCT);
1087 load_reg( R_EAX, Rm );
1088 load_reg( R_ECX, Rn );
1089 SHL_imm8_r32( 16, R_EAX );
1090 SHR_imm8_r32( 16, R_ECX );
1091 OR_r32_r32( R_EAX, R_ECX );
1092 store_reg( R_ECX, Rn );
1093 sh4_x86.tstate = TSTATE_NONE;
1096 /* Data move instructions */
1099 load_reg( R_EAX, Rm );
1100 store_reg( R_EAX, Rn );
1104 load_imm32( R_EAX, imm );
1105 store_reg( R_EAX, Rn );
1109 load_reg( R_EAX, Rn );
1110 load_reg( R_EDX, Rm );
1111 MEM_WRITE_BYTE( R_EAX, R_EDX );
1112 sh4_x86.tstate = TSTATE_NONE;
1116 load_reg( R_EAX, Rn );
1117 LEA_r32disp8_r32( R_EAX, -1, R_EAX );
1118 load_reg( R_EDX, Rm );
1119 MEM_WRITE_BYTE( R_EAX, R_EDX );
1120 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1121 sh4_x86.tstate = TSTATE_NONE;
1123 MOV.B Rm, @(R0, Rn) {:
1125 load_reg( R_EAX, 0 );
1126 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1127 load_reg( R_EDX, Rm );
1128 MEM_WRITE_BYTE( R_EAX, R_EDX );
1129 sh4_x86.tstate = TSTATE_NONE;
1131 MOV.B R0, @(disp, GBR) {:
1133 load_spreg( R_EAX, R_GBR );
1134 ADD_imm32_r32( disp, R_EAX );
1135 load_reg( R_EDX, 0 );
1136 MEM_WRITE_BYTE( R_EAX, R_EDX );
1137 sh4_x86.tstate = TSTATE_NONE;
1139 MOV.B R0, @(disp, Rn) {:
1141 load_reg( R_EAX, Rn );
1142 ADD_imm32_r32( disp, R_EAX );
1143 load_reg( R_EDX, 0 );
1144 MEM_WRITE_BYTE( R_EAX, R_EDX );
1145 sh4_x86.tstate = TSTATE_NONE;
1149 load_reg( R_EAX, Rm );
1150 MEM_READ_BYTE( R_EAX, R_EAX );
1151 store_reg( R_EAX, Rn );
1152 sh4_x86.tstate = TSTATE_NONE;
1156 load_reg( R_EAX, Rm );
1157 MEM_READ_BYTE( R_EAX, R_EAX );
1159 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1161 store_reg( R_EAX, Rn );
1162 sh4_x86.tstate = TSTATE_NONE;
1164 MOV.B @(R0, Rm), Rn {:
1166 load_reg( R_EAX, 0 );
1167 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1168 MEM_READ_BYTE( R_EAX, R_EAX );
1169 store_reg( R_EAX, Rn );
1170 sh4_x86.tstate = TSTATE_NONE;
1172 MOV.B @(disp, GBR), R0 {:
1174 load_spreg( R_EAX, R_GBR );
1175 ADD_imm32_r32( disp, R_EAX );
1176 MEM_READ_BYTE( R_EAX, R_EAX );
1177 store_reg( R_EAX, 0 );
1178 sh4_x86.tstate = TSTATE_NONE;
1180 MOV.B @(disp, Rm), R0 {:
1182 load_reg( R_EAX, Rm );
1183 ADD_imm32_r32( disp, R_EAX );
1184 MEM_READ_BYTE( R_EAX, R_EAX );
1185 store_reg( R_EAX, 0 );
1186 sh4_x86.tstate = TSTATE_NONE;
1190 load_reg( R_EAX, Rn );
1191 check_walign32(R_EAX);
1192 MOV_r32_r32( R_EAX, R_ECX );
1193 AND_imm32_r32( 0xFC000000, R_ECX );
1194 CMP_imm32_r32( 0xE0000000, R_ECX );
1196 AND_imm8s_r32( 0x3C, R_EAX );
1197 load_reg( R_EDX, Rm );
1198 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1201 load_reg( R_EDX, Rm );
1202 MEM_WRITE_LONG( R_EAX, R_EDX );
1204 sh4_x86.tstate = TSTATE_NONE;
1208 load_reg( R_EAX, Rn );
1209 ADD_imm8s_r32( -4, R_EAX );
1210 check_walign32( R_EAX );
1211 load_reg( R_EDX, Rm );
1212 MEM_WRITE_LONG( R_EAX, R_EDX );
1213 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1214 sh4_x86.tstate = TSTATE_NONE;
1216 MOV.L Rm, @(R0, Rn) {:
1218 load_reg( R_EAX, 0 );
1219 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1220 check_walign32( R_EAX );
1221 load_reg( R_EDX, Rm );
1222 MEM_WRITE_LONG( R_EAX, R_EDX );
1223 sh4_x86.tstate = TSTATE_NONE;
1225 MOV.L R0, @(disp, GBR) {:
1227 load_spreg( R_EAX, R_GBR );
1228 ADD_imm32_r32( disp, R_EAX );
1229 check_walign32( R_EAX );
1230 load_reg( R_EDX, 0 );
1231 MEM_WRITE_LONG( R_EAX, R_EDX );
1232 sh4_x86.tstate = TSTATE_NONE;
1234 MOV.L Rm, @(disp, Rn) {:
1236 load_reg( R_EAX, Rn );
1237 ADD_imm32_r32( disp, R_EAX );
1238 check_walign32( R_EAX );
1239 MOV_r32_r32( R_EAX, R_ECX );
1240 AND_imm32_r32( 0xFC000000, R_ECX );
1241 CMP_imm32_r32( 0xE0000000, R_ECX );
1243 AND_imm8s_r32( 0x3C, R_EAX );
1244 load_reg( R_EDX, Rm );
1245 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1248 load_reg( R_EDX, Rm );
1249 MEM_WRITE_LONG( R_EAX, R_EDX );
1251 sh4_x86.tstate = TSTATE_NONE;
1255 load_reg( R_EAX, Rm );
1256 check_ralign32( R_EAX );
1257 MEM_READ_LONG( R_EAX, R_EAX );
1258 store_reg( R_EAX, Rn );
1259 sh4_x86.tstate = TSTATE_NONE;
1263 load_reg( R_EAX, Rm );
1264 check_ralign32( R_EAX );
1265 MEM_READ_LONG( R_EAX, R_EAX );
1267 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1269 store_reg( R_EAX, Rn );
1270 sh4_x86.tstate = TSTATE_NONE;
1272 MOV.L @(R0, Rm), Rn {:
1274 load_reg( R_EAX, 0 );
1275 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1276 check_ralign32( R_EAX );
1277 MEM_READ_LONG( R_EAX, R_EAX );
1278 store_reg( R_EAX, Rn );
1279 sh4_x86.tstate = TSTATE_NONE;
1281 MOV.L @(disp, GBR), R0 {:
1283 load_spreg( R_EAX, R_GBR );
1284 ADD_imm32_r32( disp, R_EAX );
1285 check_ralign32( R_EAX );
1286 MEM_READ_LONG( R_EAX, R_EAX );
1287 store_reg( R_EAX, 0 );
1288 sh4_x86.tstate = TSTATE_NONE;
1290 MOV.L @(disp, PC), Rn {:
1291 COUNT_INST(I_MOVLPC);
1292 if( sh4_x86.in_delay_slot ) {
1295 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1296 if( IS_IN_ICACHE(target) ) {
1297 // If the target address is in the same page as the code, it's
1298 // pretty safe to just ref it directly and circumvent the whole
1299 // memory subsystem. (this is a big performance win)
1301 // FIXME: There's a corner-case that's not handled here when
1302 // the current code-page is in the ITLB but not in the UTLB.
1303 // (should generate a TLB miss although need to test SH4
1304 // behaviour to confirm) Unlikely to be anyone depending on this
1305 // behaviour though.
1306 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1307 MOV_moff32_EAX( ptr );
1309 // Note: we use sh4r.pc for the calc as we could be running at a
1310 // different virtual address than the translation was done with,
1311 // but we can safely assume that the low bits are the same.
1312 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1313 ADD_sh4r_r32( R_PC, R_EAX );
1314 MEM_READ_LONG( R_EAX, R_EAX );
1315 sh4_x86.tstate = TSTATE_NONE;
1317 store_reg( R_EAX, Rn );
1320 MOV.L @(disp, Rm), Rn {:
1322 load_reg( R_EAX, Rm );
1323 ADD_imm8s_r32( disp, R_EAX );
1324 check_ralign32( R_EAX );
1325 MEM_READ_LONG( R_EAX, R_EAX );
1326 store_reg( R_EAX, Rn );
1327 sh4_x86.tstate = TSTATE_NONE;
1331 load_reg( R_EAX, Rn );
1332 check_walign16( R_EAX );
1333 load_reg( R_EDX, Rm );
1334 MEM_WRITE_WORD( R_EAX, R_EDX );
1335 sh4_x86.tstate = TSTATE_NONE;
1339 load_reg( R_EAX, Rn );
1340 check_walign16( R_EAX );
1341 LEA_r32disp8_r32( R_EAX, -2, R_EAX );
1342 load_reg( R_EDX, Rm );
1343 MEM_WRITE_WORD( R_EAX, R_EDX );
1344 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1345 sh4_x86.tstate = TSTATE_NONE;
1347 MOV.W Rm, @(R0, Rn) {:
1349 load_reg( R_EAX, 0 );
1350 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1351 check_walign16( R_EAX );
1352 load_reg( R_EDX, Rm );
1353 MEM_WRITE_WORD( R_EAX, R_EDX );
1354 sh4_x86.tstate = TSTATE_NONE;
1356 MOV.W R0, @(disp, GBR) {:
1358 load_spreg( R_EAX, R_GBR );
1359 ADD_imm32_r32( disp, R_EAX );
1360 check_walign16( R_EAX );
1361 load_reg( R_EDX, 0 );
1362 MEM_WRITE_WORD( R_EAX, R_EDX );
1363 sh4_x86.tstate = TSTATE_NONE;
1365 MOV.W R0, @(disp, Rn) {:
1367 load_reg( R_EAX, Rn );
1368 ADD_imm32_r32( disp, R_EAX );
1369 check_walign16( R_EAX );
1370 load_reg( R_EDX, 0 );
1371 MEM_WRITE_WORD( R_EAX, R_EDX );
1372 sh4_x86.tstate = TSTATE_NONE;
1376 load_reg( R_EAX, Rm );
1377 check_ralign16( R_EAX );
1378 MEM_READ_WORD( R_EAX, R_EAX );
1379 store_reg( R_EAX, Rn );
1380 sh4_x86.tstate = TSTATE_NONE;
1384 load_reg( R_EAX, Rm );
1385 check_ralign16( R_EAX );
1386 MEM_READ_WORD( R_EAX, R_EAX );
1388 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1390 store_reg( R_EAX, Rn );
1391 sh4_x86.tstate = TSTATE_NONE;
1393 MOV.W @(R0, Rm), Rn {:
1395 load_reg( R_EAX, 0 );
1396 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1397 check_ralign16( R_EAX );
1398 MEM_READ_WORD( R_EAX, R_EAX );
1399 store_reg( R_EAX, Rn );
1400 sh4_x86.tstate = TSTATE_NONE;
1402 MOV.W @(disp, GBR), R0 {:
1404 load_spreg( R_EAX, R_GBR );
1405 ADD_imm32_r32( disp, R_EAX );
1406 check_ralign16( R_EAX );
1407 MEM_READ_WORD( R_EAX, R_EAX );
1408 store_reg( R_EAX, 0 );
1409 sh4_x86.tstate = TSTATE_NONE;
1411 MOV.W @(disp, PC), Rn {:
1413 if( sh4_x86.in_delay_slot ) {
1416 // See comments for MOV.L @(disp, PC), Rn
1417 uint32_t target = pc + disp + 4;
1418 if( IS_IN_ICACHE(target) ) {
1419 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1420 MOV_moff32_EAX( ptr );
1421 MOVSX_r16_r32( R_EAX, R_EAX );
1423 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1424 ADD_sh4r_r32( R_PC, R_EAX );
1425 MEM_READ_WORD( R_EAX, R_EAX );
1426 sh4_x86.tstate = TSTATE_NONE;
1428 store_reg( R_EAX, Rn );
1431 MOV.W @(disp, Rm), R0 {:
1433 load_reg( R_EAX, Rm );
1434 ADD_imm32_r32( disp, R_EAX );
1435 check_ralign16( R_EAX );
1436 MEM_READ_WORD( R_EAX, R_EAX );
1437 store_reg( R_EAX, 0 );
1438 sh4_x86.tstate = TSTATE_NONE;
1440 MOVA @(disp, PC), R0 {:
1442 if( sh4_x86.in_delay_slot ) {
1445 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1446 ADD_sh4r_r32( R_PC, R_ECX );
1447 store_reg( R_ECX, 0 );
1448 sh4_x86.tstate = TSTATE_NONE;
1452 COUNT_INST(I_MOVCA);
1453 load_reg( R_EAX, Rn );
1454 check_walign32( R_EAX );
1455 load_reg( R_EDX, 0 );
1456 MEM_WRITE_LONG( R_EAX, R_EDX );
1457 sh4_x86.tstate = TSTATE_NONE;
1460 /* Control transfer instructions */
1463 if( sh4_x86.in_delay_slot ) {
1466 sh4vma_t target = disp + pc + 4;
1467 JT_rel8( nottaken );
1468 exit_block_rel(target, pc+2 );
1469 JMP_TARGET(nottaken);
1475 if( sh4_x86.in_delay_slot ) {
1478 sh4_x86.in_delay_slot = DELAY_PC;
1479 if( UNTRANSLATABLE(pc+2) ) {
1480 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1482 ADD_imm32_r32( disp, R_EAX );
1483 JMP_TARGET(nottaken);
1484 ADD_sh4r_r32( R_PC, R_EAX );
1485 store_spreg( R_EAX, R_NEW_PC );
1486 exit_block_emu(pc+2);
1487 sh4_x86.branch_taken = TRUE;
1490 if( sh4_x86.tstate == TSTATE_NONE ) {
1491 CMP_imm8s_sh4r( 1, R_T );
1492 sh4_x86.tstate = TSTATE_E;
1494 sh4vma_t target = disp + pc + 4;
1495 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1496 int save_tstate = sh4_x86.tstate;
1497 sh4_translate_instruction(pc+2);
1498 exit_block_rel( target, pc+4 );
1501 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1502 sh4_x86.tstate = save_tstate;
1503 sh4_translate_instruction(pc+2);
1510 if( sh4_x86.in_delay_slot ) {
1513 sh4_x86.in_delay_slot = DELAY_PC;
1514 sh4_x86.branch_taken = TRUE;
1515 if( UNTRANSLATABLE(pc+2) ) {
1516 load_spreg( R_EAX, R_PC );
1517 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1518 store_spreg( R_EAX, R_NEW_PC );
1519 exit_block_emu(pc+2);
1522 sh4_translate_instruction( pc + 2 );
1523 exit_block_rel( disp + pc + 4, pc+4 );
1530 if( sh4_x86.in_delay_slot ) {
1533 load_spreg( R_EAX, R_PC );
1534 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1535 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1536 store_spreg( R_EAX, R_NEW_PC );
1537 sh4_x86.in_delay_slot = DELAY_PC;
1538 sh4_x86.tstate = TSTATE_NONE;
1539 sh4_x86.branch_taken = TRUE;
1540 if( UNTRANSLATABLE(pc+2) ) {
1541 exit_block_emu(pc+2);
1544 sh4_translate_instruction( pc + 2 );
1545 exit_block_newpcset(pc+2);
1552 if( sh4_x86.in_delay_slot ) {
1555 load_spreg( R_EAX, R_PC );
1556 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1557 store_spreg( R_EAX, R_PR );
1558 sh4_x86.in_delay_slot = DELAY_PC;
1559 sh4_x86.branch_taken = TRUE;
1560 sh4_x86.tstate = TSTATE_NONE;
1561 if( UNTRANSLATABLE(pc+2) ) {
1562 ADD_imm32_r32( disp, R_EAX );
1563 store_spreg( R_EAX, R_NEW_PC );
1564 exit_block_emu(pc+2);
1567 sh4_translate_instruction( pc + 2 );
1568 exit_block_rel( disp + pc + 4, pc+4 );
1575 if( sh4_x86.in_delay_slot ) {
1578 load_spreg( R_EAX, R_PC );
1579 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1580 store_spreg( R_EAX, R_PR );
1581 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1582 store_spreg( R_EAX, R_NEW_PC );
1584 sh4_x86.in_delay_slot = DELAY_PC;
1585 sh4_x86.tstate = TSTATE_NONE;
1586 sh4_x86.branch_taken = TRUE;
1587 if( UNTRANSLATABLE(pc+2) ) {
1588 exit_block_emu(pc+2);
1591 sh4_translate_instruction( pc + 2 );
1592 exit_block_newpcset(pc+2);
1599 if( sh4_x86.in_delay_slot ) {
1602 sh4vma_t target = disp + pc + 4;
1603 JF_rel8( nottaken );
1604 exit_block_rel(target, pc+2 );
1605 JMP_TARGET(nottaken);
1611 if( sh4_x86.in_delay_slot ) {
1614 sh4_x86.in_delay_slot = DELAY_PC;
1615 if( UNTRANSLATABLE(pc+2) ) {
1616 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1618 ADD_imm32_r32( disp, R_EAX );
1619 JMP_TARGET(nottaken);
1620 ADD_sh4r_r32( R_PC, R_EAX );
1621 store_spreg( R_EAX, R_NEW_PC );
1622 exit_block_emu(pc+2);
1623 sh4_x86.branch_taken = TRUE;
1626 if( sh4_x86.tstate == TSTATE_NONE ) {
1627 CMP_imm8s_sh4r( 1, R_T );
1628 sh4_x86.tstate = TSTATE_E;
1630 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1631 int save_tstate = sh4_x86.tstate;
1632 sh4_translate_instruction(pc+2);
1633 exit_block_rel( disp + pc + 4, pc+4 );
1635 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1636 sh4_x86.tstate = save_tstate;
1637 sh4_translate_instruction(pc+2);
1644 if( sh4_x86.in_delay_slot ) {
1647 load_reg( R_ECX, Rn );
1648 store_spreg( R_ECX, R_NEW_PC );
1649 sh4_x86.in_delay_slot = DELAY_PC;
1650 sh4_x86.branch_taken = TRUE;
1651 if( UNTRANSLATABLE(pc+2) ) {
1652 exit_block_emu(pc+2);
1655 sh4_translate_instruction(pc+2);
1656 exit_block_newpcset(pc+2);
1663 if( sh4_x86.in_delay_slot ) {
1666 load_spreg( R_EAX, R_PC );
1667 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1668 store_spreg( R_EAX, R_PR );
1669 load_reg( R_ECX, Rn );
1670 store_spreg( R_ECX, R_NEW_PC );
1671 sh4_x86.in_delay_slot = DELAY_PC;
1672 sh4_x86.branch_taken = TRUE;
1673 sh4_x86.tstate = TSTATE_NONE;
1674 if( UNTRANSLATABLE(pc+2) ) {
1675 exit_block_emu(pc+2);
1678 sh4_translate_instruction(pc+2);
1679 exit_block_newpcset(pc+2);
1686 if( sh4_x86.in_delay_slot ) {
1690 load_spreg( R_ECX, R_SPC );
1691 store_spreg( R_ECX, R_NEW_PC );
1692 load_spreg( R_EAX, R_SSR );
1693 call_func1( sh4_write_sr, R_EAX );
1694 sh4_x86.in_delay_slot = DELAY_PC;
1695 sh4_x86.fpuen_checked = FALSE;
1696 sh4_x86.tstate = TSTATE_NONE;
1697 sh4_x86.branch_taken = TRUE;
1698 if( UNTRANSLATABLE(pc+2) ) {
1699 exit_block_emu(pc+2);
1702 sh4_translate_instruction(pc+2);
1703 exit_block_newpcset(pc+2);
1710 if( sh4_x86.in_delay_slot ) {
1713 load_spreg( R_ECX, R_PR );
1714 store_spreg( R_ECX, R_NEW_PC );
1715 sh4_x86.in_delay_slot = DELAY_PC;
1716 sh4_x86.branch_taken = TRUE;
1717 if( UNTRANSLATABLE(pc+2) ) {
1718 exit_block_emu(pc+2);
1721 sh4_translate_instruction(pc+2);
1722 exit_block_newpcset(pc+2);
1728 COUNT_INST(I_TRAPA);
1729 if( sh4_x86.in_delay_slot ) {
1732 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1733 ADD_r32_sh4r( R_ECX, R_PC );
1734 load_imm32( R_EAX, imm );
1735 call_func1( sh4_raise_trap, R_EAX );
1736 sh4_x86.tstate = TSTATE_NONE;
1737 exit_block_pcset(pc);
1738 sh4_x86.branch_taken = TRUE;
1743 COUNT_INST(I_UNDEF);
1744 if( sh4_x86.in_delay_slot ) {
1747 JMP_exc(EXC_ILLEGAL);
1753 COUNT_INST(I_CLRMAC);
1754 XOR_r32_r32(R_EAX, R_EAX);
1755 store_spreg( R_EAX, R_MACL );
1756 store_spreg( R_EAX, R_MACH );
1757 sh4_x86.tstate = TSTATE_NONE;
1763 sh4_x86.tstate = TSTATE_NONE;
1769 sh4_x86.tstate = TSTATE_C;
1775 sh4_x86.tstate = TSTATE_NONE;
1781 sh4_x86.tstate = TSTATE_C;
1784 /* Floating point moves */
1786 COUNT_INST(I_FMOV1);
1788 if( sh4_x86.double_size ) {
1789 load_dr0( R_EAX, FRm );
1790 load_dr1( R_ECX, FRm );
1791 store_dr0( R_EAX, FRn );
1792 store_dr1( R_ECX, FRn );
1794 load_fr( R_EAX, FRm ); // SZ=0 branch
1795 store_fr( R_EAX, FRn );
1799 COUNT_INST(I_FMOV2);
1801 load_reg( R_EAX, Rn );
1802 if( sh4_x86.double_size ) {
1803 check_walign64( R_EAX );
1804 load_dr0( R_EDX, FRm );
1805 MEM_WRITE_LONG( R_EAX, R_EDX );
1806 load_reg( R_EAX, Rn );
1807 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1808 load_dr1( R_EDX, FRm );
1809 MEM_WRITE_LONG( R_EAX, R_EDX );
1811 check_walign32( R_EAX );
1812 load_fr( R_EDX, FRm );
1813 MEM_WRITE_LONG( R_EAX, R_EDX );
1815 sh4_x86.tstate = TSTATE_NONE;
1818 COUNT_INST(I_FMOV5);
1820 load_reg( R_EAX, Rm );
1821 if( sh4_x86.double_size ) {
1822 check_ralign64( R_EAX );
1823 MEM_READ_LONG( R_EAX, R_EAX );
1824 store_dr0( R_EAX, FRn );
1825 load_reg( R_EAX, Rm );
1826 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1827 MEM_READ_LONG( R_EAX, R_EAX );
1828 store_dr1( R_EAX, FRn );
1830 check_ralign32( R_EAX );
1831 MEM_READ_LONG( R_EAX, R_EAX );
1832 store_fr( R_EAX, FRn );
1834 sh4_x86.tstate = TSTATE_NONE;
1837 COUNT_INST(I_FMOV3);
1839 load_reg( R_EAX, Rn );
1840 if( sh4_x86.double_size ) {
1841 check_walign64( R_EAX );
1842 LEA_r32disp8_r32( R_EAX, -8, R_EAX );
1843 load_dr0( R_EDX, FRm );
1844 MEM_WRITE_LONG( R_EAX, R_EDX );
1845 load_reg( R_EAX, Rn );
1846 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1847 load_dr1( R_EDX, FRm );
1848 MEM_WRITE_LONG( R_EAX, R_EDX );
1849 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1851 check_walign32( R_EAX );
1852 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1853 load_fr( R_EDX, FRm );
1854 MEM_WRITE_LONG( R_EAX, R_EDX );
1855 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1857 sh4_x86.tstate = TSTATE_NONE;
1860 COUNT_INST(I_FMOV6);
1862 load_reg( R_EAX, Rm );
1863 if( sh4_x86.double_size ) {
1864 check_ralign64( R_EAX );
1865 MEM_READ_LONG( R_EAX, R_EAX );
1866 store_dr0( R_EAX, FRn );
1867 load_reg( R_EAX, Rm );
1868 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1869 MEM_READ_LONG( R_EAX, R_EAX );
1870 store_dr1( R_EAX, FRn );
1871 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1873 check_ralign32( R_EAX );
1874 MEM_READ_LONG( R_EAX, R_EAX );
1875 store_fr( R_EAX, FRn );
1876 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1878 sh4_x86.tstate = TSTATE_NONE;
1880 FMOV FRm, @(R0, Rn) {:
1881 COUNT_INST(I_FMOV4);
1883 load_reg( R_EAX, Rn );
1884 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1885 if( sh4_x86.double_size ) {
1886 check_walign64( R_EAX );
1887 load_dr0( R_EDX, FRm );
1888 MEM_WRITE_LONG( R_EAX, R_EDX );
1889 load_reg( R_EAX, Rn );
1890 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1891 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1892 load_dr1( R_EDX, FRm );
1893 MEM_WRITE_LONG( R_EAX, R_EDX );
1895 check_walign32( R_EAX );
1896 load_fr( R_EDX, FRm );
1897 MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
1899 sh4_x86.tstate = TSTATE_NONE;
1901 FMOV @(R0, Rm), FRn {:
1902 COUNT_INST(I_FMOV7);
1904 load_reg( R_EAX, Rm );
1905 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1906 if( sh4_x86.double_size ) {
1907 check_ralign64( R_EAX );
1908 MEM_READ_LONG( R_EAX, R_EAX );
1909 store_dr0( R_EAX, FRn );
1910 load_reg( R_EAX, Rm );
1911 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1912 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1913 MEM_READ_LONG( R_EAX, R_EAX );
1914 store_dr1( R_EAX, FRn );
1916 check_ralign32( R_EAX );
1917 MEM_READ_LONG( R_EAX, R_EAX );
1918 store_fr( R_EAX, FRn );
1920 sh4_x86.tstate = TSTATE_NONE;
1922 FLDI0 FRn {: /* IFF PR=0 */
1923 COUNT_INST(I_FLDI0);
1925 if( sh4_x86.double_prec == 0 ) {
1926 XOR_r32_r32( R_EAX, R_EAX );
1927 store_fr( R_EAX, FRn );
1929 sh4_x86.tstate = TSTATE_NONE;
1931 FLDI1 FRn {: /* IFF PR=0 */
1932 COUNT_INST(I_FLDI1);
1934 if( sh4_x86.double_prec == 0 ) {
1935 load_imm32(R_EAX, 0x3F800000);
1936 store_fr( R_EAX, FRn );
1941 COUNT_INST(I_FLOAT);
1944 if( sh4_x86.double_prec ) {
1953 if( sh4_x86.double_prec ) {
1958 load_ptr( R_ECX, &max_int );
1959 FILD_r32ind( R_ECX );
1962 load_ptr( R_ECX, &min_int ); // 5
1963 FILD_r32ind( R_ECX ); // 2
1965 JAE_rel8( sat2 ); // 2
1966 load_ptr( R_EAX, &save_fcw );
1967 FNSTCW_r32ind( R_EAX );
1968 load_ptr( R_EDX, &trunc_fcw );
1969 FLDCW_r32ind( R_EDX );
1970 FISTP_sh4r(R_FPUL); // 3
1971 FLDCW_r32ind( R_EAX );
1976 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
1977 store_spreg( R_ECX, R_FPUL );
1980 sh4_x86.tstate = TSTATE_NONE;
1985 load_fr( R_EAX, FRm );
1986 store_spreg( R_EAX, R_FPUL );
1991 load_spreg( R_EAX, R_FPUL );
1992 store_fr( R_EAX, FRn );
1995 COUNT_INST(I_FCNVDS);
1997 if( sh4_x86.double_prec ) {
2003 COUNT_INST(I_FCNVSD);
2005 if( sh4_x86.double_prec ) {
2011 /* Floating point instructions */
2015 if( sh4_x86.double_prec ) {
2028 if( sh4_x86.double_prec ) {
2043 if( sh4_x86.double_prec ) {
2055 FMAC FR0, FRm, FRn {:
2058 if( sh4_x86.double_prec ) {
2078 if( sh4_x86.double_prec ) {
2093 if( sh4_x86.double_prec ) {
2104 COUNT_INST(I_FSRRA);
2106 if( sh4_x86.double_prec == 0 ) {
2115 COUNT_INST(I_FSQRT);
2117 if( sh4_x86.double_prec ) {
2130 if( sh4_x86.double_prec ) {
2144 COUNT_INST(I_FCMPEQ);
2146 if( sh4_x86.double_prec ) {
2156 sh4_x86.tstate = TSTATE_E;
2159 COUNT_INST(I_FCMPGT);
2161 if( sh4_x86.double_prec ) {
2171 sh4_x86.tstate = TSTATE_A;
2177 if( sh4_x86.double_prec == 0 ) {
2178 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
2179 load_spreg( R_EAX, R_FPUL );
2180 call_func2( sh4_fsca, R_EAX, R_EDX );
2182 sh4_x86.tstate = TSTATE_NONE;
2187 if( sh4_x86.double_prec == 0 ) {
2188 if( sh4_x86.sse3_enabled ) {
2189 MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2190 MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2191 HADDPS_xmm_xmm( 4, 4 );
2192 HADDPS_xmm_xmm( 4, 4 );
2193 MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2198 push_fr( (FVm<<2)+1);
2199 push_fr( (FVn<<2)+1);
2202 push_fr( (FVm<<2)+2);
2203 push_fr( (FVn<<2)+2);
2206 push_fr( (FVm<<2)+3);
2207 push_fr( (FVn<<2)+3);
2210 pop_fr( (FVn<<2)+3);
2217 if( sh4_x86.double_prec == 0 ) {
2218 if( sh4_x86.sse3_enabled ) {
2219 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2220 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2221 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2222 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2224 MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2225 MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2226 MOVAPS_xmm_xmm( 4, 6 );
2227 MOVAPS_xmm_xmm( 5, 7 );
2228 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2229 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2230 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2231 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2232 MULPS_xmm_xmm( 0, 4 );
2233 MULPS_xmm_xmm( 1, 5 );
2234 MULPS_xmm_xmm( 2, 6 );
2235 MULPS_xmm_xmm( 3, 7 );
2236 ADDPS_xmm_xmm( 5, 4 );
2237 ADDPS_xmm_xmm( 7, 6 );
2238 ADDPS_xmm_xmm( 6, 4 );
2239 MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
2241 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
2242 call_func1( sh4_ftrv, R_EAX );
2245 sh4_x86.tstate = TSTATE_NONE;
2249 COUNT_INST(I_FRCHG);
2251 XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
2252 call_func0( sh4_switch_fr_banks );
2253 sh4_x86.tstate = TSTATE_NONE;
2256 COUNT_INST(I_FSCHG);
2258 XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
2259 XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2260 sh4_x86.tstate = TSTATE_NONE;
2261 sh4_x86.double_size = !sh4_x86.double_size;
2264 /* Processor control instructions */
2266 COUNT_INST(I_LDCSR);
2267 if( sh4_x86.in_delay_slot ) {
2271 load_reg( R_EAX, Rm );
2272 call_func1( sh4_write_sr, R_EAX );
2273 sh4_x86.fpuen_checked = FALSE;
2274 sh4_x86.tstate = TSTATE_NONE;
2280 load_reg( R_EAX, Rm );
2281 store_spreg( R_EAX, R_GBR );
2286 load_reg( R_EAX, Rm );
2287 store_spreg( R_EAX, R_VBR );
2288 sh4_x86.tstate = TSTATE_NONE;
2293 load_reg( R_EAX, Rm );
2294 store_spreg( R_EAX, R_SSR );
2295 sh4_x86.tstate = TSTATE_NONE;
2300 load_reg( R_EAX, Rm );
2301 store_spreg( R_EAX, R_SGR );
2302 sh4_x86.tstate = TSTATE_NONE;
2307 load_reg( R_EAX, Rm );
2308 store_spreg( R_EAX, R_SPC );
2309 sh4_x86.tstate = TSTATE_NONE;
2314 load_reg( R_EAX, Rm );
2315 store_spreg( R_EAX, R_DBR );
2316 sh4_x86.tstate = TSTATE_NONE;
2321 load_reg( R_EAX, Rm );
2322 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2323 sh4_x86.tstate = TSTATE_NONE;
2327 load_reg( R_EAX, Rm );
2328 check_ralign32( R_EAX );
2329 MEM_READ_LONG( R_EAX, R_EAX );
2330 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2331 store_spreg( R_EAX, R_GBR );
2332 sh4_x86.tstate = TSTATE_NONE;
2335 COUNT_INST(I_LDCSRM);
2336 if( sh4_x86.in_delay_slot ) {
2340 load_reg( R_EAX, Rm );
2341 check_ralign32( R_EAX );
2342 MEM_READ_LONG( R_EAX, R_EAX );
2343 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2344 call_func1( sh4_write_sr, R_EAX );
2345 sh4_x86.fpuen_checked = FALSE;
2346 sh4_x86.tstate = TSTATE_NONE;
2353 load_reg( R_EAX, Rm );
2354 check_ralign32( R_EAX );
2355 MEM_READ_LONG( R_EAX, R_EAX );
2356 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2357 store_spreg( R_EAX, R_VBR );
2358 sh4_x86.tstate = TSTATE_NONE;
2363 load_reg( R_EAX, Rm );
2364 check_ralign32( R_EAX );
2365 MEM_READ_LONG( R_EAX, R_EAX );
2366 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2367 store_spreg( R_EAX, R_SSR );
2368 sh4_x86.tstate = TSTATE_NONE;
2373 load_reg( R_EAX, Rm );
2374 check_ralign32( R_EAX );
2375 MEM_READ_LONG( R_EAX, R_EAX );
2376 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2377 store_spreg( R_EAX, R_SGR );
2378 sh4_x86.tstate = TSTATE_NONE;
2383 load_reg( R_EAX, Rm );
2384 check_ralign32( R_EAX );
2385 MEM_READ_LONG( R_EAX, R_EAX );
2386 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2387 store_spreg( R_EAX, R_SPC );
2388 sh4_x86.tstate = TSTATE_NONE;
2393 load_reg( R_EAX, Rm );
2394 check_ralign32( R_EAX );
2395 MEM_READ_LONG( R_EAX, R_EAX );
2396 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2397 store_spreg( R_EAX, R_DBR );
2398 sh4_x86.tstate = TSTATE_NONE;
2400 LDC.L @Rm+, Rn_BANK {:
2403 load_reg( R_EAX, Rm );
2404 check_ralign32( R_EAX );
2405 MEM_READ_LONG( R_EAX, R_EAX );
2406 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2407 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2408 sh4_x86.tstate = TSTATE_NONE;
2411 COUNT_INST(I_LDSFPSCR);
2413 load_reg( R_EAX, Rm );
2414 call_func1( sh4_write_fpscr, R_EAX );
2415 sh4_x86.tstate = TSTATE_NONE;
2418 LDS.L @Rm+, FPSCR {:
2419 COUNT_INST(I_LDSFPSCRM);
2421 load_reg( R_EAX, Rm );
2422 check_ralign32( R_EAX );
2423 MEM_READ_LONG( R_EAX, R_EAX );
2424 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2425 call_func1( sh4_write_fpscr, R_EAX );
2426 sh4_x86.tstate = TSTATE_NONE;
2432 load_reg( R_EAX, Rm );
2433 store_spreg( R_EAX, R_FPUL );
2438 load_reg( R_EAX, Rm );
2439 check_ralign32( R_EAX );
2440 MEM_READ_LONG( R_EAX, R_EAX );
2441 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2442 store_spreg( R_EAX, R_FPUL );
2443 sh4_x86.tstate = TSTATE_NONE;
2447 load_reg( R_EAX, Rm );
2448 store_spreg( R_EAX, R_MACH );
2452 load_reg( R_EAX, Rm );
2453 check_ralign32( R_EAX );
2454 MEM_READ_LONG( R_EAX, R_EAX );
2455 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2456 store_spreg( R_EAX, R_MACH );
2457 sh4_x86.tstate = TSTATE_NONE;
2461 load_reg( R_EAX, Rm );
2462 store_spreg( R_EAX, R_MACL );
2466 load_reg( R_EAX, Rm );
2467 check_ralign32( R_EAX );
2468 MEM_READ_LONG( R_EAX, R_EAX );
2469 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2470 store_spreg( R_EAX, R_MACL );
2471 sh4_x86.tstate = TSTATE_NONE;
2475 load_reg( R_EAX, Rm );
2476 store_spreg( R_EAX, R_PR );
2480 load_reg( R_EAX, Rm );
2481 check_ralign32( R_EAX );
2482 MEM_READ_LONG( R_EAX, R_EAX );
2483 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2484 store_spreg( R_EAX, R_PR );
2485 sh4_x86.tstate = TSTATE_NONE;
2488 COUNT_INST(I_LDTLB);
2489 call_func0( MMU_ldtlb );
2490 sh4_x86.tstate = TSTATE_NONE;
2499 COUNT_INST(I_OCBWB);
2503 load_reg( R_EAX, Rn );
2504 MOV_r32_r32( R_EAX, R_ECX );
2505 AND_imm32_r32( 0xFC000000, R_ECX );
2506 CMP_imm32_r32( 0xE0000000, R_ECX );
2508 if( sh4_x86.tlb_on ) {
2509 call_func1( sh4_flush_store_queue_mmu, R_EAX );
2510 TEST_r32_r32( R_EAX, R_EAX );
2513 call_func1( sh4_flush_store_queue, R_EAX );
2516 sh4_x86.tstate = TSTATE_NONE;
2519 COUNT_INST(I_SLEEP);
2521 call_func0( sh4_sleep );
2522 sh4_x86.tstate = TSTATE_NONE;
2523 sh4_x86.in_delay_slot = DELAY_NONE;
2527 COUNT_INST(I_STCSR);
2529 call_func0(sh4_read_sr);
2530 store_reg( R_EAX, Rn );
2531 sh4_x86.tstate = TSTATE_NONE;
2535 load_spreg( R_EAX, R_GBR );
2536 store_reg( R_EAX, Rn );
2541 load_spreg( R_EAX, R_VBR );
2542 store_reg( R_EAX, Rn );
2543 sh4_x86.tstate = TSTATE_NONE;
2548 load_spreg( R_EAX, R_SSR );
2549 store_reg( R_EAX, Rn );
2550 sh4_x86.tstate = TSTATE_NONE;
2555 load_spreg( R_EAX, R_SPC );
2556 store_reg( R_EAX, Rn );
2557 sh4_x86.tstate = TSTATE_NONE;
2562 load_spreg( R_EAX, R_SGR );
2563 store_reg( R_EAX, Rn );
2564 sh4_x86.tstate = TSTATE_NONE;
2569 load_spreg( R_EAX, R_DBR );
2570 store_reg( R_EAX, Rn );
2571 sh4_x86.tstate = TSTATE_NONE;
2576 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2577 store_reg( R_EAX, Rn );
2578 sh4_x86.tstate = TSTATE_NONE;
2581 COUNT_INST(I_STCSRM);
2583 call_func0( sh4_read_sr );
2584 MOV_r32_r32( R_EAX, R_EDX );
2585 load_reg( R_EAX, Rn );
2586 check_walign32( R_EAX );
2587 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
2588 MEM_WRITE_LONG( R_EAX, R_EDX );
2589 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2590 sh4_x86.tstate = TSTATE_NONE;
2595 load_reg( R_EAX, Rn );
2596 check_walign32( R_EAX );
2597 ADD_imm8s_r32( -4, R_EAX );
2598 load_spreg( R_EDX, R_VBR );
2599 MEM_WRITE_LONG( R_EAX, R_EDX );
2600 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2601 sh4_x86.tstate = TSTATE_NONE;
2606 load_reg( R_EAX, Rn );
2607 check_walign32( R_EAX );
2608 ADD_imm8s_r32( -4, R_EAX );
2609 load_spreg( R_EDX, R_SSR );
2610 MEM_WRITE_LONG( R_EAX, R_EDX );
2611 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2612 sh4_x86.tstate = TSTATE_NONE;
2617 load_reg( R_EAX, Rn );
2618 check_walign32( R_EAX );
2619 ADD_imm8s_r32( -4, R_EAX );
2620 load_spreg( R_EDX, R_SPC );
2621 MEM_WRITE_LONG( R_EAX, R_EDX );
2622 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2623 sh4_x86.tstate = TSTATE_NONE;
2628 load_reg( R_EAX, Rn );
2629 check_walign32( R_EAX );
2630 ADD_imm8s_r32( -4, R_EAX );
2631 load_spreg( R_EDX, R_SGR );
2632 MEM_WRITE_LONG( R_EAX, R_EDX );
2633 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2634 sh4_x86.tstate = TSTATE_NONE;
2639 load_reg( R_EAX, Rn );
2640 check_walign32( R_EAX );
2641 ADD_imm8s_r32( -4, R_EAX );
2642 load_spreg( R_EDX, R_DBR );
2643 MEM_WRITE_LONG( R_EAX, R_EDX );
2644 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2645 sh4_x86.tstate = TSTATE_NONE;
2647 STC.L Rm_BANK, @-Rn {:
2650 load_reg( R_EAX, Rn );
2651 check_walign32( R_EAX );
2652 ADD_imm8s_r32( -4, R_EAX );
2653 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2654 MEM_WRITE_LONG( R_EAX, R_EDX );
2655 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2656 sh4_x86.tstate = TSTATE_NONE;
2660 load_reg( R_EAX, Rn );
2661 check_walign32( R_EAX );
2662 ADD_imm8s_r32( -4, R_EAX );
2663 load_spreg( R_EDX, R_GBR );
2664 MEM_WRITE_LONG( R_EAX, R_EDX );
2665 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2666 sh4_x86.tstate = TSTATE_NONE;
2669 COUNT_INST(I_STSFPSCR);
2671 load_spreg( R_EAX, R_FPSCR );
2672 store_reg( R_EAX, Rn );
2674 STS.L FPSCR, @-Rn {:
2675 COUNT_INST(I_STSFPSCRM);
2677 load_reg( R_EAX, Rn );
2678 check_walign32( R_EAX );
2679 ADD_imm8s_r32( -4, R_EAX );
2680 load_spreg( R_EDX, R_FPSCR );
2681 MEM_WRITE_LONG( R_EAX, R_EDX );
2682 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2683 sh4_x86.tstate = TSTATE_NONE;
2688 load_spreg( R_EAX, R_FPUL );
2689 store_reg( R_EAX, Rn );
2694 load_reg( R_EAX, Rn );
2695 check_walign32( R_EAX );
2696 ADD_imm8s_r32( -4, R_EAX );
2697 load_spreg( R_EDX, R_FPUL );
2698 MEM_WRITE_LONG( R_EAX, R_EDX );
2699 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2700 sh4_x86.tstate = TSTATE_NONE;
2704 load_spreg( R_EAX, R_MACH );
2705 store_reg( R_EAX, Rn );
2709 load_reg( R_EAX, Rn );
2710 check_walign32( R_EAX );
2711 ADD_imm8s_r32( -4, R_EAX );
2712 load_spreg( R_EDX, R_MACH );
2713 MEM_WRITE_LONG( R_EAX, R_EDX );
2714 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2715 sh4_x86.tstate = TSTATE_NONE;
2719 load_spreg( R_EAX, R_MACL );
2720 store_reg( R_EAX, Rn );
2724 load_reg( R_EAX, Rn );
2725 check_walign32( R_EAX );
2726 ADD_imm8s_r32( -4, R_EAX );
2727 load_spreg( R_EDX, R_MACL );
2728 MEM_WRITE_LONG( R_EAX, R_EDX );
2729 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2730 sh4_x86.tstate = TSTATE_NONE;
2734 load_spreg( R_EAX, R_PR );
2735 store_reg( R_EAX, Rn );
2739 load_reg( R_EAX, Rn );
2740 check_walign32( R_EAX );
2741 ADD_imm8s_r32( -4, R_EAX );
2742 load_spreg( R_EDX, R_PR );
2743 MEM_WRITE_LONG( R_EAX, R_EDX );
2744 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2745 sh4_x86.tstate = TSTATE_NONE;
2750 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2753 sh4_x86.in_delay_slot = DELAY_NONE;
.