4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
29 #include "sh4/xltcache.h"
30 #include "sh4/sh4core.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/x86op.h"
38 #define DEFAULT_BACKPATCH_SIZE 4096
40 struct backpatch_record {
41 uint32_t fixup_offset;
42 uint32_t fixup_icount;
51 * Struct to manage internal translation state. This state is not saved -
52 * it is only valid between calls to sh4_translate_begin_block() and
53 * sh4_translate_end_block()
55 struct sh4_x86_state {
57 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
58 gboolean branch_taken; /* true if we branched unconditionally */
59 gboolean double_prec; /* true if FPU is in double-precision mode */
60 gboolean double_size; /* true if FPU is in double-size mode */
61 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
62 uint32_t block_start_pc;
63 uint32_t stack_posn; /* Trace stack height for alignment purposes */
67 gboolean tlb_on; /* True if tlb translation is active */
69 /* Allocated memory for the (block-wide) back-patch list */
70 struct backpatch_record *backpatch_list;
71 uint32_t backpatch_posn;
72 uint32_t backpatch_size;
75 #define TSTATE_NONE -1
85 #ifdef ENABLE_SH4STATS
86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
88 #define COUNT_INST(id)
91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
93 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
94 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
98 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
99 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
101 static struct sh4_x86_state sh4_x86;
103 static uint32_t max_int = 0x7FFFFFFF;
104 static uint32_t min_int = 0x80000000;
105 static uint32_t save_fcw; /* save value for fpu control word */
106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
108 gboolean is_sse3_supported()
112 __asm__ __volatile__(
113 "mov $0x01, %%eax\n\t"
114 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
115 return (features & 1) ? TRUE : FALSE;
118 void sh4_translate_init(void)
120 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
121 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
122 sh4_x86.sse3_enabled = is_sse3_supported();
126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
128 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
129 sh4_x86.backpatch_size <<= 1;
130 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
131 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
132 assert( sh4_x86.backpatch_list != NULL );
134 if( sh4_x86.in_delay_slot ) {
137 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
138 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
139 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
140 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
141 sh4_x86.backpatch_posn++;
145 * Emit an instruction to load an SH4 reg into a real register
147 static inline void load_reg( int x86reg, int sh4reg )
149 /* mov [bp+n], reg */
151 OP(0x45 + (x86reg<<3));
152 OP(REG_OFFSET(r[sh4reg]));
155 static inline void load_reg16s( int x86reg, int sh4reg )
159 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
162 static inline void load_reg16u( int x86reg, int sh4reg )
166 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
173 * Emit an instruction to load an immediate value into a register
175 static inline void load_imm32( int x86reg, uint32_t value ) {
176 /* mov #value, reg */
183 * Load an immediate 64-bit quantity (note: x86-64 only)
185 static inline void load_imm64( int x86reg, uint64_t value ) {
186 /* mov #value, reg */
193 * Emit an instruction to store an SH4 reg (RN)
195 void static inline store_reg( int x86reg, int sh4reg ) {
196 /* mov reg, [bp+n] */
198 OP(0x45 + (x86reg<<3));
199 OP(REG_OFFSET(r[sh4reg]));
203 * Load an FR register (single-precision floating point) into an integer x86
204 * register (eg for register-to-register moves)
206 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
207 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
210 * Load the low half of a DR register (DR or XD) into an integer x86 register
212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
216 * Store an FR register (single-precision floating point) from an integer x86+
217 * register (eg for register-to-register moves)
219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
226 #define push_fpul() FLDF_sh4r(R_FPUL)
227 #define pop_fpul() FSTPF_sh4r(R_FPUL)
228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
229 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
231 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
233 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
235 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
239 /* Exception checks - Note that all exception checks will clobber EAX */
241 #define check_priv( ) \
242 if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
243 if( sh4_x86.in_delay_slot ) { \
244 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
246 exit_block_exc(EXC_ILLEGAL, pc); \
248 sh4_x86.branch_taken = TRUE; \
249 sh4_x86.in_delay_slot = DELAY_NONE; \
253 #define check_fpuen( ) \
254 if( !sh4_x86.fpuen_checked ) {\
255 sh4_x86.fpuen_checked = TRUE;\
256 load_spreg( R_EAX, R_SR );\
257 AND_imm32_r32( SR_FD, R_EAX );\
258 if( sh4_x86.in_delay_slot ) {\
259 JNE_exc(EXC_SLOT_FPU_DISABLED);\
261 JNE_exc(EXC_FPU_DISABLED);\
263 sh4_x86.tstate = TSTATE_NONE; \
266 #define check_ralign16( x86reg ) \
267 TEST_imm32_r32( 0x00000001, x86reg ); \
268 JNE_exc(EXC_DATA_ADDR_READ)
270 #define check_walign16( x86reg ) \
271 TEST_imm32_r32( 0x00000001, x86reg ); \
272 JNE_exc(EXC_DATA_ADDR_WRITE);
274 #define check_ralign32( x86reg ) \
275 TEST_imm32_r32( 0x00000003, x86reg ); \
276 JNE_exc(EXC_DATA_ADDR_READ)
278 #define check_walign32( x86reg ) \
279 TEST_imm32_r32( 0x00000003, x86reg ); \
280 JNE_exc(EXC_DATA_ADDR_WRITE);
282 #define check_ralign64( x86reg ) \
283 TEST_imm32_r32( 0x00000007, x86reg ); \
284 JNE_exc(EXC_DATA_ADDR_READ)
286 #define check_walign64( x86reg ) \
287 TEST_imm32_r32( 0x00000007, x86reg ); \
288 JNE_exc(EXC_DATA_ADDR_WRITE);
291 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
292 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
293 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
294 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
297 #ifdef HAVE_FRAME_ADDRESS
298 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
299 call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
300 call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); }
301 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
302 call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
303 call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
305 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
306 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
309 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
310 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
311 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
312 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
313 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
314 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
315 #define MEM_PREFETCH( addr_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, prefetch)
317 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
319 /****** Import appropriate calling conventions ******/
320 #if SIZEOF_VOID_P == 8
321 #include "sh4/ia64abi.h"
322 #else /* 32-bit system */
323 #include "sh4/ia32abi.h"
326 void sh4_translate_begin_block( sh4addr_t pc )
329 sh4_x86.in_delay_slot = FALSE;
330 sh4_x86.fpuen_checked = FALSE;
331 sh4_x86.branch_taken = FALSE;
332 sh4_x86.backpatch_posn = 0;
333 sh4_x86.block_start_pc = pc;
334 sh4_x86.tlb_on = IS_TLB_ENABLED();
335 sh4_x86.tstate = TSTATE_NONE;
336 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
337 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
341 uint32_t sh4_translate_end_block_size()
343 if( sh4_x86.backpatch_posn <= 3 ) {
344 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
346 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
352 * Embed a breakpoint into the generated code
354 void sh4_translate_emit_breakpoint( sh4vma_t pc )
356 load_imm32( R_EAX, pc );
357 call_func1( sh4_translate_breakpoint_hit, R_EAX );
358 sh4_x86.tstate = TSTATE_NONE;
362 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
365 * Embed a call to sh4_execute_instruction for situations that we
366 * can't translate (just page-crossing delay slots at the moment).
367 * Caller is responsible for setting new_pc before calling this function.
371 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
372 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
373 * Call sh4_execute_instruction
374 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
376 void exit_block_emu( sh4vma_t endpc )
378 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
379 ADD_r32_sh4r( R_ECX, R_PC );
381 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
382 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
383 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
384 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
386 call_func0( sh4_execute_instruction );
387 load_spreg( R_EAX, R_PC );
388 if( sh4_x86.tlb_on ) {
389 call_func1(xlat_get_code_by_vma,R_EAX);
391 call_func1(xlat_get_code,R_EAX);
397 * Translate a single instruction. Delayed branches are handled specially
398 * by translating both branch and delayed instruction as a single unit (as
400 * The instruction MUST be in the icache (assert check)
402 * @return true if the instruction marks the end of a basic block
405 uint32_t sh4_translate_instruction( sh4vma_t pc )
408 /* Read instruction from icache */
409 assert( IS_IN_ICACHE(pc) );
410 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
412 if( !sh4_x86.in_delay_slot ) {
413 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
419 load_reg( R_EAX, Rm );
420 load_reg( R_ECX, Rn );
421 ADD_r32_r32( R_EAX, R_ECX );
422 store_reg( R_ECX, Rn );
423 sh4_x86.tstate = TSTATE_NONE;
427 ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
428 sh4_x86.tstate = TSTATE_NONE;
432 if( sh4_x86.tstate != TSTATE_C ) {
435 load_reg( R_EAX, Rm );
436 load_reg( R_ECX, Rn );
437 ADC_r32_r32( R_EAX, R_ECX );
438 store_reg( R_ECX, Rn );
440 sh4_x86.tstate = TSTATE_C;
444 load_reg( R_EAX, Rm );
445 load_reg( R_ECX, Rn );
446 ADD_r32_r32( R_EAX, R_ECX );
447 store_reg( R_ECX, Rn );
449 sh4_x86.tstate = TSTATE_O;
453 load_reg( R_EAX, Rm );
454 load_reg( R_ECX, Rn );
455 AND_r32_r32( R_EAX, R_ECX );
456 store_reg( R_ECX, Rn );
457 sh4_x86.tstate = TSTATE_NONE;
461 load_reg( R_EAX, 0 );
462 AND_imm32_r32(imm, R_EAX);
463 store_reg( R_EAX, 0 );
464 sh4_x86.tstate = TSTATE_NONE;
466 AND.B #imm, @(R0, GBR) {:
468 load_reg( R_EAX, 0 );
469 ADD_sh4r_r32( R_GBR, R_EAX );
470 MOV_r32_esp8(R_EAX, 0);
471 MEM_READ_BYTE( R_EAX, R_EDX );
472 MOV_esp8_r32(0, R_EAX);
473 AND_imm32_r32(imm, R_EDX );
474 MEM_WRITE_BYTE( R_EAX, R_EDX );
475 sh4_x86.tstate = TSTATE_NONE;
479 load_reg( R_EAX, Rm );
480 load_reg( R_ECX, Rn );
481 CMP_r32_r32( R_EAX, R_ECX );
483 sh4_x86.tstate = TSTATE_E;
486 COUNT_INST(I_CMPEQI);
487 load_reg( R_EAX, 0 );
488 CMP_imm8s_r32(imm, R_EAX);
490 sh4_x86.tstate = TSTATE_E;
494 load_reg( R_EAX, Rm );
495 load_reg( R_ECX, Rn );
496 CMP_r32_r32( R_EAX, R_ECX );
498 sh4_x86.tstate = TSTATE_GE;
502 load_reg( R_EAX, Rm );
503 load_reg( R_ECX, Rn );
504 CMP_r32_r32( R_EAX, R_ECX );
506 sh4_x86.tstate = TSTATE_G;
510 load_reg( R_EAX, Rm );
511 load_reg( R_ECX, Rn );
512 CMP_r32_r32( R_EAX, R_ECX );
514 sh4_x86.tstate = TSTATE_A;
518 load_reg( R_EAX, Rm );
519 load_reg( R_ECX, Rn );
520 CMP_r32_r32( R_EAX, R_ECX );
522 sh4_x86.tstate = TSTATE_AE;
526 load_reg( R_EAX, Rn );
527 CMP_imm8s_r32( 0, R_EAX );
529 sh4_x86.tstate = TSTATE_G;
533 load_reg( R_EAX, Rn );
534 CMP_imm8s_r32( 0, R_EAX );
536 sh4_x86.tstate = TSTATE_GE;
539 COUNT_INST(I_CMPSTR);
540 load_reg( R_EAX, Rm );
541 load_reg( R_ECX, Rn );
542 XOR_r32_r32( R_ECX, R_EAX );
543 TEST_r8_r8( R_AL, R_AL );
545 TEST_r8_r8( R_AH, R_AH );
547 SHR_imm8_r32( 16, R_EAX );
548 TEST_r8_r8( R_AL, R_AL );
550 TEST_r8_r8( R_AH, R_AH );
555 sh4_x86.tstate = TSTATE_E;
559 load_reg( R_EAX, Rm );
560 load_reg( R_ECX, Rn );
561 SHR_imm8_r32( 31, R_EAX );
562 SHR_imm8_r32( 31, R_ECX );
563 store_spreg( R_EAX, R_M );
564 store_spreg( R_ECX, R_Q );
565 CMP_r32_r32( R_EAX, R_ECX );
567 sh4_x86.tstate = TSTATE_NE;
571 XOR_r32_r32( R_EAX, R_EAX );
572 store_spreg( R_EAX, R_Q );
573 store_spreg( R_EAX, R_M );
574 store_spreg( R_EAX, R_T );
575 sh4_x86.tstate = TSTATE_C; // works for DIV1
579 load_spreg( R_ECX, R_M );
580 load_reg( R_EAX, Rn );
581 if( sh4_x86.tstate != TSTATE_C ) {
585 SETC_r8( R_DL ); // Q'
586 CMP_sh4r_r32( R_Q, R_ECX );
588 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
591 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
593 store_reg( R_EAX, Rn ); // Done with Rn now
594 SETC_r8(R_AL); // tmp1
595 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
596 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
597 store_spreg( R_ECX, R_Q );
598 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
599 MOVZX_r8_r32( R_AL, R_EAX );
600 store_spreg( R_EAX, R_T );
601 sh4_x86.tstate = TSTATE_NONE;
605 load_reg( R_EAX, Rm );
606 load_reg( R_ECX, Rn );
608 store_spreg( R_EDX, R_MACH );
609 store_spreg( R_EAX, R_MACL );
610 sh4_x86.tstate = TSTATE_NONE;
614 load_reg( R_EAX, Rm );
615 load_reg( R_ECX, Rn );
617 store_spreg( R_EDX, R_MACH );
618 store_spreg( R_EAX, R_MACL );
619 sh4_x86.tstate = TSTATE_NONE;
623 load_reg( R_EAX, Rn );
624 ADD_imm8s_r32( -1, R_EAX );
625 store_reg( R_EAX, Rn );
627 sh4_x86.tstate = TSTATE_E;
631 load_reg( R_EAX, Rm );
632 MOVSX_r8_r32( R_EAX, R_EAX );
633 store_reg( R_EAX, Rn );
637 load_reg( R_EAX, Rm );
638 MOVSX_r16_r32( R_EAX, R_EAX );
639 store_reg( R_EAX, Rn );
643 load_reg( R_EAX, Rm );
644 MOVZX_r8_r32( R_EAX, R_EAX );
645 store_reg( R_EAX, Rn );
649 load_reg( R_EAX, Rm );
650 MOVZX_r16_r32( R_EAX, R_EAX );
651 store_reg( R_EAX, Rn );
656 load_reg( R_EAX, Rm );
657 check_ralign32( R_EAX );
658 MEM_READ_LONG( R_EAX, R_EAX );
659 MOV_r32_esp8(R_EAX, 0);
660 load_reg( R_EAX, Rm );
661 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
662 MEM_READ_LONG( R_EAX, R_EAX );
663 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
665 load_reg( R_EAX, Rm );
666 check_ralign32( R_EAX );
667 MEM_READ_LONG( R_EAX, R_EAX );
668 MOV_r32_esp8( R_EAX, 0 );
669 load_reg( R_EAX, Rn );
670 check_ralign32( R_EAX );
671 MEM_READ_LONG( R_EAX, R_EAX );
672 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
673 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
677 ADD_r32_sh4r( R_EAX, R_MACL );
678 ADC_r32_sh4r( R_EDX, R_MACH );
680 load_spreg( R_ECX, R_S );
681 TEST_r32_r32(R_ECX, R_ECX);
683 call_func0( signsat48 );
685 sh4_x86.tstate = TSTATE_NONE;
690 load_reg( R_EAX, Rm );
691 check_ralign16( R_EAX );
692 MEM_READ_WORD( R_EAX, R_EAX );
693 MOV_r32_esp8( R_EAX, 0 );
694 load_reg( R_EAX, Rm );
695 LEA_r32disp8_r32( R_EAX, 2, R_EAX );
696 MEM_READ_WORD( R_EAX, R_EAX );
697 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
698 // Note translate twice in case of page boundaries. Maybe worth
699 // adding a page-boundary check to skip the second translation
701 load_reg( R_EAX, Rm );
702 check_ralign16( R_EAX );
703 MEM_READ_WORD( R_EAX, R_EAX );
704 MOV_r32_esp8( R_EAX, 0 );
705 load_reg( R_EAX, Rn );
706 check_ralign16( R_EAX );
707 MEM_READ_WORD( R_EAX, R_EAX );
708 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
709 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
712 load_spreg( R_ECX, R_S );
713 TEST_r32_r32( R_ECX, R_ECX );
716 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
717 JNO_rel8( end ); // 2
718 load_imm32( R_EDX, 1 ); // 5
719 store_spreg( R_EDX, R_MACH ); // 6
720 JS_rel8( positive ); // 2
721 load_imm32( R_EAX, 0x80000000 );// 5
722 store_spreg( R_EAX, R_MACL ); // 6
725 JMP_TARGET(positive);
726 load_imm32( R_EAX, 0x7FFFFFFF );// 5
727 store_spreg( R_EAX, R_MACL ); // 6
731 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
732 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
736 sh4_x86.tstate = TSTATE_NONE;
740 load_spreg( R_EAX, R_T );
741 store_reg( R_EAX, Rn );
745 load_reg( R_EAX, Rm );
746 load_reg( R_ECX, Rn );
748 store_spreg( R_EAX, R_MACL );
749 sh4_x86.tstate = TSTATE_NONE;
753 load_reg16s( R_EAX, Rm );
754 load_reg16s( R_ECX, Rn );
756 store_spreg( R_EAX, R_MACL );
757 sh4_x86.tstate = TSTATE_NONE;
761 load_reg16u( R_EAX, Rm );
762 load_reg16u( R_ECX, Rn );
764 store_spreg( R_EAX, R_MACL );
765 sh4_x86.tstate = TSTATE_NONE;
769 load_reg( R_EAX, Rm );
771 store_reg( R_EAX, Rn );
772 sh4_x86.tstate = TSTATE_NONE;
776 load_reg( R_EAX, Rm );
777 XOR_r32_r32( R_ECX, R_ECX );
779 SBB_r32_r32( R_EAX, R_ECX );
780 store_reg( R_ECX, Rn );
782 sh4_x86.tstate = TSTATE_C;
786 load_reg( R_EAX, Rm );
788 store_reg( R_EAX, Rn );
789 sh4_x86.tstate = TSTATE_NONE;
793 load_reg( R_EAX, Rm );
794 load_reg( R_ECX, Rn );
795 OR_r32_r32( R_EAX, R_ECX );
796 store_reg( R_ECX, Rn );
797 sh4_x86.tstate = TSTATE_NONE;
801 load_reg( R_EAX, 0 );
802 OR_imm32_r32(imm, R_EAX);
803 store_reg( R_EAX, 0 );
804 sh4_x86.tstate = TSTATE_NONE;
806 OR.B #imm, @(R0, GBR) {:
808 load_reg( R_EAX, 0 );
809 ADD_sh4r_r32( R_GBR, R_EAX );
810 MOV_r32_esp8( R_EAX, 0 );
811 MEM_READ_BYTE( R_EAX, R_EDX );
812 MOV_esp8_r32( 0, R_EAX );
813 OR_imm32_r32(imm, R_EDX );
814 MEM_WRITE_BYTE( R_EAX, R_EDX );
815 sh4_x86.tstate = TSTATE_NONE;
819 load_reg( R_EAX, Rn );
820 if( sh4_x86.tstate != TSTATE_C ) {
824 store_reg( R_EAX, Rn );
826 sh4_x86.tstate = TSTATE_C;
830 load_reg( R_EAX, Rn );
831 if( sh4_x86.tstate != TSTATE_C ) {
835 store_reg( R_EAX, Rn );
837 sh4_x86.tstate = TSTATE_C;
841 load_reg( R_EAX, Rn );
843 store_reg( R_EAX, Rn );
845 sh4_x86.tstate = TSTATE_C;
849 load_reg( R_EAX, Rn );
851 store_reg( R_EAX, Rn );
853 sh4_x86.tstate = TSTATE_C;
857 /* Annoyingly enough, not directly convertible */
858 load_reg( R_EAX, Rn );
859 load_reg( R_ECX, Rm );
860 CMP_imm32_r32( 0, R_ECX );
863 NEG_r32( R_ECX ); // 2
864 AND_imm8_r8( 0x1F, R_CL ); // 3
865 JE_rel8(emptysar); // 2
866 SAR_r32_CL( R_EAX ); // 2
869 JMP_TARGET(emptysar);
870 SAR_imm8_r32(31, R_EAX ); // 3
874 AND_imm8_r8( 0x1F, R_CL ); // 3
875 SHL_r32_CL( R_EAX ); // 2
878 store_reg( R_EAX, Rn );
879 sh4_x86.tstate = TSTATE_NONE;
883 load_reg( R_EAX, Rn );
884 load_reg( R_ECX, Rm );
885 CMP_imm32_r32( 0, R_ECX );
888 NEG_r32( R_ECX ); // 2
889 AND_imm8_r8( 0x1F, R_CL ); // 3
891 SHR_r32_CL( R_EAX ); // 2
894 JMP_TARGET(emptyshr);
895 XOR_r32_r32( R_EAX, R_EAX );
899 AND_imm8_r8( 0x1F, R_CL ); // 3
900 SHL_r32_CL( R_EAX ); // 2
903 store_reg( R_EAX, Rn );
904 sh4_x86.tstate = TSTATE_NONE;
908 load_reg( R_EAX, Rn );
911 store_reg( R_EAX, Rn );
912 sh4_x86.tstate = TSTATE_C;
916 load_reg( R_EAX, Rn );
919 store_reg( R_EAX, Rn );
920 sh4_x86.tstate = TSTATE_C;
924 load_reg( R_EAX, Rn );
927 store_reg( R_EAX, Rn );
928 sh4_x86.tstate = TSTATE_C;
932 load_reg( R_EAX, Rn );
933 SHL_imm8_r32( 2, R_EAX );
934 store_reg( R_EAX, Rn );
935 sh4_x86.tstate = TSTATE_NONE;
939 load_reg( R_EAX, Rn );
940 SHL_imm8_r32( 8, R_EAX );
941 store_reg( R_EAX, Rn );
942 sh4_x86.tstate = TSTATE_NONE;
946 load_reg( R_EAX, Rn );
947 SHL_imm8_r32( 16, R_EAX );
948 store_reg( R_EAX, Rn );
949 sh4_x86.tstate = TSTATE_NONE;
953 load_reg( R_EAX, Rn );
956 store_reg( R_EAX, Rn );
957 sh4_x86.tstate = TSTATE_C;
961 load_reg( R_EAX, Rn );
962 SHR_imm8_r32( 2, R_EAX );
963 store_reg( R_EAX, Rn );
964 sh4_x86.tstate = TSTATE_NONE;
968 load_reg( R_EAX, Rn );
969 SHR_imm8_r32( 8, R_EAX );
970 store_reg( R_EAX, Rn );
971 sh4_x86.tstate = TSTATE_NONE;
975 load_reg( R_EAX, Rn );
976 SHR_imm8_r32( 16, R_EAX );
977 store_reg( R_EAX, Rn );
978 sh4_x86.tstate = TSTATE_NONE;
982 load_reg( R_EAX, Rm );
983 load_reg( R_ECX, Rn );
984 SUB_r32_r32( R_EAX, R_ECX );
985 store_reg( R_ECX, Rn );
986 sh4_x86.tstate = TSTATE_NONE;
990 load_reg( R_EAX, Rm );
991 load_reg( R_ECX, Rn );
992 if( sh4_x86.tstate != TSTATE_C ) {
995 SBB_r32_r32( R_EAX, R_ECX );
996 store_reg( R_ECX, Rn );
998 sh4_x86.tstate = TSTATE_C;
1002 load_reg( R_EAX, Rm );
1003 load_reg( R_ECX, Rn );
1004 SUB_r32_r32( R_EAX, R_ECX );
1005 store_reg( R_ECX, Rn );
1007 sh4_x86.tstate = TSTATE_O;
1010 COUNT_INST(I_SWAPB);
1011 load_reg( R_EAX, Rm );
1012 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1013 store_reg( R_EAX, Rn );
1016 COUNT_INST(I_SWAPB);
1017 load_reg( R_EAX, Rm );
1018 MOV_r32_r32( R_EAX, R_ECX );
1019 SHL_imm8_r32( 16, R_ECX );
1020 SHR_imm8_r32( 16, R_EAX );
1021 OR_r32_r32( R_EAX, R_ECX );
1022 store_reg( R_ECX, Rn );
1023 sh4_x86.tstate = TSTATE_NONE;
1027 load_reg( R_EAX, Rn );
1028 MOV_r32_esp8( R_EAX, 0 );
1029 MEM_READ_BYTE( R_EAX, R_EDX );
1030 TEST_r8_r8( R_DL, R_DL );
1032 OR_imm8_r8( 0x80, R_DL );
1033 MOV_esp8_r32( 0, R_EAX );
1034 MEM_WRITE_BYTE( R_EAX, R_EDX );
1035 sh4_x86.tstate = TSTATE_NONE;
1039 load_reg( R_EAX, Rm );
1040 load_reg( R_ECX, Rn );
1041 TEST_r32_r32( R_EAX, R_ECX );
1043 sh4_x86.tstate = TSTATE_E;
1047 load_reg( R_EAX, 0 );
1048 TEST_imm32_r32( imm, R_EAX );
1050 sh4_x86.tstate = TSTATE_E;
1052 TST.B #imm, @(R0, GBR) {:
1054 load_reg( R_EAX, 0);
1055 ADD_sh4r_r32( R_GBR, R_EAX );
1056 MEM_READ_BYTE( R_EAX, R_EAX );
1057 TEST_imm8_r8( imm, R_AL );
1059 sh4_x86.tstate = TSTATE_E;
1063 load_reg( R_EAX, Rm );
1064 load_reg( R_ECX, Rn );
1065 XOR_r32_r32( R_EAX, R_ECX );
1066 store_reg( R_ECX, Rn );
1067 sh4_x86.tstate = TSTATE_NONE;
1071 load_reg( R_EAX, 0 );
1072 XOR_imm32_r32( imm, R_EAX );
1073 store_reg( R_EAX, 0 );
1074 sh4_x86.tstate = TSTATE_NONE;
1076 XOR.B #imm, @(R0, GBR) {:
1078 load_reg( R_EAX, 0 );
1079 ADD_sh4r_r32( R_GBR, R_EAX );
1080 MOV_r32_esp8( R_EAX, 0 );
1081 MEM_READ_BYTE(R_EAX, R_EDX);
1082 MOV_esp8_r32( 0, R_EAX );
1083 XOR_imm32_r32( imm, R_EDX );
1084 MEM_WRITE_BYTE( R_EAX, R_EDX );
1085 sh4_x86.tstate = TSTATE_NONE;
1088 COUNT_INST(I_XTRCT);
1089 load_reg( R_EAX, Rm );
1090 load_reg( R_ECX, Rn );
1091 SHL_imm8_r32( 16, R_EAX );
1092 SHR_imm8_r32( 16, R_ECX );
1093 OR_r32_r32( R_EAX, R_ECX );
1094 store_reg( R_ECX, Rn );
1095 sh4_x86.tstate = TSTATE_NONE;
1098 /* Data move instructions */
1101 load_reg( R_EAX, Rm );
1102 store_reg( R_EAX, Rn );
1106 load_imm32( R_EAX, imm );
1107 store_reg( R_EAX, Rn );
1111 load_reg( R_EAX, Rn );
1112 load_reg( R_EDX, Rm );
1113 MEM_WRITE_BYTE( R_EAX, R_EDX );
1114 sh4_x86.tstate = TSTATE_NONE;
1118 load_reg( R_EAX, Rn );
1119 LEA_r32disp8_r32( R_EAX, -1, R_EAX );
1120 load_reg( R_EDX, Rm );
1121 MEM_WRITE_BYTE( R_EAX, R_EDX );
1122 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1123 sh4_x86.tstate = TSTATE_NONE;
1125 MOV.B Rm, @(R0, Rn) {:
1127 load_reg( R_EAX, 0 );
1128 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1129 load_reg( R_EDX, Rm );
1130 MEM_WRITE_BYTE( R_EAX, R_EDX );
1131 sh4_x86.tstate = TSTATE_NONE;
1133 MOV.B R0, @(disp, GBR) {:
1135 load_spreg( R_EAX, R_GBR );
1136 ADD_imm32_r32( disp, R_EAX );
1137 load_reg( R_EDX, 0 );
1138 MEM_WRITE_BYTE( R_EAX, R_EDX );
1139 sh4_x86.tstate = TSTATE_NONE;
1141 MOV.B R0, @(disp, Rn) {:
1143 load_reg( R_EAX, Rn );
1144 ADD_imm32_r32( disp, R_EAX );
1145 load_reg( R_EDX, 0 );
1146 MEM_WRITE_BYTE( R_EAX, R_EDX );
1147 sh4_x86.tstate = TSTATE_NONE;
1151 load_reg( R_EAX, Rm );
1152 MEM_READ_BYTE( R_EAX, R_EAX );
1153 store_reg( R_EAX, Rn );
1154 sh4_x86.tstate = TSTATE_NONE;
1158 load_reg( R_EAX, Rm );
1159 MEM_READ_BYTE( R_EAX, R_EAX );
1161 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1163 store_reg( R_EAX, Rn );
1164 sh4_x86.tstate = TSTATE_NONE;
1166 MOV.B @(R0, Rm), Rn {:
1168 load_reg( R_EAX, 0 );
1169 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1170 MEM_READ_BYTE( R_EAX, R_EAX );
1171 store_reg( R_EAX, Rn );
1172 sh4_x86.tstate = TSTATE_NONE;
1174 MOV.B @(disp, GBR), R0 {:
1176 load_spreg( R_EAX, R_GBR );
1177 ADD_imm32_r32( disp, R_EAX );
1178 MEM_READ_BYTE( R_EAX, R_EAX );
1179 store_reg( R_EAX, 0 );
1180 sh4_x86.tstate = TSTATE_NONE;
1182 MOV.B @(disp, Rm), R0 {:
1184 load_reg( R_EAX, Rm );
1185 ADD_imm32_r32( disp, R_EAX );
1186 MEM_READ_BYTE( R_EAX, R_EAX );
1187 store_reg( R_EAX, 0 );
1188 sh4_x86.tstate = TSTATE_NONE;
1192 load_reg( R_EAX, Rn );
1193 check_walign32(R_EAX);
1194 MOV_r32_r32( R_EAX, R_ECX );
1195 AND_imm32_r32( 0xFC000000, R_ECX );
1196 CMP_imm32_r32( 0xE0000000, R_ECX );
1198 AND_imm8s_r32( 0x3C, R_EAX );
1199 load_reg( R_EDX, Rm );
1200 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1203 load_reg( R_EDX, Rm );
1204 MEM_WRITE_LONG( R_EAX, R_EDX );
1206 sh4_x86.tstate = TSTATE_NONE;
1210 load_reg( R_EAX, Rn );
1211 ADD_imm8s_r32( -4, R_EAX );
1212 check_walign32( R_EAX );
1213 load_reg( R_EDX, Rm );
1214 MEM_WRITE_LONG( R_EAX, R_EDX );
1215 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1216 sh4_x86.tstate = TSTATE_NONE;
1218 MOV.L Rm, @(R0, Rn) {:
1220 load_reg( R_EAX, 0 );
1221 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1222 check_walign32( R_EAX );
1223 load_reg( R_EDX, Rm );
1224 MEM_WRITE_LONG( R_EAX, R_EDX );
1225 sh4_x86.tstate = TSTATE_NONE;
1227 MOV.L R0, @(disp, GBR) {:
1229 load_spreg( R_EAX, R_GBR );
1230 ADD_imm32_r32( disp, R_EAX );
1231 check_walign32( R_EAX );
1232 load_reg( R_EDX, 0 );
1233 MEM_WRITE_LONG( R_EAX, R_EDX );
1234 sh4_x86.tstate = TSTATE_NONE;
1236 MOV.L Rm, @(disp, Rn) {:
1238 load_reg( R_EAX, Rn );
1239 ADD_imm32_r32( disp, R_EAX );
1240 check_walign32( R_EAX );
1241 MOV_r32_r32( R_EAX, R_ECX );
1242 AND_imm32_r32( 0xFC000000, R_ECX );
1243 CMP_imm32_r32( 0xE0000000, R_ECX );
1245 AND_imm8s_r32( 0x3C, R_EAX );
1246 load_reg( R_EDX, Rm );
1247 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1250 load_reg( R_EDX, Rm );
1251 MEM_WRITE_LONG( R_EAX, R_EDX );
1253 sh4_x86.tstate = TSTATE_NONE;
1257 load_reg( R_EAX, Rm );
1258 check_ralign32( R_EAX );
1259 MEM_READ_LONG( R_EAX, R_EAX );
1260 store_reg( R_EAX, Rn );
1261 sh4_x86.tstate = TSTATE_NONE;
1265 load_reg( R_EAX, Rm );
1266 check_ralign32( R_EAX );
1267 MEM_READ_LONG( R_EAX, R_EAX );
1269 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1271 store_reg( R_EAX, Rn );
1272 sh4_x86.tstate = TSTATE_NONE;
1274 MOV.L @(R0, Rm), Rn {:
1276 load_reg( R_EAX, 0 );
1277 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1278 check_ralign32( R_EAX );
1279 MEM_READ_LONG( R_EAX, R_EAX );
1280 store_reg( R_EAX, Rn );
1281 sh4_x86.tstate = TSTATE_NONE;
1283 MOV.L @(disp, GBR), R0 {:
1285 load_spreg( R_EAX, R_GBR );
1286 ADD_imm32_r32( disp, R_EAX );
1287 check_ralign32( R_EAX );
1288 MEM_READ_LONG( R_EAX, R_EAX );
1289 store_reg( R_EAX, 0 );
1290 sh4_x86.tstate = TSTATE_NONE;
1292 MOV.L @(disp, PC), Rn {:
1293 COUNT_INST(I_MOVLPC);
1294 if( sh4_x86.in_delay_slot ) {
1297 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1298 if( IS_IN_ICACHE(target) ) {
1299 // If the target address is in the same page as the code, it's
1300 // pretty safe to just ref it directly and circumvent the whole
1301 // memory subsystem. (this is a big performance win)
1303 // FIXME: There's a corner-case that's not handled here when
1304 // the current code-page is in the ITLB but not in the UTLB.
1305 // (should generate a TLB miss although need to test SH4
1306 // behaviour to confirm) Unlikely to be anyone depending on this
1307 // behaviour though.
1308 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1309 MOV_moff32_EAX( ptr );
1311 // Note: we use sh4r.pc for the calc as we could be running at a
1312 // different virtual address than the translation was done with,
1313 // but we can safely assume that the low bits are the same.
1314 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1315 ADD_sh4r_r32( R_PC, R_EAX );
1316 MEM_READ_LONG( R_EAX, R_EAX );
1317 sh4_x86.tstate = TSTATE_NONE;
1319 store_reg( R_EAX, Rn );
1322 MOV.L @(disp, Rm), Rn {:
1324 load_reg( R_EAX, Rm );
1325 ADD_imm8s_r32( disp, R_EAX );
1326 check_ralign32( R_EAX );
1327 MEM_READ_LONG( R_EAX, R_EAX );
1328 store_reg( R_EAX, Rn );
1329 sh4_x86.tstate = TSTATE_NONE;
1333 load_reg( R_EAX, Rn );
1334 check_walign16( R_EAX );
1335 load_reg( R_EDX, Rm );
1336 MEM_WRITE_WORD( R_EAX, R_EDX );
1337 sh4_x86.tstate = TSTATE_NONE;
1341 load_reg( R_EAX, Rn );
1342 check_walign16( R_EAX );
1343 LEA_r32disp8_r32( R_EAX, -2, R_EAX );
1344 load_reg( R_EDX, Rm );
1345 MEM_WRITE_WORD( R_EAX, R_EDX );
1346 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1347 sh4_x86.tstate = TSTATE_NONE;
1349 MOV.W Rm, @(R0, Rn) {:
1351 load_reg( R_EAX, 0 );
1352 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1353 check_walign16( R_EAX );
1354 load_reg( R_EDX, Rm );
1355 MEM_WRITE_WORD( R_EAX, R_EDX );
1356 sh4_x86.tstate = TSTATE_NONE;
1358 MOV.W R0, @(disp, GBR) {:
1360 load_spreg( R_EAX, R_GBR );
1361 ADD_imm32_r32( disp, R_EAX );
1362 check_walign16( R_EAX );
1363 load_reg( R_EDX, 0 );
1364 MEM_WRITE_WORD( R_EAX, R_EDX );
1365 sh4_x86.tstate = TSTATE_NONE;
1367 MOV.W R0, @(disp, Rn) {:
1369 load_reg( R_EAX, Rn );
1370 ADD_imm32_r32( disp, R_EAX );
1371 check_walign16( R_EAX );
1372 load_reg( R_EDX, 0 );
1373 MEM_WRITE_WORD( R_EAX, R_EDX );
1374 sh4_x86.tstate = TSTATE_NONE;
1378 load_reg( R_EAX, Rm );
1379 check_ralign16( R_EAX );
1380 MEM_READ_WORD( R_EAX, R_EAX );
1381 store_reg( R_EAX, Rn );
1382 sh4_x86.tstate = TSTATE_NONE;
1386 load_reg( R_EAX, Rm );
1387 check_ralign16( R_EAX );
1388 MEM_READ_WORD( R_EAX, R_EAX );
1390 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1392 store_reg( R_EAX, Rn );
1393 sh4_x86.tstate = TSTATE_NONE;
1395 MOV.W @(R0, Rm), Rn {:
1397 load_reg( R_EAX, 0 );
1398 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1399 check_ralign16( R_EAX );
1400 MEM_READ_WORD( R_EAX, R_EAX );
1401 store_reg( R_EAX, Rn );
1402 sh4_x86.tstate = TSTATE_NONE;
1404 MOV.W @(disp, GBR), R0 {:
1406 load_spreg( R_EAX, R_GBR );
1407 ADD_imm32_r32( disp, R_EAX );
1408 check_ralign16( R_EAX );
1409 MEM_READ_WORD( R_EAX, R_EAX );
1410 store_reg( R_EAX, 0 );
1411 sh4_x86.tstate = TSTATE_NONE;
1413 MOV.W @(disp, PC), Rn {:
1415 if( sh4_x86.in_delay_slot ) {
1418 // See comments for MOV.L @(disp, PC), Rn
1419 uint32_t target = pc + disp + 4;
1420 if( IS_IN_ICACHE(target) ) {
1421 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1422 MOV_moff32_EAX( ptr );
1423 MOVSX_r16_r32( R_EAX, R_EAX );
1425 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1426 ADD_sh4r_r32( R_PC, R_EAX );
1427 MEM_READ_WORD( R_EAX, R_EAX );
1428 sh4_x86.tstate = TSTATE_NONE;
1430 store_reg( R_EAX, Rn );
1433 MOV.W @(disp, Rm), R0 {:
1435 load_reg( R_EAX, Rm );
1436 ADD_imm32_r32( disp, R_EAX );
1437 check_ralign16( R_EAX );
1438 MEM_READ_WORD( R_EAX, R_EAX );
1439 store_reg( R_EAX, 0 );
1440 sh4_x86.tstate = TSTATE_NONE;
1442 MOVA @(disp, PC), R0 {:
1444 if( sh4_x86.in_delay_slot ) {
1447 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1448 ADD_sh4r_r32( R_PC, R_ECX );
1449 store_reg( R_ECX, 0 );
1450 sh4_x86.tstate = TSTATE_NONE;
1454 COUNT_INST(I_MOVCA);
1455 load_reg( R_EAX, Rn );
1456 check_walign32( R_EAX );
1457 load_reg( R_EDX, 0 );
1458 MEM_WRITE_LONG( R_EAX, R_EDX );
1459 sh4_x86.tstate = TSTATE_NONE;
1462 /* Control transfer instructions */
1465 if( sh4_x86.in_delay_slot ) {
1468 sh4vma_t target = disp + pc + 4;
1469 JT_rel8( nottaken );
1470 exit_block_rel(target, pc+2 );
1471 JMP_TARGET(nottaken);
1477 if( sh4_x86.in_delay_slot ) {
1480 sh4_x86.in_delay_slot = DELAY_PC;
1481 if( UNTRANSLATABLE(pc+2) ) {
1482 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1484 ADD_imm32_r32( disp, R_EAX );
1485 JMP_TARGET(nottaken);
1486 ADD_sh4r_r32( R_PC, R_EAX );
1487 store_spreg( R_EAX, R_NEW_PC );
1488 exit_block_emu(pc+2);
1489 sh4_x86.branch_taken = TRUE;
1492 if( sh4_x86.tstate == TSTATE_NONE ) {
1493 CMP_imm8s_sh4r( 1, R_T );
1494 sh4_x86.tstate = TSTATE_E;
1496 sh4vma_t target = disp + pc + 4;
1497 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1498 int save_tstate = sh4_x86.tstate;
1499 sh4_translate_instruction(pc+2);
1500 exit_block_rel( target, pc+4 );
1503 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1504 sh4_x86.tstate = save_tstate;
1505 sh4_translate_instruction(pc+2);
1512 if( sh4_x86.in_delay_slot ) {
1515 sh4_x86.in_delay_slot = DELAY_PC;
1516 sh4_x86.branch_taken = TRUE;
1517 if( UNTRANSLATABLE(pc+2) ) {
1518 load_spreg( R_EAX, R_PC );
1519 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1520 store_spreg( R_EAX, R_NEW_PC );
1521 exit_block_emu(pc+2);
1524 sh4_translate_instruction( pc + 2 );
1525 exit_block_rel( disp + pc + 4, pc+4 );
1532 if( sh4_x86.in_delay_slot ) {
1535 load_spreg( R_EAX, R_PC );
1536 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1537 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1538 store_spreg( R_EAX, R_NEW_PC );
1539 sh4_x86.in_delay_slot = DELAY_PC;
1540 sh4_x86.tstate = TSTATE_NONE;
1541 sh4_x86.branch_taken = TRUE;
1542 if( UNTRANSLATABLE(pc+2) ) {
1543 exit_block_emu(pc+2);
1546 sh4_translate_instruction( pc + 2 );
1547 exit_block_newpcset(pc+2);
1554 if( sh4_x86.in_delay_slot ) {
1557 load_spreg( R_EAX, R_PC );
1558 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1559 store_spreg( R_EAX, R_PR );
1560 sh4_x86.in_delay_slot = DELAY_PC;
1561 sh4_x86.branch_taken = TRUE;
1562 sh4_x86.tstate = TSTATE_NONE;
1563 if( UNTRANSLATABLE(pc+2) ) {
1564 ADD_imm32_r32( disp, R_EAX );
1565 store_spreg( R_EAX, R_NEW_PC );
1566 exit_block_emu(pc+2);
1569 sh4_translate_instruction( pc + 2 );
1570 exit_block_rel( disp + pc + 4, pc+4 );
1577 if( sh4_x86.in_delay_slot ) {
1580 load_spreg( R_EAX, R_PC );
1581 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1582 store_spreg( R_EAX, R_PR );
1583 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1584 store_spreg( R_EAX, R_NEW_PC );
1586 sh4_x86.in_delay_slot = DELAY_PC;
1587 sh4_x86.tstate = TSTATE_NONE;
1588 sh4_x86.branch_taken = TRUE;
1589 if( UNTRANSLATABLE(pc+2) ) {
1590 exit_block_emu(pc+2);
1593 sh4_translate_instruction( pc + 2 );
1594 exit_block_newpcset(pc+2);
1601 if( sh4_x86.in_delay_slot ) {
1604 sh4vma_t target = disp + pc + 4;
1605 JF_rel8( nottaken );
1606 exit_block_rel(target, pc+2 );
1607 JMP_TARGET(nottaken);
1613 if( sh4_x86.in_delay_slot ) {
1616 sh4_x86.in_delay_slot = DELAY_PC;
1617 if( UNTRANSLATABLE(pc+2) ) {
1618 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1620 ADD_imm32_r32( disp, R_EAX );
1621 JMP_TARGET(nottaken);
1622 ADD_sh4r_r32( R_PC, R_EAX );
1623 store_spreg( R_EAX, R_NEW_PC );
1624 exit_block_emu(pc+2);
1625 sh4_x86.branch_taken = TRUE;
1628 if( sh4_x86.tstate == TSTATE_NONE ) {
1629 CMP_imm8s_sh4r( 1, R_T );
1630 sh4_x86.tstate = TSTATE_E;
1632 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1633 int save_tstate = sh4_x86.tstate;
1634 sh4_translate_instruction(pc+2);
1635 exit_block_rel( disp + pc + 4, pc+4 );
1637 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1638 sh4_x86.tstate = save_tstate;
1639 sh4_translate_instruction(pc+2);
1646 if( sh4_x86.in_delay_slot ) {
1649 load_reg( R_ECX, Rn );
1650 store_spreg( R_ECX, R_NEW_PC );
1651 sh4_x86.in_delay_slot = DELAY_PC;
1652 sh4_x86.branch_taken = TRUE;
1653 if( UNTRANSLATABLE(pc+2) ) {
1654 exit_block_emu(pc+2);
1657 sh4_translate_instruction(pc+2);
1658 exit_block_newpcset(pc+2);
1665 if( sh4_x86.in_delay_slot ) {
1668 load_spreg( R_EAX, R_PC );
1669 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1670 store_spreg( R_EAX, R_PR );
1671 load_reg( R_ECX, Rn );
1672 store_spreg( R_ECX, R_NEW_PC );
1673 sh4_x86.in_delay_slot = DELAY_PC;
1674 sh4_x86.branch_taken = TRUE;
1675 sh4_x86.tstate = TSTATE_NONE;
1676 if( UNTRANSLATABLE(pc+2) ) {
1677 exit_block_emu(pc+2);
1680 sh4_translate_instruction(pc+2);
1681 exit_block_newpcset(pc+2);
1688 if( sh4_x86.in_delay_slot ) {
1692 load_spreg( R_ECX, R_SPC );
1693 store_spreg( R_ECX, R_NEW_PC );
1694 load_spreg( R_EAX, R_SSR );
1695 call_func1( sh4_write_sr, R_EAX );
1696 sh4_x86.in_delay_slot = DELAY_PC;
1697 sh4_x86.fpuen_checked = FALSE;
1698 sh4_x86.tstate = TSTATE_NONE;
1699 sh4_x86.branch_taken = TRUE;
1700 if( UNTRANSLATABLE(pc+2) ) {
1701 exit_block_emu(pc+2);
1704 sh4_translate_instruction(pc+2);
1705 exit_block_newpcset(pc+2);
1712 if( sh4_x86.in_delay_slot ) {
1715 load_spreg( R_ECX, R_PR );
1716 store_spreg( R_ECX, R_NEW_PC );
1717 sh4_x86.in_delay_slot = DELAY_PC;
1718 sh4_x86.branch_taken = TRUE;
1719 if( UNTRANSLATABLE(pc+2) ) {
1720 exit_block_emu(pc+2);
1723 sh4_translate_instruction(pc+2);
1724 exit_block_newpcset(pc+2);
1730 COUNT_INST(I_TRAPA);
1731 if( sh4_x86.in_delay_slot ) {
1734 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1735 ADD_r32_sh4r( R_ECX, R_PC );
1736 load_imm32( R_EAX, imm );
1737 call_func1( sh4_raise_trap, R_EAX );
1738 sh4_x86.tstate = TSTATE_NONE;
1739 exit_block_pcset(pc);
1740 sh4_x86.branch_taken = TRUE;
1745 COUNT_INST(I_UNDEF);
1746 if( sh4_x86.in_delay_slot ) {
1747 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);
1749 exit_block_exc(EXC_ILLEGAL, pc);
1755 COUNT_INST(I_CLRMAC);
1756 XOR_r32_r32(R_EAX, R_EAX);
1757 store_spreg( R_EAX, R_MACL );
1758 store_spreg( R_EAX, R_MACH );
1759 sh4_x86.tstate = TSTATE_NONE;
1765 sh4_x86.tstate = TSTATE_NONE;
1771 sh4_x86.tstate = TSTATE_C;
1777 sh4_x86.tstate = TSTATE_NONE;
1783 sh4_x86.tstate = TSTATE_C;
1786 /* Floating point moves */
1788 COUNT_INST(I_FMOV1);
1790 if( sh4_x86.double_size ) {
1791 load_dr0( R_EAX, FRm );
1792 load_dr1( R_ECX, FRm );
1793 store_dr0( R_EAX, FRn );
1794 store_dr1( R_ECX, FRn );
1796 load_fr( R_EAX, FRm ); // SZ=0 branch
1797 store_fr( R_EAX, FRn );
1801 COUNT_INST(I_FMOV2);
1803 load_reg( R_EAX, Rn );
1804 if( sh4_x86.double_size ) {
1805 check_walign64( R_EAX );
1806 load_dr0( R_EDX, FRm );
1807 MEM_WRITE_LONG( R_EAX, R_EDX );
1808 load_reg( R_EAX, Rn );
1809 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1810 load_dr1( R_EDX, FRm );
1811 MEM_WRITE_LONG( R_EAX, R_EDX );
1813 check_walign32( R_EAX );
1814 load_fr( R_EDX, FRm );
1815 MEM_WRITE_LONG( R_EAX, R_EDX );
1817 sh4_x86.tstate = TSTATE_NONE;
1820 COUNT_INST(I_FMOV5);
1822 load_reg( R_EAX, Rm );
1823 if( sh4_x86.double_size ) {
1824 check_ralign64( R_EAX );
1825 MEM_READ_LONG( R_EAX, R_EAX );
1826 store_dr0( R_EAX, FRn );
1827 load_reg( R_EAX, Rm );
1828 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1829 MEM_READ_LONG( R_EAX, R_EAX );
1830 store_dr1( R_EAX, FRn );
1832 check_ralign32( R_EAX );
1833 MEM_READ_LONG( R_EAX, R_EAX );
1834 store_fr( R_EAX, FRn );
1836 sh4_x86.tstate = TSTATE_NONE;
1839 COUNT_INST(I_FMOV3);
1841 load_reg( R_EAX, Rn );
1842 if( sh4_x86.double_size ) {
1843 check_walign64( R_EAX );
1844 LEA_r32disp8_r32( R_EAX, -8, R_EAX );
1845 load_dr0( R_EDX, FRm );
1846 MEM_WRITE_LONG( R_EAX, R_EDX );
1847 load_reg( R_EAX, Rn );
1848 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1849 load_dr1( R_EDX, FRm );
1850 MEM_WRITE_LONG( R_EAX, R_EDX );
1851 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1853 check_walign32( R_EAX );
1854 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1855 load_fr( R_EDX, FRm );
1856 MEM_WRITE_LONG( R_EAX, R_EDX );
1857 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1859 sh4_x86.tstate = TSTATE_NONE;
1862 COUNT_INST(I_FMOV6);
1864 load_reg( R_EAX, Rm );
1865 if( sh4_x86.double_size ) {
1866 check_ralign64( R_EAX );
1867 MEM_READ_LONG( R_EAX, R_EAX );
1868 store_dr0( R_EAX, FRn );
1869 load_reg( R_EAX, Rm );
1870 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1871 MEM_READ_LONG( R_EAX, R_EAX );
1872 store_dr1( R_EAX, FRn );
1873 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1875 check_ralign32( R_EAX );
1876 MEM_READ_LONG( R_EAX, R_EAX );
1877 store_fr( R_EAX, FRn );
1878 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1880 sh4_x86.tstate = TSTATE_NONE;
1882 FMOV FRm, @(R0, Rn) {:
1883 COUNT_INST(I_FMOV4);
1885 load_reg( R_EAX, Rn );
1886 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1887 if( sh4_x86.double_size ) {
1888 check_walign64( R_EAX );
1889 load_dr0( R_EDX, FRm );
1890 MEM_WRITE_LONG( R_EAX, R_EDX );
1891 load_reg( R_EAX, Rn );
1892 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1893 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1894 load_dr1( R_EDX, FRm );
1895 MEM_WRITE_LONG( R_EAX, R_EDX );
1897 check_walign32( R_EAX );
1898 load_fr( R_EDX, FRm );
1899 MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
1901 sh4_x86.tstate = TSTATE_NONE;
1903 FMOV @(R0, Rm), FRn {:
1904 COUNT_INST(I_FMOV7);
1906 load_reg( R_EAX, Rm );
1907 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1908 if( sh4_x86.double_size ) {
1909 check_ralign64( R_EAX );
1910 MEM_READ_LONG( R_EAX, R_EAX );
1911 store_dr0( R_EAX, FRn );
1912 load_reg( R_EAX, Rm );
1913 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1914 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1915 MEM_READ_LONG( R_EAX, R_EAX );
1916 store_dr1( R_EAX, FRn );
1918 check_ralign32( R_EAX );
1919 MEM_READ_LONG( R_EAX, R_EAX );
1920 store_fr( R_EAX, FRn );
1922 sh4_x86.tstate = TSTATE_NONE;
1924 FLDI0 FRn {: /* IFF PR=0 */
1925 COUNT_INST(I_FLDI0);
1927 if( sh4_x86.double_prec == 0 ) {
1928 XOR_r32_r32( R_EAX, R_EAX );
1929 store_fr( R_EAX, FRn );
1931 sh4_x86.tstate = TSTATE_NONE;
1933 FLDI1 FRn {: /* IFF PR=0 */
1934 COUNT_INST(I_FLDI1);
1936 if( sh4_x86.double_prec == 0 ) {
1937 load_imm32(R_EAX, 0x3F800000);
1938 store_fr( R_EAX, FRn );
1943 COUNT_INST(I_FLOAT);
1946 if( sh4_x86.double_prec ) {
1955 if( sh4_x86.double_prec ) {
1960 load_ptr( R_ECX, &max_int );
1961 FILD_r32ind( R_ECX );
1964 load_ptr( R_ECX, &min_int ); // 5
1965 FILD_r32ind( R_ECX ); // 2
1967 JAE_rel8( sat2 ); // 2
1968 load_ptr( R_EAX, &save_fcw );
1969 FNSTCW_r32ind( R_EAX );
1970 load_ptr( R_EDX, &trunc_fcw );
1971 FLDCW_r32ind( R_EDX );
1972 FISTP_sh4r(R_FPUL); // 3
1973 FLDCW_r32ind( R_EAX );
1978 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
1979 store_spreg( R_ECX, R_FPUL );
1982 sh4_x86.tstate = TSTATE_NONE;
1987 load_fr( R_EAX, FRm );
1988 store_spreg( R_EAX, R_FPUL );
1993 load_spreg( R_EAX, R_FPUL );
1994 store_fr( R_EAX, FRn );
1997 COUNT_INST(I_FCNVDS);
1999 if( sh4_x86.double_prec ) {
2005 COUNT_INST(I_FCNVSD);
2007 if( sh4_x86.double_prec ) {
2013 /* Floating point instructions */
2017 if( sh4_x86.double_prec ) {
2030 if( sh4_x86.double_prec ) {
2045 if( sh4_x86.double_prec ) {
2057 FMAC FR0, FRm, FRn {:
2060 if( sh4_x86.double_prec ) {
2080 if( sh4_x86.double_prec ) {
2095 if( sh4_x86.double_prec ) {
2106 COUNT_INST(I_FSRRA);
2108 if( sh4_x86.double_prec == 0 ) {
2117 COUNT_INST(I_FSQRT);
2119 if( sh4_x86.double_prec ) {
2132 if( sh4_x86.double_prec ) {
2146 COUNT_INST(I_FCMPEQ);
2148 if( sh4_x86.double_prec ) {
2158 sh4_x86.tstate = TSTATE_E;
2161 COUNT_INST(I_FCMPGT);
2163 if( sh4_x86.double_prec ) {
2173 sh4_x86.tstate = TSTATE_A;
2179 if( sh4_x86.double_prec == 0 ) {
2180 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
2181 load_spreg( R_EAX, R_FPUL );
2182 call_func2( sh4_fsca, R_EAX, R_EDX );
2184 sh4_x86.tstate = TSTATE_NONE;
2189 if( sh4_x86.double_prec == 0 ) {
2190 if( sh4_x86.sse3_enabled ) {
2191 MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2192 MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2193 HADDPS_xmm_xmm( 4, 4 );
2194 HADDPS_xmm_xmm( 4, 4 );
2195 MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2200 push_fr( (FVm<<2)+1);
2201 push_fr( (FVn<<2)+1);
2204 push_fr( (FVm<<2)+2);
2205 push_fr( (FVn<<2)+2);
2208 push_fr( (FVm<<2)+3);
2209 push_fr( (FVn<<2)+3);
2212 pop_fr( (FVn<<2)+3);
2219 if( sh4_x86.double_prec == 0 ) {
2220 if( sh4_x86.sse3_enabled ) {
2221 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2222 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2223 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2224 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2226 MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2227 MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2228 MOVAPS_xmm_xmm( 4, 6 );
2229 MOVAPS_xmm_xmm( 5, 7 );
2230 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2231 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2232 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2233 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2234 MULPS_xmm_xmm( 0, 4 );
2235 MULPS_xmm_xmm( 1, 5 );
2236 MULPS_xmm_xmm( 2, 6 );
2237 MULPS_xmm_xmm( 3, 7 );
2238 ADDPS_xmm_xmm( 5, 4 );
2239 ADDPS_xmm_xmm( 7, 6 );
2240 ADDPS_xmm_xmm( 6, 4 );
2241 MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
2243 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
2244 call_func1( sh4_ftrv, R_EAX );
2247 sh4_x86.tstate = TSTATE_NONE;
2251 COUNT_INST(I_FRCHG);
2253 XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
2254 call_func0( sh4_switch_fr_banks );
2255 sh4_x86.tstate = TSTATE_NONE;
2258 COUNT_INST(I_FSCHG);
2260 XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
2261 XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2262 sh4_x86.tstate = TSTATE_NONE;
2263 sh4_x86.double_size = !sh4_x86.double_size;
2266 /* Processor control instructions */
2268 COUNT_INST(I_LDCSR);
2269 if( sh4_x86.in_delay_slot ) {
2273 load_reg( R_EAX, Rm );
2274 call_func1( sh4_write_sr, R_EAX );
2275 sh4_x86.fpuen_checked = FALSE;
2276 sh4_x86.tstate = TSTATE_NONE;
2282 load_reg( R_EAX, Rm );
2283 store_spreg( R_EAX, R_GBR );
2288 load_reg( R_EAX, Rm );
2289 store_spreg( R_EAX, R_VBR );
2290 sh4_x86.tstate = TSTATE_NONE;
2295 load_reg( R_EAX, Rm );
2296 store_spreg( R_EAX, R_SSR );
2297 sh4_x86.tstate = TSTATE_NONE;
2302 load_reg( R_EAX, Rm );
2303 store_spreg( R_EAX, R_SGR );
2304 sh4_x86.tstate = TSTATE_NONE;
2309 load_reg( R_EAX, Rm );
2310 store_spreg( R_EAX, R_SPC );
2311 sh4_x86.tstate = TSTATE_NONE;
2316 load_reg( R_EAX, Rm );
2317 store_spreg( R_EAX, R_DBR );
2318 sh4_x86.tstate = TSTATE_NONE;
2323 load_reg( R_EAX, Rm );
2324 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2325 sh4_x86.tstate = TSTATE_NONE;
2329 load_reg( R_EAX, Rm );
2330 check_ralign32( R_EAX );
2331 MEM_READ_LONG( R_EAX, R_EAX );
2332 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2333 store_spreg( R_EAX, R_GBR );
2334 sh4_x86.tstate = TSTATE_NONE;
2337 COUNT_INST(I_LDCSRM);
2338 if( sh4_x86.in_delay_slot ) {
2342 load_reg( R_EAX, Rm );
2343 check_ralign32( R_EAX );
2344 MEM_READ_LONG( R_EAX, R_EAX );
2345 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2346 call_func1( sh4_write_sr, R_EAX );
2347 sh4_x86.fpuen_checked = FALSE;
2348 sh4_x86.tstate = TSTATE_NONE;
2355 load_reg( R_EAX, Rm );
2356 check_ralign32( R_EAX );
2357 MEM_READ_LONG( R_EAX, R_EAX );
2358 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2359 store_spreg( R_EAX, R_VBR );
2360 sh4_x86.tstate = TSTATE_NONE;
2365 load_reg( R_EAX, Rm );
2366 check_ralign32( R_EAX );
2367 MEM_READ_LONG( R_EAX, R_EAX );
2368 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2369 store_spreg( R_EAX, R_SSR );
2370 sh4_x86.tstate = TSTATE_NONE;
2375 load_reg( R_EAX, Rm );
2376 check_ralign32( R_EAX );
2377 MEM_READ_LONG( R_EAX, R_EAX );
2378 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2379 store_spreg( R_EAX, R_SGR );
2380 sh4_x86.tstate = TSTATE_NONE;
2385 load_reg( R_EAX, Rm );
2386 check_ralign32( R_EAX );
2387 MEM_READ_LONG( R_EAX, R_EAX );
2388 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2389 store_spreg( R_EAX, R_SPC );
2390 sh4_x86.tstate = TSTATE_NONE;
2395 load_reg( R_EAX, Rm );
2396 check_ralign32( R_EAX );
2397 MEM_READ_LONG( R_EAX, R_EAX );
2398 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2399 store_spreg( R_EAX, R_DBR );
2400 sh4_x86.tstate = TSTATE_NONE;
2402 LDC.L @Rm+, Rn_BANK {:
2405 load_reg( R_EAX, Rm );
2406 check_ralign32( R_EAX );
2407 MEM_READ_LONG( R_EAX, R_EAX );
2408 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2409 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2410 sh4_x86.tstate = TSTATE_NONE;
2413 COUNT_INST(I_LDSFPSCR);
2415 load_reg( R_EAX, Rm );
2416 call_func1( sh4_write_fpscr, R_EAX );
2417 sh4_x86.tstate = TSTATE_NONE;
2420 LDS.L @Rm+, FPSCR {:
2421 COUNT_INST(I_LDSFPSCRM);
2423 load_reg( R_EAX, Rm );
2424 check_ralign32( R_EAX );
2425 MEM_READ_LONG( R_EAX, R_EAX );
2426 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2427 call_func1( sh4_write_fpscr, R_EAX );
2428 sh4_x86.tstate = TSTATE_NONE;
2434 load_reg( R_EAX, Rm );
2435 store_spreg( R_EAX, R_FPUL );
2440 load_reg( R_EAX, Rm );
2441 check_ralign32( R_EAX );
2442 MEM_READ_LONG( R_EAX, R_EAX );
2443 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2444 store_spreg( R_EAX, R_FPUL );
2445 sh4_x86.tstate = TSTATE_NONE;
2449 load_reg( R_EAX, Rm );
2450 store_spreg( R_EAX, R_MACH );
2454 load_reg( R_EAX, Rm );
2455 check_ralign32( R_EAX );
2456 MEM_READ_LONG( R_EAX, R_EAX );
2457 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2458 store_spreg( R_EAX, R_MACH );
2459 sh4_x86.tstate = TSTATE_NONE;
2463 load_reg( R_EAX, Rm );
2464 store_spreg( R_EAX, R_MACL );
2468 load_reg( R_EAX, Rm );
2469 check_ralign32( R_EAX );
2470 MEM_READ_LONG( R_EAX, R_EAX );
2471 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2472 store_spreg( R_EAX, R_MACL );
2473 sh4_x86.tstate = TSTATE_NONE;
2477 load_reg( R_EAX, Rm );
2478 store_spreg( R_EAX, R_PR );
2482 load_reg( R_EAX, Rm );
2483 check_ralign32( R_EAX );
2484 MEM_READ_LONG( R_EAX, R_EAX );
2485 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2486 store_spreg( R_EAX, R_PR );
2487 sh4_x86.tstate = TSTATE_NONE;
2490 COUNT_INST(I_LDTLB);
2491 call_func0( MMU_ldtlb );
2492 sh4_x86.tstate = TSTATE_NONE;
2501 COUNT_INST(I_OCBWB);
2505 load_reg( R_EAX, Rn );
2506 MEM_PREFETCH( R_EAX );
2507 sh4_x86.tstate = TSTATE_NONE;
2510 COUNT_INST(I_SLEEP);
2512 call_func0( sh4_sleep );
2513 sh4_x86.tstate = TSTATE_NONE;
2514 sh4_x86.in_delay_slot = DELAY_NONE;
2518 COUNT_INST(I_STCSR);
2520 call_func0(sh4_read_sr);
2521 store_reg( R_EAX, Rn );
2522 sh4_x86.tstate = TSTATE_NONE;
2526 load_spreg( R_EAX, R_GBR );
2527 store_reg( R_EAX, Rn );
2532 load_spreg( R_EAX, R_VBR );
2533 store_reg( R_EAX, Rn );
2534 sh4_x86.tstate = TSTATE_NONE;
2539 load_spreg( R_EAX, R_SSR );
2540 store_reg( R_EAX, Rn );
2541 sh4_x86.tstate = TSTATE_NONE;
2546 load_spreg( R_EAX, R_SPC );
2547 store_reg( R_EAX, Rn );
2548 sh4_x86.tstate = TSTATE_NONE;
2553 load_spreg( R_EAX, R_SGR );
2554 store_reg( R_EAX, Rn );
2555 sh4_x86.tstate = TSTATE_NONE;
2560 load_spreg( R_EAX, R_DBR );
2561 store_reg( R_EAX, Rn );
2562 sh4_x86.tstate = TSTATE_NONE;
2567 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2568 store_reg( R_EAX, Rn );
2569 sh4_x86.tstate = TSTATE_NONE;
2572 COUNT_INST(I_STCSRM);
2574 call_func0( sh4_read_sr );
2575 MOV_r32_r32( R_EAX, R_EDX );
2576 load_reg( R_EAX, Rn );
2577 check_walign32( R_EAX );
2578 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
2579 MEM_WRITE_LONG( R_EAX, R_EDX );
2580 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2581 sh4_x86.tstate = TSTATE_NONE;
2586 load_reg( R_EAX, Rn );
2587 check_walign32( R_EAX );
2588 ADD_imm8s_r32( -4, R_EAX );
2589 load_spreg( R_EDX, R_VBR );
2590 MEM_WRITE_LONG( R_EAX, R_EDX );
2591 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2592 sh4_x86.tstate = TSTATE_NONE;
2597 load_reg( R_EAX, Rn );
2598 check_walign32( R_EAX );
2599 ADD_imm8s_r32( -4, R_EAX );
2600 load_spreg( R_EDX, R_SSR );
2601 MEM_WRITE_LONG( R_EAX, R_EDX );
2602 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2603 sh4_x86.tstate = TSTATE_NONE;
2608 load_reg( R_EAX, Rn );
2609 check_walign32( R_EAX );
2610 ADD_imm8s_r32( -4, R_EAX );
2611 load_spreg( R_EDX, R_SPC );
2612 MEM_WRITE_LONG( R_EAX, R_EDX );
2613 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2614 sh4_x86.tstate = TSTATE_NONE;
2619 load_reg( R_EAX, Rn );
2620 check_walign32( R_EAX );
2621 ADD_imm8s_r32( -4, R_EAX );
2622 load_spreg( R_EDX, R_SGR );
2623 MEM_WRITE_LONG( R_EAX, R_EDX );
2624 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2625 sh4_x86.tstate = TSTATE_NONE;
2630 load_reg( R_EAX, Rn );
2631 check_walign32( R_EAX );
2632 ADD_imm8s_r32( -4, R_EAX );
2633 load_spreg( R_EDX, R_DBR );
2634 MEM_WRITE_LONG( R_EAX, R_EDX );
2635 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2636 sh4_x86.tstate = TSTATE_NONE;
2638 STC.L Rm_BANK, @-Rn {:
2641 load_reg( R_EAX, Rn );
2642 check_walign32( R_EAX );
2643 ADD_imm8s_r32( -4, R_EAX );
2644 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2645 MEM_WRITE_LONG( R_EAX, R_EDX );
2646 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2647 sh4_x86.tstate = TSTATE_NONE;
2651 load_reg( R_EAX, Rn );
2652 check_walign32( R_EAX );
2653 ADD_imm8s_r32( -4, R_EAX );
2654 load_spreg( R_EDX, R_GBR );
2655 MEM_WRITE_LONG( R_EAX, R_EDX );
2656 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2657 sh4_x86.tstate = TSTATE_NONE;
2660 COUNT_INST(I_STSFPSCR);
2662 load_spreg( R_EAX, R_FPSCR );
2663 store_reg( R_EAX, Rn );
2665 STS.L FPSCR, @-Rn {:
2666 COUNT_INST(I_STSFPSCRM);
2668 load_reg( R_EAX, Rn );
2669 check_walign32( R_EAX );
2670 ADD_imm8s_r32( -4, R_EAX );
2671 load_spreg( R_EDX, R_FPSCR );
2672 MEM_WRITE_LONG( R_EAX, R_EDX );
2673 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2674 sh4_x86.tstate = TSTATE_NONE;
2679 load_spreg( R_EAX, R_FPUL );
2680 store_reg( R_EAX, Rn );
2685 load_reg( R_EAX, Rn );
2686 check_walign32( R_EAX );
2687 ADD_imm8s_r32( -4, R_EAX );
2688 load_spreg( R_EDX, R_FPUL );
2689 MEM_WRITE_LONG( R_EAX, R_EDX );
2690 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2691 sh4_x86.tstate = TSTATE_NONE;
2695 load_spreg( R_EAX, R_MACH );
2696 store_reg( R_EAX, Rn );
2700 load_reg( R_EAX, Rn );
2701 check_walign32( R_EAX );
2702 ADD_imm8s_r32( -4, R_EAX );
2703 load_spreg( R_EDX, R_MACH );
2704 MEM_WRITE_LONG( R_EAX, R_EDX );
2705 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2706 sh4_x86.tstate = TSTATE_NONE;
2710 load_spreg( R_EAX, R_MACL );
2711 store_reg( R_EAX, Rn );
2715 load_reg( R_EAX, Rn );
2716 check_walign32( R_EAX );
2717 ADD_imm8s_r32( -4, R_EAX );
2718 load_spreg( R_EDX, R_MACL );
2719 MEM_WRITE_LONG( R_EAX, R_EDX );
2720 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2721 sh4_x86.tstate = TSTATE_NONE;
2725 load_spreg( R_EAX, R_PR );
2726 store_reg( R_EAX, Rn );
2730 load_reg( R_EAX, Rn );
2731 check_walign32( R_EAX );
2732 ADD_imm8s_r32( -4, R_EAX );
2733 load_spreg( R_EDX, R_PR );
2734 MEM_WRITE_LONG( R_EAX, R_EDX );
2735 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2736 sh4_x86.tstate = TSTATE_NONE;
2741 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2744 sh4_x86.in_delay_slot = DELAY_NONE;
.