4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
29 #include "sh4/xltcache.h"
30 #include "sh4/sh4core.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/x86op.h"
38 #define DEFAULT_BACKPATCH_SIZE 4096
40 struct backpatch_record {
41 uint32_t fixup_offset;
42 uint32_t fixup_icount;
51 * Struct to manage internal translation state. This state is not saved -
52 * it is only valid between calls to sh4_translate_begin_block() and
53 * sh4_translate_end_block()
55 struct sh4_x86_state {
57 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
58 gboolean branch_taken; /* true if we branched unconditionally */
59 gboolean double_prec; /* true if FPU is in double-precision mode */
60 gboolean double_size; /* true if FPU is in double-size mode */
61 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
62 uint32_t block_start_pc;
63 uint32_t stack_posn; /* Trace stack height for alignment purposes */
67 gboolean tlb_on; /* True if tlb translation is active */
69 /* Allocated memory for the (block-wide) back-patch list */
70 struct backpatch_record *backpatch_list;
71 uint32_t backpatch_posn;
72 uint32_t backpatch_size;
75 #define TSTATE_NONE -1
85 #ifdef ENABLE_SH4STATS
86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
88 #define COUNT_INST(id)
91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
93 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
94 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
98 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
99 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
101 static struct sh4_x86_state sh4_x86;
103 static uint32_t max_int = 0x7FFFFFFF;
104 static uint32_t min_int = 0x80000000;
105 static uint32_t save_fcw; /* save value for fpu control word */
106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
108 gboolean is_sse3_supported()
112 __asm__ __volatile__(
113 "mov $0x01, %%eax\n\t"
114 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
115 return (features & 1) ? TRUE : FALSE;
118 void sh4_translate_init(void)
120 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
121 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
122 sh4_x86.sse3_enabled = is_sse3_supported();
126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
128 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
129 sh4_x86.backpatch_size <<= 1;
130 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
131 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
132 assert( sh4_x86.backpatch_list != NULL );
134 if( sh4_x86.in_delay_slot ) {
137 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
138 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
139 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
140 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
141 sh4_x86.backpatch_posn++;
145 * Emit an instruction to load an SH4 reg into a real register
147 static inline void load_reg( int x86reg, int sh4reg )
149 /* mov [bp+n], reg */
151 OP(0x45 + (x86reg<<3));
152 OP(REG_OFFSET(r[sh4reg]));
155 static inline void load_reg16s( int x86reg, int sh4reg )
159 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
162 static inline void load_reg16u( int x86reg, int sh4reg )
166 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
173 * Emit an instruction to load an immediate value into a register
175 static inline void load_imm32( int x86reg, uint32_t value ) {
176 /* mov #value, reg */
183 * Load an immediate 64-bit quantity (note: x86-64 only)
185 static inline void load_imm64( int x86reg, uint64_t value ) {
186 /* mov #value, reg */
193 * Emit an instruction to store an SH4 reg (RN)
195 void static inline store_reg( int x86reg, int sh4reg ) {
196 /* mov reg, [bp+n] */
198 OP(0x45 + (x86reg<<3));
199 OP(REG_OFFSET(r[sh4reg]));
203 * Load an FR register (single-precision floating point) into an integer x86
204 * register (eg for register-to-register moves)
206 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
207 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
210 * Load the low half of a DR register (DR or XD) into an integer x86 register
212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
216 * Store an FR register (single-precision floating point) from an integer x86+
217 * register (eg for register-to-register moves)
219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
226 #define push_fpul() FLDF_sh4r(R_FPUL)
227 #define pop_fpul() FSTPF_sh4r(R_FPUL)
228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
229 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
231 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
233 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
235 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
239 /* Exception checks - Note that all exception checks will clobber EAX */
241 #define check_priv( ) \
242 if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
243 if( sh4_x86.in_delay_slot ) { \
244 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
246 exit_block_exc(EXC_ILLEGAL, pc); \
248 sh4_x86.branch_taken = TRUE; \
249 sh4_x86.in_delay_slot = DELAY_NONE; \
253 #define check_fpuen( ) \
254 if( !sh4_x86.fpuen_checked ) {\
255 sh4_x86.fpuen_checked = TRUE;\
256 load_spreg( R_EAX, R_SR );\
257 AND_imm32_r32( SR_FD, R_EAX );\
258 if( sh4_x86.in_delay_slot ) {\
259 JNE_exc(EXC_SLOT_FPU_DISABLED);\
261 JNE_exc(EXC_FPU_DISABLED);\
263 sh4_x86.tstate = TSTATE_NONE; \
266 #define check_ralign16( x86reg ) \
267 TEST_imm32_r32( 0x00000001, x86reg ); \
268 JNE_exc(EXC_DATA_ADDR_READ)
270 #define check_walign16( x86reg ) \
271 TEST_imm32_r32( 0x00000001, x86reg ); \
272 JNE_exc(EXC_DATA_ADDR_WRITE);
274 #define check_ralign32( x86reg ) \
275 TEST_imm32_r32( 0x00000003, x86reg ); \
276 JNE_exc(EXC_DATA_ADDR_READ)
278 #define check_walign32( x86reg ) \
279 TEST_imm32_r32( 0x00000003, x86reg ); \
280 JNE_exc(EXC_DATA_ADDR_WRITE);
282 #define check_ralign64( x86reg ) \
283 TEST_imm32_r32( 0x00000007, x86reg ); \
284 JNE_exc(EXC_DATA_ADDR_READ)
286 #define check_walign64( x86reg ) \
287 TEST_imm32_r32( 0x00000007, x86reg ); \
288 JNE_exc(EXC_DATA_ADDR_WRITE);
291 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
292 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
293 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
294 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
297 #ifdef HAVE_FRAME_ADDRESS
298 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
299 call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
300 call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); }
301 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
302 call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
303 call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
305 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
306 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
309 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
310 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte_for_write); MEM_RESULT(value_reg)
311 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
312 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
313 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
314 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
315 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
316 #define MEM_PREFETCH( addr_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, prefetch)
318 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
320 /****** Import appropriate calling conventions ******/
321 #if SIZEOF_VOID_P == 8
322 #include "sh4/ia64abi.h"
323 #else /* 32-bit system */
324 #include "sh4/ia32abi.h"
327 void sh4_translate_begin_block( sh4addr_t pc )
330 sh4_x86.in_delay_slot = FALSE;
331 sh4_x86.fpuen_checked = FALSE;
332 sh4_x86.branch_taken = FALSE;
333 sh4_x86.backpatch_posn = 0;
334 sh4_x86.block_start_pc = pc;
335 sh4_x86.tlb_on = IS_TLB_ENABLED();
336 sh4_x86.tstate = TSTATE_NONE;
337 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
338 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
342 uint32_t sh4_translate_end_block_size()
344 if( sh4_x86.backpatch_posn <= 3 ) {
345 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
347 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
353 * Embed a breakpoint into the generated code
355 void sh4_translate_emit_breakpoint( sh4vma_t pc )
357 load_imm32( R_EAX, pc );
358 call_func1( sh4_translate_breakpoint_hit, R_EAX );
359 sh4_x86.tstate = TSTATE_NONE;
363 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
366 * Embed a call to sh4_execute_instruction for situations that we
367 * can't translate (just page-crossing delay slots at the moment).
368 * Caller is responsible for setting new_pc before calling this function.
372 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
373 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
374 * Call sh4_execute_instruction
375 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
377 void exit_block_emu( sh4vma_t endpc )
379 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
380 ADD_r32_sh4r( R_ECX, R_PC );
382 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
383 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
384 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
385 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
387 call_func0( sh4_execute_instruction );
388 load_spreg( R_EAX, R_PC );
389 if( sh4_x86.tlb_on ) {
390 call_func1(xlat_get_code_by_vma,R_EAX);
392 call_func1(xlat_get_code,R_EAX);
398 * Translate a single instruction. Delayed branches are handled specially
399 * by translating both branch and delayed instruction as a single unit (as
401 * The instruction MUST be in the icache (assert check)
403 * @return true if the instruction marks the end of a basic block
406 uint32_t sh4_translate_instruction( sh4vma_t pc )
409 /* Read instruction from icache */
410 assert( IS_IN_ICACHE(pc) );
411 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
413 if( !sh4_x86.in_delay_slot ) {
414 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
420 load_reg( R_EAX, Rm );
421 load_reg( R_ECX, Rn );
422 ADD_r32_r32( R_EAX, R_ECX );
423 store_reg( R_ECX, Rn );
424 sh4_x86.tstate = TSTATE_NONE;
428 ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
429 sh4_x86.tstate = TSTATE_NONE;
433 if( sh4_x86.tstate != TSTATE_C ) {
436 load_reg( R_EAX, Rm );
437 load_reg( R_ECX, Rn );
438 ADC_r32_r32( R_EAX, R_ECX );
439 store_reg( R_ECX, Rn );
441 sh4_x86.tstate = TSTATE_C;
445 load_reg( R_EAX, Rm );
446 load_reg( R_ECX, Rn );
447 ADD_r32_r32( R_EAX, R_ECX );
448 store_reg( R_ECX, Rn );
450 sh4_x86.tstate = TSTATE_O;
454 load_reg( R_EAX, Rm );
455 load_reg( R_ECX, Rn );
456 AND_r32_r32( R_EAX, R_ECX );
457 store_reg( R_ECX, Rn );
458 sh4_x86.tstate = TSTATE_NONE;
462 load_reg( R_EAX, 0 );
463 AND_imm32_r32(imm, R_EAX);
464 store_reg( R_EAX, 0 );
465 sh4_x86.tstate = TSTATE_NONE;
467 AND.B #imm, @(R0, GBR) {:
469 load_reg( R_EAX, 0 );
470 ADD_sh4r_r32( R_GBR, R_EAX );
471 MOV_r32_esp8(R_EAX, 0);
472 MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
473 MOV_esp8_r32(0, R_EAX);
474 AND_imm32_r32(imm, R_EDX );
475 MEM_WRITE_BYTE( R_EAX, R_EDX );
476 sh4_x86.tstate = TSTATE_NONE;
480 load_reg( R_EAX, Rm );
481 load_reg( R_ECX, Rn );
482 CMP_r32_r32( R_EAX, R_ECX );
484 sh4_x86.tstate = TSTATE_E;
487 COUNT_INST(I_CMPEQI);
488 load_reg( R_EAX, 0 );
489 CMP_imm8s_r32(imm, R_EAX);
491 sh4_x86.tstate = TSTATE_E;
495 load_reg( R_EAX, Rm );
496 load_reg( R_ECX, Rn );
497 CMP_r32_r32( R_EAX, R_ECX );
499 sh4_x86.tstate = TSTATE_GE;
503 load_reg( R_EAX, Rm );
504 load_reg( R_ECX, Rn );
505 CMP_r32_r32( R_EAX, R_ECX );
507 sh4_x86.tstate = TSTATE_G;
511 load_reg( R_EAX, Rm );
512 load_reg( R_ECX, Rn );
513 CMP_r32_r32( R_EAX, R_ECX );
515 sh4_x86.tstate = TSTATE_A;
519 load_reg( R_EAX, Rm );
520 load_reg( R_ECX, Rn );
521 CMP_r32_r32( R_EAX, R_ECX );
523 sh4_x86.tstate = TSTATE_AE;
527 load_reg( R_EAX, Rn );
528 CMP_imm8s_r32( 0, R_EAX );
530 sh4_x86.tstate = TSTATE_G;
534 load_reg( R_EAX, Rn );
535 CMP_imm8s_r32( 0, R_EAX );
537 sh4_x86.tstate = TSTATE_GE;
540 COUNT_INST(I_CMPSTR);
541 load_reg( R_EAX, Rm );
542 load_reg( R_ECX, Rn );
543 XOR_r32_r32( R_ECX, R_EAX );
544 TEST_r8_r8( R_AL, R_AL );
546 TEST_r8_r8( R_AH, R_AH );
548 SHR_imm8_r32( 16, R_EAX );
549 TEST_r8_r8( R_AL, R_AL );
551 TEST_r8_r8( R_AH, R_AH );
556 sh4_x86.tstate = TSTATE_E;
560 load_reg( R_EAX, Rm );
561 load_reg( R_ECX, Rn );
562 SHR_imm8_r32( 31, R_EAX );
563 SHR_imm8_r32( 31, R_ECX );
564 store_spreg( R_EAX, R_M );
565 store_spreg( R_ECX, R_Q );
566 CMP_r32_r32( R_EAX, R_ECX );
568 sh4_x86.tstate = TSTATE_NE;
572 XOR_r32_r32( R_EAX, R_EAX );
573 store_spreg( R_EAX, R_Q );
574 store_spreg( R_EAX, R_M );
575 store_spreg( R_EAX, R_T );
576 sh4_x86.tstate = TSTATE_C; // works for DIV1
580 load_spreg( R_ECX, R_M );
581 load_reg( R_EAX, Rn );
582 if( sh4_x86.tstate != TSTATE_C ) {
586 SETC_r8( R_DL ); // Q'
587 CMP_sh4r_r32( R_Q, R_ECX );
589 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
592 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
594 store_reg( R_EAX, Rn ); // Done with Rn now
595 SETC_r8(R_AL); // tmp1
596 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
597 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
598 store_spreg( R_ECX, R_Q );
599 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
600 MOVZX_r8_r32( R_AL, R_EAX );
601 store_spreg( R_EAX, R_T );
602 sh4_x86.tstate = TSTATE_NONE;
606 load_reg( R_EAX, Rm );
607 load_reg( R_ECX, Rn );
609 store_spreg( R_EDX, R_MACH );
610 store_spreg( R_EAX, R_MACL );
611 sh4_x86.tstate = TSTATE_NONE;
615 load_reg( R_EAX, Rm );
616 load_reg( R_ECX, Rn );
618 store_spreg( R_EDX, R_MACH );
619 store_spreg( R_EAX, R_MACL );
620 sh4_x86.tstate = TSTATE_NONE;
624 load_reg( R_EAX, Rn );
625 ADD_imm8s_r32( -1, R_EAX );
626 store_reg( R_EAX, Rn );
628 sh4_x86.tstate = TSTATE_E;
632 load_reg( R_EAX, Rm );
633 MOVSX_r8_r32( R_EAX, R_EAX );
634 store_reg( R_EAX, Rn );
638 load_reg( R_EAX, Rm );
639 MOVSX_r16_r32( R_EAX, R_EAX );
640 store_reg( R_EAX, Rn );
644 load_reg( R_EAX, Rm );
645 MOVZX_r8_r32( R_EAX, R_EAX );
646 store_reg( R_EAX, Rn );
650 load_reg( R_EAX, Rm );
651 MOVZX_r16_r32( R_EAX, R_EAX );
652 store_reg( R_EAX, Rn );
657 load_reg( R_EAX, Rm );
658 check_ralign32( R_EAX );
659 MEM_READ_LONG( R_EAX, R_EAX );
660 MOV_r32_esp8(R_EAX, 0);
661 load_reg( R_EAX, Rm );
662 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
663 MEM_READ_LONG( R_EAX, R_EAX );
664 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
666 load_reg( R_EAX, Rm );
667 check_ralign32( R_EAX );
668 MEM_READ_LONG( R_EAX, R_EAX );
669 MOV_r32_esp8( R_EAX, 0 );
670 load_reg( R_EAX, Rn );
671 check_ralign32( R_EAX );
672 MEM_READ_LONG( R_EAX, R_EAX );
673 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
674 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
678 ADD_r32_sh4r( R_EAX, R_MACL );
679 ADC_r32_sh4r( R_EDX, R_MACH );
681 load_spreg( R_ECX, R_S );
682 TEST_r32_r32(R_ECX, R_ECX);
684 call_func0( signsat48 );
686 sh4_x86.tstate = TSTATE_NONE;
691 load_reg( R_EAX, Rm );
692 check_ralign16( R_EAX );
693 MEM_READ_WORD( R_EAX, R_EAX );
694 MOV_r32_esp8( R_EAX, 0 );
695 load_reg( R_EAX, Rm );
696 LEA_r32disp8_r32( R_EAX, 2, R_EAX );
697 MEM_READ_WORD( R_EAX, R_EAX );
698 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
699 // Note translate twice in case of page boundaries. Maybe worth
700 // adding a page-boundary check to skip the second translation
702 load_reg( R_EAX, Rm );
703 check_ralign16( R_EAX );
704 MEM_READ_WORD( R_EAX, R_EAX );
705 MOV_r32_esp8( R_EAX, 0 );
706 load_reg( R_EAX, Rn );
707 check_ralign16( R_EAX );
708 MEM_READ_WORD( R_EAX, R_EAX );
709 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
710 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
713 load_spreg( R_ECX, R_S );
714 TEST_r32_r32( R_ECX, R_ECX );
717 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
718 JNO_rel8( end ); // 2
719 load_imm32( R_EDX, 1 ); // 5
720 store_spreg( R_EDX, R_MACH ); // 6
721 JS_rel8( positive ); // 2
722 load_imm32( R_EAX, 0x80000000 );// 5
723 store_spreg( R_EAX, R_MACL ); // 6
726 JMP_TARGET(positive);
727 load_imm32( R_EAX, 0x7FFFFFFF );// 5
728 store_spreg( R_EAX, R_MACL ); // 6
732 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
733 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
737 sh4_x86.tstate = TSTATE_NONE;
741 load_spreg( R_EAX, R_T );
742 store_reg( R_EAX, Rn );
746 load_reg( R_EAX, Rm );
747 load_reg( R_ECX, Rn );
749 store_spreg( R_EAX, R_MACL );
750 sh4_x86.tstate = TSTATE_NONE;
754 load_reg16s( R_EAX, Rm );
755 load_reg16s( R_ECX, Rn );
757 store_spreg( R_EAX, R_MACL );
758 sh4_x86.tstate = TSTATE_NONE;
762 load_reg16u( R_EAX, Rm );
763 load_reg16u( R_ECX, Rn );
765 store_spreg( R_EAX, R_MACL );
766 sh4_x86.tstate = TSTATE_NONE;
770 load_reg( R_EAX, Rm );
772 store_reg( R_EAX, Rn );
773 sh4_x86.tstate = TSTATE_NONE;
777 load_reg( R_EAX, Rm );
778 XOR_r32_r32( R_ECX, R_ECX );
780 SBB_r32_r32( R_EAX, R_ECX );
781 store_reg( R_ECX, Rn );
783 sh4_x86.tstate = TSTATE_C;
787 load_reg( R_EAX, Rm );
789 store_reg( R_EAX, Rn );
790 sh4_x86.tstate = TSTATE_NONE;
794 load_reg( R_EAX, Rm );
795 load_reg( R_ECX, Rn );
796 OR_r32_r32( R_EAX, R_ECX );
797 store_reg( R_ECX, Rn );
798 sh4_x86.tstate = TSTATE_NONE;
802 load_reg( R_EAX, 0 );
803 OR_imm32_r32(imm, R_EAX);
804 store_reg( R_EAX, 0 );
805 sh4_x86.tstate = TSTATE_NONE;
807 OR.B #imm, @(R0, GBR) {:
809 load_reg( R_EAX, 0 );
810 ADD_sh4r_r32( R_GBR, R_EAX );
811 MOV_r32_esp8( R_EAX, 0 );
812 MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
813 MOV_esp8_r32( 0, R_EAX );
814 OR_imm32_r32(imm, R_EDX );
815 MEM_WRITE_BYTE( R_EAX, R_EDX );
816 sh4_x86.tstate = TSTATE_NONE;
820 load_reg( R_EAX, Rn );
821 if( sh4_x86.tstate != TSTATE_C ) {
825 store_reg( R_EAX, Rn );
827 sh4_x86.tstate = TSTATE_C;
831 load_reg( R_EAX, Rn );
832 if( sh4_x86.tstate != TSTATE_C ) {
836 store_reg( R_EAX, Rn );
838 sh4_x86.tstate = TSTATE_C;
842 load_reg( R_EAX, Rn );
844 store_reg( R_EAX, Rn );
846 sh4_x86.tstate = TSTATE_C;
850 load_reg( R_EAX, Rn );
852 store_reg( R_EAX, Rn );
854 sh4_x86.tstate = TSTATE_C;
858 /* Annoyingly enough, not directly convertible */
859 load_reg( R_EAX, Rn );
860 load_reg( R_ECX, Rm );
861 CMP_imm32_r32( 0, R_ECX );
864 NEG_r32( R_ECX ); // 2
865 AND_imm8_r8( 0x1F, R_CL ); // 3
866 JE_rel8(emptysar); // 2
867 SAR_r32_CL( R_EAX ); // 2
870 JMP_TARGET(emptysar);
871 SAR_imm8_r32(31, R_EAX ); // 3
875 AND_imm8_r8( 0x1F, R_CL ); // 3
876 SHL_r32_CL( R_EAX ); // 2
879 store_reg( R_EAX, Rn );
880 sh4_x86.tstate = TSTATE_NONE;
884 load_reg( R_EAX, Rn );
885 load_reg( R_ECX, Rm );
886 CMP_imm32_r32( 0, R_ECX );
889 NEG_r32( R_ECX ); // 2
890 AND_imm8_r8( 0x1F, R_CL ); // 3
892 SHR_r32_CL( R_EAX ); // 2
895 JMP_TARGET(emptyshr);
896 XOR_r32_r32( R_EAX, R_EAX );
900 AND_imm8_r8( 0x1F, R_CL ); // 3
901 SHL_r32_CL( R_EAX ); // 2
904 store_reg( R_EAX, Rn );
905 sh4_x86.tstate = TSTATE_NONE;
909 load_reg( R_EAX, Rn );
912 store_reg( R_EAX, Rn );
913 sh4_x86.tstate = TSTATE_C;
917 load_reg( R_EAX, Rn );
920 store_reg( R_EAX, Rn );
921 sh4_x86.tstate = TSTATE_C;
925 load_reg( R_EAX, Rn );
928 store_reg( R_EAX, Rn );
929 sh4_x86.tstate = TSTATE_C;
933 load_reg( R_EAX, Rn );
934 SHL_imm8_r32( 2, R_EAX );
935 store_reg( R_EAX, Rn );
936 sh4_x86.tstate = TSTATE_NONE;
940 load_reg( R_EAX, Rn );
941 SHL_imm8_r32( 8, R_EAX );
942 store_reg( R_EAX, Rn );
943 sh4_x86.tstate = TSTATE_NONE;
947 load_reg( R_EAX, Rn );
948 SHL_imm8_r32( 16, R_EAX );
949 store_reg( R_EAX, Rn );
950 sh4_x86.tstate = TSTATE_NONE;
954 load_reg( R_EAX, Rn );
957 store_reg( R_EAX, Rn );
958 sh4_x86.tstate = TSTATE_C;
962 load_reg( R_EAX, Rn );
963 SHR_imm8_r32( 2, R_EAX );
964 store_reg( R_EAX, Rn );
965 sh4_x86.tstate = TSTATE_NONE;
969 load_reg( R_EAX, Rn );
970 SHR_imm8_r32( 8, R_EAX );
971 store_reg( R_EAX, Rn );
972 sh4_x86.tstate = TSTATE_NONE;
976 load_reg( R_EAX, Rn );
977 SHR_imm8_r32( 16, R_EAX );
978 store_reg( R_EAX, Rn );
979 sh4_x86.tstate = TSTATE_NONE;
983 load_reg( R_EAX, Rm );
984 load_reg( R_ECX, Rn );
985 SUB_r32_r32( R_EAX, R_ECX );
986 store_reg( R_ECX, Rn );
987 sh4_x86.tstate = TSTATE_NONE;
991 load_reg( R_EAX, Rm );
992 load_reg( R_ECX, Rn );
993 if( sh4_x86.tstate != TSTATE_C ) {
996 SBB_r32_r32( R_EAX, R_ECX );
997 store_reg( R_ECX, Rn );
999 sh4_x86.tstate = TSTATE_C;
1003 load_reg( R_EAX, Rm );
1004 load_reg( R_ECX, Rn );
1005 SUB_r32_r32( R_EAX, R_ECX );
1006 store_reg( R_ECX, Rn );
1008 sh4_x86.tstate = TSTATE_O;
1011 COUNT_INST(I_SWAPB);
1012 load_reg( R_EAX, Rm );
1013 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1014 store_reg( R_EAX, Rn );
1017 COUNT_INST(I_SWAPB);
1018 load_reg( R_EAX, Rm );
1019 MOV_r32_r32( R_EAX, R_ECX );
1020 SHL_imm8_r32( 16, R_ECX );
1021 SHR_imm8_r32( 16, R_EAX );
1022 OR_r32_r32( R_EAX, R_ECX );
1023 store_reg( R_ECX, Rn );
1024 sh4_x86.tstate = TSTATE_NONE;
1028 load_reg( R_EAX, Rn );
1029 MOV_r32_esp8( R_EAX, 0 );
1030 MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
1031 TEST_r8_r8( R_DL, R_DL );
1033 OR_imm8_r8( 0x80, R_DL );
1034 MOV_esp8_r32( 0, R_EAX );
1035 MEM_WRITE_BYTE( R_EAX, R_EDX );
1036 sh4_x86.tstate = TSTATE_NONE;
1040 load_reg( R_EAX, Rm );
1041 load_reg( R_ECX, Rn );
1042 TEST_r32_r32( R_EAX, R_ECX );
1044 sh4_x86.tstate = TSTATE_E;
1048 load_reg( R_EAX, 0 );
1049 TEST_imm32_r32( imm, R_EAX );
1051 sh4_x86.tstate = TSTATE_E;
1053 TST.B #imm, @(R0, GBR) {:
1055 load_reg( R_EAX, 0);
1056 ADD_sh4r_r32( R_GBR, R_EAX );
1057 MEM_READ_BYTE( R_EAX, R_EAX );
1058 TEST_imm8_r8( imm, R_AL );
1060 sh4_x86.tstate = TSTATE_E;
1064 load_reg( R_EAX, Rm );
1065 load_reg( R_ECX, Rn );
1066 XOR_r32_r32( R_EAX, R_ECX );
1067 store_reg( R_ECX, Rn );
1068 sh4_x86.tstate = TSTATE_NONE;
1072 load_reg( R_EAX, 0 );
1073 XOR_imm32_r32( imm, R_EAX );
1074 store_reg( R_EAX, 0 );
1075 sh4_x86.tstate = TSTATE_NONE;
1077 XOR.B #imm, @(R0, GBR) {:
1079 load_reg( R_EAX, 0 );
1080 ADD_sh4r_r32( R_GBR, R_EAX );
1081 MOV_r32_esp8( R_EAX, 0 );
1082 MEM_READ_BYTE_FOR_WRITE(R_EAX, R_EDX);
1083 MOV_esp8_r32( 0, R_EAX );
1084 XOR_imm32_r32( imm, R_EDX );
1085 MEM_WRITE_BYTE( R_EAX, R_EDX );
1086 sh4_x86.tstate = TSTATE_NONE;
1089 COUNT_INST(I_XTRCT);
1090 load_reg( R_EAX, Rm );
1091 load_reg( R_ECX, Rn );
1092 SHL_imm8_r32( 16, R_EAX );
1093 SHR_imm8_r32( 16, R_ECX );
1094 OR_r32_r32( R_EAX, R_ECX );
1095 store_reg( R_ECX, Rn );
1096 sh4_x86.tstate = TSTATE_NONE;
1099 /* Data move instructions */
1102 load_reg( R_EAX, Rm );
1103 store_reg( R_EAX, Rn );
1107 load_imm32( R_EAX, imm );
1108 store_reg( R_EAX, Rn );
1112 load_reg( R_EAX, Rn );
1113 load_reg( R_EDX, Rm );
1114 MEM_WRITE_BYTE( R_EAX, R_EDX );
1115 sh4_x86.tstate = TSTATE_NONE;
1119 load_reg( R_EAX, Rn );
1120 LEA_r32disp8_r32( R_EAX, -1, R_EAX );
1121 load_reg( R_EDX, Rm );
1122 MEM_WRITE_BYTE( R_EAX, R_EDX );
1123 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1124 sh4_x86.tstate = TSTATE_NONE;
1126 MOV.B Rm, @(R0, Rn) {:
1128 load_reg( R_EAX, 0 );
1129 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1130 load_reg( R_EDX, Rm );
1131 MEM_WRITE_BYTE( R_EAX, R_EDX );
1132 sh4_x86.tstate = TSTATE_NONE;
1134 MOV.B R0, @(disp, GBR) {:
1136 load_spreg( R_EAX, R_GBR );
1137 ADD_imm32_r32( disp, R_EAX );
1138 load_reg( R_EDX, 0 );
1139 MEM_WRITE_BYTE( R_EAX, R_EDX );
1140 sh4_x86.tstate = TSTATE_NONE;
1142 MOV.B R0, @(disp, Rn) {:
1144 load_reg( R_EAX, Rn );
1145 ADD_imm32_r32( disp, R_EAX );
1146 load_reg( R_EDX, 0 );
1147 MEM_WRITE_BYTE( R_EAX, R_EDX );
1148 sh4_x86.tstate = TSTATE_NONE;
1152 load_reg( R_EAX, Rm );
1153 MEM_READ_BYTE( R_EAX, R_EAX );
1154 store_reg( R_EAX, Rn );
1155 sh4_x86.tstate = TSTATE_NONE;
1159 load_reg( R_EAX, Rm );
1160 MEM_READ_BYTE( R_EAX, R_EAX );
1162 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1164 store_reg( R_EAX, Rn );
1165 sh4_x86.tstate = TSTATE_NONE;
1167 MOV.B @(R0, Rm), Rn {:
1169 load_reg( R_EAX, 0 );
1170 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1171 MEM_READ_BYTE( R_EAX, R_EAX );
1172 store_reg( R_EAX, Rn );
1173 sh4_x86.tstate = TSTATE_NONE;
1175 MOV.B @(disp, GBR), R0 {:
1177 load_spreg( R_EAX, R_GBR );
1178 ADD_imm32_r32( disp, R_EAX );
1179 MEM_READ_BYTE( R_EAX, R_EAX );
1180 store_reg( R_EAX, 0 );
1181 sh4_x86.tstate = TSTATE_NONE;
1183 MOV.B @(disp, Rm), R0 {:
1185 load_reg( R_EAX, Rm );
1186 ADD_imm32_r32( disp, R_EAX );
1187 MEM_READ_BYTE( R_EAX, R_EAX );
1188 store_reg( R_EAX, 0 );
1189 sh4_x86.tstate = TSTATE_NONE;
1193 load_reg( R_EAX, Rn );
1194 check_walign32(R_EAX);
1195 MOV_r32_r32( R_EAX, R_ECX );
1196 AND_imm32_r32( 0xFC000000, R_ECX );
1197 CMP_imm32_r32( 0xE0000000, R_ECX );
1199 AND_imm8s_r32( 0x3C, R_EAX );
1200 load_reg( R_EDX, Rm );
1201 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1204 load_reg( R_EDX, Rm );
1205 MEM_WRITE_LONG( R_EAX, R_EDX );
1207 sh4_x86.tstate = TSTATE_NONE;
1211 load_reg( R_EAX, Rn );
1212 ADD_imm8s_r32( -4, R_EAX );
1213 check_walign32( R_EAX );
1214 load_reg( R_EDX, Rm );
1215 MEM_WRITE_LONG( R_EAX, R_EDX );
1216 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1217 sh4_x86.tstate = TSTATE_NONE;
1219 MOV.L Rm, @(R0, Rn) {:
1221 load_reg( R_EAX, 0 );
1222 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1223 check_walign32( R_EAX );
1224 load_reg( R_EDX, Rm );
1225 MEM_WRITE_LONG( R_EAX, R_EDX );
1226 sh4_x86.tstate = TSTATE_NONE;
1228 MOV.L R0, @(disp, GBR) {:
1230 load_spreg( R_EAX, R_GBR );
1231 ADD_imm32_r32( disp, R_EAX );
1232 check_walign32( R_EAX );
1233 load_reg( R_EDX, 0 );
1234 MEM_WRITE_LONG( R_EAX, R_EDX );
1235 sh4_x86.tstate = TSTATE_NONE;
1237 MOV.L Rm, @(disp, Rn) {:
1239 load_reg( R_EAX, Rn );
1240 ADD_imm32_r32( disp, R_EAX );
1241 check_walign32( R_EAX );
1242 MOV_r32_r32( R_EAX, R_ECX );
1243 AND_imm32_r32( 0xFC000000, R_ECX );
1244 CMP_imm32_r32( 0xE0000000, R_ECX );
1246 AND_imm8s_r32( 0x3C, R_EAX );
1247 load_reg( R_EDX, Rm );
1248 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1251 load_reg( R_EDX, Rm );
1252 MEM_WRITE_LONG( R_EAX, R_EDX );
1254 sh4_x86.tstate = TSTATE_NONE;
1258 load_reg( R_EAX, Rm );
1259 check_ralign32( R_EAX );
1260 MEM_READ_LONG( R_EAX, R_EAX );
1261 store_reg( R_EAX, Rn );
1262 sh4_x86.tstate = TSTATE_NONE;
1266 load_reg( R_EAX, Rm );
1267 check_ralign32( R_EAX );
1268 MEM_READ_LONG( R_EAX, R_EAX );
1270 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1272 store_reg( R_EAX, Rn );
1273 sh4_x86.tstate = TSTATE_NONE;
1275 MOV.L @(R0, Rm), Rn {:
1277 load_reg( R_EAX, 0 );
1278 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1279 check_ralign32( R_EAX );
1280 MEM_READ_LONG( R_EAX, R_EAX );
1281 store_reg( R_EAX, Rn );
1282 sh4_x86.tstate = TSTATE_NONE;
1284 MOV.L @(disp, GBR), R0 {:
1286 load_spreg( R_EAX, R_GBR );
1287 ADD_imm32_r32( disp, R_EAX );
1288 check_ralign32( R_EAX );
1289 MEM_READ_LONG( R_EAX, R_EAX );
1290 store_reg( R_EAX, 0 );
1291 sh4_x86.tstate = TSTATE_NONE;
1293 MOV.L @(disp, PC), Rn {:
1294 COUNT_INST(I_MOVLPC);
1295 if( sh4_x86.in_delay_slot ) {
1298 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1299 if( IS_IN_ICACHE(target) ) {
1300 // If the target address is in the same page as the code, it's
1301 // pretty safe to just ref it directly and circumvent the whole
1302 // memory subsystem. (this is a big performance win)
1304 // FIXME: There's a corner-case that's not handled here when
1305 // the current code-page is in the ITLB but not in the UTLB.
1306 // (should generate a TLB miss although need to test SH4
1307 // behaviour to confirm) Unlikely to be anyone depending on this
1308 // behaviour though.
1309 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1310 MOV_moff32_EAX( ptr );
1312 // Note: we use sh4r.pc for the calc as we could be running at a
1313 // different virtual address than the translation was done with,
1314 // but we can safely assume that the low bits are the same.
1315 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1316 ADD_sh4r_r32( R_PC, R_EAX );
1317 MEM_READ_LONG( R_EAX, R_EAX );
1318 sh4_x86.tstate = TSTATE_NONE;
1320 store_reg( R_EAX, Rn );
1323 MOV.L @(disp, Rm), Rn {:
1325 load_reg( R_EAX, Rm );
1326 ADD_imm8s_r32( disp, R_EAX );
1327 check_ralign32( R_EAX );
1328 MEM_READ_LONG( R_EAX, R_EAX );
1329 store_reg( R_EAX, Rn );
1330 sh4_x86.tstate = TSTATE_NONE;
1334 load_reg( R_EAX, Rn );
1335 check_walign16( R_EAX );
1336 load_reg( R_EDX, Rm );
1337 MEM_WRITE_WORD( R_EAX, R_EDX );
1338 sh4_x86.tstate = TSTATE_NONE;
1342 load_reg( R_EAX, Rn );
1343 check_walign16( R_EAX );
1344 LEA_r32disp8_r32( R_EAX, -2, R_EAX );
1345 load_reg( R_EDX, Rm );
1346 MEM_WRITE_WORD( R_EAX, R_EDX );
1347 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1348 sh4_x86.tstate = TSTATE_NONE;
1350 MOV.W Rm, @(R0, Rn) {:
1352 load_reg( R_EAX, 0 );
1353 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1354 check_walign16( R_EAX );
1355 load_reg( R_EDX, Rm );
1356 MEM_WRITE_WORD( R_EAX, R_EDX );
1357 sh4_x86.tstate = TSTATE_NONE;
1359 MOV.W R0, @(disp, GBR) {:
1361 load_spreg( R_EAX, R_GBR );
1362 ADD_imm32_r32( disp, R_EAX );
1363 check_walign16( R_EAX );
1364 load_reg( R_EDX, 0 );
1365 MEM_WRITE_WORD( R_EAX, R_EDX );
1366 sh4_x86.tstate = TSTATE_NONE;
1368 MOV.W R0, @(disp, Rn) {:
1370 load_reg( R_EAX, Rn );
1371 ADD_imm32_r32( disp, R_EAX );
1372 check_walign16( R_EAX );
1373 load_reg( R_EDX, 0 );
1374 MEM_WRITE_WORD( R_EAX, R_EDX );
1375 sh4_x86.tstate = TSTATE_NONE;
1379 load_reg( R_EAX, Rm );
1380 check_ralign16( R_EAX );
1381 MEM_READ_WORD( R_EAX, R_EAX );
1382 store_reg( R_EAX, Rn );
1383 sh4_x86.tstate = TSTATE_NONE;
1387 load_reg( R_EAX, Rm );
1388 check_ralign16( R_EAX );
1389 MEM_READ_WORD( R_EAX, R_EAX );
1391 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1393 store_reg( R_EAX, Rn );
1394 sh4_x86.tstate = TSTATE_NONE;
1396 MOV.W @(R0, Rm), Rn {:
1398 load_reg( R_EAX, 0 );
1399 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1400 check_ralign16( R_EAX );
1401 MEM_READ_WORD( R_EAX, R_EAX );
1402 store_reg( R_EAX, Rn );
1403 sh4_x86.tstate = TSTATE_NONE;
1405 MOV.W @(disp, GBR), R0 {:
1407 load_spreg( R_EAX, R_GBR );
1408 ADD_imm32_r32( disp, R_EAX );
1409 check_ralign16( R_EAX );
1410 MEM_READ_WORD( R_EAX, R_EAX );
1411 store_reg( R_EAX, 0 );
1412 sh4_x86.tstate = TSTATE_NONE;
1414 MOV.W @(disp, PC), Rn {:
1416 if( sh4_x86.in_delay_slot ) {
1419 // See comments for MOV.L @(disp, PC), Rn
1420 uint32_t target = pc + disp + 4;
1421 if( IS_IN_ICACHE(target) ) {
1422 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1423 MOV_moff32_EAX( ptr );
1424 MOVSX_r16_r32( R_EAX, R_EAX );
1426 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1427 ADD_sh4r_r32( R_PC, R_EAX );
1428 MEM_READ_WORD( R_EAX, R_EAX );
1429 sh4_x86.tstate = TSTATE_NONE;
1431 store_reg( R_EAX, Rn );
1434 MOV.W @(disp, Rm), R0 {:
1436 load_reg( R_EAX, Rm );
1437 ADD_imm32_r32( disp, R_EAX );
1438 check_ralign16( R_EAX );
1439 MEM_READ_WORD( R_EAX, R_EAX );
1440 store_reg( R_EAX, 0 );
1441 sh4_x86.tstate = TSTATE_NONE;
1443 MOVA @(disp, PC), R0 {:
1445 if( sh4_x86.in_delay_slot ) {
1448 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1449 ADD_sh4r_r32( R_PC, R_ECX );
1450 store_reg( R_ECX, 0 );
1451 sh4_x86.tstate = TSTATE_NONE;
1455 COUNT_INST(I_MOVCA);
1456 load_reg( R_EAX, Rn );
1457 check_walign32( R_EAX );
1458 load_reg( R_EDX, 0 );
1459 MEM_WRITE_LONG( R_EAX, R_EDX );
1460 sh4_x86.tstate = TSTATE_NONE;
1463 /* Control transfer instructions */
1466 if( sh4_x86.in_delay_slot ) {
1469 sh4vma_t target = disp + pc + 4;
1470 JT_rel8( nottaken );
1471 exit_block_rel(target, pc+2 );
1472 JMP_TARGET(nottaken);
1478 if( sh4_x86.in_delay_slot ) {
1481 sh4_x86.in_delay_slot = DELAY_PC;
1482 if( UNTRANSLATABLE(pc+2) ) {
1483 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1485 ADD_imm32_r32( disp, R_EAX );
1486 JMP_TARGET(nottaken);
1487 ADD_sh4r_r32( R_PC, R_EAX );
1488 store_spreg( R_EAX, R_NEW_PC );
1489 exit_block_emu(pc+2);
1490 sh4_x86.branch_taken = TRUE;
1493 if( sh4_x86.tstate == TSTATE_NONE ) {
1494 CMP_imm8s_sh4r( 1, R_T );
1495 sh4_x86.tstate = TSTATE_E;
1497 sh4vma_t target = disp + pc + 4;
1498 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1499 int save_tstate = sh4_x86.tstate;
1500 sh4_translate_instruction(pc+2);
1501 exit_block_rel( target, pc+4 );
1504 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1505 sh4_x86.tstate = save_tstate;
1506 sh4_translate_instruction(pc+2);
1513 if( sh4_x86.in_delay_slot ) {
1516 sh4_x86.in_delay_slot = DELAY_PC;
1517 sh4_x86.branch_taken = TRUE;
1518 if( UNTRANSLATABLE(pc+2) ) {
1519 load_spreg( R_EAX, R_PC );
1520 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1521 store_spreg( R_EAX, R_NEW_PC );
1522 exit_block_emu(pc+2);
1525 sh4_translate_instruction( pc + 2 );
1526 exit_block_rel( disp + pc + 4, pc+4 );
1533 if( sh4_x86.in_delay_slot ) {
1536 load_spreg( R_EAX, R_PC );
1537 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1538 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1539 store_spreg( R_EAX, R_NEW_PC );
1540 sh4_x86.in_delay_slot = DELAY_PC;
1541 sh4_x86.tstate = TSTATE_NONE;
1542 sh4_x86.branch_taken = TRUE;
1543 if( UNTRANSLATABLE(pc+2) ) {
1544 exit_block_emu(pc+2);
1547 sh4_translate_instruction( pc + 2 );
1548 exit_block_newpcset(pc+4);
1555 if( sh4_x86.in_delay_slot ) {
1558 load_spreg( R_EAX, R_PC );
1559 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1560 store_spreg( R_EAX, R_PR );
1561 sh4_x86.in_delay_slot = DELAY_PC;
1562 sh4_x86.branch_taken = TRUE;
1563 sh4_x86.tstate = TSTATE_NONE;
1564 if( UNTRANSLATABLE(pc+2) ) {
1565 ADD_imm32_r32( disp, R_EAX );
1566 store_spreg( R_EAX, R_NEW_PC );
1567 exit_block_emu(pc+2);
1570 sh4_translate_instruction( pc + 2 );
1571 exit_block_rel( disp + pc + 4, pc+4 );
1578 if( sh4_x86.in_delay_slot ) {
1581 load_spreg( R_EAX, R_PC );
1582 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1583 store_spreg( R_EAX, R_PR );
1584 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1585 store_spreg( R_EAX, R_NEW_PC );
1587 sh4_x86.in_delay_slot = DELAY_PC;
1588 sh4_x86.tstate = TSTATE_NONE;
1589 sh4_x86.branch_taken = TRUE;
1590 if( UNTRANSLATABLE(pc+2) ) {
1591 exit_block_emu(pc+2);
1594 sh4_translate_instruction( pc + 2 );
1595 exit_block_newpcset(pc+4);
1602 if( sh4_x86.in_delay_slot ) {
1605 sh4vma_t target = disp + pc + 4;
1606 JF_rel8( nottaken );
1607 exit_block_rel(target, pc+2 );
1608 JMP_TARGET(nottaken);
1614 if( sh4_x86.in_delay_slot ) {
1617 sh4_x86.in_delay_slot = DELAY_PC;
1618 if( UNTRANSLATABLE(pc+2) ) {
1619 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1621 ADD_imm32_r32( disp, R_EAX );
1622 JMP_TARGET(nottaken);
1623 ADD_sh4r_r32( R_PC, R_EAX );
1624 store_spreg( R_EAX, R_NEW_PC );
1625 exit_block_emu(pc+2);
1626 sh4_x86.branch_taken = TRUE;
1629 if( sh4_x86.tstate == TSTATE_NONE ) {
1630 CMP_imm8s_sh4r( 1, R_T );
1631 sh4_x86.tstate = TSTATE_E;
1633 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1634 int save_tstate = sh4_x86.tstate;
1635 sh4_translate_instruction(pc+2);
1636 exit_block_rel( disp + pc + 4, pc+4 );
1638 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1639 sh4_x86.tstate = save_tstate;
1640 sh4_translate_instruction(pc+2);
1647 if( sh4_x86.in_delay_slot ) {
1650 load_reg( R_ECX, Rn );
1651 store_spreg( R_ECX, R_NEW_PC );
1652 sh4_x86.in_delay_slot = DELAY_PC;
1653 sh4_x86.branch_taken = TRUE;
1654 if( UNTRANSLATABLE(pc+2) ) {
1655 exit_block_emu(pc+2);
1658 sh4_translate_instruction(pc+2);
1659 exit_block_newpcset(pc+4);
1666 if( sh4_x86.in_delay_slot ) {
1669 load_spreg( R_EAX, R_PC );
1670 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1671 store_spreg( R_EAX, R_PR );
1672 load_reg( R_ECX, Rn );
1673 store_spreg( R_ECX, R_NEW_PC );
1674 sh4_x86.in_delay_slot = DELAY_PC;
1675 sh4_x86.branch_taken = TRUE;
1676 sh4_x86.tstate = TSTATE_NONE;
1677 if( UNTRANSLATABLE(pc+2) ) {
1678 exit_block_emu(pc+2);
1681 sh4_translate_instruction(pc+2);
1682 exit_block_newpcset(pc+4);
1689 if( sh4_x86.in_delay_slot ) {
1693 load_spreg( R_ECX, R_SPC );
1694 store_spreg( R_ECX, R_NEW_PC );
1695 load_spreg( R_EAX, R_SSR );
1696 call_func1( sh4_write_sr, R_EAX );
1697 sh4_x86.in_delay_slot = DELAY_PC;
1698 sh4_x86.fpuen_checked = FALSE;
1699 sh4_x86.tstate = TSTATE_NONE;
1700 sh4_x86.branch_taken = TRUE;
1701 if( UNTRANSLATABLE(pc+2) ) {
1702 exit_block_emu(pc+2);
1705 sh4_translate_instruction(pc+2);
1706 exit_block_newpcset(pc+4);
1713 if( sh4_x86.in_delay_slot ) {
1716 load_spreg( R_ECX, R_PR );
1717 store_spreg( R_ECX, R_NEW_PC );
1718 sh4_x86.in_delay_slot = DELAY_PC;
1719 sh4_x86.branch_taken = TRUE;
1720 if( UNTRANSLATABLE(pc+2) ) {
1721 exit_block_emu(pc+2);
1724 sh4_translate_instruction(pc+2);
1725 exit_block_newpcset(pc+4);
1731 COUNT_INST(I_TRAPA);
1732 if( sh4_x86.in_delay_slot ) {
1735 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1736 ADD_r32_sh4r( R_ECX, R_PC );
1737 load_imm32( R_EAX, imm );
1738 call_func1( sh4_raise_trap, R_EAX );
1739 sh4_x86.tstate = TSTATE_NONE;
1740 exit_block_pcset(pc+2);
1741 sh4_x86.branch_taken = TRUE;
1746 COUNT_INST(I_UNDEF);
1747 if( sh4_x86.in_delay_slot ) {
1748 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);
1750 exit_block_exc(EXC_ILLEGAL, pc);
1756 COUNT_INST(I_CLRMAC);
1757 XOR_r32_r32(R_EAX, R_EAX);
1758 store_spreg( R_EAX, R_MACL );
1759 store_spreg( R_EAX, R_MACH );
1760 sh4_x86.tstate = TSTATE_NONE;
1766 sh4_x86.tstate = TSTATE_NONE;
1772 sh4_x86.tstate = TSTATE_C;
1778 sh4_x86.tstate = TSTATE_NONE;
1784 sh4_x86.tstate = TSTATE_C;
1787 /* Floating point moves */
1789 COUNT_INST(I_FMOV1);
1791 if( sh4_x86.double_size ) {
1792 load_dr0( R_EAX, FRm );
1793 load_dr1( R_ECX, FRm );
1794 store_dr0( R_EAX, FRn );
1795 store_dr1( R_ECX, FRn );
1797 load_fr( R_EAX, FRm ); // SZ=0 branch
1798 store_fr( R_EAX, FRn );
1802 COUNT_INST(I_FMOV2);
1804 load_reg( R_EAX, Rn );
1805 if( sh4_x86.double_size ) {
1806 check_walign64( R_EAX );
1807 load_dr0( R_EDX, FRm );
1808 MEM_WRITE_LONG( R_EAX, R_EDX );
1809 load_reg( R_EAX, Rn );
1810 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1811 load_dr1( R_EDX, FRm );
1812 MEM_WRITE_LONG( R_EAX, R_EDX );
1814 check_walign32( R_EAX );
1815 load_fr( R_EDX, FRm );
1816 MEM_WRITE_LONG( R_EAX, R_EDX );
1818 sh4_x86.tstate = TSTATE_NONE;
1821 COUNT_INST(I_FMOV5);
1823 load_reg( R_EAX, Rm );
1824 if( sh4_x86.double_size ) {
1825 check_ralign64( R_EAX );
1826 MEM_READ_LONG( R_EAX, R_EAX );
1827 store_dr0( R_EAX, FRn );
1828 load_reg( R_EAX, Rm );
1829 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1830 MEM_READ_LONG( R_EAX, R_EAX );
1831 store_dr1( R_EAX, FRn );
1833 check_ralign32( R_EAX );
1834 MEM_READ_LONG( R_EAX, R_EAX );
1835 store_fr( R_EAX, FRn );
1837 sh4_x86.tstate = TSTATE_NONE;
1840 COUNT_INST(I_FMOV3);
1842 load_reg( R_EAX, Rn );
1843 if( sh4_x86.double_size ) {
1844 check_walign64( R_EAX );
1845 LEA_r32disp8_r32( R_EAX, -8, R_EAX );
1846 load_dr0( R_EDX, FRm );
1847 MEM_WRITE_LONG( R_EAX, R_EDX );
1848 load_reg( R_EAX, Rn );
1849 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1850 load_dr1( R_EDX, FRm );
1851 MEM_WRITE_LONG( R_EAX, R_EDX );
1852 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1854 check_walign32( R_EAX );
1855 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1856 load_fr( R_EDX, FRm );
1857 MEM_WRITE_LONG( R_EAX, R_EDX );
1858 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1860 sh4_x86.tstate = TSTATE_NONE;
1863 COUNT_INST(I_FMOV6);
1865 load_reg( R_EAX, Rm );
1866 if( sh4_x86.double_size ) {
1867 check_ralign64( R_EAX );
1868 MEM_READ_LONG( R_EAX, R_EAX );
1869 store_dr0( R_EAX, FRn );
1870 load_reg( R_EAX, Rm );
1871 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1872 MEM_READ_LONG( R_EAX, R_EAX );
1873 store_dr1( R_EAX, FRn );
1874 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1876 check_ralign32( R_EAX );
1877 MEM_READ_LONG( R_EAX, R_EAX );
1878 store_fr( R_EAX, FRn );
1879 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1881 sh4_x86.tstate = TSTATE_NONE;
1883 FMOV FRm, @(R0, Rn) {:
1884 COUNT_INST(I_FMOV4);
1886 load_reg( R_EAX, Rn );
1887 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1888 if( sh4_x86.double_size ) {
1889 check_walign64( R_EAX );
1890 load_dr0( R_EDX, FRm );
1891 MEM_WRITE_LONG( R_EAX, R_EDX );
1892 load_reg( R_EAX, Rn );
1893 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1894 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1895 load_dr1( R_EDX, FRm );
1896 MEM_WRITE_LONG( R_EAX, R_EDX );
1898 check_walign32( R_EAX );
1899 load_fr( R_EDX, FRm );
1900 MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
1902 sh4_x86.tstate = TSTATE_NONE;
1904 FMOV @(R0, Rm), FRn {:
1905 COUNT_INST(I_FMOV7);
1907 load_reg( R_EAX, Rm );
1908 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1909 if( sh4_x86.double_size ) {
1910 check_ralign64( R_EAX );
1911 MEM_READ_LONG( R_EAX, R_EAX );
1912 store_dr0( R_EAX, FRn );
1913 load_reg( R_EAX, Rm );
1914 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1915 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1916 MEM_READ_LONG( R_EAX, R_EAX );
1917 store_dr1( R_EAX, FRn );
1919 check_ralign32( R_EAX );
1920 MEM_READ_LONG( R_EAX, R_EAX );
1921 store_fr( R_EAX, FRn );
1923 sh4_x86.tstate = TSTATE_NONE;
1925 FLDI0 FRn {: /* IFF PR=0 */
1926 COUNT_INST(I_FLDI0);
1928 if( sh4_x86.double_prec == 0 ) {
1929 XOR_r32_r32( R_EAX, R_EAX );
1930 store_fr( R_EAX, FRn );
1932 sh4_x86.tstate = TSTATE_NONE;
1934 FLDI1 FRn {: /* IFF PR=0 */
1935 COUNT_INST(I_FLDI1);
1937 if( sh4_x86.double_prec == 0 ) {
1938 load_imm32(R_EAX, 0x3F800000);
1939 store_fr( R_EAX, FRn );
1944 COUNT_INST(I_FLOAT);
1947 if( sh4_x86.double_prec ) {
1956 if( sh4_x86.double_prec ) {
1961 load_ptr( R_ECX, &max_int );
1962 FILD_r32ind( R_ECX );
1965 load_ptr( R_ECX, &min_int ); // 5
1966 FILD_r32ind( R_ECX ); // 2
1968 JAE_rel8( sat2 ); // 2
1969 load_ptr( R_EAX, &save_fcw );
1970 FNSTCW_r32ind( R_EAX );
1971 load_ptr( R_EDX, &trunc_fcw );
1972 FLDCW_r32ind( R_EDX );
1973 FISTP_sh4r(R_FPUL); // 3
1974 FLDCW_r32ind( R_EAX );
1979 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
1980 store_spreg( R_ECX, R_FPUL );
1983 sh4_x86.tstate = TSTATE_NONE;
1988 load_fr( R_EAX, FRm );
1989 store_spreg( R_EAX, R_FPUL );
1994 load_spreg( R_EAX, R_FPUL );
1995 store_fr( R_EAX, FRn );
1998 COUNT_INST(I_FCNVDS);
2000 if( sh4_x86.double_prec ) {
2006 COUNT_INST(I_FCNVSD);
2008 if( sh4_x86.double_prec ) {
2014 /* Floating point instructions */
2018 if( sh4_x86.double_prec ) {
2031 if( sh4_x86.double_prec ) {
2046 if( sh4_x86.double_prec ) {
2058 FMAC FR0, FRm, FRn {:
2061 if( sh4_x86.double_prec ) {
2081 if( sh4_x86.double_prec ) {
2096 if( sh4_x86.double_prec ) {
2107 COUNT_INST(I_FSRRA);
2109 if( sh4_x86.double_prec == 0 ) {
2118 COUNT_INST(I_FSQRT);
2120 if( sh4_x86.double_prec ) {
2133 if( sh4_x86.double_prec ) {
2147 COUNT_INST(I_FCMPEQ);
2149 if( sh4_x86.double_prec ) {
2159 sh4_x86.tstate = TSTATE_E;
2162 COUNT_INST(I_FCMPGT);
2164 if( sh4_x86.double_prec ) {
2174 sh4_x86.tstate = TSTATE_A;
2180 if( sh4_x86.double_prec == 0 ) {
2181 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
2182 load_spreg( R_EAX, R_FPUL );
2183 call_func2( sh4_fsca, R_EAX, R_EDX );
2185 sh4_x86.tstate = TSTATE_NONE;
2190 if( sh4_x86.double_prec == 0 ) {
2191 if( sh4_x86.sse3_enabled ) {
2192 MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2193 MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2194 HADDPS_xmm_xmm( 4, 4 );
2195 HADDPS_xmm_xmm( 4, 4 );
2196 MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2201 push_fr( (FVm<<2)+1);
2202 push_fr( (FVn<<2)+1);
2205 push_fr( (FVm<<2)+2);
2206 push_fr( (FVn<<2)+2);
2209 push_fr( (FVm<<2)+3);
2210 push_fr( (FVn<<2)+3);
2213 pop_fr( (FVn<<2)+3);
2220 if( sh4_x86.double_prec == 0 ) {
2221 if( sh4_x86.sse3_enabled ) {
2222 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2223 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2224 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2225 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2227 MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2228 MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2229 MOVAPS_xmm_xmm( 4, 6 );
2230 MOVAPS_xmm_xmm( 5, 7 );
2231 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2232 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2233 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2234 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2235 MULPS_xmm_xmm( 0, 4 );
2236 MULPS_xmm_xmm( 1, 5 );
2237 MULPS_xmm_xmm( 2, 6 );
2238 MULPS_xmm_xmm( 3, 7 );
2239 ADDPS_xmm_xmm( 5, 4 );
2240 ADDPS_xmm_xmm( 7, 6 );
2241 ADDPS_xmm_xmm( 6, 4 );
2242 MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
2244 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
2245 call_func1( sh4_ftrv, R_EAX );
2248 sh4_x86.tstate = TSTATE_NONE;
2252 COUNT_INST(I_FRCHG);
2254 XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
2255 call_func0( sh4_switch_fr_banks );
2256 sh4_x86.tstate = TSTATE_NONE;
2259 COUNT_INST(I_FSCHG);
2261 XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
2262 XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2263 sh4_x86.tstate = TSTATE_NONE;
2264 sh4_x86.double_size = !sh4_x86.double_size;
2267 /* Processor control instructions */
2269 COUNT_INST(I_LDCSR);
2270 if( sh4_x86.in_delay_slot ) {
2274 load_reg( R_EAX, Rm );
2275 call_func1( sh4_write_sr, R_EAX );
2276 sh4_x86.fpuen_checked = FALSE;
2277 sh4_x86.tstate = TSTATE_NONE;
2283 load_reg( R_EAX, Rm );
2284 store_spreg( R_EAX, R_GBR );
2289 load_reg( R_EAX, Rm );
2290 store_spreg( R_EAX, R_VBR );
2291 sh4_x86.tstate = TSTATE_NONE;
2296 load_reg( R_EAX, Rm );
2297 store_spreg( R_EAX, R_SSR );
2298 sh4_x86.tstate = TSTATE_NONE;
2303 load_reg( R_EAX, Rm );
2304 store_spreg( R_EAX, R_SGR );
2305 sh4_x86.tstate = TSTATE_NONE;
2310 load_reg( R_EAX, Rm );
2311 store_spreg( R_EAX, R_SPC );
2312 sh4_x86.tstate = TSTATE_NONE;
2317 load_reg( R_EAX, Rm );
2318 store_spreg( R_EAX, R_DBR );
2319 sh4_x86.tstate = TSTATE_NONE;
2324 load_reg( R_EAX, Rm );
2325 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2326 sh4_x86.tstate = TSTATE_NONE;
2330 load_reg( R_EAX, Rm );
2331 check_ralign32( R_EAX );
2332 MEM_READ_LONG( R_EAX, R_EAX );
2333 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2334 store_spreg( R_EAX, R_GBR );
2335 sh4_x86.tstate = TSTATE_NONE;
2338 COUNT_INST(I_LDCSRM);
2339 if( sh4_x86.in_delay_slot ) {
2343 load_reg( R_EAX, Rm );
2344 check_ralign32( R_EAX );
2345 MEM_READ_LONG( R_EAX, R_EAX );
2346 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2347 call_func1( sh4_write_sr, R_EAX );
2348 sh4_x86.fpuen_checked = FALSE;
2349 sh4_x86.tstate = TSTATE_NONE;
2356 load_reg( R_EAX, Rm );
2357 check_ralign32( R_EAX );
2358 MEM_READ_LONG( R_EAX, R_EAX );
2359 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2360 store_spreg( R_EAX, R_VBR );
2361 sh4_x86.tstate = TSTATE_NONE;
2366 load_reg( R_EAX, Rm );
2367 check_ralign32( R_EAX );
2368 MEM_READ_LONG( R_EAX, R_EAX );
2369 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2370 store_spreg( R_EAX, R_SSR );
2371 sh4_x86.tstate = TSTATE_NONE;
2376 load_reg( R_EAX, Rm );
2377 check_ralign32( R_EAX );
2378 MEM_READ_LONG( R_EAX, R_EAX );
2379 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2380 store_spreg( R_EAX, R_SGR );
2381 sh4_x86.tstate = TSTATE_NONE;
2386 load_reg( R_EAX, Rm );
2387 check_ralign32( R_EAX );
2388 MEM_READ_LONG( R_EAX, R_EAX );
2389 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2390 store_spreg( R_EAX, R_SPC );
2391 sh4_x86.tstate = TSTATE_NONE;
2396 load_reg( R_EAX, Rm );
2397 check_ralign32( R_EAX );
2398 MEM_READ_LONG( R_EAX, R_EAX );
2399 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2400 store_spreg( R_EAX, R_DBR );
2401 sh4_x86.tstate = TSTATE_NONE;
2403 LDC.L @Rm+, Rn_BANK {:
2406 load_reg( R_EAX, Rm );
2407 check_ralign32( R_EAX );
2408 MEM_READ_LONG( R_EAX, R_EAX );
2409 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2410 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2411 sh4_x86.tstate = TSTATE_NONE;
2414 COUNT_INST(I_LDSFPSCR);
2416 load_reg( R_EAX, Rm );
2417 call_func1( sh4_write_fpscr, R_EAX );
2418 sh4_x86.tstate = TSTATE_NONE;
2421 LDS.L @Rm+, FPSCR {:
2422 COUNT_INST(I_LDSFPSCRM);
2424 load_reg( R_EAX, Rm );
2425 check_ralign32( R_EAX );
2426 MEM_READ_LONG( R_EAX, R_EAX );
2427 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2428 call_func1( sh4_write_fpscr, R_EAX );
2429 sh4_x86.tstate = TSTATE_NONE;
2435 load_reg( R_EAX, Rm );
2436 store_spreg( R_EAX, R_FPUL );
2441 load_reg( R_EAX, Rm );
2442 check_ralign32( R_EAX );
2443 MEM_READ_LONG( R_EAX, R_EAX );
2444 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2445 store_spreg( R_EAX, R_FPUL );
2446 sh4_x86.tstate = TSTATE_NONE;
2450 load_reg( R_EAX, Rm );
2451 store_spreg( R_EAX, R_MACH );
2455 load_reg( R_EAX, Rm );
2456 check_ralign32( R_EAX );
2457 MEM_READ_LONG( R_EAX, R_EAX );
2458 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2459 store_spreg( R_EAX, R_MACH );
2460 sh4_x86.tstate = TSTATE_NONE;
2464 load_reg( R_EAX, Rm );
2465 store_spreg( R_EAX, R_MACL );
2469 load_reg( R_EAX, Rm );
2470 check_ralign32( R_EAX );
2471 MEM_READ_LONG( R_EAX, R_EAX );
2472 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2473 store_spreg( R_EAX, R_MACL );
2474 sh4_x86.tstate = TSTATE_NONE;
2478 load_reg( R_EAX, Rm );
2479 store_spreg( R_EAX, R_PR );
2483 load_reg( R_EAX, Rm );
2484 check_ralign32( R_EAX );
2485 MEM_READ_LONG( R_EAX, R_EAX );
2486 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2487 store_spreg( R_EAX, R_PR );
2488 sh4_x86.tstate = TSTATE_NONE;
2491 COUNT_INST(I_LDTLB);
2492 call_func0( MMU_ldtlb );
2493 sh4_x86.tstate = TSTATE_NONE;
2502 COUNT_INST(I_OCBWB);
2506 load_reg( R_EAX, Rn );
2507 MEM_PREFETCH( R_EAX );
2508 sh4_x86.tstate = TSTATE_NONE;
2511 COUNT_INST(I_SLEEP);
2513 call_func0( sh4_sleep );
2514 sh4_x86.tstate = TSTATE_NONE;
2515 sh4_x86.in_delay_slot = DELAY_NONE;
2519 COUNT_INST(I_STCSR);
2521 call_func0(sh4_read_sr);
2522 store_reg( R_EAX, Rn );
2523 sh4_x86.tstate = TSTATE_NONE;
2527 load_spreg( R_EAX, R_GBR );
2528 store_reg( R_EAX, Rn );
2533 load_spreg( R_EAX, R_VBR );
2534 store_reg( R_EAX, Rn );
2535 sh4_x86.tstate = TSTATE_NONE;
2540 load_spreg( R_EAX, R_SSR );
2541 store_reg( R_EAX, Rn );
2542 sh4_x86.tstate = TSTATE_NONE;
2547 load_spreg( R_EAX, R_SPC );
2548 store_reg( R_EAX, Rn );
2549 sh4_x86.tstate = TSTATE_NONE;
2554 load_spreg( R_EAX, R_SGR );
2555 store_reg( R_EAX, Rn );
2556 sh4_x86.tstate = TSTATE_NONE;
2561 load_spreg( R_EAX, R_DBR );
2562 store_reg( R_EAX, Rn );
2563 sh4_x86.tstate = TSTATE_NONE;
2568 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2569 store_reg( R_EAX, Rn );
2570 sh4_x86.tstate = TSTATE_NONE;
2573 COUNT_INST(I_STCSRM);
2575 call_func0( sh4_read_sr );
2576 MOV_r32_r32( R_EAX, R_EDX );
2577 load_reg( R_EAX, Rn );
2578 check_walign32( R_EAX );
2579 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
2580 MEM_WRITE_LONG( R_EAX, R_EDX );
2581 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2582 sh4_x86.tstate = TSTATE_NONE;
2587 load_reg( R_EAX, Rn );
2588 check_walign32( R_EAX );
2589 ADD_imm8s_r32( -4, R_EAX );
2590 load_spreg( R_EDX, R_VBR );
2591 MEM_WRITE_LONG( R_EAX, R_EDX );
2592 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2593 sh4_x86.tstate = TSTATE_NONE;
2598 load_reg( R_EAX, Rn );
2599 check_walign32( R_EAX );
2600 ADD_imm8s_r32( -4, R_EAX );
2601 load_spreg( R_EDX, R_SSR );
2602 MEM_WRITE_LONG( R_EAX, R_EDX );
2603 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2604 sh4_x86.tstate = TSTATE_NONE;
2609 load_reg( R_EAX, Rn );
2610 check_walign32( R_EAX );
2611 ADD_imm8s_r32( -4, R_EAX );
2612 load_spreg( R_EDX, R_SPC );
2613 MEM_WRITE_LONG( R_EAX, R_EDX );
2614 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2615 sh4_x86.tstate = TSTATE_NONE;
2620 load_reg( R_EAX, Rn );
2621 check_walign32( R_EAX );
2622 ADD_imm8s_r32( -4, R_EAX );
2623 load_spreg( R_EDX, R_SGR );
2624 MEM_WRITE_LONG( R_EAX, R_EDX );
2625 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2626 sh4_x86.tstate = TSTATE_NONE;
2631 load_reg( R_EAX, Rn );
2632 check_walign32( R_EAX );
2633 ADD_imm8s_r32( -4, R_EAX );
2634 load_spreg( R_EDX, R_DBR );
2635 MEM_WRITE_LONG( R_EAX, R_EDX );
2636 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2637 sh4_x86.tstate = TSTATE_NONE;
2639 STC.L Rm_BANK, @-Rn {:
2642 load_reg( R_EAX, Rn );
2643 check_walign32( R_EAX );
2644 ADD_imm8s_r32( -4, R_EAX );
2645 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2646 MEM_WRITE_LONG( R_EAX, R_EDX );
2647 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2648 sh4_x86.tstate = TSTATE_NONE;
2652 load_reg( R_EAX, Rn );
2653 check_walign32( R_EAX );
2654 ADD_imm8s_r32( -4, R_EAX );
2655 load_spreg( R_EDX, R_GBR );
2656 MEM_WRITE_LONG( R_EAX, R_EDX );
2657 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2658 sh4_x86.tstate = TSTATE_NONE;
2661 COUNT_INST(I_STSFPSCR);
2663 load_spreg( R_EAX, R_FPSCR );
2664 store_reg( R_EAX, Rn );
2666 STS.L FPSCR, @-Rn {:
2667 COUNT_INST(I_STSFPSCRM);
2669 load_reg( R_EAX, Rn );
2670 check_walign32( R_EAX );
2671 ADD_imm8s_r32( -4, R_EAX );
2672 load_spreg( R_EDX, R_FPSCR );
2673 MEM_WRITE_LONG( R_EAX, R_EDX );
2674 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2675 sh4_x86.tstate = TSTATE_NONE;
2680 load_spreg( R_EAX, R_FPUL );
2681 store_reg( R_EAX, Rn );
2686 load_reg( R_EAX, Rn );
2687 check_walign32( R_EAX );
2688 ADD_imm8s_r32( -4, R_EAX );
2689 load_spreg( R_EDX, R_FPUL );
2690 MEM_WRITE_LONG( R_EAX, R_EDX );
2691 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2692 sh4_x86.tstate = TSTATE_NONE;
2696 load_spreg( R_EAX, R_MACH );
2697 store_reg( R_EAX, Rn );
2701 load_reg( R_EAX, Rn );
2702 check_walign32( R_EAX );
2703 ADD_imm8s_r32( -4, R_EAX );
2704 load_spreg( R_EDX, R_MACH );
2705 MEM_WRITE_LONG( R_EAX, R_EDX );
2706 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2707 sh4_x86.tstate = TSTATE_NONE;
2711 load_spreg( R_EAX, R_MACL );
2712 store_reg( R_EAX, Rn );
2716 load_reg( R_EAX, Rn );
2717 check_walign32( R_EAX );
2718 ADD_imm8s_r32( -4, R_EAX );
2719 load_spreg( R_EDX, R_MACL );
2720 MEM_WRITE_LONG( R_EAX, R_EDX );
2721 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2722 sh4_x86.tstate = TSTATE_NONE;
2726 load_spreg( R_EAX, R_PR );
2727 store_reg( R_EAX, Rn );
2731 load_reg( R_EAX, Rn );
2732 check_walign32( R_EAX );
2733 ADD_imm8s_r32( -4, R_EAX );
2734 load_spreg( R_EDX, R_PR );
2735 MEM_WRITE_LONG( R_EAX, R_EDX );
2736 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2737 sh4_x86.tstate = TSTATE_NONE;
2742 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2745 sh4_x86.in_delay_slot = DELAY_NONE;
.