4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4dasm.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
35 #include "xlat/xltcache.h"
36 #include "xlat/x86/x86op.h"
37 #include "xlat/xlatdasm.h"
40 #define DEFAULT_BACKPATCH_SIZE 4096
42 /* Offset of a reg relative to the sh4r structure */
43 #define REG_OFFSET(reg) (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
45 #define R_T REG_OFFSET(t)
46 #define R_Q REG_OFFSET(q)
47 #define R_S REG_OFFSET(s)
48 #define R_M REG_OFFSET(m)
49 #define R_SR REG_OFFSET(sr)
50 #define R_GBR REG_OFFSET(gbr)
51 #define R_SSR REG_OFFSET(ssr)
52 #define R_SPC REG_OFFSET(spc)
53 #define R_VBR REG_OFFSET(vbr)
54 #define R_MACH REG_OFFSET(mac)+4
55 #define R_MACL REG_OFFSET(mac)
56 #define R_PC REG_OFFSET(pc)
57 #define R_NEW_PC REG_OFFSET(new_pc)
58 #define R_PR REG_OFFSET(pr)
59 #define R_SGR REG_OFFSET(sgr)
60 #define R_FPUL REG_OFFSET(fpul)
61 #define R_FPSCR REG_OFFSET(fpscr)
62 #define R_DBR REG_OFFSET(dbr)
63 #define R_R(rn) REG_OFFSET(r[rn])
64 #define R_FR(f) REG_OFFSET(fr[0][(f)^1])
65 #define R_XF(f) REG_OFFSET(fr[1][(f)^1])
66 #define R_DR(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
74 #define SH4_MODE_UNKNOWN -1
76 struct backpatch_record {
77 uint32_t fixup_offset;
78 uint32_t fixup_icount;
83 * Struct to manage internal translation state. This state is not saved -
84 * it is only valid between calls to sh4_translate_begin_block() and
85 * sh4_translate_end_block()
87 struct sh4_x86_state {
90 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
91 gboolean branch_taken; /* true if we branched unconditionally */
92 gboolean double_prec; /* true if FPU is in double-precision mode */
93 gboolean double_size; /* true if FPU is in double-size mode */
94 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
95 uint32_t block_start_pc;
96 uint32_t stack_posn; /* Trace stack height for alignment purposes */
97 uint32_t sh4_mode; /* Mirror of sh4r.xlat_sh4_mode */
101 gboolean tlb_on; /* True if tlb translation is active */
102 struct mem_region_fn **priv_address_space;
103 struct mem_region_fn **user_address_space;
105 /* Instrumentation */
106 xlat_block_begin_callback_t begin_callback;
107 xlat_block_end_callback_t end_callback;
110 /* Allocated memory for the (block-wide) back-patch list */
111 struct backpatch_record *backpatch_list;
112 uint32_t backpatch_posn;
113 uint32_t backpatch_size;
116 static struct sh4_x86_state sh4_x86;
118 static uint8_t sh4_entry_stub[128];
119 typedef FASTCALL void (*entry_point_t)(void *);
120 entry_point_t sh4_translate_enter;
122 static uint32_t max_int = 0x7FFFFFFF;
123 static uint32_t min_int = 0x80000000;
124 static uint32_t save_fcw; /* save value for fpu control word */
125 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
127 static void sh4_x86_translate_unlink_block( void *use_list );
129 static struct xlat_target_fns x86_target_fns = {
130 sh4_x86_translate_unlink_block
134 gboolean is_sse3_supported()
138 __asm__ __volatile__(
139 "mov $0x01, %%eax\n\t"
140 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
141 return (features & 1) ? TRUE : FALSE;
144 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
146 sh4_x86.priv_address_space = priv;
147 sh4_x86.user_address_space = user;
150 void sh4_translate_write_entry_stub(void)
152 mem_unprotect(sh4_entry_stub, sizeof(sh4_entry_stub));
153 xlat_output = sh4_entry_stub;
155 MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
159 #if SIZEOF_VOID_P == 8
162 CALL_r32( REG_ARG1 );
166 SUBL_imms_r32( 8, REG_ESP );
167 CALL_r32( REG_ARG1 );
168 ADDL_imms_r32( 8, REG_ESP );
175 sh4_translate_enter = (entry_point_t)sh4_entry_stub;
178 void sh4_translate_init(void)
180 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
181 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
182 sh4_x86.begin_callback = NULL;
183 sh4_x86.end_callback = NULL;
184 sh4_x86.fastmem = TRUE;
185 sh4_x86.sse3_enabled = is_sse3_supported();
186 xlat_set_target_fns(&x86_target_fns);
187 sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
188 sh4_translate_write_entry_stub();
191 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
193 sh4_x86.begin_callback = begin;
194 sh4_x86.end_callback = end;
197 void sh4_translate_set_fastmem( gboolean flag )
199 sh4_x86.fastmem = flag;
202 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
206 if( exc_code == -2 ) {
207 reloc_size = sizeof(void *);
210 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
211 sh4_x86.backpatch_size <<= 1;
212 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
213 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
214 assert( sh4_x86.backpatch_list != NULL );
216 if( sh4_x86.in_delay_slot ) {
220 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
221 (((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
222 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
223 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
224 sh4_x86.backpatch_posn++;
227 #define TSTATE_NONE -1
228 #define TSTATE_O X86_COND_O
229 #define TSTATE_C X86_COND_C
230 #define TSTATE_E X86_COND_E
231 #define TSTATE_NE X86_COND_NE
232 #define TSTATE_G X86_COND_G
233 #define TSTATE_GE X86_COND_GE
234 #define TSTATE_A X86_COND_A
235 #define TSTATE_AE X86_COND_AE
237 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
238 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
240 /* Convenience instructions */
241 #define LDC_t() CMPB_imms_rbpdisp(1,R_T); CMC()
242 #define SETE_t() SETCCB_cc_rbpdisp(X86_COND_E,R_T)
243 #define SETA_t() SETCCB_cc_rbpdisp(X86_COND_A,R_T)
244 #define SETAE_t() SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
245 #define SETG_t() SETCCB_cc_rbpdisp(X86_COND_G,R_T)
246 #define SETGE_t() SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
247 #define SETC_t() SETCCB_cc_rbpdisp(X86_COND_C,R_T)
248 #define SETO_t() SETCCB_cc_rbpdisp(X86_COND_O,R_T)
249 #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
250 #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1)
251 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
252 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
253 #define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
254 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
255 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
256 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
257 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
258 #define JP_label(label) JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
259 #define JS_label(label) JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
260 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
261 #define JNE_exc(exc) JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
263 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
264 CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }
266 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
267 #define JT_label(label) LOAD_t() \
268 JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
270 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
271 #define JF_label(label) LOAD_t() \
272 JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
275 #define load_reg(x86reg,sh4reg) MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
276 #define store_reg(x86reg,sh4reg) MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
279 * Load an FR register (single-precision floating point) into an integer x86
280 * register (eg for register-to-register moves)
282 #define load_fr(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
283 #define load_xf(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
286 * Load the low half of a DR register (DR or XD) into an integer x86 register
288 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
289 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
292 * Store an FR register (single-precision floating point) from an integer x86+
293 * register (eg for register-to-register moves)
295 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
296 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
298 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
299 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
302 #define push_fpul() FLDF_rbpdisp(R_FPUL)
303 #define pop_fpul() FSTPF_rbpdisp(R_FPUL)
304 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
305 #define pop_fr(frm) FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
306 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
307 #define pop_xf(frm) FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
308 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
309 #define pop_dr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
310 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
311 #define pop_xdr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
313 #ifdef ENABLE_SH4STATS
314 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
316 #define COUNT_INST(id)
320 /* Exception checks - Note that all exception checks will clobber EAX */
322 #define check_priv( ) \
323 if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
324 if( sh4_x86.in_delay_slot ) { \
325 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
327 exit_block_exc(EXC_ILLEGAL, pc, 2); \
329 sh4_x86.branch_taken = TRUE; \
330 sh4_x86.in_delay_slot = DELAY_NONE; \
334 #define check_fpuen( ) \
335 if( !sh4_x86.fpuen_checked ) {\
336 sh4_x86.fpuen_checked = TRUE;\
337 MOVL_rbpdisp_r32( R_SR, REG_EAX );\
338 ANDL_imms_r32( SR_FD, REG_EAX );\
339 if( sh4_x86.in_delay_slot ) {\
340 JNE_exc(EXC_SLOT_FPU_DISABLED);\
342 JNE_exc(EXC_FPU_DISABLED);\
344 sh4_x86.tstate = TSTATE_NONE; \
347 #define check_ralign16( x86reg ) \
348 TESTL_imms_r32( 0x00000001, x86reg ); \
349 JNE_exc(EXC_DATA_ADDR_READ)
351 #define check_walign16( x86reg ) \
352 TESTL_imms_r32( 0x00000001, x86reg ); \
353 JNE_exc(EXC_DATA_ADDR_WRITE);
355 #define check_ralign32( x86reg ) \
356 TESTL_imms_r32( 0x00000003, x86reg ); \
357 JNE_exc(EXC_DATA_ADDR_READ)
359 #define check_walign32( x86reg ) \
360 TESTL_imms_r32( 0x00000003, x86reg ); \
361 JNE_exc(EXC_DATA_ADDR_WRITE);
363 #define check_ralign64( x86reg ) \
364 TESTL_imms_r32( 0x00000007, x86reg ); \
365 JNE_exc(EXC_DATA_ADDR_READ)
367 #define check_walign64( x86reg ) \
368 TESTL_imms_r32( 0x00000007, x86reg ); \
369 JNE_exc(EXC_DATA_ADDR_WRITE);
371 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
374 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
375 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
377 #ifdef HAVE_FRAME_ADDRESS
378 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
380 decode_address(address_space(), addr_reg, REG_CALLPTR);
381 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
382 CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
384 if( addr_reg != REG_ARG1 ) {
385 MOVL_r32_r32( addr_reg, REG_ARG1 );
387 MOVP_immptr_rptr( 0, REG_ARG2 );
388 sh4_x86_add_backpatch( xlat_output, pc, -2 );
389 CALL2_r32disp_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2);
391 if( value_reg != REG_RESULT1 ) {
392 MOVL_r32_r32( REG_RESULT1, value_reg );
396 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
398 decode_address(address_space(), addr_reg, REG_CALLPTR);
399 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
400 CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
402 if( value_reg != REG_ARG2 ) {
403 MOVL_r32_r32( value_reg, REG_ARG2 );
405 if( addr_reg != REG_ARG1 ) {
406 MOVL_r32_r32( addr_reg, REG_ARG1 );
409 MOVP_immptr_rptr( 0, REG_ARG3 );
410 sh4_x86_add_backpatch( xlat_output, pc, -2 );
411 CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, REG_ARG3);
413 MOVL_imm32_rspdisp( 0, 0 );
414 sh4_x86_add_backpatch( xlat_output, pc, -2 );
415 CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, 0);
420 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
422 decode_address(address_space(), addr_reg, REG_CALLPTR);
423 CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
424 if( value_reg != REG_RESULT1 ) {
425 MOVL_r32_r32( REG_RESULT1, value_reg );
429 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
431 decode_address(address_space(), addr_reg, REG_CALLPTR);
432 CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
436 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
437 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
438 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc)
439 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
440 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
441 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
442 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
443 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
444 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
446 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
448 /** Offset of xlat_sh4_mode field relative to the code pointer */
449 #define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
450 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
451 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
453 void sh4_translate_begin_block( sh4addr_t pc )
455 sh4_x86.code = xlat_output;
456 sh4_x86.in_delay_slot = FALSE;
457 sh4_x86.fpuen_checked = FALSE;
458 sh4_x86.branch_taken = FALSE;
459 sh4_x86.backpatch_posn = 0;
460 sh4_x86.block_start_pc = pc;
461 sh4_x86.tlb_on = IS_TLB_ENABLED();
462 sh4_x86.tstate = TSTATE_NONE;
463 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
464 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
465 sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
466 if( sh4_x86.begin_callback ) {
467 CALL_ptr( sh4_x86.begin_callback );
469 if( sh4_profile_blocks ) {
470 MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
471 ADDL_imms_r32disp( 1, REG_EAX, 0 );
476 uint32_t sh4_translate_end_block_size()
478 uint32_t epilogue_size = EPILOGUE_SIZE;
479 if( sh4_x86.end_callback ) {
480 epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
482 if( sh4_x86.backpatch_posn <= 3 ) {
483 epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
485 epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
487 return epilogue_size;
492 * Embed a breakpoint into the generated code
494 void sh4_translate_emit_breakpoint( sh4vma_t pc )
496 MOVL_imm32_r32( pc, REG_EAX );
497 CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
498 sh4_x86.tstate = TSTATE_NONE;
502 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
505 * Test if the loaded target code pointer in %eax is valid, and if so jump
506 * directly into it, bypassing the normal exit.
508 static void jump_next_block()
510 uint8_t *ptr = xlat_output;
511 TESTP_rptr_rptr(REG_EAX, REG_EAX);
513 if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
514 /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
515 MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
516 CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
518 CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
520 JNE_label(wrongmode);
521 if( sh4_x86.end_callback ) {
523 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
528 JMP_TARGET(wrongmode);
529 MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
530 int rel = ptr - xlat_output;
538 void FASTCALL sh4_translate_link_block( uint32_t pc )
540 uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
541 while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
542 target = XLAT_BLOCK_CHAIN(target);
544 if( target == NULL ) {
545 target = sh4_translate_basic_block( pc );
547 uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
549 *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)-5;
550 *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
551 XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;
553 uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
554 assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
558 static void emit_translate_and_backpatch()
560 /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
561 CALL1_ptr_r32(sh4_translate_link_block, REG_ARG1);
563 /* When patched, the jmp instruction will be 5 bytes (either platform) -
564 * we need to reserve sizeof(void*) bytes for the use-list
567 if( sizeof(void*) == 8 ) {
575 * If we're jumping to a fixed address (or at least fixed relative to the
576 * current PC, then we can do a direct branch. REG_ARG1 should contain
577 * the PC at this point.
579 static void jump_next_block_fixed_pc( sh4addr_t pc )
581 if( IS_IN_ICACHE(pc) ) {
582 if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
583 /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
584 * fetch-and-backpatch routine, which will replace the call with a branch */
585 emit_translate_and_backpatch();
588 MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
589 ANDP_imms_rptr( -4, REG_EAX );
591 } else if( sh4_x86.tlb_on ) {
592 CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
594 CALL1_ptr_r32(xlat_get_code, REG_ARG1);
601 static void sh4_x86_translate_unlink_block( void *use_list )
603 uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
604 void *next = use_list;
605 while( next != NULL ) {
606 xlat_output = (uint8_t *)next;
607 next = *(void **)(xlat_output+5);
608 emit_translate_and_backpatch();
615 static void exit_block()
617 if( sh4_x86.end_callback ) {
618 CALL_ptr(sh4_x86.end_callback);
624 * Exit the block with sh4r.pc already written
626 void exit_block_pcset( sh4addr_t pc )
628 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
629 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
630 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
631 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
633 MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
634 if( sh4_x86.tlb_on ) {
635 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
637 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
641 JMP_TARGET(exitloop);
646 * Exit the block with sh4r.new_pc written with the target pc
648 void exit_block_newpcset( sh4addr_t pc )
650 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
651 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
652 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
653 MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
654 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
655 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
657 if( sh4_x86.tlb_on ) {
658 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
660 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
664 JMP_TARGET(exitloop);
670 * Exit the block to an absolute PC
672 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
674 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
675 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
676 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
678 MOVL_imm32_r32( pc, REG_ARG1 );
679 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
680 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
682 jump_next_block_fixed_pc(pc);
683 JMP_TARGET(exitloop);
688 * Exit the block to a relative PC
690 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
692 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
693 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
694 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
696 if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
697 /* Special case for tight loops - the PC doesn't change, and
698 * we already know the target address. Just check events pending before
701 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
702 uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output));
703 JCC_cc_prerel(X86_COND_A, backdisp);
705 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
706 ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
707 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
708 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
709 JBE_label(exitloop2);
711 jump_next_block_fixed_pc(pc);
712 JMP_TARGET(exitloop2);
718 * Exit unconditionally with a general exception
720 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
722 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
723 ADDL_r32_rbpdisp( REG_ECX, R_PC );
724 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
725 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
726 MOVL_imm32_r32( code, REG_ARG1 );
727 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
732 * Embed a call to sh4_execute_instruction for situations that we
733 * can't translate (just page-crossing delay slots at the moment).
734 * Caller is responsible for setting new_pc before calling this function.
738 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
739 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
740 * Call sh4_execute_instruction
741 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
743 void exit_block_emu( sh4vma_t endpc )
745 MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX ); // 5
746 ADDL_r32_rbpdisp( REG_ECX, R_PC );
748 MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
749 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); // 6
750 MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
751 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
753 CALL_ptr( sh4_execute_instruction );
758 * Write the block trailer (exception handling block)
760 void sh4_translate_end_block( sh4addr_t pc ) {
761 if( sh4_x86.branch_taken == FALSE ) {
762 // Didn't exit unconditionally already, so write the termination here
763 exit_block_rel( pc, pc );
765 if( sh4_x86.backpatch_posn != 0 ) {
767 // Exception raised - cleanup and exit
768 uint8_t *end_ptr = xlat_output;
769 MOVL_r32_r32( REG_EDX, REG_ECX );
770 ADDL_r32_r32( REG_EDX, REG_ECX );
771 ADDL_r32_rbpdisp( REG_ECX, R_SPC );
772 MOVL_moffptr_eax( &sh4_cpu_period );
773 INC_r32( REG_EDX ); /* Add 1 for the aborting instruction itself */
775 ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
778 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
779 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
780 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
781 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
782 *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output;
784 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
786 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
787 int rel = end_ptr - xlat_output;
790 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
791 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
792 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
793 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
794 int rel = end_ptr - xlat_output;
802 * Translate a single instruction. Delayed branches are handled specially
803 * by translating both branch and delayed instruction as a single unit (as
805 * The instruction MUST be in the icache (assert check)
807 * @return true if the instruction marks the end of a basic block
810 uint32_t sh4_translate_instruction( sh4vma_t pc )
813 /* Read instruction from icache */
814 assert( IS_IN_ICACHE(pc) );
815 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
817 if( !sh4_x86.in_delay_slot ) {
818 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
821 /* check for breakpoints at this pc */
822 for( int i=0; i<sh4_breakpoint_count; i++ ) {
823 if( sh4_breakpoints[i].address == pc ) {
824 sh4_translate_emit_breakpoint(pc);
832 load_reg( REG_EAX, Rm );
833 load_reg( REG_ECX, Rn );
834 ADDL_r32_r32( REG_EAX, REG_ECX );
835 store_reg( REG_ECX, Rn );
836 sh4_x86.tstate = TSTATE_NONE;
840 ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
841 sh4_x86.tstate = TSTATE_NONE;
845 if( sh4_x86.tstate != TSTATE_C ) {
848 load_reg( REG_EAX, Rm );
849 load_reg( REG_ECX, Rn );
850 ADCL_r32_r32( REG_EAX, REG_ECX );
851 store_reg( REG_ECX, Rn );
853 sh4_x86.tstate = TSTATE_C;
857 load_reg( REG_EAX, Rm );
858 load_reg( REG_ECX, Rn );
859 ADDL_r32_r32( REG_EAX, REG_ECX );
860 store_reg( REG_ECX, Rn );
862 sh4_x86.tstate = TSTATE_O;
866 load_reg( REG_EAX, Rm );
867 load_reg( REG_ECX, Rn );
868 ANDL_r32_r32( REG_EAX, REG_ECX );
869 store_reg( REG_ECX, Rn );
870 sh4_x86.tstate = TSTATE_NONE;
874 load_reg( REG_EAX, 0 );
875 ANDL_imms_r32(imm, REG_EAX);
876 store_reg( REG_EAX, 0 );
877 sh4_x86.tstate = TSTATE_NONE;
879 AND.B #imm, @(R0, GBR) {:
881 load_reg( REG_EAX, 0 );
882 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
883 MOVL_r32_r32(REG_EAX, REG_SAVE1);
884 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
885 MOVL_r32_r32(REG_SAVE1, REG_EAX);
886 ANDL_imms_r32(imm, REG_EDX );
887 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
888 sh4_x86.tstate = TSTATE_NONE;
892 load_reg( REG_EAX, Rm );
893 load_reg( REG_ECX, Rn );
894 CMPL_r32_r32( REG_EAX, REG_ECX );
896 sh4_x86.tstate = TSTATE_E;
899 COUNT_INST(I_CMPEQI);
900 load_reg( REG_EAX, 0 );
901 CMPL_imms_r32(imm, REG_EAX);
903 sh4_x86.tstate = TSTATE_E;
907 load_reg( REG_EAX, Rm );
908 load_reg( REG_ECX, Rn );
909 CMPL_r32_r32( REG_EAX, REG_ECX );
911 sh4_x86.tstate = TSTATE_GE;
915 load_reg( REG_EAX, Rm );
916 load_reg( REG_ECX, Rn );
917 CMPL_r32_r32( REG_EAX, REG_ECX );
919 sh4_x86.tstate = TSTATE_G;
923 load_reg( REG_EAX, Rm );
924 load_reg( REG_ECX, Rn );
925 CMPL_r32_r32( REG_EAX, REG_ECX );
927 sh4_x86.tstate = TSTATE_A;
931 load_reg( REG_EAX, Rm );
932 load_reg( REG_ECX, Rn );
933 CMPL_r32_r32( REG_EAX, REG_ECX );
935 sh4_x86.tstate = TSTATE_AE;
939 load_reg( REG_EAX, Rn );
940 CMPL_imms_r32( 0, REG_EAX );
942 sh4_x86.tstate = TSTATE_G;
946 load_reg( REG_EAX, Rn );
947 CMPL_imms_r32( 0, REG_EAX );
949 sh4_x86.tstate = TSTATE_GE;
952 COUNT_INST(I_CMPSTR);
953 load_reg( REG_EAX, Rm );
954 load_reg( REG_ECX, Rn );
955 XORL_r32_r32( REG_ECX, REG_EAX );
956 TESTB_r8_r8( REG_AL, REG_AL );
958 TESTB_r8_r8( REG_AH, REG_AH );
960 SHRL_imm_r32( 16, REG_EAX );
961 TESTB_r8_r8( REG_AL, REG_AL );
963 TESTB_r8_r8( REG_AH, REG_AH );
968 sh4_x86.tstate = TSTATE_E;
972 load_reg( REG_EAX, Rm );
973 load_reg( REG_ECX, Rn );
974 SHRL_imm_r32( 31, REG_EAX );
975 SHRL_imm_r32( 31, REG_ECX );
976 MOVL_r32_rbpdisp( REG_EAX, R_M );
977 MOVL_r32_rbpdisp( REG_ECX, R_Q );
978 CMPL_r32_r32( REG_EAX, REG_ECX );
980 sh4_x86.tstate = TSTATE_NE;
984 XORL_r32_r32( REG_EAX, REG_EAX );
985 MOVL_r32_rbpdisp( REG_EAX, R_Q );
986 MOVL_r32_rbpdisp( REG_EAX, R_M );
987 MOVL_r32_rbpdisp( REG_EAX, R_T );
988 sh4_x86.tstate = TSTATE_C; // works for DIV1
992 MOVL_rbpdisp_r32( R_M, REG_ECX );
993 load_reg( REG_EAX, Rn );
994 if( sh4_x86.tstate != TSTATE_C ) {
997 RCLL_imm_r32( 1, REG_EAX );
998 SETC_r8( REG_DL ); // Q'
999 CMPL_rbpdisp_r32( R_Q, REG_ECX );
1001 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1003 JMP_TARGET(mqequal);
1004 SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1006 store_reg( REG_EAX, Rn ); // Done with Rn now
1007 SETC_r8(REG_AL); // tmp1
1008 XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
1009 XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
1010 MOVL_r32_rbpdisp( REG_ECX, R_Q );
1011 XORL_imms_r32( 1, REG_AL ); // T = !Q'
1012 MOVZXL_r8_r32( REG_AL, REG_EAX );
1013 MOVL_r32_rbpdisp( REG_EAX, R_T );
1014 sh4_x86.tstate = TSTATE_NONE;
1017 COUNT_INST(I_DMULS);
1018 load_reg( REG_EAX, Rm );
1019 load_reg( REG_ECX, Rn );
1021 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1022 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1023 sh4_x86.tstate = TSTATE_NONE;
1026 COUNT_INST(I_DMULU);
1027 load_reg( REG_EAX, Rm );
1028 load_reg( REG_ECX, Rn );
1030 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1031 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1032 sh4_x86.tstate = TSTATE_NONE;
1036 load_reg( REG_EAX, Rn );
1037 ADDL_imms_r32( -1, REG_EAX );
1038 store_reg( REG_EAX, Rn );
1040 sh4_x86.tstate = TSTATE_E;
1043 COUNT_INST(I_EXTSB);
1044 load_reg( REG_EAX, Rm );
1045 MOVSXL_r8_r32( REG_EAX, REG_EAX );
1046 store_reg( REG_EAX, Rn );
1049 COUNT_INST(I_EXTSW);
1050 load_reg( REG_EAX, Rm );
1051 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1052 store_reg( REG_EAX, Rn );
1055 COUNT_INST(I_EXTUB);
1056 load_reg( REG_EAX, Rm );
1057 MOVZXL_r8_r32( REG_EAX, REG_EAX );
1058 store_reg( REG_EAX, Rn );
1061 COUNT_INST(I_EXTUW);
1062 load_reg( REG_EAX, Rm );
1063 MOVZXL_r16_r32( REG_EAX, REG_EAX );
1064 store_reg( REG_EAX, Rn );
1069 load_reg( REG_EAX, Rm );
1070 check_ralign32( REG_EAX );
1071 MEM_READ_LONG( REG_EAX, REG_EAX );
1072 MOVL_r32_r32(REG_EAX, REG_SAVE1);
1073 load_reg( REG_EAX, Rm );
1074 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
1075 MEM_READ_LONG( REG_EAX, REG_EAX );
1076 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
1078 load_reg( REG_EAX, Rm );
1079 check_ralign32( REG_EAX );
1080 MEM_READ_LONG( REG_EAX, REG_EAX );
1081 MOVL_r32_r32(REG_EAX, REG_SAVE1);
1082 load_reg( REG_EAX, Rn );
1083 check_ralign32( REG_EAX );
1084 MEM_READ_LONG( REG_EAX, REG_EAX );
1085 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1086 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1089 IMULL_r32( REG_SAVE1 );
1090 ADDL_r32_rbpdisp( REG_EAX, R_MACL );
1091 ADCL_r32_rbpdisp( REG_EDX, R_MACH );
1093 MOVL_rbpdisp_r32( R_S, REG_ECX );
1094 TESTL_r32_r32(REG_ECX, REG_ECX);
1096 CALL_ptr( signsat48 );
1097 JMP_TARGET( nosat );
1098 sh4_x86.tstate = TSTATE_NONE;
1103 load_reg( REG_EAX, Rm );
1104 check_ralign16( REG_EAX );
1105 MEM_READ_WORD( REG_EAX, REG_EAX );
1106 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1107 load_reg( REG_EAX, Rm );
1108 LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
1109 MEM_READ_WORD( REG_EAX, REG_EAX );
1110 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1111 // Note translate twice in case of page boundaries. Maybe worth
1112 // adding a page-boundary check to skip the second translation
1114 load_reg( REG_EAX, Rn );
1115 check_ralign16( REG_EAX );
1116 MEM_READ_WORD( REG_EAX, REG_EAX );
1117 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1118 load_reg( REG_EAX, Rm );
1119 check_ralign16( REG_EAX );
1120 MEM_READ_WORD( REG_EAX, REG_EAX );
1121 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
1122 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1124 IMULL_r32( REG_SAVE1 );
1125 MOVL_rbpdisp_r32( R_S, REG_ECX );
1126 TESTL_r32_r32( REG_ECX, REG_ECX );
1129 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1130 JNO_label( end ); // 2
1131 MOVL_imm32_r32( 1, REG_EDX ); // 5
1132 MOVL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1133 JS_label( positive ); // 2
1134 MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
1135 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1136 JMP_label(end2); // 2
1138 JMP_TARGET(positive);
1139 MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
1140 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1141 JMP_label(end3); // 2
1144 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1145 ADCL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1149 sh4_x86.tstate = TSTATE_NONE;
1153 MOVL_rbpdisp_r32( R_T, REG_EAX );
1154 store_reg( REG_EAX, Rn );
1158 load_reg( REG_EAX, Rm );
1159 load_reg( REG_ECX, Rn );
1160 MULL_r32( REG_ECX );
1161 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1162 sh4_x86.tstate = TSTATE_NONE;
1165 COUNT_INST(I_MULSW);
1166 MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1167 MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1168 MULL_r32( REG_ECX );
1169 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1170 sh4_x86.tstate = TSTATE_NONE;
1173 COUNT_INST(I_MULUW);
1174 MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1175 MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1176 MULL_r32( REG_ECX );
1177 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1178 sh4_x86.tstate = TSTATE_NONE;
1182 load_reg( REG_EAX, Rm );
1183 NEGL_r32( REG_EAX );
1184 store_reg( REG_EAX, Rn );
1185 sh4_x86.tstate = TSTATE_NONE;
1189 load_reg( REG_EAX, Rm );
1190 XORL_r32_r32( REG_ECX, REG_ECX );
1192 SBBL_r32_r32( REG_EAX, REG_ECX );
1193 store_reg( REG_ECX, Rn );
1195 sh4_x86.tstate = TSTATE_C;
1199 load_reg( REG_EAX, Rm );
1200 NOTL_r32( REG_EAX );
1201 store_reg( REG_EAX, Rn );
1202 sh4_x86.tstate = TSTATE_NONE;
1206 load_reg( REG_EAX, Rm );
1207 load_reg( REG_ECX, Rn );
1208 ORL_r32_r32( REG_EAX, REG_ECX );
1209 store_reg( REG_ECX, Rn );
1210 sh4_x86.tstate = TSTATE_NONE;
1214 load_reg( REG_EAX, 0 );
1215 ORL_imms_r32(imm, REG_EAX);
1216 store_reg( REG_EAX, 0 );
1217 sh4_x86.tstate = TSTATE_NONE;
1219 OR.B #imm, @(R0, GBR) {:
1221 load_reg( REG_EAX, 0 );
1222 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1223 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1224 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1225 MOVL_r32_r32( REG_SAVE1, REG_EAX );
1226 ORL_imms_r32(imm, REG_EDX );
1227 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1228 sh4_x86.tstate = TSTATE_NONE;
1231 COUNT_INST(I_ROTCL);
1232 load_reg( REG_EAX, Rn );
1233 if( sh4_x86.tstate != TSTATE_C ) {
1236 RCLL_imm_r32( 1, REG_EAX );
1237 store_reg( REG_EAX, Rn );
1239 sh4_x86.tstate = TSTATE_C;
1242 COUNT_INST(I_ROTCR);
1243 load_reg( REG_EAX, Rn );
1244 if( sh4_x86.tstate != TSTATE_C ) {
1247 RCRL_imm_r32( 1, REG_EAX );
1248 store_reg( REG_EAX, Rn );
1250 sh4_x86.tstate = TSTATE_C;
1254 load_reg( REG_EAX, Rn );
1255 ROLL_imm_r32( 1, REG_EAX );
1256 store_reg( REG_EAX, Rn );
1258 sh4_x86.tstate = TSTATE_C;
1262 load_reg( REG_EAX, Rn );
1263 RORL_imm_r32( 1, REG_EAX );
1264 store_reg( REG_EAX, Rn );
1266 sh4_x86.tstate = TSTATE_C;
1270 /* Annoyingly enough, not directly convertible */
1271 load_reg( REG_EAX, Rn );
1272 load_reg( REG_ECX, Rm );
1273 CMPL_imms_r32( 0, REG_ECX );
1276 NEGL_r32( REG_ECX ); // 2
1277 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1278 JE_label(emptysar); // 2
1279 SARL_cl_r32( REG_EAX ); // 2
1280 JMP_label(end); // 2
1282 JMP_TARGET(emptysar);
1283 SARL_imm_r32(31, REG_EAX ); // 3
1287 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1288 SHLL_cl_r32( REG_EAX ); // 2
1291 store_reg( REG_EAX, Rn );
1292 sh4_x86.tstate = TSTATE_NONE;
1296 load_reg( REG_EAX, Rn );
1297 load_reg( REG_ECX, Rm );
1298 CMPL_imms_r32( 0, REG_ECX );
1301 NEGL_r32( REG_ECX ); // 2
1302 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1303 JE_label(emptyshr );
1304 SHRL_cl_r32( REG_EAX ); // 2
1305 JMP_label(end); // 2
1307 JMP_TARGET(emptyshr);
1308 XORL_r32_r32( REG_EAX, REG_EAX );
1312 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1313 SHLL_cl_r32( REG_EAX ); // 2
1316 store_reg( REG_EAX, Rn );
1317 sh4_x86.tstate = TSTATE_NONE;
1321 load_reg( REG_EAX, Rn );
1322 SHLL_imm_r32( 1, REG_EAX );
1324 store_reg( REG_EAX, Rn );
1325 sh4_x86.tstate = TSTATE_C;
1329 load_reg( REG_EAX, Rn );
1330 SARL_imm_r32( 1, REG_EAX );
1332 store_reg( REG_EAX, Rn );
1333 sh4_x86.tstate = TSTATE_C;
1337 load_reg( REG_EAX, Rn );
1338 SHLL_imm_r32( 1, REG_EAX );
1340 store_reg( REG_EAX, Rn );
1341 sh4_x86.tstate = TSTATE_C;
1345 load_reg( REG_EAX, Rn );
1346 SHLL_imm_r32( 2, REG_EAX );
1347 store_reg( REG_EAX, Rn );
1348 sh4_x86.tstate = TSTATE_NONE;
1352 load_reg( REG_EAX, Rn );
1353 SHLL_imm_r32( 8, REG_EAX );
1354 store_reg( REG_EAX, Rn );
1355 sh4_x86.tstate = TSTATE_NONE;
1359 load_reg( REG_EAX, Rn );
1360 SHLL_imm_r32( 16, REG_EAX );
1361 store_reg( REG_EAX, Rn );
1362 sh4_x86.tstate = TSTATE_NONE;
1366 load_reg( REG_EAX, Rn );
1367 SHRL_imm_r32( 1, REG_EAX );
1369 store_reg( REG_EAX, Rn );
1370 sh4_x86.tstate = TSTATE_C;
1374 load_reg( REG_EAX, Rn );
1375 SHRL_imm_r32( 2, REG_EAX );
1376 store_reg( REG_EAX, Rn );
1377 sh4_x86.tstate = TSTATE_NONE;
1381 load_reg( REG_EAX, Rn );
1382 SHRL_imm_r32( 8, REG_EAX );
1383 store_reg( REG_EAX, Rn );
1384 sh4_x86.tstate = TSTATE_NONE;
1388 load_reg( REG_EAX, Rn );
1389 SHRL_imm_r32( 16, REG_EAX );
1390 store_reg( REG_EAX, Rn );
1391 sh4_x86.tstate = TSTATE_NONE;
1395 load_reg( REG_EAX, Rm );
1396 load_reg( REG_ECX, Rn );
1397 SUBL_r32_r32( REG_EAX, REG_ECX );
1398 store_reg( REG_ECX, Rn );
1399 sh4_x86.tstate = TSTATE_NONE;
1403 load_reg( REG_EAX, Rm );
1404 load_reg( REG_ECX, Rn );
1405 if( sh4_x86.tstate != TSTATE_C ) {
1408 SBBL_r32_r32( REG_EAX, REG_ECX );
1409 store_reg( REG_ECX, Rn );
1411 sh4_x86.tstate = TSTATE_C;
1415 load_reg( REG_EAX, Rm );
1416 load_reg( REG_ECX, Rn );
1417 SUBL_r32_r32( REG_EAX, REG_ECX );
1418 store_reg( REG_ECX, Rn );
1420 sh4_x86.tstate = TSTATE_O;
1423 COUNT_INST(I_SWAPB);
1424 load_reg( REG_EAX, Rm );
1425 XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
1426 store_reg( REG_EAX, Rn );
1429 COUNT_INST(I_SWAPB);
1430 load_reg( REG_EAX, Rm );
1431 MOVL_r32_r32( REG_EAX, REG_ECX );
1432 SHLL_imm_r32( 16, REG_ECX );
1433 SHRL_imm_r32( 16, REG_EAX );
1434 ORL_r32_r32( REG_EAX, REG_ECX );
1435 store_reg( REG_ECX, Rn );
1436 sh4_x86.tstate = TSTATE_NONE;
1440 load_reg( REG_EAX, Rn );
1441 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1442 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1443 TESTB_r8_r8( REG_DL, REG_DL );
1445 ORB_imms_r8( 0x80, REG_DL );
1446 MOVL_r32_r32( REG_SAVE1, REG_EAX );
1447 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1448 sh4_x86.tstate = TSTATE_NONE;
1452 load_reg( REG_EAX, Rm );
1453 load_reg( REG_ECX, Rn );
1454 TESTL_r32_r32( REG_EAX, REG_ECX );
1456 sh4_x86.tstate = TSTATE_E;
1460 load_reg( REG_EAX, 0 );
1461 TESTL_imms_r32( imm, REG_EAX );
1463 sh4_x86.tstate = TSTATE_E;
1465 TST.B #imm, @(R0, GBR) {:
1467 load_reg( REG_EAX, 0);
1468 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1469 MEM_READ_BYTE( REG_EAX, REG_EAX );
1470 TESTB_imms_r8( imm, REG_AL );
1472 sh4_x86.tstate = TSTATE_E;
1476 load_reg( REG_EAX, Rm );
1477 load_reg( REG_ECX, Rn );
1478 XORL_r32_r32( REG_EAX, REG_ECX );
1479 store_reg( REG_ECX, Rn );
1480 sh4_x86.tstate = TSTATE_NONE;
1484 load_reg( REG_EAX, 0 );
1485 XORL_imms_r32( imm, REG_EAX );
1486 store_reg( REG_EAX, 0 );
1487 sh4_x86.tstate = TSTATE_NONE;
1489 XOR.B #imm, @(R0, GBR) {:
1491 load_reg( REG_EAX, 0 );
1492 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1493 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1494 MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
1495 MOVL_r32_r32( REG_SAVE1, REG_EAX );
1496 XORL_imms_r32( imm, REG_EDX );
1497 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1498 sh4_x86.tstate = TSTATE_NONE;
1501 COUNT_INST(I_XTRCT);
1502 load_reg( REG_EAX, Rm );
1503 load_reg( REG_ECX, Rn );
1504 SHLL_imm_r32( 16, REG_EAX );
1505 SHRL_imm_r32( 16, REG_ECX );
1506 ORL_r32_r32( REG_EAX, REG_ECX );
1507 store_reg( REG_ECX, Rn );
1508 sh4_x86.tstate = TSTATE_NONE;
1511 /* Data move instructions */
1514 load_reg( REG_EAX, Rm );
1515 store_reg( REG_EAX, Rn );
1519 MOVL_imm32_r32( imm, REG_EAX );
1520 store_reg( REG_EAX, Rn );
1524 load_reg( REG_EAX, Rn );
1525 load_reg( REG_EDX, Rm );
1526 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1527 sh4_x86.tstate = TSTATE_NONE;
1531 load_reg( REG_EAX, Rn );
1532 LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
1533 load_reg( REG_EDX, Rm );
1534 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1535 ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
1536 sh4_x86.tstate = TSTATE_NONE;
1538 MOV.B Rm, @(R0, Rn) {:
1540 load_reg( REG_EAX, 0 );
1541 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1542 load_reg( REG_EDX, Rm );
1543 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1544 sh4_x86.tstate = TSTATE_NONE;
1546 MOV.B R0, @(disp, GBR) {:
1548 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1549 ADDL_imms_r32( disp, REG_EAX );
1550 load_reg( REG_EDX, 0 );
1551 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1552 sh4_x86.tstate = TSTATE_NONE;
1554 MOV.B R0, @(disp, Rn) {:
1556 load_reg( REG_EAX, Rn );
1557 ADDL_imms_r32( disp, REG_EAX );
1558 load_reg( REG_EDX, 0 );
1559 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1560 sh4_x86.tstate = TSTATE_NONE;
1564 load_reg( REG_EAX, Rm );
1565 MEM_READ_BYTE( REG_EAX, REG_EAX );
1566 store_reg( REG_EAX, Rn );
1567 sh4_x86.tstate = TSTATE_NONE;
1571 load_reg( REG_EAX, Rm );
1572 MEM_READ_BYTE( REG_EAX, REG_EAX );
1574 ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
1576 store_reg( REG_EAX, Rn );
1577 sh4_x86.tstate = TSTATE_NONE;
1579 MOV.B @(R0, Rm), Rn {:
1581 load_reg( REG_EAX, 0 );
1582 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1583 MEM_READ_BYTE( REG_EAX, REG_EAX );
1584 store_reg( REG_EAX, Rn );
1585 sh4_x86.tstate = TSTATE_NONE;
1587 MOV.B @(disp, GBR), R0 {:
1589 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1590 ADDL_imms_r32( disp, REG_EAX );
1591 MEM_READ_BYTE( REG_EAX, REG_EAX );
1592 store_reg( REG_EAX, 0 );
1593 sh4_x86.tstate = TSTATE_NONE;
1595 MOV.B @(disp, Rm), R0 {:
1597 load_reg( REG_EAX, Rm );
1598 ADDL_imms_r32( disp, REG_EAX );
1599 MEM_READ_BYTE( REG_EAX, REG_EAX );
1600 store_reg( REG_EAX, 0 );
1601 sh4_x86.tstate = TSTATE_NONE;
1605 load_reg( REG_EAX, Rn );
1606 check_walign32(REG_EAX);
1607 MOVL_r32_r32( REG_EAX, REG_ECX );
1608 ANDL_imms_r32( 0xFC000000, REG_ECX );
1609 CMPL_imms_r32( 0xE0000000, REG_ECX );
1611 ANDL_imms_r32( 0x3C, REG_EAX );
1612 load_reg( REG_EDX, Rm );
1613 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1616 load_reg( REG_EDX, Rm );
1617 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1619 sh4_x86.tstate = TSTATE_NONE;
1623 load_reg( REG_EAX, Rn );
1624 ADDL_imms_r32( -4, REG_EAX );
1625 check_walign32( REG_EAX );
1626 load_reg( REG_EDX, Rm );
1627 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1628 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
1629 sh4_x86.tstate = TSTATE_NONE;
1631 MOV.L Rm, @(R0, Rn) {:
1633 load_reg( REG_EAX, 0 );
1634 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1635 check_walign32( REG_EAX );
1636 load_reg( REG_EDX, Rm );
1637 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1638 sh4_x86.tstate = TSTATE_NONE;
1640 MOV.L R0, @(disp, GBR) {:
1642 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1643 ADDL_imms_r32( disp, REG_EAX );
1644 check_walign32( REG_EAX );
1645 load_reg( REG_EDX, 0 );
1646 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1647 sh4_x86.tstate = TSTATE_NONE;
1649 MOV.L Rm, @(disp, Rn) {:
1651 load_reg( REG_EAX, Rn );
1652 ADDL_imms_r32( disp, REG_EAX );
1653 check_walign32( REG_EAX );
1654 MOVL_r32_r32( REG_EAX, REG_ECX );
1655 ANDL_imms_r32( 0xFC000000, REG_ECX );
1656 CMPL_imms_r32( 0xE0000000, REG_ECX );
1658 ANDL_imms_r32( 0x3C, REG_EAX );
1659 load_reg( REG_EDX, Rm );
1660 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1663 load_reg( REG_EDX, Rm );
1664 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1666 sh4_x86.tstate = TSTATE_NONE;
1670 load_reg( REG_EAX, Rm );
1671 check_ralign32( REG_EAX );
1672 MEM_READ_LONG( REG_EAX, REG_EAX );
1673 store_reg( REG_EAX, Rn );
1674 sh4_x86.tstate = TSTATE_NONE;
1678 load_reg( REG_EAX, Rm );
1679 check_ralign32( REG_EAX );
1680 MEM_READ_LONG( REG_EAX, REG_EAX );
1682 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1684 store_reg( REG_EAX, Rn );
1685 sh4_x86.tstate = TSTATE_NONE;
1687 MOV.L @(R0, Rm), Rn {:
1689 load_reg( REG_EAX, 0 );
1690 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1691 check_ralign32( REG_EAX );
1692 MEM_READ_LONG( REG_EAX, REG_EAX );
1693 store_reg( REG_EAX, Rn );
1694 sh4_x86.tstate = TSTATE_NONE;
1696 MOV.L @(disp, GBR), R0 {:
1698 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1699 ADDL_imms_r32( disp, REG_EAX );
1700 check_ralign32( REG_EAX );
1701 MEM_READ_LONG( REG_EAX, REG_EAX );
1702 store_reg( REG_EAX, 0 );
1703 sh4_x86.tstate = TSTATE_NONE;
1705 MOV.L @(disp, PC), Rn {:
1706 COUNT_INST(I_MOVLPC);
1707 if( sh4_x86.in_delay_slot ) {
1710 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1711 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1712 // If the target address is in the same page as the code, it's
1713 // pretty safe to just ref it directly and circumvent the whole
1714 // memory subsystem. (this is a big performance win)
1716 // FIXME: There's a corner-case that's not handled here when
1717 // the current code-page is in the ITLB but not in the UTLB.
1718 // (should generate a TLB miss although need to test SH4
1719 // behaviour to confirm) Unlikely to be anyone depending on this
1720 // behaviour though.
1721 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1722 MOVL_moffptr_eax( ptr );
1724 // Note: we use sh4r.pc for the calc as we could be running at a
1725 // different virtual address than the translation was done with,
1726 // but we can safely assume that the low bits are the same.
1727 MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
1728 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1729 MEM_READ_LONG( REG_EAX, REG_EAX );
1730 sh4_x86.tstate = TSTATE_NONE;
1732 store_reg( REG_EAX, Rn );
1735 MOV.L @(disp, Rm), Rn {:
1737 load_reg( REG_EAX, Rm );
1738 ADDL_imms_r32( disp, REG_EAX );
1739 check_ralign32( REG_EAX );
1740 MEM_READ_LONG( REG_EAX, REG_EAX );
1741 store_reg( REG_EAX, Rn );
1742 sh4_x86.tstate = TSTATE_NONE;
1746 load_reg( REG_EAX, Rn );
1747 check_walign16( REG_EAX );
1748 load_reg( REG_EDX, Rm );
1749 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1750 sh4_x86.tstate = TSTATE_NONE;
1754 load_reg( REG_EAX, Rn );
1755 check_walign16( REG_EAX );
1756 LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
1757 load_reg( REG_EDX, Rm );
1758 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1759 ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
1760 sh4_x86.tstate = TSTATE_NONE;
1762 MOV.W Rm, @(R0, Rn) {:
1764 load_reg( REG_EAX, 0 );
1765 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1766 check_walign16( REG_EAX );
1767 load_reg( REG_EDX, Rm );
1768 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1769 sh4_x86.tstate = TSTATE_NONE;
1771 MOV.W R0, @(disp, GBR) {:
1773 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1774 ADDL_imms_r32( disp, REG_EAX );
1775 check_walign16( REG_EAX );
1776 load_reg( REG_EDX, 0 );
1777 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1778 sh4_x86.tstate = TSTATE_NONE;
1780 MOV.W R0, @(disp, Rn) {:
1782 load_reg( REG_EAX, Rn );
1783 ADDL_imms_r32( disp, REG_EAX );
1784 check_walign16( REG_EAX );
1785 load_reg( REG_EDX, 0 );
1786 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1787 sh4_x86.tstate = TSTATE_NONE;
1791 load_reg( REG_EAX, Rm );
1792 check_ralign16( REG_EAX );
1793 MEM_READ_WORD( REG_EAX, REG_EAX );
1794 store_reg( REG_EAX, Rn );
1795 sh4_x86.tstate = TSTATE_NONE;
1799 load_reg( REG_EAX, Rm );
1800 check_ralign16( REG_EAX );
1801 MEM_READ_WORD( REG_EAX, REG_EAX );
1803 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1805 store_reg( REG_EAX, Rn );
1806 sh4_x86.tstate = TSTATE_NONE;
1808 MOV.W @(R0, Rm), Rn {:
1810 load_reg( REG_EAX, 0 );
1811 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1812 check_ralign16( REG_EAX );
1813 MEM_READ_WORD( REG_EAX, REG_EAX );
1814 store_reg( REG_EAX, Rn );
1815 sh4_x86.tstate = TSTATE_NONE;
1817 MOV.W @(disp, GBR), R0 {:
1819 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1820 ADDL_imms_r32( disp, REG_EAX );
1821 check_ralign16( REG_EAX );
1822 MEM_READ_WORD( REG_EAX, REG_EAX );
1823 store_reg( REG_EAX, 0 );
1824 sh4_x86.tstate = TSTATE_NONE;
1826 MOV.W @(disp, PC), Rn {:
1828 if( sh4_x86.in_delay_slot ) {
1831 // See comments for MOV.L @(disp, PC), Rn
1832 uint32_t target = pc + disp + 4;
1833 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1834 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1835 MOVL_moffptr_eax( ptr );
1836 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1838 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
1839 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1840 MEM_READ_WORD( REG_EAX, REG_EAX );
1841 sh4_x86.tstate = TSTATE_NONE;
1843 store_reg( REG_EAX, Rn );
1846 MOV.W @(disp, Rm), R0 {:
1848 load_reg( REG_EAX, Rm );
1849 ADDL_imms_r32( disp, REG_EAX );
1850 check_ralign16( REG_EAX );
1851 MEM_READ_WORD( REG_EAX, REG_EAX );
1852 store_reg( REG_EAX, 0 );
1853 sh4_x86.tstate = TSTATE_NONE;
1855 MOVA @(disp, PC), R0 {:
1857 if( sh4_x86.in_delay_slot ) {
1860 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
1861 ADDL_rbpdisp_r32( R_PC, REG_ECX );
1862 store_reg( REG_ECX, 0 );
1863 sh4_x86.tstate = TSTATE_NONE;
1867 COUNT_INST(I_MOVCA);
1868 load_reg( REG_EAX, Rn );
1869 check_walign32( REG_EAX );
1870 load_reg( REG_EDX, 0 );
1871 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1872 sh4_x86.tstate = TSTATE_NONE;
1875 /* Control transfer instructions */
1878 if( sh4_x86.in_delay_slot ) {
1881 sh4vma_t target = disp + pc + 4;
1882 JT_label( nottaken );
1883 exit_block_rel(target, pc+2 );
1884 JMP_TARGET(nottaken);
1890 if( sh4_x86.in_delay_slot ) {
1893 sh4_x86.in_delay_slot = DELAY_PC;
1894 if( UNTRANSLATABLE(pc+2) ) {
1895 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1897 ADDL_imms_r32( disp, REG_EAX );
1898 JMP_TARGET(nottaken);
1899 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1900 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1901 exit_block_emu(pc+2);
1902 sh4_x86.branch_taken = TRUE;
1906 sh4vma_t target = disp + pc + 4;
1907 JCC_cc_rel32(sh4_x86.tstate,0);
1908 uint32_t *patch = ((uint32_t *)xlat_output)-1;
1909 int save_tstate = sh4_x86.tstate;
1910 sh4_translate_instruction(pc+2);
1911 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
1912 exit_block_rel( target, pc+4 );
1915 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1916 sh4_x86.tstate = save_tstate;
1917 sh4_translate_instruction(pc+2);
1924 if( sh4_x86.in_delay_slot ) {
1927 sh4_x86.in_delay_slot = DELAY_PC;
1928 sh4_x86.branch_taken = TRUE;
1929 if( UNTRANSLATABLE(pc+2) ) {
1930 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1931 ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
1932 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1933 exit_block_emu(pc+2);
1936 sh4_translate_instruction( pc + 2 );
1937 exit_block_rel( disp + pc + 4, pc+4 );
1944 if( sh4_x86.in_delay_slot ) {
1947 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1948 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1949 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1950 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1951 sh4_x86.in_delay_slot = DELAY_PC;
1952 sh4_x86.tstate = TSTATE_NONE;
1953 sh4_x86.branch_taken = TRUE;
1954 if( UNTRANSLATABLE(pc+2) ) {
1955 exit_block_emu(pc+2);
1958 sh4_translate_instruction( pc + 2 );
1959 exit_block_newpcset(pc+4);
1966 if( sh4_x86.in_delay_slot ) {
1969 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1970 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1971 MOVL_r32_rbpdisp( REG_EAX, R_PR );
1972 sh4_x86.in_delay_slot = DELAY_PC;
1973 sh4_x86.branch_taken = TRUE;
1974 sh4_x86.tstate = TSTATE_NONE;
1975 if( UNTRANSLATABLE(pc+2) ) {
1976 ADDL_imms_r32( disp, REG_EAX );
1977 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1978 exit_block_emu(pc+2);
1981 sh4_translate_instruction( pc + 2 );
1982 exit_block_rel( disp + pc + 4, pc+4 );
1989 if( sh4_x86.in_delay_slot ) {
1992 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1993 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1994 MOVL_r32_rbpdisp( REG_EAX, R_PR );
1995 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1996 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1998 sh4_x86.in_delay_slot = DELAY_PC;
1999 sh4_x86.tstate = TSTATE_NONE;
2000 sh4_x86.branch_taken = TRUE;
2001 if( UNTRANSLATABLE(pc+2) ) {
2002 exit_block_emu(pc+2);
2005 sh4_translate_instruction( pc + 2 );
2006 exit_block_newpcset(pc+4);
2013 if( sh4_x86.in_delay_slot ) {
2016 sh4vma_t target = disp + pc + 4;
2017 JF_label( nottaken );
2018 exit_block_rel(target, pc+2 );
2019 JMP_TARGET(nottaken);
2025 if( sh4_x86.in_delay_slot ) {
2028 sh4_x86.in_delay_slot = DELAY_PC;
2029 if( UNTRANSLATABLE(pc+2) ) {
2030 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2032 ADDL_imms_r32( disp, REG_EAX );
2033 JMP_TARGET(nottaken);
2034 ADDL_rbpdisp_r32( R_PC, REG_EAX );
2035 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2036 exit_block_emu(pc+2);
2037 sh4_x86.branch_taken = TRUE;
2041 JCC_cc_rel32(sh4_x86.tstate^1,0);
2042 uint32_t *patch = ((uint32_t *)xlat_output)-1;
2044 int save_tstate = sh4_x86.tstate;
2045 sh4_translate_instruction(pc+2);
2046 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
2047 exit_block_rel( disp + pc + 4, pc+4 );
2049 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
2050 sh4_x86.tstate = save_tstate;
2051 sh4_translate_instruction(pc+2);
2058 if( sh4_x86.in_delay_slot ) {
2061 load_reg( REG_ECX, Rn );
2062 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2063 sh4_x86.in_delay_slot = DELAY_PC;
2064 sh4_x86.branch_taken = TRUE;
2065 if( UNTRANSLATABLE(pc+2) ) {
2066 exit_block_emu(pc+2);
2069 sh4_translate_instruction(pc+2);
2070 exit_block_newpcset(pc+4);
2077 if( sh4_x86.in_delay_slot ) {
2080 MOVL_rbpdisp_r32( R_PC, REG_EAX );
2081 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2082 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2083 load_reg( REG_ECX, Rn );
2084 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2085 sh4_x86.in_delay_slot = DELAY_PC;
2086 sh4_x86.branch_taken = TRUE;
2087 sh4_x86.tstate = TSTATE_NONE;
2088 if( UNTRANSLATABLE(pc+2) ) {
2089 exit_block_emu(pc+2);
2092 sh4_translate_instruction(pc+2);
2093 exit_block_newpcset(pc+4);
2100 if( sh4_x86.in_delay_slot ) {
2104 MOVL_rbpdisp_r32( R_SPC, REG_ECX );
2105 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2106 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2107 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2108 sh4_x86.in_delay_slot = DELAY_PC;
2109 sh4_x86.fpuen_checked = FALSE;
2110 sh4_x86.tstate = TSTATE_NONE;
2111 sh4_x86.branch_taken = TRUE;
2112 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2113 if( UNTRANSLATABLE(pc+2) ) {
2114 exit_block_emu(pc+2);
2117 sh4_translate_instruction(pc+2);
2118 exit_block_newpcset(pc+4);
2125 if( sh4_x86.in_delay_slot ) {
2128 MOVL_rbpdisp_r32( R_PR, REG_ECX );
2129 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2130 sh4_x86.in_delay_slot = DELAY_PC;
2131 sh4_x86.branch_taken = TRUE;
2132 if( UNTRANSLATABLE(pc+2) ) {
2133 exit_block_emu(pc+2);
2136 sh4_translate_instruction(pc+2);
2137 exit_block_newpcset(pc+4);
2143 COUNT_INST(I_TRAPA);
2144 if( sh4_x86.in_delay_slot ) {
2147 MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX ); // 5
2148 ADDL_r32_rbpdisp( REG_ECX, R_PC );
2149 MOVL_imm32_r32( imm, REG_EAX );
2150 CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
2151 sh4_x86.tstate = TSTATE_NONE;
2152 exit_block_pcset(pc+2);
2153 sh4_x86.branch_taken = TRUE;
2158 COUNT_INST(I_UNDEF);
2159 if( sh4_x86.in_delay_slot ) {
2160 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);
2162 exit_block_exc(EXC_ILLEGAL, pc, 2);
2168 COUNT_INST(I_CLRMAC);
2169 XORL_r32_r32(REG_EAX, REG_EAX);
2170 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2171 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2172 sh4_x86.tstate = TSTATE_NONE;
2177 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2178 sh4_x86.tstate = TSTATE_NONE;
2184 sh4_x86.tstate = TSTATE_C;
2189 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2190 sh4_x86.tstate = TSTATE_NONE;
2196 sh4_x86.tstate = TSTATE_C;
2199 /* Floating point moves */
2201 COUNT_INST(I_FMOV1);
2203 if( sh4_x86.double_size ) {
2204 load_dr0( REG_EAX, FRm );
2205 load_dr1( REG_ECX, FRm );
2206 store_dr0( REG_EAX, FRn );
2207 store_dr1( REG_ECX, FRn );
2209 load_fr( REG_EAX, FRm ); // SZ=0 branch
2210 store_fr( REG_EAX, FRn );
2214 COUNT_INST(I_FMOV2);
2216 load_reg( REG_EAX, Rn );
2217 if( sh4_x86.double_size ) {
2218 check_walign64( REG_EAX );
2219 load_dr0( REG_EDX, FRm );
2220 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2221 load_reg( REG_EAX, Rn );
2222 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2223 load_dr1( REG_EDX, FRm );
2224 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2226 check_walign32( REG_EAX );
2227 load_fr( REG_EDX, FRm );
2228 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2230 sh4_x86.tstate = TSTATE_NONE;
2233 COUNT_INST(I_FMOV5);
2235 load_reg( REG_EAX, Rm );
2236 if( sh4_x86.double_size ) {
2237 check_ralign64( REG_EAX );
2238 MEM_READ_LONG( REG_EAX, REG_EAX );
2239 store_dr0( REG_EAX, FRn );
2240 load_reg( REG_EAX, Rm );
2241 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2242 MEM_READ_LONG( REG_EAX, REG_EAX );
2243 store_dr1( REG_EAX, FRn );
2245 check_ralign32( REG_EAX );
2246 MEM_READ_LONG( REG_EAX, REG_EAX );
2247 store_fr( REG_EAX, FRn );
2249 sh4_x86.tstate = TSTATE_NONE;
2252 COUNT_INST(I_FMOV3);
2254 load_reg( REG_EAX, Rn );
2255 if( sh4_x86.double_size ) {
2256 check_walign64( REG_EAX );
2257 LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
2258 load_dr0( REG_EDX, FRm );
2259 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2260 load_reg( REG_EAX, Rn );
2261 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2262 load_dr1( REG_EDX, FRm );
2263 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2264 ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
2266 check_walign32( REG_EAX );
2267 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2268 load_fr( REG_EDX, FRm );
2269 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2270 ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
2272 sh4_x86.tstate = TSTATE_NONE;
2275 COUNT_INST(I_FMOV6);
2277 load_reg( REG_EAX, Rm );
2278 if( sh4_x86.double_size ) {
2279 check_ralign64( REG_EAX );
2280 MEM_READ_LONG( REG_EAX, REG_EAX );
2281 store_dr0( REG_EAX, FRn );
2282 load_reg( REG_EAX, Rm );
2283 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2284 MEM_READ_LONG( REG_EAX, REG_EAX );
2285 store_dr1( REG_EAX, FRn );
2286 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
2288 check_ralign32( REG_EAX );
2289 MEM_READ_LONG( REG_EAX, REG_EAX );
2290 store_fr( REG_EAX, FRn );
2291 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2293 sh4_x86.tstate = TSTATE_NONE;
2295 FMOV FRm, @(R0, Rn) {:
2296 COUNT_INST(I_FMOV4);
2298 load_reg( REG_EAX, Rn );
2299 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2300 if( sh4_x86.double_size ) {
2301 check_walign64( REG_EAX );
2302 load_dr0( REG_EDX, FRm );
2303 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2304 load_reg( REG_EAX, Rn );
2305 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2306 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2307 load_dr1( REG_EDX, FRm );
2308 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2310 check_walign32( REG_EAX );
2311 load_fr( REG_EDX, FRm );
2312 MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
2314 sh4_x86.tstate = TSTATE_NONE;
2316 FMOV @(R0, Rm), FRn {:
2317 COUNT_INST(I_FMOV7);
2319 load_reg( REG_EAX, Rm );
2320 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2321 if( sh4_x86.double_size ) {
2322 check_ralign64( REG_EAX );
2323 MEM_READ_LONG( REG_EAX, REG_EAX );
2324 store_dr0( REG_EAX, FRn );
2325 load_reg( REG_EAX, Rm );
2326 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2327 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2328 MEM_READ_LONG( REG_EAX, REG_EAX );
2329 store_dr1( REG_EAX, FRn );
2331 check_ralign32( REG_EAX );
2332 MEM_READ_LONG( REG_EAX, REG_EAX );
2333 store_fr( REG_EAX, FRn );
2335 sh4_x86.tstate = TSTATE_NONE;
2337 FLDI0 FRn {: /* IFF PR=0 */
2338 COUNT_INST(I_FLDI0);
2340 if( sh4_x86.double_prec == 0 ) {
2341 XORL_r32_r32( REG_EAX, REG_EAX );
2342 store_fr( REG_EAX, FRn );
2344 sh4_x86.tstate = TSTATE_NONE;
2346 FLDI1 FRn {: /* IFF PR=0 */
2347 COUNT_INST(I_FLDI1);
2349 if( sh4_x86.double_prec == 0 ) {
2350 MOVL_imm32_r32( 0x3F800000, REG_EAX );
2351 store_fr( REG_EAX, FRn );
2356 COUNT_INST(I_FLOAT);
2358 FILD_rbpdisp(R_FPUL);
2359 if( sh4_x86.double_prec ) {
2368 if( sh4_x86.double_prec ) {
2373 MOVP_immptr_rptr( &min_int, REG_ECX );
2374 FILD_r32disp( REG_ECX, 0 );
2378 MOVP_immptr_rptr( &max_int, REG_ECX );
2379 FILD_r32disp( REG_ECX, 0 );
2382 MOVP_immptr_rptr( &save_fcw, REG_EAX );
2383 FNSTCW_r32disp( REG_EAX, 0 );
2384 MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
2385 FLDCW_r32disp( REG_EDX, 0 );
2386 FISTP_rbpdisp(R_FPUL);
2387 FLDCW_r32disp( REG_EAX, 0 );
2393 MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
2394 MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
2397 sh4_x86.tstate = TSTATE_NONE;
2402 load_fr( REG_EAX, FRm );
2403 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2408 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2409 store_fr( REG_EAX, FRn );
2412 COUNT_INST(I_FCNVDS);
2414 if( sh4_x86.double_prec ) {
2420 COUNT_INST(I_FCNVSD);
2422 if( sh4_x86.double_prec ) {
2428 /* Floating point instructions */
2432 if( sh4_x86.double_prec ) {
2445 if( sh4_x86.double_prec ) {
2460 if( sh4_x86.double_prec ) {
2472 FMAC FR0, FRm, FRn {:
2475 if( sh4_x86.double_prec ) {
2495 if( sh4_x86.double_prec ) {
2510 if( sh4_x86.double_prec ) {
2521 COUNT_INST(I_FSRRA);
2523 if( sh4_x86.double_prec == 0 ) {
2532 COUNT_INST(I_FSQRT);
2534 if( sh4_x86.double_prec ) {
2547 if( sh4_x86.double_prec ) {
2561 COUNT_INST(I_FCMPEQ);
2563 if( sh4_x86.double_prec ) {
2570 XORL_r32_r32(REG_EAX, REG_EAX);
2571 XORL_r32_r32(REG_EDX, REG_EDX);
2573 SETCCB_cc_r8(X86_COND_NP, REG_DL);
2574 CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
2575 MOVL_r32_rbpdisp(REG_EAX, R_T);
2577 sh4_x86.tstate = TSTATE_NONE;
2580 COUNT_INST(I_FCMPGT);
2582 if( sh4_x86.double_prec ) {
2592 sh4_x86.tstate = TSTATE_A;
2598 if( sh4_x86.double_prec == 0 ) {
2599 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
2600 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2601 CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
2603 sh4_x86.tstate = TSTATE_NONE;
2608 if( sh4_x86.double_prec == 0 ) {
2609 if( sh4_x86.sse3_enabled ) {
2610 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2611 MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2612 HADDPS_xmm_xmm( 4, 4 );
2613 HADDPS_xmm_xmm( 4, 4 );
2614 MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2619 push_fr( (FVm<<2)+1);
2620 push_fr( (FVn<<2)+1);
2623 push_fr( (FVm<<2)+2);
2624 push_fr( (FVn<<2)+2);
2627 push_fr( (FVm<<2)+3);
2628 push_fr( (FVn<<2)+3);
2631 pop_fr( (FVn<<2)+3);
2638 if( sh4_x86.double_prec == 0 ) {
2639 if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
2640 /* FIXME: For now, disable this inlining when we're running in shadow mode -
2641 * it gives slightly different results from the emu core. Need to
2642 * fix the precision so both give the right results.
2644 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2645 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2646 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2647 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2649 MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2650 MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2651 MOV_xmm_xmm( 4, 6 );
2652 MOV_xmm_xmm( 5, 7 );
2653 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2654 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2655 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2656 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2657 MULPS_xmm_xmm( 0, 4 );
2658 MULPS_xmm_xmm( 1, 5 );
2659 MULPS_xmm_xmm( 2, 6 );
2660 MULPS_xmm_xmm( 3, 7 );
2661 ADDPS_xmm_xmm( 5, 4 );
2662 ADDPS_xmm_xmm( 7, 6 );
2663 ADDPS_xmm_xmm( 6, 4 );
2664 MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
2666 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
2667 CALL1_ptr_r32( sh4_ftrv, REG_EAX );
2670 sh4_x86.tstate = TSTATE_NONE;
2674 COUNT_INST(I_FRCHG);
2676 XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
2677 CALL_ptr( sh4_switch_fr_banks );
2678 sh4_x86.tstate = TSTATE_NONE;
2681 COUNT_INST(I_FSCHG);
2683 XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
2684 XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2685 sh4_x86.tstate = TSTATE_NONE;
2686 sh4_x86.double_size = !sh4_x86.double_size;
2687 sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
2690 /* Processor control instructions */
2692 COUNT_INST(I_LDCSR);
2693 if( sh4_x86.in_delay_slot ) {
2697 load_reg( REG_EAX, Rm );
2698 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2699 sh4_x86.fpuen_checked = FALSE;
2700 sh4_x86.tstate = TSTATE_NONE;
2701 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2707 load_reg( REG_EAX, Rm );
2708 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2713 load_reg( REG_EAX, Rm );
2714 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2715 sh4_x86.tstate = TSTATE_NONE;
2720 load_reg( REG_EAX, Rm );
2721 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2722 sh4_x86.tstate = TSTATE_NONE;
2727 load_reg( REG_EAX, Rm );
2728 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2729 sh4_x86.tstate = TSTATE_NONE;
2734 load_reg( REG_EAX, Rm );
2735 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2736 sh4_x86.tstate = TSTATE_NONE;
2741 load_reg( REG_EAX, Rm );
2742 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2743 sh4_x86.tstate = TSTATE_NONE;
2748 load_reg( REG_EAX, Rm );
2749 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2750 sh4_x86.tstate = TSTATE_NONE;
2754 load_reg( REG_EAX, Rm );
2755 check_ralign32( REG_EAX );
2756 MEM_READ_LONG( REG_EAX, REG_EAX );
2757 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2758 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2759 sh4_x86.tstate = TSTATE_NONE;
2762 COUNT_INST(I_LDCSRM);
2763 if( sh4_x86.in_delay_slot ) {
2767 load_reg( REG_EAX, Rm );
2768 check_ralign32( REG_EAX );
2769 MEM_READ_LONG( REG_EAX, REG_EAX );
2770 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2771 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2772 sh4_x86.fpuen_checked = FALSE;
2773 sh4_x86.tstate = TSTATE_NONE;
2774 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2781 load_reg( REG_EAX, Rm );
2782 check_ralign32( REG_EAX );
2783 MEM_READ_LONG( REG_EAX, REG_EAX );
2784 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2785 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2786 sh4_x86.tstate = TSTATE_NONE;
2791 load_reg( REG_EAX, Rm );
2792 check_ralign32( REG_EAX );
2793 MEM_READ_LONG( REG_EAX, REG_EAX );
2794 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2795 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2796 sh4_x86.tstate = TSTATE_NONE;
2801 load_reg( REG_EAX, Rm );
2802 check_ralign32( REG_EAX );
2803 MEM_READ_LONG( REG_EAX, REG_EAX );
2804 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2805 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2806 sh4_x86.tstate = TSTATE_NONE;
2811 load_reg( REG_EAX, Rm );
2812 check_ralign32( REG_EAX );
2813 MEM_READ_LONG( REG_EAX, REG_EAX );
2814 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2815 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2816 sh4_x86.tstate = TSTATE_NONE;
2821 load_reg( REG_EAX, Rm );
2822 check_ralign32( REG_EAX );
2823 MEM_READ_LONG( REG_EAX, REG_EAX );
2824 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2825 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2826 sh4_x86.tstate = TSTATE_NONE;
2828 LDC.L @Rm+, Rn_BANK {:
2831 load_reg( REG_EAX, Rm );
2832 check_ralign32( REG_EAX );
2833 MEM_READ_LONG( REG_EAX, REG_EAX );
2834 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2835 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2836 sh4_x86.tstate = TSTATE_NONE;
2839 COUNT_INST(I_LDSFPSCR);
2841 load_reg( REG_EAX, Rm );
2842 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2843 sh4_x86.tstate = TSTATE_NONE;
2844 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2847 LDS.L @Rm+, FPSCR {:
2848 COUNT_INST(I_LDSFPSCRM);
2850 load_reg( REG_EAX, Rm );
2851 check_ralign32( REG_EAX );
2852 MEM_READ_LONG( REG_EAX, REG_EAX );
2853 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2854 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2855 sh4_x86.tstate = TSTATE_NONE;
2856 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2862 load_reg( REG_EAX, Rm );
2863 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2868 load_reg( REG_EAX, Rm );
2869 check_ralign32( REG_EAX );
2870 MEM_READ_LONG( REG_EAX, REG_EAX );
2871 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2872 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2873 sh4_x86.tstate = TSTATE_NONE;
2877 load_reg( REG_EAX, Rm );
2878 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2882 load_reg( REG_EAX, Rm );
2883 check_ralign32( REG_EAX );
2884 MEM_READ_LONG( REG_EAX, REG_EAX );
2885 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2886 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2887 sh4_x86.tstate = TSTATE_NONE;
2891 load_reg( REG_EAX, Rm );
2892 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2896 load_reg( REG_EAX, Rm );
2897 check_ralign32( REG_EAX );
2898 MEM_READ_LONG( REG_EAX, REG_EAX );
2899 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2900 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2901 sh4_x86.tstate = TSTATE_NONE;
2905 load_reg( REG_EAX, Rm );
2906 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2910 load_reg( REG_EAX, Rm );
2911 check_ralign32( REG_EAX );
2912 MEM_READ_LONG( REG_EAX, REG_EAX );
2913 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2914 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2915 sh4_x86.tstate = TSTATE_NONE;
2918 COUNT_INST(I_LDTLB);
2919 CALL_ptr( MMU_ldtlb );
2920 sh4_x86.tstate = TSTATE_NONE;
2929 COUNT_INST(I_OCBWB);
2933 load_reg( REG_EAX, Rn );
2934 MEM_PREFETCH( REG_EAX );
2935 sh4_x86.tstate = TSTATE_NONE;
2938 COUNT_INST(I_SLEEP);
2940 CALL_ptr( sh4_sleep );
2941 sh4_x86.tstate = TSTATE_NONE;
2942 sh4_x86.in_delay_slot = DELAY_NONE;
2946 COUNT_INST(I_STCSR);
2948 CALL_ptr(sh4_read_sr);
2949 store_reg( REG_EAX, Rn );
2950 sh4_x86.tstate = TSTATE_NONE;
2954 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
2955 store_reg( REG_EAX, Rn );
2960 MOVL_rbpdisp_r32( R_VBR, REG_EAX );
2961 store_reg( REG_EAX, Rn );
2962 sh4_x86.tstate = TSTATE_NONE;
2967 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2968 store_reg( REG_EAX, Rn );
2969 sh4_x86.tstate = TSTATE_NONE;
2974 MOVL_rbpdisp_r32( R_SPC, REG_EAX );
2975 store_reg( REG_EAX, Rn );
2976 sh4_x86.tstate = TSTATE_NONE;
2981 MOVL_rbpdisp_r32( R_SGR, REG_EAX );
2982 store_reg( REG_EAX, Rn );
2983 sh4_x86.tstate = TSTATE_NONE;
2988 MOVL_rbpdisp_r32( R_DBR, REG_EAX );
2989 store_reg( REG_EAX, Rn );
2990 sh4_x86.tstate = TSTATE_NONE;
2995 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
2996 store_reg( REG_EAX, Rn );
2997 sh4_x86.tstate = TSTATE_NONE;
3000 COUNT_INST(I_STCSRM);
3002 CALL_ptr( sh4_read_sr );
3003 MOVL_r32_r32( REG_EAX, REG_EDX );
3004 load_reg( REG_EAX, Rn );
3005 check_walign32( REG_EAX );
3006 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
3007 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3008 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3009 sh4_x86.tstate = TSTATE_NONE;
3014 load_reg( REG_EAX, Rn );
3015 check_walign32( REG_EAX );
3016 ADDL_imms_r32( -4, REG_EAX );
3017 MOVL_rbpdisp_r32( R_VBR, REG_EDX );
3018 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3019 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3020 sh4_x86.tstate = TSTATE_NONE;
3025 load_reg( REG_EAX, Rn );
3026 check_walign32( REG_EAX );
3027 ADDL_imms_r32( -4, REG_EAX );
3028 MOVL_rbpdisp_r32( R_SSR, REG_EDX );
3029 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3030 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3031 sh4_x86.tstate = TSTATE_NONE;
3036 load_reg( REG_EAX, Rn );
3037 check_walign32( REG_EAX );
3038 ADDL_imms_r32( -4, REG_EAX );
3039 MOVL_rbpdisp_r32( R_SPC, REG_EDX );
3040 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3041 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3042 sh4_x86.tstate = TSTATE_NONE;
3047 load_reg( REG_EAX, Rn );
3048 check_walign32( REG_EAX );
3049 ADDL_imms_r32( -4, REG_EAX );
3050 MOVL_rbpdisp_r32( R_SGR, REG_EDX );
3051 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3052 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3053 sh4_x86.tstate = TSTATE_NONE;
3058 load_reg( REG_EAX, Rn );
3059 check_walign32( REG_EAX );
3060 ADDL_imms_r32( -4, REG_EAX );
3061 MOVL_rbpdisp_r32( R_DBR, REG_EDX );
3062 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3063 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3064 sh4_x86.tstate = TSTATE_NONE;
3066 STC.L Rm_BANK, @-Rn {:
3069 load_reg( REG_EAX, Rn );
3070 check_walign32( REG_EAX );
3071 ADDL_imms_r32( -4, REG_EAX );
3072 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
3073 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3074 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3075 sh4_x86.tstate = TSTATE_NONE;
3079 load_reg( REG_EAX, Rn );
3080 check_walign32( REG_EAX );
3081 ADDL_imms_r32( -4, REG_EAX );
3082 MOVL_rbpdisp_r32( R_GBR, REG_EDX );
3083 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3084 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3085 sh4_x86.tstate = TSTATE_NONE;
3088 COUNT_INST(I_STSFPSCR);
3090 MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
3091 store_reg( REG_EAX, Rn );
3093 STS.L FPSCR, @-Rn {:
3094 COUNT_INST(I_STSFPSCRM);
3096 load_reg( REG_EAX, Rn );
3097 check_walign32( REG_EAX );
3098 ADDL_imms_r32( -4, REG_EAX );
3099 MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
3100 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3101 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3102 sh4_x86.tstate = TSTATE_NONE;
3107 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
3108 store_reg( REG_EAX, Rn );
3113 load_reg( REG_EAX, Rn );
3114 check_walign32( REG_EAX );
3115 ADDL_imms_r32( -4, REG_EAX );
3116 MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
3117 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3118 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3119 sh4_x86.tstate = TSTATE_NONE;
3123 MOVL_rbpdisp_r32( R_MACH, REG_EAX );
3124 store_reg( REG_EAX, Rn );
3128 load_reg( REG_EAX, Rn );
3129 check_walign32( REG_EAX );
3130 ADDL_imms_r32( -4, REG_EAX );
3131 MOVL_rbpdisp_r32( R_MACH, REG_EDX );
3132 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3133 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3134 sh4_x86.tstate = TSTATE_NONE;
3138 MOVL_rbpdisp_r32( R_MACL, REG_EAX );
3139 store_reg( REG_EAX, Rn );
3143 load_reg( REG_EAX, Rn );
3144 check_walign32( REG_EAX );
3145 ADDL_imms_r32( -4, REG_EAX );
3146 MOVL_rbpdisp_r32( R_MACL, REG_EDX );
3147 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3148 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3149 sh4_x86.tstate = TSTATE_NONE;
3153 MOVL_rbpdisp_r32( R_PR, REG_EAX );
3154 store_reg( REG_EAX, Rn );
3158 load_reg( REG_EAX, Rn );
3159 check_walign32( REG_EAX );
3160 ADDL_imms_r32( -4, REG_EAX );
3161 MOVL_rbpdisp_r32( R_PR, REG_EDX );
3162 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3163 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3164 sh4_x86.tstate = TSTATE_NONE;
3169 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
3172 sh4_x86.in_delay_slot = DELAY_NONE;
3178 * The unwind methods only work if we compiled with DWARF2 frame information
3179 * (ie -fexceptions), otherwise we have to use the direct frame scan.
3181 #ifdef HAVE_EXCEPTIONS
3185 uintptr_t block_start;
3186 uintptr_t block_end;
3190 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
3192 struct UnwindInfo *info = arg;
3193 void *pc = (void *)_Unwind_GetIP(context);
3194 if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
3196 return _URC_NORMAL_STOP;
3198 return _URC_NO_REASON;
3201 void *xlat_get_native_pc( void *code, uint32_t code_size )
3203 struct UnwindInfo info;
3206 info.block_start = (uintptr_t)code;
3207 info.block_end = info.block_start + code_size;
3208 _Unwind_Backtrace( xlat_check_frame, &info );
3212 /* Assume this is an ia32 build - amd64 should always have dwarf information */
3213 void *xlat_get_native_pc( void *code, uint32_t code_size )
3215 void *result = NULL;
3217 "mov %%ebp, %%eax\n\t"
3218 "mov $0x8, %%ecx\n\t"
3220 "frame_loop: test %%eax, %%eax\n\t"
3221 "je frame_not_found\n\t"
3222 "cmp (%%eax), %%edx\n\t"
3223 "je frame_found\n\t"
3224 "sub $0x1, %%ecx\n\t"
3225 "je frame_not_found\n\t"
3226 "movl (%%eax), %%eax\n\t"
3228 "frame_found: movl 0x4(%%eax), %0\n"
3231 : "r" (((uint8_t *)&sh4r) + 128 )
3232 : "eax", "ecx", "edx" );
.