4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4dasm.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
35 #include "xlat/xltcache.h"
36 #include "xlat/x86/x86op.h"
37 #include "xlat/xlatdasm.h"
40 #define DEFAULT_BACKPATCH_SIZE 4096
42 /* Offset of a reg relative to the sh4r structure */
43 #define REG_OFFSET(reg) (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
45 #define R_T REG_OFFSET(t)
46 #define R_Q REG_OFFSET(q)
47 #define R_S REG_OFFSET(s)
48 #define R_M REG_OFFSET(m)
49 #define R_SR REG_OFFSET(sr)
50 #define R_GBR REG_OFFSET(gbr)
51 #define R_SSR REG_OFFSET(ssr)
52 #define R_SPC REG_OFFSET(spc)
53 #define R_VBR REG_OFFSET(vbr)
54 #define R_MACH REG_OFFSET(mac)+4
55 #define R_MACL REG_OFFSET(mac)
56 #define R_PC REG_OFFSET(pc)
57 #define R_NEW_PC REG_OFFSET(new_pc)
58 #define R_PR REG_OFFSET(pr)
59 #define R_SGR REG_OFFSET(sgr)
60 #define R_FPUL REG_OFFSET(fpul)
61 #define R_FPSCR REG_OFFSET(fpscr)
62 #define R_DBR REG_OFFSET(dbr)
63 #define R_R(rn) REG_OFFSET(r[rn])
64 #define R_FR(f) REG_OFFSET(fr[0][(f)^1])
65 #define R_XF(f) REG_OFFSET(fr[1][(f)^1])
66 #define R_DR(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
74 #define SH4_MODE_UNKNOWN -1
76 struct backpatch_record {
77 uint32_t fixup_offset;
78 uint32_t fixup_icount;
83 * Struct to manage internal translation state. This state is not saved -
84 * it is only valid between calls to sh4_translate_begin_block() and
85 * sh4_translate_end_block()
87 struct sh4_x86_state {
90 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
91 gboolean branch_taken; /* true if we branched unconditionally */
92 gboolean double_prec; /* true if FPU is in double-precision mode */
93 gboolean double_size; /* true if FPU is in double-size mode */
94 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
95 uint32_t block_start_pc;
96 uint32_t stack_posn; /* Trace stack height for alignment purposes */
97 uint32_t sh4_mode; /* Mirror of sh4r.xlat_sh4_mode */
101 gboolean tlb_on; /* True if tlb translation is active */
102 struct mem_region_fn **priv_address_space;
103 struct mem_region_fn **user_address_space;
105 /* Instrumentation */
106 xlat_block_begin_callback_t begin_callback;
107 xlat_block_end_callback_t end_callback;
110 /* Allocated memory for the (block-wide) back-patch list */
111 struct backpatch_record *backpatch_list;
112 uint32_t backpatch_posn;
113 uint32_t backpatch_size;
116 static struct sh4_x86_state sh4_x86;
118 static uint8_t sh4_entry_stub[128];
119 typedef FASTCALL void (*entry_point_t)(void *);
120 entry_point_t sh4_translate_enter;
122 static uint32_t max_int = 0x7FFFFFFF;
123 static uint32_t min_int = 0x80000000;
124 static uint32_t save_fcw; /* save value for fpu control word */
125 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
127 static void sh4_x86_translate_unlink_block( void *use_list );
129 static struct xlat_target_fns x86_target_fns = {
130 sh4_x86_translate_unlink_block
134 gboolean is_sse3_supported()
138 __asm__ __volatile__(
139 "mov $0x01, %%eax\n\t"
140 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
141 return (features & 1) ? TRUE : FALSE;
144 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
146 sh4_x86.priv_address_space = priv;
147 sh4_x86.user_address_space = user;
150 void sh4_translate_write_entry_stub(void)
152 mem_unprotect(sh4_entry_stub, sizeof(sh4_entry_stub));
153 xlat_output = sh4_entry_stub;
155 MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
159 #if SIZEOF_VOID_P == 8
162 CALL_r32( REG_ARG1 );
166 SUBL_imms_r32( 8, REG_ESP );
167 CALL_r32( REG_ARG1 );
168 ADDL_imms_r32( 8, REG_ESP );
175 sh4_translate_enter = (entry_point_t)sh4_entry_stub;
178 void sh4_translate_init(void)
180 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
181 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
182 sh4_x86.begin_callback = NULL;
183 sh4_x86.end_callback = NULL;
184 sh4_x86.fastmem = TRUE;
185 sh4_x86.sse3_enabled = is_sse3_supported();
186 xlat_set_target_fns(&x86_target_fns);
187 sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
188 sh4_translate_write_entry_stub();
191 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
193 sh4_x86.begin_callback = begin;
194 sh4_x86.end_callback = end;
197 void sh4_translate_set_fastmem( gboolean flag )
199 sh4_x86.fastmem = flag;
202 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
206 if( exc_code == -2 ) {
207 reloc_size = sizeof(void *);
210 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
211 sh4_x86.backpatch_size <<= 1;
212 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
213 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
214 assert( sh4_x86.backpatch_list != NULL );
216 if( sh4_x86.in_delay_slot ) {
220 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
221 (((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
222 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
223 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
224 sh4_x86.backpatch_posn++;
227 #define TSTATE_NONE -1
228 #define TSTATE_O X86_COND_O
229 #define TSTATE_C X86_COND_C
230 #define TSTATE_E X86_COND_E
231 #define TSTATE_NE X86_COND_NE
232 #define TSTATE_G X86_COND_G
233 #define TSTATE_GE X86_COND_GE
234 #define TSTATE_A X86_COND_A
235 #define TSTATE_AE X86_COND_AE
237 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
238 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
240 /* Convenience instructions */
241 #define LDC_t() CMPB_imms_rbpdisp(1,R_T); CMC()
242 #define SETE_t() SETCCB_cc_rbpdisp(X86_COND_E,R_T)
243 #define SETA_t() SETCCB_cc_rbpdisp(X86_COND_A,R_T)
244 #define SETAE_t() SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
245 #define SETG_t() SETCCB_cc_rbpdisp(X86_COND_G,R_T)
246 #define SETGE_t() SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
247 #define SETC_t() SETCCB_cc_rbpdisp(X86_COND_C,R_T)
248 #define SETO_t() SETCCB_cc_rbpdisp(X86_COND_O,R_T)
249 #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
250 #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1)
251 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
252 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
253 #define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
254 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
255 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
256 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
257 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
258 #define JP_label(label) JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
259 #define JS_label(label) JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
260 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
261 #define JNE_exc(exc) JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
263 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
264 CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }
266 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
267 #define JT_label(label) LOAD_t() \
268 JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
270 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
271 #define JF_label(label) LOAD_t() \
272 JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
275 #define load_reg(x86reg,sh4reg) MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
276 #define store_reg(x86reg,sh4reg) MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
279 * Load an FR register (single-precision floating point) into an integer x86
280 * register (eg for register-to-register moves)
282 #define load_fr(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
283 #define load_xf(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
286 * Load the low half of a DR register (DR or XD) into an integer x86 register
288 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
289 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
292 * Store an FR register (single-precision floating point) from an integer x86+
293 * register (eg for register-to-register moves)
295 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
296 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
298 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
299 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
302 #define push_fpul() FLDF_rbpdisp(R_FPUL)
303 #define pop_fpul() FSTPF_rbpdisp(R_FPUL)
304 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
305 #define pop_fr(frm) FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
306 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
307 #define pop_xf(frm) FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
308 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
309 #define pop_dr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
310 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
311 #define pop_xdr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
313 #ifdef ENABLE_SH4STATS
314 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
316 #define COUNT_INST(id)
320 /* Exception checks - Note that all exception checks will clobber EAX */
322 #define check_priv( ) \
323 if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
324 if( sh4_x86.in_delay_slot ) { \
325 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
327 exit_block_exc(EXC_ILLEGAL, pc, 2); \
329 sh4_x86.branch_taken = TRUE; \
330 sh4_x86.in_delay_slot = DELAY_NONE; \
334 #define check_fpuen( ) \
335 if( !sh4_x86.fpuen_checked ) {\
336 sh4_x86.fpuen_checked = TRUE;\
337 MOVL_rbpdisp_r32( R_SR, REG_EAX );\
338 ANDL_imms_r32( SR_FD, REG_EAX );\
339 if( sh4_x86.in_delay_slot ) {\
340 JNE_exc(EXC_SLOT_FPU_DISABLED);\
342 JNE_exc(EXC_FPU_DISABLED);\
344 sh4_x86.tstate = TSTATE_NONE; \
347 #define check_ralign16( x86reg ) \
348 TESTL_imms_r32( 0x00000001, x86reg ); \
349 JNE_exc(EXC_DATA_ADDR_READ)
351 #define check_walign16( x86reg ) \
352 TESTL_imms_r32( 0x00000001, x86reg ); \
353 JNE_exc(EXC_DATA_ADDR_WRITE);
355 #define check_ralign32( x86reg ) \
356 TESTL_imms_r32( 0x00000003, x86reg ); \
357 JNE_exc(EXC_DATA_ADDR_READ)
359 #define check_walign32( x86reg ) \
360 TESTL_imms_r32( 0x00000003, x86reg ); \
361 JNE_exc(EXC_DATA_ADDR_WRITE);
363 #define check_ralign64( x86reg ) \
364 TESTL_imms_r32( 0x00000007, x86reg ); \
365 JNE_exc(EXC_DATA_ADDR_READ)
367 #define check_walign64( x86reg ) \
368 TESTL_imms_r32( 0x00000007, x86reg ); \
369 JNE_exc(EXC_DATA_ADDR_WRITE);
371 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
374 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
375 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
377 #ifdef HAVE_FRAME_ADDRESS
378 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
380 decode_address(address_space(), addr_reg, REG_CALLPTR);
381 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
382 CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
384 if( addr_reg != REG_ARG1 ) {
385 MOVL_r32_r32( addr_reg, REG_ARG1 );
387 MOVP_immptr_rptr( 0, REG_ARG2 );
388 sh4_x86_add_backpatch( xlat_output, pc, -2 );
389 CALL2_r32disp_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2);
391 if( value_reg != REG_RESULT1 ) {
392 MOVL_r32_r32( REG_RESULT1, value_reg );
396 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
398 decode_address(address_space(), addr_reg, REG_CALLPTR);
399 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
400 CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
402 if( value_reg != REG_ARG2 ) {
403 MOVL_r32_r32( value_reg, REG_ARG2 );
405 if( addr_reg != REG_ARG1 ) {
406 MOVL_r32_r32( addr_reg, REG_ARG1 );
409 MOVP_immptr_rptr( 0, REG_ARG3 );
410 sh4_x86_add_backpatch( xlat_output, pc, -2 );
411 CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, REG_ARG3);
413 MOVL_imm32_rspdisp( 0, 0 );
414 sh4_x86_add_backpatch( xlat_output, pc, -2 );
415 CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, 0);
420 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
422 decode_address(address_space(), addr_reg, REG_CALLPTR);
423 CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
424 if( value_reg != REG_RESULT1 ) {
425 MOVL_r32_r32( REG_RESULT1, value_reg );
429 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
431 decode_address(address_space(), addr_reg, REG_CALLPTR);
432 CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
436 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
437 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
438 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc)
439 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
440 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
441 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
442 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
443 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
444 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
446 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
448 /** Offset of xlat_sh4_mode field relative to the code pointer */
449 #define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
450 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
451 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
453 void sh4_translate_begin_block( sh4addr_t pc )
455 sh4_x86.code = xlat_output;
456 sh4_x86.in_delay_slot = FALSE;
457 sh4_x86.fpuen_checked = FALSE;
458 sh4_x86.branch_taken = FALSE;
459 sh4_x86.backpatch_posn = 0;
460 sh4_x86.block_start_pc = pc;
461 sh4_x86.tlb_on = IS_TLB_ENABLED();
462 sh4_x86.tstate = TSTATE_NONE;
463 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
464 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
465 sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
466 if( sh4_x86.begin_callback ) {
467 CALL_ptr( sh4_x86.begin_callback );
469 if( sh4_profile_blocks ) {
470 MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
471 ADDL_imms_r32disp( 1, REG_EAX, 0 );
476 uint32_t sh4_translate_end_block_size()
478 uint32_t epilogue_size = EPILOGUE_SIZE;
479 if( sh4_x86.end_callback ) {
480 epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
482 if( sh4_x86.backpatch_posn <= 3 ) {
483 epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
485 epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
487 return epilogue_size;
492 * Embed a breakpoint into the generated code
494 void sh4_translate_emit_breakpoint( sh4vma_t pc )
496 MOVL_imm32_r32( pc, REG_EAX );
497 CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
498 sh4_x86.tstate = TSTATE_NONE;
502 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
505 * Test if the loaded target code pointer in %eax is valid, and if so jump
506 * directly into it, bypassing the normal exit.
508 static void jump_next_block()
510 uint8_t *ptr = xlat_output;
511 TESTP_rptr_rptr(REG_EAX, REG_EAX);
513 if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
514 /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
515 MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
516 CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
518 CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
520 JNE_label(wrongmode);
521 if( sh4_x86.end_callback ) {
522 /* Note this does leave the stack out of alignment, but doesn't matter
523 * for what we're currently using it for.
526 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
531 JMP_TARGET(wrongmode);
532 MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
533 int rel = ptr - xlat_output;
541 void FASTCALL sh4_translate_link_block( uint32_t pc )
543 uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
544 while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
545 target = XLAT_BLOCK_CHAIN(target);
547 if( target == NULL ) {
548 target = sh4_translate_basic_block( pc );
550 uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
552 *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)-5;
553 *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
554 XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;
556 uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
557 assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
561 static void emit_translate_and_backpatch()
563 /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
564 CALL1_ptr_r32(sh4_translate_link_block, REG_ARG1);
566 /* When patched, the jmp instruction will be 5 bytes (either platform) -
567 * we need to reserve sizeof(void*) bytes for the use-list
570 if( sizeof(void*) == 8 ) {
578 * If we're jumping to a fixed address (or at least fixed relative to the
579 * current PC, then we can do a direct branch. REG_ARG1 should contain
580 * the PC at this point.
582 static void jump_next_block_fixed_pc( sh4addr_t pc )
584 if( IS_IN_ICACHE(pc) ) {
585 if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
586 /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
587 * fetch-and-backpatch routine, which will replace the call with a branch */
588 emit_translate_and_backpatch();
591 MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
592 ANDP_imms_rptr( -4, REG_EAX );
594 } else if( sh4_x86.tlb_on ) {
595 CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
597 CALL1_ptr_r32(xlat_get_code, REG_ARG1);
604 static void sh4_x86_translate_unlink_block( void *use_list )
606 uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
607 void *next = use_list;
608 while( next != NULL ) {
609 xlat_output = (uint8_t *)next;
610 next = *(void **)(xlat_output+5);
611 emit_translate_and_backpatch();
618 static void exit_block()
620 if( sh4_x86.end_callback ) {
621 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
629 * Exit the block with sh4r.pc already written
631 void exit_block_pcset( sh4addr_t pc )
633 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
634 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
635 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
636 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
638 MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
639 if( sh4_x86.tlb_on ) {
640 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
642 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
646 JMP_TARGET(exitloop);
651 * Exit the block with sh4r.new_pc written with the target pc
653 void exit_block_newpcset( sh4addr_t pc )
655 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
656 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
657 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
658 MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
659 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
660 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
662 if( sh4_x86.tlb_on ) {
663 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
665 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
669 JMP_TARGET(exitloop);
675 * Exit the block to an absolute PC
677 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
679 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
680 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
681 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
683 MOVL_imm32_r32( pc, REG_ARG1 );
684 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
685 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
687 jump_next_block_fixed_pc(pc);
688 JMP_TARGET(exitloop);
693 * Exit the block to a relative PC
695 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
697 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
698 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
699 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
701 if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
702 /* Special case for tight loops - the PC doesn't change, and
703 * we already know the target address. Just check events pending before
706 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
707 uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output));
708 JCC_cc_prerel(X86_COND_A, backdisp);
710 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
711 ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
712 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
713 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
714 JBE_label(exitloop2);
716 jump_next_block_fixed_pc(pc);
717 JMP_TARGET(exitloop2);
723 * Exit unconditionally with a general exception
725 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
727 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
728 ADDL_r32_rbpdisp( REG_ECX, R_PC );
729 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
730 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
731 MOVL_imm32_r32( code, REG_ARG1 );
732 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
737 * Embed a call to sh4_execute_instruction for situations that we
738 * can't translate (just page-crossing delay slots at the moment).
739 * Caller is responsible for setting new_pc before calling this function.
743 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
744 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
745 * Call sh4_execute_instruction
746 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
748 void exit_block_emu( sh4vma_t endpc )
750 MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX ); // 5
751 ADDL_r32_rbpdisp( REG_ECX, R_PC );
753 MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
754 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); // 6
755 MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
756 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
758 CALL_ptr( sh4_execute_instruction );
763 * Write the block trailer (exception handling block)
765 void sh4_translate_end_block( sh4addr_t pc ) {
766 if( sh4_x86.branch_taken == FALSE ) {
767 // Didn't exit unconditionally already, so write the termination here
768 exit_block_rel( pc, pc );
770 if( sh4_x86.backpatch_posn != 0 ) {
772 // Exception raised - cleanup and exit
773 uint8_t *end_ptr = xlat_output;
774 MOVL_r32_r32( REG_EDX, REG_ECX );
775 ADDL_r32_r32( REG_EDX, REG_ECX );
776 ADDL_r32_rbpdisp( REG_ECX, R_SPC );
777 MOVL_moffptr_eax( &sh4_cpu_period );
778 INC_r32( REG_EDX ); /* Add 1 for the aborting instruction itself */
780 ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
783 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
784 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
785 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
786 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
787 *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output;
789 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
791 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
792 int rel = end_ptr - xlat_output;
795 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
796 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
797 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
798 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
799 int rel = end_ptr - xlat_output;
807 * Translate a single instruction. Delayed branches are handled specially
808 * by translating both branch and delayed instruction as a single unit (as
810 * The instruction MUST be in the icache (assert check)
812 * @return true if the instruction marks the end of a basic block
815 uint32_t sh4_translate_instruction( sh4vma_t pc )
818 /* Read instruction from icache */
819 assert( IS_IN_ICACHE(pc) );
820 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
822 if( !sh4_x86.in_delay_slot ) {
823 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
826 /* check for breakpoints at this pc */
827 for( int i=0; i<sh4_breakpoint_count; i++ ) {
828 if( sh4_breakpoints[i].address == pc ) {
829 sh4_translate_emit_breakpoint(pc);
837 load_reg( REG_EAX, Rm );
838 load_reg( REG_ECX, Rn );
839 ADDL_r32_r32( REG_EAX, REG_ECX );
840 store_reg( REG_ECX, Rn );
841 sh4_x86.tstate = TSTATE_NONE;
845 ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
846 sh4_x86.tstate = TSTATE_NONE;
850 if( sh4_x86.tstate != TSTATE_C ) {
853 load_reg( REG_EAX, Rm );
854 load_reg( REG_ECX, Rn );
855 ADCL_r32_r32( REG_EAX, REG_ECX );
856 store_reg( REG_ECX, Rn );
858 sh4_x86.tstate = TSTATE_C;
862 load_reg( REG_EAX, Rm );
863 load_reg( REG_ECX, Rn );
864 ADDL_r32_r32( REG_EAX, REG_ECX );
865 store_reg( REG_ECX, Rn );
867 sh4_x86.tstate = TSTATE_O;
871 load_reg( REG_EAX, Rm );
872 load_reg( REG_ECX, Rn );
873 ANDL_r32_r32( REG_EAX, REG_ECX );
874 store_reg( REG_ECX, Rn );
875 sh4_x86.tstate = TSTATE_NONE;
879 load_reg( REG_EAX, 0 );
880 ANDL_imms_r32(imm, REG_EAX);
881 store_reg( REG_EAX, 0 );
882 sh4_x86.tstate = TSTATE_NONE;
884 AND.B #imm, @(R0, GBR) {:
886 load_reg( REG_EAX, 0 );
887 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
888 MOVL_r32_r32(REG_EAX, REG_SAVE1);
889 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
890 MOVL_r32_r32(REG_SAVE1, REG_EAX);
891 ANDL_imms_r32(imm, REG_EDX );
892 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
893 sh4_x86.tstate = TSTATE_NONE;
897 load_reg( REG_EAX, Rm );
898 load_reg( REG_ECX, Rn );
899 CMPL_r32_r32( REG_EAX, REG_ECX );
901 sh4_x86.tstate = TSTATE_E;
904 COUNT_INST(I_CMPEQI);
905 load_reg( REG_EAX, 0 );
906 CMPL_imms_r32(imm, REG_EAX);
908 sh4_x86.tstate = TSTATE_E;
912 load_reg( REG_EAX, Rm );
913 load_reg( REG_ECX, Rn );
914 CMPL_r32_r32( REG_EAX, REG_ECX );
916 sh4_x86.tstate = TSTATE_GE;
920 load_reg( REG_EAX, Rm );
921 load_reg( REG_ECX, Rn );
922 CMPL_r32_r32( REG_EAX, REG_ECX );
924 sh4_x86.tstate = TSTATE_G;
928 load_reg( REG_EAX, Rm );
929 load_reg( REG_ECX, Rn );
930 CMPL_r32_r32( REG_EAX, REG_ECX );
932 sh4_x86.tstate = TSTATE_A;
936 load_reg( REG_EAX, Rm );
937 load_reg( REG_ECX, Rn );
938 CMPL_r32_r32( REG_EAX, REG_ECX );
940 sh4_x86.tstate = TSTATE_AE;
944 load_reg( REG_EAX, Rn );
945 CMPL_imms_r32( 0, REG_EAX );
947 sh4_x86.tstate = TSTATE_G;
951 load_reg( REG_EAX, Rn );
952 CMPL_imms_r32( 0, REG_EAX );
954 sh4_x86.tstate = TSTATE_GE;
957 COUNT_INST(I_CMPSTR);
958 load_reg( REG_EAX, Rm );
959 load_reg( REG_ECX, Rn );
960 XORL_r32_r32( REG_ECX, REG_EAX );
961 TESTB_r8_r8( REG_AL, REG_AL );
963 TESTB_r8_r8( REG_AH, REG_AH );
965 SHRL_imm_r32( 16, REG_EAX );
966 TESTB_r8_r8( REG_AL, REG_AL );
968 TESTB_r8_r8( REG_AH, REG_AH );
973 sh4_x86.tstate = TSTATE_E;
977 load_reg( REG_EAX, Rm );
978 load_reg( REG_ECX, Rn );
979 SHRL_imm_r32( 31, REG_EAX );
980 SHRL_imm_r32( 31, REG_ECX );
981 MOVL_r32_rbpdisp( REG_EAX, R_M );
982 MOVL_r32_rbpdisp( REG_ECX, R_Q );
983 CMPL_r32_r32( REG_EAX, REG_ECX );
985 sh4_x86.tstate = TSTATE_NE;
989 XORL_r32_r32( REG_EAX, REG_EAX );
990 MOVL_r32_rbpdisp( REG_EAX, R_Q );
991 MOVL_r32_rbpdisp( REG_EAX, R_M );
992 MOVL_r32_rbpdisp( REG_EAX, R_T );
993 sh4_x86.tstate = TSTATE_C; // works for DIV1
997 MOVL_rbpdisp_r32( R_M, REG_ECX );
998 load_reg( REG_EAX, Rn );
999 if( sh4_x86.tstate != TSTATE_C ) {
1002 RCLL_imm_r32( 1, REG_EAX );
1003 SETC_r8( REG_DL ); // Q'
1004 CMPL_rbpdisp_r32( R_Q, REG_ECX );
1006 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1008 JMP_TARGET(mqequal);
1009 SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1011 store_reg( REG_EAX, Rn ); // Done with Rn now
1012 SETC_r8(REG_AL); // tmp1
1013 XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
1014 XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
1015 MOVL_r32_rbpdisp( REG_ECX, R_Q );
1016 XORL_imms_r32( 1, REG_AL ); // T = !Q'
1017 MOVZXL_r8_r32( REG_AL, REG_EAX );
1018 MOVL_r32_rbpdisp( REG_EAX, R_T );
1019 sh4_x86.tstate = TSTATE_NONE;
1022 COUNT_INST(I_DMULS);
1023 load_reg( REG_EAX, Rm );
1024 load_reg( REG_ECX, Rn );
1026 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1027 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1028 sh4_x86.tstate = TSTATE_NONE;
1031 COUNT_INST(I_DMULU);
1032 load_reg( REG_EAX, Rm );
1033 load_reg( REG_ECX, Rn );
1035 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1036 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1037 sh4_x86.tstate = TSTATE_NONE;
1041 load_reg( REG_EAX, Rn );
1042 ADDL_imms_r32( -1, REG_EAX );
1043 store_reg( REG_EAX, Rn );
1045 sh4_x86.tstate = TSTATE_E;
1048 COUNT_INST(I_EXTSB);
1049 load_reg( REG_EAX, Rm );
1050 MOVSXL_r8_r32( REG_EAX, REG_EAX );
1051 store_reg( REG_EAX, Rn );
1054 COUNT_INST(I_EXTSW);
1055 load_reg( REG_EAX, Rm );
1056 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1057 store_reg( REG_EAX, Rn );
1060 COUNT_INST(I_EXTUB);
1061 load_reg( REG_EAX, Rm );
1062 MOVZXL_r8_r32( REG_EAX, REG_EAX );
1063 store_reg( REG_EAX, Rn );
1066 COUNT_INST(I_EXTUW);
1067 load_reg( REG_EAX, Rm );
1068 MOVZXL_r16_r32( REG_EAX, REG_EAX );
1069 store_reg( REG_EAX, Rn );
1074 load_reg( REG_EAX, Rm );
1075 check_ralign32( REG_EAX );
1076 MEM_READ_LONG( REG_EAX, REG_EAX );
1077 MOVL_r32_r32(REG_EAX, REG_SAVE1);
1078 load_reg( REG_EAX, Rm );
1079 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
1080 MEM_READ_LONG( REG_EAX, REG_EAX );
1081 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
1083 load_reg( REG_EAX, Rm );
1084 check_ralign32( REG_EAX );
1085 MEM_READ_LONG( REG_EAX, REG_EAX );
1086 MOVL_r32_r32(REG_EAX, REG_SAVE1);
1087 load_reg( REG_EAX, Rn );
1088 check_ralign32( REG_EAX );
1089 MEM_READ_LONG( REG_EAX, REG_EAX );
1090 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1091 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1094 IMULL_r32( REG_SAVE1 );
1095 ADDL_r32_rbpdisp( REG_EAX, R_MACL );
1096 ADCL_r32_rbpdisp( REG_EDX, R_MACH );
1098 MOVL_rbpdisp_r32( R_S, REG_ECX );
1099 TESTL_r32_r32(REG_ECX, REG_ECX);
1101 CALL_ptr( signsat48 );
1102 JMP_TARGET( nosat );
1103 sh4_x86.tstate = TSTATE_NONE;
1108 load_reg( REG_EAX, Rm );
1109 check_ralign16( REG_EAX );
1110 MEM_READ_WORD( REG_EAX, REG_EAX );
1111 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1112 load_reg( REG_EAX, Rm );
1113 LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
1114 MEM_READ_WORD( REG_EAX, REG_EAX );
1115 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1116 // Note translate twice in case of page boundaries. Maybe worth
1117 // adding a page-boundary check to skip the second translation
1119 load_reg( REG_EAX, Rn );
1120 check_ralign16( REG_EAX );
1121 MEM_READ_WORD( REG_EAX, REG_EAX );
1122 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1123 load_reg( REG_EAX, Rm );
1124 check_ralign16( REG_EAX );
1125 MEM_READ_WORD( REG_EAX, REG_EAX );
1126 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
1127 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1129 IMULL_r32( REG_SAVE1 );
1130 MOVL_rbpdisp_r32( R_S, REG_ECX );
1131 TESTL_r32_r32( REG_ECX, REG_ECX );
1134 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1135 JNO_label( end ); // 2
1136 MOVL_imm32_r32( 1, REG_EDX ); // 5
1137 MOVL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1138 JS_label( positive ); // 2
1139 MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
1140 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1141 JMP_label(end2); // 2
1143 JMP_TARGET(positive);
1144 MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
1145 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1146 JMP_label(end3); // 2
1149 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1150 ADCL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1154 sh4_x86.tstate = TSTATE_NONE;
1158 MOVL_rbpdisp_r32( R_T, REG_EAX );
1159 store_reg( REG_EAX, Rn );
1163 load_reg( REG_EAX, Rm );
1164 load_reg( REG_ECX, Rn );
1165 MULL_r32( REG_ECX );
1166 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1167 sh4_x86.tstate = TSTATE_NONE;
1170 COUNT_INST(I_MULSW);
1171 MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1172 MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1173 MULL_r32( REG_ECX );
1174 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1175 sh4_x86.tstate = TSTATE_NONE;
1178 COUNT_INST(I_MULUW);
1179 MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1180 MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1181 MULL_r32( REG_ECX );
1182 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1183 sh4_x86.tstate = TSTATE_NONE;
1187 load_reg( REG_EAX, Rm );
1188 NEGL_r32( REG_EAX );
1189 store_reg( REG_EAX, Rn );
1190 sh4_x86.tstate = TSTATE_NONE;
1194 load_reg( REG_EAX, Rm );
1195 XORL_r32_r32( REG_ECX, REG_ECX );
1197 SBBL_r32_r32( REG_EAX, REG_ECX );
1198 store_reg( REG_ECX, Rn );
1200 sh4_x86.tstate = TSTATE_C;
1204 load_reg( REG_EAX, Rm );
1205 NOTL_r32( REG_EAX );
1206 store_reg( REG_EAX, Rn );
1207 sh4_x86.tstate = TSTATE_NONE;
1211 load_reg( REG_EAX, Rm );
1212 load_reg( REG_ECX, Rn );
1213 ORL_r32_r32( REG_EAX, REG_ECX );
1214 store_reg( REG_ECX, Rn );
1215 sh4_x86.tstate = TSTATE_NONE;
1219 load_reg( REG_EAX, 0 );
1220 ORL_imms_r32(imm, REG_EAX);
1221 store_reg( REG_EAX, 0 );
1222 sh4_x86.tstate = TSTATE_NONE;
1224 OR.B #imm, @(R0, GBR) {:
1226 load_reg( REG_EAX, 0 );
1227 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1228 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1229 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1230 MOVL_r32_r32( REG_SAVE1, REG_EAX );
1231 ORL_imms_r32(imm, REG_EDX );
1232 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1233 sh4_x86.tstate = TSTATE_NONE;
1236 COUNT_INST(I_ROTCL);
1237 load_reg( REG_EAX, Rn );
1238 if( sh4_x86.tstate != TSTATE_C ) {
1241 RCLL_imm_r32( 1, REG_EAX );
1242 store_reg( REG_EAX, Rn );
1244 sh4_x86.tstate = TSTATE_C;
1247 COUNT_INST(I_ROTCR);
1248 load_reg( REG_EAX, Rn );
1249 if( sh4_x86.tstate != TSTATE_C ) {
1252 RCRL_imm_r32( 1, REG_EAX );
1253 store_reg( REG_EAX, Rn );
1255 sh4_x86.tstate = TSTATE_C;
1259 load_reg( REG_EAX, Rn );
1260 ROLL_imm_r32( 1, REG_EAX );
1261 store_reg( REG_EAX, Rn );
1263 sh4_x86.tstate = TSTATE_C;
1267 load_reg( REG_EAX, Rn );
1268 RORL_imm_r32( 1, REG_EAX );
1269 store_reg( REG_EAX, Rn );
1271 sh4_x86.tstate = TSTATE_C;
1275 /* Annoyingly enough, not directly convertible */
1276 load_reg( REG_EAX, Rn );
1277 load_reg( REG_ECX, Rm );
1278 CMPL_imms_r32( 0, REG_ECX );
1281 NEGL_r32( REG_ECX ); // 2
1282 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1283 JE_label(emptysar); // 2
1284 SARL_cl_r32( REG_EAX ); // 2
1285 JMP_label(end); // 2
1287 JMP_TARGET(emptysar);
1288 SARL_imm_r32(31, REG_EAX ); // 3
1292 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1293 SHLL_cl_r32( REG_EAX ); // 2
1296 store_reg( REG_EAX, Rn );
1297 sh4_x86.tstate = TSTATE_NONE;
1301 load_reg( REG_EAX, Rn );
1302 load_reg( REG_ECX, Rm );
1303 CMPL_imms_r32( 0, REG_ECX );
1306 NEGL_r32( REG_ECX ); // 2
1307 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1308 JE_label(emptyshr );
1309 SHRL_cl_r32( REG_EAX ); // 2
1310 JMP_label(end); // 2
1312 JMP_TARGET(emptyshr);
1313 XORL_r32_r32( REG_EAX, REG_EAX );
1317 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1318 SHLL_cl_r32( REG_EAX ); // 2
1321 store_reg( REG_EAX, Rn );
1322 sh4_x86.tstate = TSTATE_NONE;
1326 load_reg( REG_EAX, Rn );
1327 SHLL_imm_r32( 1, REG_EAX );
1329 store_reg( REG_EAX, Rn );
1330 sh4_x86.tstate = TSTATE_C;
1334 load_reg( REG_EAX, Rn );
1335 SARL_imm_r32( 1, REG_EAX );
1337 store_reg( REG_EAX, Rn );
1338 sh4_x86.tstate = TSTATE_C;
1342 load_reg( REG_EAX, Rn );
1343 SHLL_imm_r32( 1, REG_EAX );
1345 store_reg( REG_EAX, Rn );
1346 sh4_x86.tstate = TSTATE_C;
1350 load_reg( REG_EAX, Rn );
1351 SHLL_imm_r32( 2, REG_EAX );
1352 store_reg( REG_EAX, Rn );
1353 sh4_x86.tstate = TSTATE_NONE;
1357 load_reg( REG_EAX, Rn );
1358 SHLL_imm_r32( 8, REG_EAX );
1359 store_reg( REG_EAX, Rn );
1360 sh4_x86.tstate = TSTATE_NONE;
1364 load_reg( REG_EAX, Rn );
1365 SHLL_imm_r32( 16, REG_EAX );
1366 store_reg( REG_EAX, Rn );
1367 sh4_x86.tstate = TSTATE_NONE;
1371 load_reg( REG_EAX, Rn );
1372 SHRL_imm_r32( 1, REG_EAX );
1374 store_reg( REG_EAX, Rn );
1375 sh4_x86.tstate = TSTATE_C;
1379 load_reg( REG_EAX, Rn );
1380 SHRL_imm_r32( 2, REG_EAX );
1381 store_reg( REG_EAX, Rn );
1382 sh4_x86.tstate = TSTATE_NONE;
1386 load_reg( REG_EAX, Rn );
1387 SHRL_imm_r32( 8, REG_EAX );
1388 store_reg( REG_EAX, Rn );
1389 sh4_x86.tstate = TSTATE_NONE;
1393 load_reg( REG_EAX, Rn );
1394 SHRL_imm_r32( 16, REG_EAX );
1395 store_reg( REG_EAX, Rn );
1396 sh4_x86.tstate = TSTATE_NONE;
1400 load_reg( REG_EAX, Rm );
1401 load_reg( REG_ECX, Rn );
1402 SUBL_r32_r32( REG_EAX, REG_ECX );
1403 store_reg( REG_ECX, Rn );
1404 sh4_x86.tstate = TSTATE_NONE;
1408 load_reg( REG_EAX, Rm );
1409 load_reg( REG_ECX, Rn );
1410 if( sh4_x86.tstate != TSTATE_C ) {
1413 SBBL_r32_r32( REG_EAX, REG_ECX );
1414 store_reg( REG_ECX, Rn );
1416 sh4_x86.tstate = TSTATE_C;
1420 load_reg( REG_EAX, Rm );
1421 load_reg( REG_ECX, Rn );
1422 SUBL_r32_r32( REG_EAX, REG_ECX );
1423 store_reg( REG_ECX, Rn );
1425 sh4_x86.tstate = TSTATE_O;
1428 COUNT_INST(I_SWAPB);
1429 load_reg( REG_EAX, Rm );
1430 XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
1431 store_reg( REG_EAX, Rn );
1434 COUNT_INST(I_SWAPB);
1435 load_reg( REG_EAX, Rm );
1436 MOVL_r32_r32( REG_EAX, REG_ECX );
1437 SHLL_imm_r32( 16, REG_ECX );
1438 SHRL_imm_r32( 16, REG_EAX );
1439 ORL_r32_r32( REG_EAX, REG_ECX );
1440 store_reg( REG_ECX, Rn );
1441 sh4_x86.tstate = TSTATE_NONE;
1445 load_reg( REG_EAX, Rn );
1446 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1447 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1448 TESTB_r8_r8( REG_DL, REG_DL );
1450 ORB_imms_r8( 0x80, REG_DL );
1451 MOVL_r32_r32( REG_SAVE1, REG_EAX );
1452 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1453 sh4_x86.tstate = TSTATE_NONE;
1457 load_reg( REG_EAX, Rm );
1458 load_reg( REG_ECX, Rn );
1459 TESTL_r32_r32( REG_EAX, REG_ECX );
1461 sh4_x86.tstate = TSTATE_E;
1465 load_reg( REG_EAX, 0 );
1466 TESTL_imms_r32( imm, REG_EAX );
1468 sh4_x86.tstate = TSTATE_E;
1470 TST.B #imm, @(R0, GBR) {:
1472 load_reg( REG_EAX, 0);
1473 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1474 MEM_READ_BYTE( REG_EAX, REG_EAX );
1475 TESTB_imms_r8( imm, REG_AL );
1477 sh4_x86.tstate = TSTATE_E;
1481 load_reg( REG_EAX, Rm );
1482 load_reg( REG_ECX, Rn );
1483 XORL_r32_r32( REG_EAX, REG_ECX );
1484 store_reg( REG_ECX, Rn );
1485 sh4_x86.tstate = TSTATE_NONE;
1489 load_reg( REG_EAX, 0 );
1490 XORL_imms_r32( imm, REG_EAX );
1491 store_reg( REG_EAX, 0 );
1492 sh4_x86.tstate = TSTATE_NONE;
1494 XOR.B #imm, @(R0, GBR) {:
1496 load_reg( REG_EAX, 0 );
1497 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1498 MOVL_r32_r32( REG_EAX, REG_SAVE1 );
1499 MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
1500 MOVL_r32_r32( REG_SAVE1, REG_EAX );
1501 XORL_imms_r32( imm, REG_EDX );
1502 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1503 sh4_x86.tstate = TSTATE_NONE;
1506 COUNT_INST(I_XTRCT);
1507 load_reg( REG_EAX, Rm );
1508 load_reg( REG_ECX, Rn );
1509 SHLL_imm_r32( 16, REG_EAX );
1510 SHRL_imm_r32( 16, REG_ECX );
1511 ORL_r32_r32( REG_EAX, REG_ECX );
1512 store_reg( REG_ECX, Rn );
1513 sh4_x86.tstate = TSTATE_NONE;
1516 /* Data move instructions */
1519 load_reg( REG_EAX, Rm );
1520 store_reg( REG_EAX, Rn );
1524 MOVL_imm32_r32( imm, REG_EAX );
1525 store_reg( REG_EAX, Rn );
1529 load_reg( REG_EAX, Rn );
1530 load_reg( REG_EDX, Rm );
1531 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1532 sh4_x86.tstate = TSTATE_NONE;
1536 load_reg( REG_EAX, Rn );
1537 LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
1538 load_reg( REG_EDX, Rm );
1539 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1540 ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
1541 sh4_x86.tstate = TSTATE_NONE;
1543 MOV.B Rm, @(R0, Rn) {:
1545 load_reg( REG_EAX, 0 );
1546 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1547 load_reg( REG_EDX, Rm );
1548 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1549 sh4_x86.tstate = TSTATE_NONE;
1551 MOV.B R0, @(disp, GBR) {:
1553 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1554 ADDL_imms_r32( disp, REG_EAX );
1555 load_reg( REG_EDX, 0 );
1556 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1557 sh4_x86.tstate = TSTATE_NONE;
1559 MOV.B R0, @(disp, Rn) {:
1561 load_reg( REG_EAX, Rn );
1562 ADDL_imms_r32( disp, REG_EAX );
1563 load_reg( REG_EDX, 0 );
1564 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1565 sh4_x86.tstate = TSTATE_NONE;
1569 load_reg( REG_EAX, Rm );
1570 MEM_READ_BYTE( REG_EAX, REG_EAX );
1571 store_reg( REG_EAX, Rn );
1572 sh4_x86.tstate = TSTATE_NONE;
1576 load_reg( REG_EAX, Rm );
1577 MEM_READ_BYTE( REG_EAX, REG_EAX );
1579 ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
1581 store_reg( REG_EAX, Rn );
1582 sh4_x86.tstate = TSTATE_NONE;
1584 MOV.B @(R0, Rm), Rn {:
1586 load_reg( REG_EAX, 0 );
1587 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1588 MEM_READ_BYTE( REG_EAX, REG_EAX );
1589 store_reg( REG_EAX, Rn );
1590 sh4_x86.tstate = TSTATE_NONE;
1592 MOV.B @(disp, GBR), R0 {:
1594 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1595 ADDL_imms_r32( disp, REG_EAX );
1596 MEM_READ_BYTE( REG_EAX, REG_EAX );
1597 store_reg( REG_EAX, 0 );
1598 sh4_x86.tstate = TSTATE_NONE;
1600 MOV.B @(disp, Rm), R0 {:
1602 load_reg( REG_EAX, Rm );
1603 ADDL_imms_r32( disp, REG_EAX );
1604 MEM_READ_BYTE( REG_EAX, REG_EAX );
1605 store_reg( REG_EAX, 0 );
1606 sh4_x86.tstate = TSTATE_NONE;
1610 load_reg( REG_EAX, Rn );
1611 check_walign32(REG_EAX);
1612 MOVL_r32_r32( REG_EAX, REG_ECX );
1613 ANDL_imms_r32( 0xFC000000, REG_ECX );
1614 CMPL_imms_r32( 0xE0000000, REG_ECX );
1616 ANDL_imms_r32( 0x3C, REG_EAX );
1617 load_reg( REG_EDX, Rm );
1618 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1621 load_reg( REG_EDX, Rm );
1622 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1624 sh4_x86.tstate = TSTATE_NONE;
1628 load_reg( REG_EAX, Rn );
1629 ADDL_imms_r32( -4, REG_EAX );
1630 check_walign32( REG_EAX );
1631 load_reg( REG_EDX, Rm );
1632 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1633 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
1634 sh4_x86.tstate = TSTATE_NONE;
1636 MOV.L Rm, @(R0, Rn) {:
1638 load_reg( REG_EAX, 0 );
1639 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1640 check_walign32( REG_EAX );
1641 load_reg( REG_EDX, Rm );
1642 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1643 sh4_x86.tstate = TSTATE_NONE;
1645 MOV.L R0, @(disp, GBR) {:
1647 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1648 ADDL_imms_r32( disp, REG_EAX );
1649 check_walign32( REG_EAX );
1650 load_reg( REG_EDX, 0 );
1651 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1652 sh4_x86.tstate = TSTATE_NONE;
1654 MOV.L Rm, @(disp, Rn) {:
1656 load_reg( REG_EAX, Rn );
1657 ADDL_imms_r32( disp, REG_EAX );
1658 check_walign32( REG_EAX );
1659 MOVL_r32_r32( REG_EAX, REG_ECX );
1660 ANDL_imms_r32( 0xFC000000, REG_ECX );
1661 CMPL_imms_r32( 0xE0000000, REG_ECX );
1663 ANDL_imms_r32( 0x3C, REG_EAX );
1664 load_reg( REG_EDX, Rm );
1665 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1668 load_reg( REG_EDX, Rm );
1669 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1671 sh4_x86.tstate = TSTATE_NONE;
1675 load_reg( REG_EAX, Rm );
1676 check_ralign32( REG_EAX );
1677 MEM_READ_LONG( REG_EAX, REG_EAX );
1678 store_reg( REG_EAX, Rn );
1679 sh4_x86.tstate = TSTATE_NONE;
1683 load_reg( REG_EAX, Rm );
1684 check_ralign32( REG_EAX );
1685 MEM_READ_LONG( REG_EAX, REG_EAX );
1687 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1689 store_reg( REG_EAX, Rn );
1690 sh4_x86.tstate = TSTATE_NONE;
1692 MOV.L @(R0, Rm), Rn {:
1694 load_reg( REG_EAX, 0 );
1695 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1696 check_ralign32( REG_EAX );
1697 MEM_READ_LONG( REG_EAX, REG_EAX );
1698 store_reg( REG_EAX, Rn );
1699 sh4_x86.tstate = TSTATE_NONE;
1701 MOV.L @(disp, GBR), R0 {:
1703 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1704 ADDL_imms_r32( disp, REG_EAX );
1705 check_ralign32( REG_EAX );
1706 MEM_READ_LONG( REG_EAX, REG_EAX );
1707 store_reg( REG_EAX, 0 );
1708 sh4_x86.tstate = TSTATE_NONE;
1710 MOV.L @(disp, PC), Rn {:
1711 COUNT_INST(I_MOVLPC);
1712 if( sh4_x86.in_delay_slot ) {
1715 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1716 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1717 // If the target address is in the same page as the code, it's
1718 // pretty safe to just ref it directly and circumvent the whole
1719 // memory subsystem. (this is a big performance win)
1721 // FIXME: There's a corner-case that's not handled here when
1722 // the current code-page is in the ITLB but not in the UTLB.
1723 // (should generate a TLB miss although need to test SH4
1724 // behaviour to confirm) Unlikely to be anyone depending on this
1725 // behaviour though.
1726 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1727 MOVL_moffptr_eax( ptr );
1729 // Note: we use sh4r.pc for the calc as we could be running at a
1730 // different virtual address than the translation was done with,
1731 // but we can safely assume that the low bits are the same.
1732 MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
1733 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1734 MEM_READ_LONG( REG_EAX, REG_EAX );
1735 sh4_x86.tstate = TSTATE_NONE;
1737 store_reg( REG_EAX, Rn );
1740 MOV.L @(disp, Rm), Rn {:
1742 load_reg( REG_EAX, Rm );
1743 ADDL_imms_r32( disp, REG_EAX );
1744 check_ralign32( REG_EAX );
1745 MEM_READ_LONG( REG_EAX, REG_EAX );
1746 store_reg( REG_EAX, Rn );
1747 sh4_x86.tstate = TSTATE_NONE;
1751 load_reg( REG_EAX, Rn );
1752 check_walign16( REG_EAX );
1753 load_reg( REG_EDX, Rm );
1754 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1755 sh4_x86.tstate = TSTATE_NONE;
1759 load_reg( REG_EAX, Rn );
1760 check_walign16( REG_EAX );
1761 LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
1762 load_reg( REG_EDX, Rm );
1763 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1764 ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
1765 sh4_x86.tstate = TSTATE_NONE;
1767 MOV.W Rm, @(R0, Rn) {:
1769 load_reg( REG_EAX, 0 );
1770 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1771 check_walign16( REG_EAX );
1772 load_reg( REG_EDX, Rm );
1773 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1774 sh4_x86.tstate = TSTATE_NONE;
1776 MOV.W R0, @(disp, GBR) {:
1778 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1779 ADDL_imms_r32( disp, REG_EAX );
1780 check_walign16( REG_EAX );
1781 load_reg( REG_EDX, 0 );
1782 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1783 sh4_x86.tstate = TSTATE_NONE;
1785 MOV.W R0, @(disp, Rn) {:
1787 load_reg( REG_EAX, Rn );
1788 ADDL_imms_r32( disp, REG_EAX );
1789 check_walign16( REG_EAX );
1790 load_reg( REG_EDX, 0 );
1791 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1792 sh4_x86.tstate = TSTATE_NONE;
1796 load_reg( REG_EAX, Rm );
1797 check_ralign16( REG_EAX );
1798 MEM_READ_WORD( REG_EAX, REG_EAX );
1799 store_reg( REG_EAX, Rn );
1800 sh4_x86.tstate = TSTATE_NONE;
1804 load_reg( REG_EAX, Rm );
1805 check_ralign16( REG_EAX );
1806 MEM_READ_WORD( REG_EAX, REG_EAX );
1808 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1810 store_reg( REG_EAX, Rn );
1811 sh4_x86.tstate = TSTATE_NONE;
1813 MOV.W @(R0, Rm), Rn {:
1815 load_reg( REG_EAX, 0 );
1816 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1817 check_ralign16( REG_EAX );
1818 MEM_READ_WORD( REG_EAX, REG_EAX );
1819 store_reg( REG_EAX, Rn );
1820 sh4_x86.tstate = TSTATE_NONE;
1822 MOV.W @(disp, GBR), R0 {:
1824 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1825 ADDL_imms_r32( disp, REG_EAX );
1826 check_ralign16( REG_EAX );
1827 MEM_READ_WORD( REG_EAX, REG_EAX );
1828 store_reg( REG_EAX, 0 );
1829 sh4_x86.tstate = TSTATE_NONE;
1831 MOV.W @(disp, PC), Rn {:
1833 if( sh4_x86.in_delay_slot ) {
1836 // See comments for MOV.L @(disp, PC), Rn
1837 uint32_t target = pc + disp + 4;
1838 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1839 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1840 MOVL_moffptr_eax( ptr );
1841 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1843 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
1844 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1845 MEM_READ_WORD( REG_EAX, REG_EAX );
1846 sh4_x86.tstate = TSTATE_NONE;
1848 store_reg( REG_EAX, Rn );
1851 MOV.W @(disp, Rm), R0 {:
1853 load_reg( REG_EAX, Rm );
1854 ADDL_imms_r32( disp, REG_EAX );
1855 check_ralign16( REG_EAX );
1856 MEM_READ_WORD( REG_EAX, REG_EAX );
1857 store_reg( REG_EAX, 0 );
1858 sh4_x86.tstate = TSTATE_NONE;
1860 MOVA @(disp, PC), R0 {:
1862 if( sh4_x86.in_delay_slot ) {
1865 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
1866 ADDL_rbpdisp_r32( R_PC, REG_ECX );
1867 store_reg( REG_ECX, 0 );
1868 sh4_x86.tstate = TSTATE_NONE;
1872 COUNT_INST(I_MOVCA);
1873 load_reg( REG_EAX, Rn );
1874 check_walign32( REG_EAX );
1875 load_reg( REG_EDX, 0 );
1876 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1877 sh4_x86.tstate = TSTATE_NONE;
1880 /* Control transfer instructions */
1883 if( sh4_x86.in_delay_slot ) {
1886 sh4vma_t target = disp + pc + 4;
1887 JT_label( nottaken );
1888 exit_block_rel(target, pc+2 );
1889 JMP_TARGET(nottaken);
1895 if( sh4_x86.in_delay_slot ) {
1898 sh4_x86.in_delay_slot = DELAY_PC;
1899 if( UNTRANSLATABLE(pc+2) ) {
1900 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1902 ADDL_imms_r32( disp, REG_EAX );
1903 JMP_TARGET(nottaken);
1904 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1905 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1906 exit_block_emu(pc+2);
1907 sh4_x86.branch_taken = TRUE;
1911 sh4vma_t target = disp + pc + 4;
1912 JCC_cc_rel32(sh4_x86.tstate,0);
1913 uint32_t *patch = ((uint32_t *)xlat_output)-1;
1914 int save_tstate = sh4_x86.tstate;
1915 sh4_translate_instruction(pc+2);
1916 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
1917 exit_block_rel( target, pc+4 );
1920 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1921 sh4_x86.tstate = save_tstate;
1922 sh4_translate_instruction(pc+2);
1929 if( sh4_x86.in_delay_slot ) {
1932 sh4_x86.in_delay_slot = DELAY_PC;
1933 sh4_x86.branch_taken = TRUE;
1934 if( UNTRANSLATABLE(pc+2) ) {
1935 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1936 ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
1937 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1938 exit_block_emu(pc+2);
1941 sh4_translate_instruction( pc + 2 );
1942 exit_block_rel( disp + pc + 4, pc+4 );
1949 if( sh4_x86.in_delay_slot ) {
1952 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1953 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1954 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1955 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1956 sh4_x86.in_delay_slot = DELAY_PC;
1957 sh4_x86.tstate = TSTATE_NONE;
1958 sh4_x86.branch_taken = TRUE;
1959 if( UNTRANSLATABLE(pc+2) ) {
1960 exit_block_emu(pc+2);
1963 sh4_translate_instruction( pc + 2 );
1964 exit_block_newpcset(pc+4);
1971 if( sh4_x86.in_delay_slot ) {
1974 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1975 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1976 MOVL_r32_rbpdisp( REG_EAX, R_PR );
1977 sh4_x86.in_delay_slot = DELAY_PC;
1978 sh4_x86.branch_taken = TRUE;
1979 sh4_x86.tstate = TSTATE_NONE;
1980 if( UNTRANSLATABLE(pc+2) ) {
1981 ADDL_imms_r32( disp, REG_EAX );
1982 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1983 exit_block_emu(pc+2);
1986 sh4_translate_instruction( pc + 2 );
1987 exit_block_rel( disp + pc + 4, pc+4 );
1994 if( sh4_x86.in_delay_slot ) {
1997 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1998 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1999 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2000 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
2001 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2003 sh4_x86.in_delay_slot = DELAY_PC;
2004 sh4_x86.tstate = TSTATE_NONE;
2005 sh4_x86.branch_taken = TRUE;
2006 if( UNTRANSLATABLE(pc+2) ) {
2007 exit_block_emu(pc+2);
2010 sh4_translate_instruction( pc + 2 );
2011 exit_block_newpcset(pc+4);
2018 if( sh4_x86.in_delay_slot ) {
2021 sh4vma_t target = disp + pc + 4;
2022 JF_label( nottaken );
2023 exit_block_rel(target, pc+2 );
2024 JMP_TARGET(nottaken);
2030 if( sh4_x86.in_delay_slot ) {
2033 sh4_x86.in_delay_slot = DELAY_PC;
2034 if( UNTRANSLATABLE(pc+2) ) {
2035 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2037 ADDL_imms_r32( disp, REG_EAX );
2038 JMP_TARGET(nottaken);
2039 ADDL_rbpdisp_r32( R_PC, REG_EAX );
2040 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2041 exit_block_emu(pc+2);
2042 sh4_x86.branch_taken = TRUE;
2046 JCC_cc_rel32(sh4_x86.tstate^1,0);
2047 uint32_t *patch = ((uint32_t *)xlat_output)-1;
2049 int save_tstate = sh4_x86.tstate;
2050 sh4_translate_instruction(pc+2);
2051 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
2052 exit_block_rel( disp + pc + 4, pc+4 );
2054 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
2055 sh4_x86.tstate = save_tstate;
2056 sh4_translate_instruction(pc+2);
2063 if( sh4_x86.in_delay_slot ) {
2066 load_reg( REG_ECX, Rn );
2067 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2068 sh4_x86.in_delay_slot = DELAY_PC;
2069 sh4_x86.branch_taken = TRUE;
2070 if( UNTRANSLATABLE(pc+2) ) {
2071 exit_block_emu(pc+2);
2074 sh4_translate_instruction(pc+2);
2075 exit_block_newpcset(pc+4);
2082 if( sh4_x86.in_delay_slot ) {
2085 MOVL_rbpdisp_r32( R_PC, REG_EAX );
2086 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2087 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2088 load_reg( REG_ECX, Rn );
2089 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2090 sh4_x86.in_delay_slot = DELAY_PC;
2091 sh4_x86.branch_taken = TRUE;
2092 sh4_x86.tstate = TSTATE_NONE;
2093 if( UNTRANSLATABLE(pc+2) ) {
2094 exit_block_emu(pc+2);
2097 sh4_translate_instruction(pc+2);
2098 exit_block_newpcset(pc+4);
2105 if( sh4_x86.in_delay_slot ) {
2109 MOVL_rbpdisp_r32( R_SPC, REG_ECX );
2110 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2111 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2112 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2113 sh4_x86.in_delay_slot = DELAY_PC;
2114 sh4_x86.fpuen_checked = FALSE;
2115 sh4_x86.tstate = TSTATE_NONE;
2116 sh4_x86.branch_taken = TRUE;
2117 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2118 if( UNTRANSLATABLE(pc+2) ) {
2119 exit_block_emu(pc+2);
2122 sh4_translate_instruction(pc+2);
2123 exit_block_newpcset(pc+4);
2130 if( sh4_x86.in_delay_slot ) {
2133 MOVL_rbpdisp_r32( R_PR, REG_ECX );
2134 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2135 sh4_x86.in_delay_slot = DELAY_PC;
2136 sh4_x86.branch_taken = TRUE;
2137 if( UNTRANSLATABLE(pc+2) ) {
2138 exit_block_emu(pc+2);
2141 sh4_translate_instruction(pc+2);
2142 exit_block_newpcset(pc+4);
2148 COUNT_INST(I_TRAPA);
2149 if( sh4_x86.in_delay_slot ) {
2152 MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX ); // 5
2153 ADDL_r32_rbpdisp( REG_ECX, R_PC );
2154 MOVL_imm32_r32( imm, REG_EAX );
2155 CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
2156 sh4_x86.tstate = TSTATE_NONE;
2157 exit_block_pcset(pc+2);
2158 sh4_x86.branch_taken = TRUE;
2163 COUNT_INST(I_UNDEF);
2164 if( sh4_x86.in_delay_slot ) {
2165 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);
2167 exit_block_exc(EXC_ILLEGAL, pc, 2);
2173 COUNT_INST(I_CLRMAC);
2174 XORL_r32_r32(REG_EAX, REG_EAX);
2175 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2176 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2177 sh4_x86.tstate = TSTATE_NONE;
2182 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2183 sh4_x86.tstate = TSTATE_NONE;
2189 sh4_x86.tstate = TSTATE_C;
2194 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2195 sh4_x86.tstate = TSTATE_NONE;
2201 sh4_x86.tstate = TSTATE_C;
2204 /* Floating point moves */
2206 COUNT_INST(I_FMOV1);
2208 if( sh4_x86.double_size ) {
2209 load_dr0( REG_EAX, FRm );
2210 load_dr1( REG_ECX, FRm );
2211 store_dr0( REG_EAX, FRn );
2212 store_dr1( REG_ECX, FRn );
2214 load_fr( REG_EAX, FRm ); // SZ=0 branch
2215 store_fr( REG_EAX, FRn );
2219 COUNT_INST(I_FMOV2);
2221 load_reg( REG_EAX, Rn );
2222 if( sh4_x86.double_size ) {
2223 check_walign64( REG_EAX );
2224 load_dr0( REG_EDX, FRm );
2225 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2226 load_reg( REG_EAX, Rn );
2227 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2228 load_dr1( REG_EDX, FRm );
2229 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2231 check_walign32( REG_EAX );
2232 load_fr( REG_EDX, FRm );
2233 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2235 sh4_x86.tstate = TSTATE_NONE;
2238 COUNT_INST(I_FMOV5);
2240 load_reg( REG_EAX, Rm );
2241 if( sh4_x86.double_size ) {
2242 check_ralign64( REG_EAX );
2243 MEM_READ_LONG( REG_EAX, REG_EAX );
2244 store_dr0( REG_EAX, FRn );
2245 load_reg( REG_EAX, Rm );
2246 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2247 MEM_READ_LONG( REG_EAX, REG_EAX );
2248 store_dr1( REG_EAX, FRn );
2250 check_ralign32( REG_EAX );
2251 MEM_READ_LONG( REG_EAX, REG_EAX );
2252 store_fr( REG_EAX, FRn );
2254 sh4_x86.tstate = TSTATE_NONE;
2257 COUNT_INST(I_FMOV3);
2259 load_reg( REG_EAX, Rn );
2260 if( sh4_x86.double_size ) {
2261 check_walign64( REG_EAX );
2262 LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
2263 load_dr0( REG_EDX, FRm );
2264 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2265 load_reg( REG_EAX, Rn );
2266 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2267 load_dr1( REG_EDX, FRm );
2268 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2269 ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
2271 check_walign32( REG_EAX );
2272 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2273 load_fr( REG_EDX, FRm );
2274 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2275 ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
2277 sh4_x86.tstate = TSTATE_NONE;
2280 COUNT_INST(I_FMOV6);
2282 load_reg( REG_EAX, Rm );
2283 if( sh4_x86.double_size ) {
2284 check_ralign64( REG_EAX );
2285 MEM_READ_LONG( REG_EAX, REG_EAX );
2286 store_dr0( REG_EAX, FRn );
2287 load_reg( REG_EAX, Rm );
2288 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2289 MEM_READ_LONG( REG_EAX, REG_EAX );
2290 store_dr1( REG_EAX, FRn );
2291 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
2293 check_ralign32( REG_EAX );
2294 MEM_READ_LONG( REG_EAX, REG_EAX );
2295 store_fr( REG_EAX, FRn );
2296 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2298 sh4_x86.tstate = TSTATE_NONE;
2300 FMOV FRm, @(R0, Rn) {:
2301 COUNT_INST(I_FMOV4);
2303 load_reg( REG_EAX, Rn );
2304 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2305 if( sh4_x86.double_size ) {
2306 check_walign64( REG_EAX );
2307 load_dr0( REG_EDX, FRm );
2308 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2309 load_reg( REG_EAX, Rn );
2310 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2311 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2312 load_dr1( REG_EDX, FRm );
2313 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2315 check_walign32( REG_EAX );
2316 load_fr( REG_EDX, FRm );
2317 MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
2319 sh4_x86.tstate = TSTATE_NONE;
2321 FMOV @(R0, Rm), FRn {:
2322 COUNT_INST(I_FMOV7);
2324 load_reg( REG_EAX, Rm );
2325 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2326 if( sh4_x86.double_size ) {
2327 check_ralign64( REG_EAX );
2328 MEM_READ_LONG( REG_EAX, REG_EAX );
2329 store_dr0( REG_EAX, FRn );
2330 load_reg( REG_EAX, Rm );
2331 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2332 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2333 MEM_READ_LONG( REG_EAX, REG_EAX );
2334 store_dr1( REG_EAX, FRn );
2336 check_ralign32( REG_EAX );
2337 MEM_READ_LONG( REG_EAX, REG_EAX );
2338 store_fr( REG_EAX, FRn );
2340 sh4_x86.tstate = TSTATE_NONE;
2342 FLDI0 FRn {: /* IFF PR=0 */
2343 COUNT_INST(I_FLDI0);
2345 if( sh4_x86.double_prec == 0 ) {
2346 XORL_r32_r32( REG_EAX, REG_EAX );
2347 store_fr( REG_EAX, FRn );
2349 sh4_x86.tstate = TSTATE_NONE;
2351 FLDI1 FRn {: /* IFF PR=0 */
2352 COUNT_INST(I_FLDI1);
2354 if( sh4_x86.double_prec == 0 ) {
2355 MOVL_imm32_r32( 0x3F800000, REG_EAX );
2356 store_fr( REG_EAX, FRn );
2361 COUNT_INST(I_FLOAT);
2363 FILD_rbpdisp(R_FPUL);
2364 if( sh4_x86.double_prec ) {
2373 if( sh4_x86.double_prec ) {
2378 MOVP_immptr_rptr( &min_int, REG_ECX );
2379 FILD_r32disp( REG_ECX, 0 );
2383 MOVP_immptr_rptr( &max_int, REG_ECX );
2384 FILD_r32disp( REG_ECX, 0 );
2387 MOVP_immptr_rptr( &save_fcw, REG_EAX );
2388 FNSTCW_r32disp( REG_EAX, 0 );
2389 MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
2390 FLDCW_r32disp( REG_EDX, 0 );
2391 FISTP_rbpdisp(R_FPUL);
2392 FLDCW_r32disp( REG_EAX, 0 );
2398 MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
2399 MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
2402 sh4_x86.tstate = TSTATE_NONE;
2407 load_fr( REG_EAX, FRm );
2408 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2413 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2414 store_fr( REG_EAX, FRn );
2417 COUNT_INST(I_FCNVDS);
2419 if( sh4_x86.double_prec ) {
2425 COUNT_INST(I_FCNVSD);
2427 if( sh4_x86.double_prec ) {
2433 /* Floating point instructions */
2437 if( sh4_x86.double_prec ) {
2450 if( sh4_x86.double_prec ) {
2465 if( sh4_x86.double_prec ) {
2477 FMAC FR0, FRm, FRn {:
2480 if( sh4_x86.double_prec ) {
2500 if( sh4_x86.double_prec ) {
2515 if( sh4_x86.double_prec ) {
2526 COUNT_INST(I_FSRRA);
2528 if( sh4_x86.double_prec == 0 ) {
2537 COUNT_INST(I_FSQRT);
2539 if( sh4_x86.double_prec ) {
2552 if( sh4_x86.double_prec ) {
2566 COUNT_INST(I_FCMPEQ);
2568 if( sh4_x86.double_prec ) {
2575 XORL_r32_r32(REG_EAX, REG_EAX);
2576 XORL_r32_r32(REG_EDX, REG_EDX);
2578 SETCCB_cc_r8(X86_COND_NP, REG_DL);
2579 CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
2580 MOVL_r32_rbpdisp(REG_EAX, R_T);
2582 sh4_x86.tstate = TSTATE_NONE;
2585 COUNT_INST(I_FCMPGT);
2587 if( sh4_x86.double_prec ) {
2597 sh4_x86.tstate = TSTATE_A;
2603 if( sh4_x86.double_prec == 0 ) {
2604 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
2605 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2606 CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
2608 sh4_x86.tstate = TSTATE_NONE;
2613 if( sh4_x86.double_prec == 0 ) {
2614 if( sh4_x86.sse3_enabled ) {
2615 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2616 MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2617 HADDPS_xmm_xmm( 4, 4 );
2618 HADDPS_xmm_xmm( 4, 4 );
2619 MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2624 push_fr( (FVm<<2)+1);
2625 push_fr( (FVn<<2)+1);
2628 push_fr( (FVm<<2)+2);
2629 push_fr( (FVn<<2)+2);
2632 push_fr( (FVm<<2)+3);
2633 push_fr( (FVn<<2)+3);
2636 pop_fr( (FVn<<2)+3);
2643 if( sh4_x86.double_prec == 0 ) {
2644 if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
2645 /* FIXME: For now, disable this inlining when we're running in shadow mode -
2646 * it gives slightly different results from the emu core. Need to
2647 * fix the precision so both give the right results.
2649 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2650 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2651 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2652 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2654 MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2655 MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2656 MOV_xmm_xmm( 4, 6 );
2657 MOV_xmm_xmm( 5, 7 );
2658 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2659 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2660 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2661 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2662 MULPS_xmm_xmm( 0, 4 );
2663 MULPS_xmm_xmm( 1, 5 );
2664 MULPS_xmm_xmm( 2, 6 );
2665 MULPS_xmm_xmm( 3, 7 );
2666 ADDPS_xmm_xmm( 5, 4 );
2667 ADDPS_xmm_xmm( 7, 6 );
2668 ADDPS_xmm_xmm( 6, 4 );
2669 MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
2671 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
2672 CALL1_ptr_r32( sh4_ftrv, REG_EAX );
2675 sh4_x86.tstate = TSTATE_NONE;
2679 COUNT_INST(I_FRCHG);
2681 XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
2682 CALL_ptr( sh4_switch_fr_banks );
2683 sh4_x86.tstate = TSTATE_NONE;
2686 COUNT_INST(I_FSCHG);
2688 XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
2689 XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2690 sh4_x86.tstate = TSTATE_NONE;
2691 sh4_x86.double_size = !sh4_x86.double_size;
2692 sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
2695 /* Processor control instructions */
2697 COUNT_INST(I_LDCSR);
2698 if( sh4_x86.in_delay_slot ) {
2702 load_reg( REG_EAX, Rm );
2703 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2704 sh4_x86.fpuen_checked = FALSE;
2705 sh4_x86.tstate = TSTATE_NONE;
2706 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2712 load_reg( REG_EAX, Rm );
2713 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2718 load_reg( REG_EAX, Rm );
2719 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2720 sh4_x86.tstate = TSTATE_NONE;
2725 load_reg( REG_EAX, Rm );
2726 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2727 sh4_x86.tstate = TSTATE_NONE;
2732 load_reg( REG_EAX, Rm );
2733 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2734 sh4_x86.tstate = TSTATE_NONE;
2739 load_reg( REG_EAX, Rm );
2740 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2741 sh4_x86.tstate = TSTATE_NONE;
2746 load_reg( REG_EAX, Rm );
2747 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2748 sh4_x86.tstate = TSTATE_NONE;
2753 load_reg( REG_EAX, Rm );
2754 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2755 sh4_x86.tstate = TSTATE_NONE;
2759 load_reg( REG_EAX, Rm );
2760 check_ralign32( REG_EAX );
2761 MEM_READ_LONG( REG_EAX, REG_EAX );
2762 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2763 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2764 sh4_x86.tstate = TSTATE_NONE;
2767 COUNT_INST(I_LDCSRM);
2768 if( sh4_x86.in_delay_slot ) {
2772 load_reg( REG_EAX, Rm );
2773 check_ralign32( REG_EAX );
2774 MEM_READ_LONG( REG_EAX, REG_EAX );
2775 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2776 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2777 sh4_x86.fpuen_checked = FALSE;
2778 sh4_x86.tstate = TSTATE_NONE;
2779 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2786 load_reg( REG_EAX, Rm );
2787 check_ralign32( REG_EAX );
2788 MEM_READ_LONG( REG_EAX, REG_EAX );
2789 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2790 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2791 sh4_x86.tstate = TSTATE_NONE;
2796 load_reg( REG_EAX, Rm );
2797 check_ralign32( REG_EAX );
2798 MEM_READ_LONG( REG_EAX, REG_EAX );
2799 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2800 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2801 sh4_x86.tstate = TSTATE_NONE;
2806 load_reg( REG_EAX, Rm );
2807 check_ralign32( REG_EAX );
2808 MEM_READ_LONG( REG_EAX, REG_EAX );
2809 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2810 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2811 sh4_x86.tstate = TSTATE_NONE;
2816 load_reg( REG_EAX, Rm );
2817 check_ralign32( REG_EAX );
2818 MEM_READ_LONG( REG_EAX, REG_EAX );
2819 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2820 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2821 sh4_x86.tstate = TSTATE_NONE;
2826 load_reg( REG_EAX, Rm );
2827 check_ralign32( REG_EAX );
2828 MEM_READ_LONG( REG_EAX, REG_EAX );
2829 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2830 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2831 sh4_x86.tstate = TSTATE_NONE;
2833 LDC.L @Rm+, Rn_BANK {:
2836 load_reg( REG_EAX, Rm );
2837 check_ralign32( REG_EAX );
2838 MEM_READ_LONG( REG_EAX, REG_EAX );
2839 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2840 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2841 sh4_x86.tstate = TSTATE_NONE;
2844 COUNT_INST(I_LDSFPSCR);
2846 load_reg( REG_EAX, Rm );
2847 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2848 sh4_x86.tstate = TSTATE_NONE;
2849 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2852 LDS.L @Rm+, FPSCR {:
2853 COUNT_INST(I_LDSFPSCRM);
2855 load_reg( REG_EAX, Rm );
2856 check_ralign32( REG_EAX );
2857 MEM_READ_LONG( REG_EAX, REG_EAX );
2858 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2859 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2860 sh4_x86.tstate = TSTATE_NONE;
2861 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2867 load_reg( REG_EAX, Rm );
2868 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2873 load_reg( REG_EAX, Rm );
2874 check_ralign32( REG_EAX );
2875 MEM_READ_LONG( REG_EAX, REG_EAX );
2876 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2877 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2878 sh4_x86.tstate = TSTATE_NONE;
2882 load_reg( REG_EAX, Rm );
2883 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2887 load_reg( REG_EAX, Rm );
2888 check_ralign32( REG_EAX );
2889 MEM_READ_LONG( REG_EAX, REG_EAX );
2890 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2891 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2892 sh4_x86.tstate = TSTATE_NONE;
2896 load_reg( REG_EAX, Rm );
2897 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2901 load_reg( REG_EAX, Rm );
2902 check_ralign32( REG_EAX );
2903 MEM_READ_LONG( REG_EAX, REG_EAX );
2904 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2905 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2906 sh4_x86.tstate = TSTATE_NONE;
2910 load_reg( REG_EAX, Rm );
2911 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2915 load_reg( REG_EAX, Rm );
2916 check_ralign32( REG_EAX );
2917 MEM_READ_LONG( REG_EAX, REG_EAX );
2918 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2919 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2920 sh4_x86.tstate = TSTATE_NONE;
2923 COUNT_INST(I_LDTLB);
2924 CALL_ptr( MMU_ldtlb );
2925 sh4_x86.tstate = TSTATE_NONE;
2934 COUNT_INST(I_OCBWB);
2938 load_reg( REG_EAX, Rn );
2939 MEM_PREFETCH( REG_EAX );
2940 sh4_x86.tstate = TSTATE_NONE;
2943 COUNT_INST(I_SLEEP);
2945 CALL_ptr( sh4_sleep );
2946 sh4_x86.tstate = TSTATE_NONE;
2947 sh4_x86.in_delay_slot = DELAY_NONE;
2951 COUNT_INST(I_STCSR);
2953 CALL_ptr(sh4_read_sr);
2954 store_reg( REG_EAX, Rn );
2955 sh4_x86.tstate = TSTATE_NONE;
2959 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
2960 store_reg( REG_EAX, Rn );
2965 MOVL_rbpdisp_r32( R_VBR, REG_EAX );
2966 store_reg( REG_EAX, Rn );
2967 sh4_x86.tstate = TSTATE_NONE;
2972 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2973 store_reg( REG_EAX, Rn );
2974 sh4_x86.tstate = TSTATE_NONE;
2979 MOVL_rbpdisp_r32( R_SPC, REG_EAX );
2980 store_reg( REG_EAX, Rn );
2981 sh4_x86.tstate = TSTATE_NONE;
2986 MOVL_rbpdisp_r32( R_SGR, REG_EAX );
2987 store_reg( REG_EAX, Rn );
2988 sh4_x86.tstate = TSTATE_NONE;
2993 MOVL_rbpdisp_r32( R_DBR, REG_EAX );
2994 store_reg( REG_EAX, Rn );
2995 sh4_x86.tstate = TSTATE_NONE;
3000 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
3001 store_reg( REG_EAX, Rn );
3002 sh4_x86.tstate = TSTATE_NONE;
3005 COUNT_INST(I_STCSRM);
3007 CALL_ptr( sh4_read_sr );
3008 MOVL_r32_r32( REG_EAX, REG_EDX );
3009 load_reg( REG_EAX, Rn );
3010 check_walign32( REG_EAX );
3011 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
3012 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3013 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3014 sh4_x86.tstate = TSTATE_NONE;
3019 load_reg( REG_EAX, Rn );
3020 check_walign32( REG_EAX );
3021 ADDL_imms_r32( -4, REG_EAX );
3022 MOVL_rbpdisp_r32( R_VBR, REG_EDX );
3023 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3024 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3025 sh4_x86.tstate = TSTATE_NONE;
3030 load_reg( REG_EAX, Rn );
3031 check_walign32( REG_EAX );
3032 ADDL_imms_r32( -4, REG_EAX );
3033 MOVL_rbpdisp_r32( R_SSR, REG_EDX );
3034 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3035 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3036 sh4_x86.tstate = TSTATE_NONE;
3041 load_reg( REG_EAX, Rn );
3042 check_walign32( REG_EAX );
3043 ADDL_imms_r32( -4, REG_EAX );
3044 MOVL_rbpdisp_r32( R_SPC, REG_EDX );
3045 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3046 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3047 sh4_x86.tstate = TSTATE_NONE;
3052 load_reg( REG_EAX, Rn );
3053 check_walign32( REG_EAX );
3054 ADDL_imms_r32( -4, REG_EAX );
3055 MOVL_rbpdisp_r32( R_SGR, REG_EDX );
3056 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3057 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3058 sh4_x86.tstate = TSTATE_NONE;
3063 load_reg( REG_EAX, Rn );
3064 check_walign32( REG_EAX );
3065 ADDL_imms_r32( -4, REG_EAX );
3066 MOVL_rbpdisp_r32( R_DBR, REG_EDX );
3067 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3068 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3069 sh4_x86.tstate = TSTATE_NONE;
3071 STC.L Rm_BANK, @-Rn {:
3074 load_reg( REG_EAX, Rn );
3075 check_walign32( REG_EAX );
3076 ADDL_imms_r32( -4, REG_EAX );
3077 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
3078 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3079 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3080 sh4_x86.tstate = TSTATE_NONE;
3084 load_reg( REG_EAX, Rn );
3085 check_walign32( REG_EAX );
3086 ADDL_imms_r32( -4, REG_EAX );
3087 MOVL_rbpdisp_r32( R_GBR, REG_EDX );
3088 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3089 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3090 sh4_x86.tstate = TSTATE_NONE;
3093 COUNT_INST(I_STSFPSCR);
3095 MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
3096 store_reg( REG_EAX, Rn );
3098 STS.L FPSCR, @-Rn {:
3099 COUNT_INST(I_STSFPSCRM);
3101 load_reg( REG_EAX, Rn );
3102 check_walign32( REG_EAX );
3103 ADDL_imms_r32( -4, REG_EAX );
3104 MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
3105 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3106 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3107 sh4_x86.tstate = TSTATE_NONE;
3112 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
3113 store_reg( REG_EAX, Rn );
3118 load_reg( REG_EAX, Rn );
3119 check_walign32( REG_EAX );
3120 ADDL_imms_r32( -4, REG_EAX );
3121 MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
3122 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3123 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3124 sh4_x86.tstate = TSTATE_NONE;
3128 MOVL_rbpdisp_r32( R_MACH, REG_EAX );
3129 store_reg( REG_EAX, Rn );
3133 load_reg( REG_EAX, Rn );
3134 check_walign32( REG_EAX );
3135 ADDL_imms_r32( -4, REG_EAX );
3136 MOVL_rbpdisp_r32( R_MACH, REG_EDX );
3137 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3138 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3139 sh4_x86.tstate = TSTATE_NONE;
3143 MOVL_rbpdisp_r32( R_MACL, REG_EAX );
3144 store_reg( REG_EAX, Rn );
3148 load_reg( REG_EAX, Rn );
3149 check_walign32( REG_EAX );
3150 ADDL_imms_r32( -4, REG_EAX );
3151 MOVL_rbpdisp_r32( R_MACL, REG_EDX );
3152 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3153 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3154 sh4_x86.tstate = TSTATE_NONE;
3158 MOVL_rbpdisp_r32( R_PR, REG_EAX );
3159 store_reg( REG_EAX, Rn );
3163 load_reg( REG_EAX, Rn );
3164 check_walign32( REG_EAX );
3165 ADDL_imms_r32( -4, REG_EAX );
3166 MOVL_rbpdisp_r32( R_PR, REG_EDX );
3167 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3168 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3169 sh4_x86.tstate = TSTATE_NONE;
3174 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
3177 sh4_x86.in_delay_slot = DELAY_NONE;
3183 * The unwind methods only work if we compiled with DWARF2 frame information
3184 * (ie -fexceptions), otherwise we have to use the direct frame scan.
3186 #ifdef HAVE_EXCEPTIONS
3190 uintptr_t block_start;
3191 uintptr_t block_end;
3195 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
3197 struct UnwindInfo *info = arg;
3198 void *pc = (void *)_Unwind_GetIP(context);
3199 if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
3201 return _URC_NORMAL_STOP;
3203 return _URC_NO_REASON;
3206 void *xlat_get_native_pc( void *code, uint32_t code_size )
3208 struct UnwindInfo info;
3211 info.block_start = (uintptr_t)code;
3212 info.block_end = info.block_start + code_size;
3213 _Unwind_Backtrace( xlat_check_frame, &info );
3217 /* Assume this is an ia32 build - amd64 should always have dwarf information */
3218 void *xlat_get_native_pc( void *code, uint32_t code_size )
3220 void *result = NULL;
3222 "mov %%ebp, %%eax\n\t"
3223 "mov $0x8, %%ecx\n\t"
3225 "frame_loop: test %%eax, %%eax\n\t"
3226 "je frame_not_found\n\t"
3227 "cmp (%%eax), %%edx\n\t"
3228 "je frame_found\n\t"
3229 "sub $0x1, %%ecx\n\t"
3230 "je frame_not_found\n\t"
3231 "movl (%%eax), %%eax\n\t"
3233 "frame_found: movl 0x4(%%eax), %0\n"
3236 : "r" (((uint8_t *)&sh4r) + 128 )
3237 : "eax", "ecx", "edx" );
.