4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
8 * Copyright (c) 2007 Nathan Keynes.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4dasm.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
35 #include "xlat/xltcache.h"
36 #include "xlat/x86/x86op.h"
37 #include "xlat/xlatdasm.h"
40 #define DEFAULT_BACKPATCH_SIZE 4096
42 /* Offset of a reg relative to the sh4r structure */
43 #define REG_OFFSET(reg) (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
45 #define R_T REG_OFFSET(t)
46 #define R_Q REG_OFFSET(q)
47 #define R_S REG_OFFSET(s)
48 #define R_M REG_OFFSET(m)
49 #define R_SR REG_OFFSET(sr)
50 #define R_GBR REG_OFFSET(gbr)
51 #define R_SSR REG_OFFSET(ssr)
52 #define R_SPC REG_OFFSET(spc)
53 #define R_VBR REG_OFFSET(vbr)
54 #define R_MACH REG_OFFSET(mac)+4
55 #define R_MACL REG_OFFSET(mac)
56 #define R_PC REG_OFFSET(pc)
57 #define R_NEW_PC REG_OFFSET(new_pc)
58 #define R_PR REG_OFFSET(pr)
59 #define R_SGR REG_OFFSET(sgr)
60 #define R_FPUL REG_OFFSET(fpul)
61 #define R_FPSCR REG_OFFSET(fpscr)
62 #define R_DBR REG_OFFSET(dbr)
63 #define R_R(rn) REG_OFFSET(r[rn])
64 #define R_FR(f) REG_OFFSET(fr[0][(f)^1])
65 #define R_XF(f) REG_OFFSET(fr[1][(f)^1])
66 #define R_DR(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
74 #define SH4_MODE_UNKNOWN -1
76 struct backpatch_record {
77 uint32_t fixup_offset;
78 uint32_t fixup_icount;
83 * Struct to manage internal translation state. This state is not saved -
84 * it is only valid between calls to sh4_translate_begin_block() and
85 * sh4_translate_end_block()
87 struct sh4_x86_state {
90 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
91 gboolean branch_taken; /* true if we branched unconditionally */
92 gboolean double_prec; /* true if FPU is in double-precision mode */
93 gboolean double_size; /* true if FPU is in double-size mode */
94 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
95 uint32_t block_start_pc;
96 uint32_t stack_posn; /* Trace stack height for alignment purposes */
97 uint32_t sh4_mode; /* Mirror of sh4r.xlat_sh4_mode */
101 gboolean tlb_on; /* True if tlb translation is active */
102 struct mem_region_fn **priv_address_space;
103 struct mem_region_fn **user_address_space;
105 /* Instrumentation */
106 xlat_block_begin_callback_t begin_callback;
107 xlat_block_end_callback_t end_callback;
110 /* Allocated memory for the (block-wide) back-patch list */
111 struct backpatch_record *backpatch_list;
112 uint32_t backpatch_posn;
113 uint32_t backpatch_size;
116 static struct sh4_x86_state sh4_x86;
118 static uint32_t max_int = 0x7FFFFFFF;
119 static uint32_t min_int = 0x80000000;
120 static uint32_t save_fcw; /* save value for fpu control word */
121 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
123 static void sh4_x86_translate_unlink_block( void *use_list );
125 static struct xlat_target_fns x86_target_fns = {
126 sh4_x86_translate_unlink_block
130 gboolean is_sse3_supported()
134 __asm__ __volatile__(
135 "mov $0x01, %%eax\n\t"
136 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
137 return (features & 1) ? TRUE : FALSE;
140 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
142 sh4_x86.priv_address_space = priv;
143 sh4_x86.user_address_space = user;
146 void sh4_translate_init(void)
148 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
149 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
150 sh4_x86.begin_callback = NULL;
151 sh4_x86.end_callback = NULL;
152 sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
153 sh4_x86.fastmem = TRUE;
154 sh4_x86.sse3_enabled = is_sse3_supported();
155 xlat_set_target_fns(&x86_target_fns);
158 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
160 sh4_x86.begin_callback = begin;
161 sh4_x86.end_callback = end;
164 void sh4_translate_set_fastmem( gboolean flag )
166 sh4_x86.fastmem = flag;
169 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
173 if( exc_code == -2 ) {
174 reloc_size = sizeof(void *);
177 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
178 sh4_x86.backpatch_size <<= 1;
179 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
180 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
181 assert( sh4_x86.backpatch_list != NULL );
183 if( sh4_x86.in_delay_slot ) {
187 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
188 (((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
189 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
190 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
191 sh4_x86.backpatch_posn++;
194 #define TSTATE_NONE -1
195 #define TSTATE_O X86_COND_O
196 #define TSTATE_C X86_COND_C
197 #define TSTATE_E X86_COND_E
198 #define TSTATE_NE X86_COND_NE
199 #define TSTATE_G X86_COND_G
200 #define TSTATE_GE X86_COND_GE
201 #define TSTATE_A X86_COND_A
202 #define TSTATE_AE X86_COND_AE
204 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
205 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
207 /* Convenience instructions */
208 #define LDC_t() CMPB_imms_rbpdisp(1,R_T); CMC()
209 #define SETE_t() SETCCB_cc_rbpdisp(X86_COND_E,R_T)
210 #define SETA_t() SETCCB_cc_rbpdisp(X86_COND_A,R_T)
211 #define SETAE_t() SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
212 #define SETG_t() SETCCB_cc_rbpdisp(X86_COND_G,R_T)
213 #define SETGE_t() SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
214 #define SETC_t() SETCCB_cc_rbpdisp(X86_COND_C,R_T)
215 #define SETO_t() SETCCB_cc_rbpdisp(X86_COND_O,R_T)
216 #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
217 #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1)
218 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
219 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
220 #define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
221 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
222 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
223 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
224 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
225 #define JP_label(label) JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
226 #define JS_label(label) JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
227 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
228 #define JNE_exc(exc) JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
230 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
231 CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }
233 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
234 #define JT_label(label) LOAD_t() \
235 JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
237 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
238 #define JF_label(label) LOAD_t() \
239 JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
242 #define load_reg(x86reg,sh4reg) MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
243 #define store_reg(x86reg,sh4reg) MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
246 * Load an FR register (single-precision floating point) into an integer x86
247 * register (eg for register-to-register moves)
249 #define load_fr(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
250 #define load_xf(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
253 * Load the low half of a DR register (DR or XD) into an integer x86 register
255 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
256 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
259 * Store an FR register (single-precision floating point) from an integer x86+
260 * register (eg for register-to-register moves)
262 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
263 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
265 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
266 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
269 #define push_fpul() FLDF_rbpdisp(R_FPUL)
270 #define pop_fpul() FSTPF_rbpdisp(R_FPUL)
271 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
272 #define pop_fr(frm) FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
273 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
274 #define pop_xf(frm) FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
275 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
276 #define pop_dr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
277 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
278 #define pop_xdr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
280 #ifdef ENABLE_SH4STATS
281 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
283 #define COUNT_INST(id)
287 /* Exception checks - Note that all exception checks will clobber EAX */
289 #define check_priv( ) \
290 if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
291 if( sh4_x86.in_delay_slot ) { \
292 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
294 exit_block_exc(EXC_ILLEGAL, pc, 2); \
296 sh4_x86.branch_taken = TRUE; \
297 sh4_x86.in_delay_slot = DELAY_NONE; \
301 #define check_fpuen( ) \
302 if( !sh4_x86.fpuen_checked ) {\
303 sh4_x86.fpuen_checked = TRUE;\
304 MOVL_rbpdisp_r32( R_SR, REG_EAX );\
305 ANDL_imms_r32( SR_FD, REG_EAX );\
306 if( sh4_x86.in_delay_slot ) {\
307 JNE_exc(EXC_SLOT_FPU_DISABLED);\
309 JNE_exc(EXC_FPU_DISABLED);\
311 sh4_x86.tstate = TSTATE_NONE; \
314 #define check_ralign16( x86reg ) \
315 TESTL_imms_r32( 0x00000001, x86reg ); \
316 JNE_exc(EXC_DATA_ADDR_READ)
318 #define check_walign16( x86reg ) \
319 TESTL_imms_r32( 0x00000001, x86reg ); \
320 JNE_exc(EXC_DATA_ADDR_WRITE);
322 #define check_ralign32( x86reg ) \
323 TESTL_imms_r32( 0x00000003, x86reg ); \
324 JNE_exc(EXC_DATA_ADDR_READ)
326 #define check_walign32( x86reg ) \
327 TESTL_imms_r32( 0x00000003, x86reg ); \
328 JNE_exc(EXC_DATA_ADDR_WRITE);
330 #define check_ralign64( x86reg ) \
331 TESTL_imms_r32( 0x00000007, x86reg ); \
332 JNE_exc(EXC_DATA_ADDR_READ)
334 #define check_walign64( x86reg ) \
335 TESTL_imms_r32( 0x00000007, x86reg ); \
336 JNE_exc(EXC_DATA_ADDR_WRITE);
338 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
341 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
342 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
344 #ifdef HAVE_FRAME_ADDRESS
345 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
347 decode_address(address_space(), addr_reg);
348 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
349 CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
351 if( addr_reg != REG_ARG1 ) {
352 MOVL_r32_r32( addr_reg, REG_ARG1 );
354 MOVP_immptr_rptr( 0, REG_ARG2 );
355 sh4_x86_add_backpatch( xlat_output, pc, -2 );
356 CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
358 if( value_reg != REG_RESULT1 ) {
359 MOVL_r32_r32( REG_RESULT1, value_reg );
363 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
365 decode_address(address_space(), addr_reg);
366 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
367 CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
369 if( value_reg != REG_ARG2 ) {
370 MOVL_r32_r32( value_reg, REG_ARG2 );
372 if( addr_reg != REG_ARG1 ) {
373 MOVL_r32_r32( addr_reg, REG_ARG1 );
376 MOVP_immptr_rptr( 0, REG_ARG3 );
377 sh4_x86_add_backpatch( xlat_output, pc, -2 );
378 CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
380 MOVL_imm32_rspdisp( 0, 0 );
381 sh4_x86_add_backpatch( xlat_output, pc, -2 );
382 CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
387 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
389 decode_address(address_space(), addr_reg);
390 CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
391 if( value_reg != REG_RESULT1 ) {
392 MOVL_r32_r32( REG_RESULT1, value_reg );
396 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
398 decode_address(address_space(), addr_reg);
399 CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
403 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
404 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
405 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc)
406 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
407 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
408 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
409 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
410 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
411 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
413 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
415 /** Offset of xlat_sh4_mode field relative to the code pointer */
416 #define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
417 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
418 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
420 void sh4_translate_begin_block( sh4addr_t pc )
422 sh4_x86.code = xlat_output;
423 sh4_x86.in_delay_slot = FALSE;
424 sh4_x86.fpuen_checked = FALSE;
425 sh4_x86.branch_taken = FALSE;
426 sh4_x86.backpatch_posn = 0;
427 sh4_x86.block_start_pc = pc;
428 sh4_x86.tlb_on = IS_TLB_ENABLED();
429 sh4_x86.tstate = TSTATE_NONE;
430 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
431 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
432 sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
434 if( sh4_x86.begin_callback ) {
435 CALL_ptr( sh4_x86.begin_callback );
437 if( sh4_profile_blocks ) {
438 MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
439 ADDL_imms_r32disp( 1, REG_EAX, 0 );
444 uint32_t sh4_translate_end_block_size()
446 uint32_t epilogue_size = EPILOGUE_SIZE;
447 if( sh4_x86.end_callback ) {
448 epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
450 if( sh4_x86.backpatch_posn <= 3 ) {
451 epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
453 epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
455 return epilogue_size;
460 * Embed a breakpoint into the generated code
462 void sh4_translate_emit_breakpoint( sh4vma_t pc )
464 MOVL_imm32_r32( pc, REG_EAX );
465 CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
466 sh4_x86.tstate = TSTATE_NONE;
470 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
473 * Test if the loaded target code pointer in %eax is valid, and if so jump
474 * directly into it, bypassing the normal exit.
476 static void jump_next_block()
478 uint8_t *ptr = xlat_output;
479 TESTP_rptr_rptr(REG_EAX, REG_EAX);
481 if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
482 /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
483 MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
484 CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
486 CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
488 JNE_label(wrongmode);
489 LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
490 if( sh4_x86.end_callback ) {
491 /* Note this does leave the stack out of alignment, but doesn't matter
492 * for what we're currently using it for.
495 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
500 JMP_TARGET(wrongmode);
501 MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
502 int rel = ptr - xlat_output;
510 void FASTCALL sh4_translate_link_block( uint32_t pc )
512 uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
513 while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
514 target = XLAT_BLOCK_CHAIN(target);
516 if( target == NULL ) {
517 target = sh4_translate_basic_block( pc );
519 uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
521 *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
522 *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
523 XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;
525 uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
526 assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
530 static void emit_translate_and_backpatch()
532 /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
533 CALL1_ptr_r32(sh4_translate_link_block, REG_ARG1);
535 /* When patched, the jmp instruction will be 5 bytes (either platform) -
536 * we need to reserve sizeof(void*) bytes for the use-list
539 if( sizeof(void*) == 8 ) {
547 * If we're jumping to a fixed address (or at least fixed relative to the
548 * current PC, then we can do a direct branch. REG_ARG1 should contain
549 * the PC at this point.
551 static void jump_next_block_fixed_pc( sh4addr_t pc )
553 if( IS_IN_ICACHE(pc) ) {
554 if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
555 /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
556 * fetch-and-backpatch routine, which will replace the call with a branch */
557 emit_translate_and_backpatch();
560 MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
561 ANDP_imms_rptr( -4, REG_EAX );
563 } else if( sh4_x86.tlb_on ) {
564 CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
566 CALL1_ptr_r32(xlat_get_code, REG_ARG1);
573 static void sh4_x86_translate_unlink_block( void *use_list )
575 uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
576 void *next = use_list;
577 while( next != NULL ) {
578 xlat_output = (uint8_t *)next;
579 next = *(void **)(xlat_output+5);
580 emit_translate_and_backpatch();
587 static void exit_block()
590 if( sh4_x86.end_callback ) {
591 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
599 * Exit the block with sh4r.pc already written
601 void exit_block_pcset( sh4addr_t pc )
603 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
604 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
605 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
606 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
608 MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
609 if( sh4_x86.tlb_on ) {
610 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
612 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
616 JMP_TARGET(exitloop);
621 * Exit the block with sh4r.new_pc written with the target pc
623 void exit_block_newpcset( sh4addr_t pc )
625 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
626 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
627 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
628 MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
629 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
630 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
632 if( sh4_x86.tlb_on ) {
633 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
635 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
639 JMP_TARGET(exitloop);
645 * Exit the block to an absolute PC
647 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
649 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
650 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
651 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
653 MOVL_imm32_r32( pc, REG_ARG1 );
654 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
655 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
657 jump_next_block_fixed_pc(pc);
658 JMP_TARGET(exitloop);
663 * Exit the block to a relative PC
665 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
667 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
668 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
669 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
671 if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
672 /* Special case for tight loops - the PC doesn't change, and
673 * we already know the target address. Just check events pending before
676 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
677 uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
678 JCC_cc_prerel(X86_COND_A, backdisp);
680 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
681 ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
682 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
683 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
684 JBE_label(exitloop2);
686 jump_next_block_fixed_pc(pc);
687 JMP_TARGET(exitloop2);
693 * Exit unconditionally with a general exception
695 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
697 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
698 ADDL_r32_rbpdisp( REG_ECX, R_PC );
699 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
700 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
701 MOVL_imm32_r32( code, REG_ARG1 );
702 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
707 * Embed a call to sh4_execute_instruction for situations that we
708 * can't translate (just page-crossing delay slots at the moment).
709 * Caller is responsible for setting new_pc before calling this function.
713 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
714 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
715 * Call sh4_execute_instruction
716 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
718 void exit_block_emu( sh4vma_t endpc )
720 MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX ); // 5
721 ADDL_r32_rbpdisp( REG_ECX, R_PC );
723 MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
724 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); // 6
725 MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
726 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
728 CALL_ptr( sh4_execute_instruction );
733 * Write the block trailer (exception handling block)
735 void sh4_translate_end_block( sh4addr_t pc ) {
736 if( sh4_x86.branch_taken == FALSE ) {
737 // Didn't exit unconditionally already, so write the termination here
738 exit_block_rel( pc, pc );
740 if( sh4_x86.backpatch_posn != 0 ) {
742 // Exception raised - cleanup and exit
743 uint8_t *end_ptr = xlat_output;
744 MOVL_r32_r32( REG_EDX, REG_ECX );
745 ADDL_r32_r32( REG_EDX, REG_ECX );
746 ADDL_r32_rbpdisp( REG_ECX, R_SPC );
747 MOVL_moffptr_eax( &sh4_cpu_period );
748 INC_r32( REG_EDX ); /* Add 1 for the aborting instruction itself */
750 ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
753 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
754 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
755 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
756 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
757 *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output;
759 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
761 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
762 int rel = end_ptr - xlat_output;
765 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
766 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
767 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
768 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
769 int rel = end_ptr - xlat_output;
777 * Translate a single instruction. Delayed branches are handled specially
778 * by translating both branch and delayed instruction as a single unit (as
780 * The instruction MUST be in the icache (assert check)
782 * @return true if the instruction marks the end of a basic block
785 uint32_t sh4_translate_instruction( sh4vma_t pc )
788 /* Read instruction from icache */
789 assert( IS_IN_ICACHE(pc) );
790 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
792 if( !sh4_x86.in_delay_slot ) {
793 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
796 /* check for breakpoints at this pc */
797 for( int i=0; i<sh4_breakpoint_count; i++ ) {
798 if( sh4_breakpoints[i].address == pc ) {
799 sh4_translate_emit_breakpoint(pc);
807 load_reg( REG_EAX, Rm );
808 load_reg( REG_ECX, Rn );
809 ADDL_r32_r32( REG_EAX, REG_ECX );
810 store_reg( REG_ECX, Rn );
811 sh4_x86.tstate = TSTATE_NONE;
815 ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
816 sh4_x86.tstate = TSTATE_NONE;
820 if( sh4_x86.tstate != TSTATE_C ) {
823 load_reg( REG_EAX, Rm );
824 load_reg( REG_ECX, Rn );
825 ADCL_r32_r32( REG_EAX, REG_ECX );
826 store_reg( REG_ECX, Rn );
828 sh4_x86.tstate = TSTATE_C;
832 load_reg( REG_EAX, Rm );
833 load_reg( REG_ECX, Rn );
834 ADDL_r32_r32( REG_EAX, REG_ECX );
835 store_reg( REG_ECX, Rn );
837 sh4_x86.tstate = TSTATE_O;
841 load_reg( REG_EAX, Rm );
842 load_reg( REG_ECX, Rn );
843 ANDL_r32_r32( REG_EAX, REG_ECX );
844 store_reg( REG_ECX, Rn );
845 sh4_x86.tstate = TSTATE_NONE;
849 load_reg( REG_EAX, 0 );
850 ANDL_imms_r32(imm, REG_EAX);
851 store_reg( REG_EAX, 0 );
852 sh4_x86.tstate = TSTATE_NONE;
854 AND.B #imm, @(R0, GBR) {:
856 load_reg( REG_EAX, 0 );
857 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
858 MOVL_r32_rspdisp(REG_EAX, 0);
859 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
860 MOVL_rspdisp_r32(0, REG_EAX);
861 ANDL_imms_r32(imm, REG_EDX );
862 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
863 sh4_x86.tstate = TSTATE_NONE;
867 load_reg( REG_EAX, Rm );
868 load_reg( REG_ECX, Rn );
869 CMPL_r32_r32( REG_EAX, REG_ECX );
871 sh4_x86.tstate = TSTATE_E;
874 COUNT_INST(I_CMPEQI);
875 load_reg( REG_EAX, 0 );
876 CMPL_imms_r32(imm, REG_EAX);
878 sh4_x86.tstate = TSTATE_E;
882 load_reg( REG_EAX, Rm );
883 load_reg( REG_ECX, Rn );
884 CMPL_r32_r32( REG_EAX, REG_ECX );
886 sh4_x86.tstate = TSTATE_GE;
890 load_reg( REG_EAX, Rm );
891 load_reg( REG_ECX, Rn );
892 CMPL_r32_r32( REG_EAX, REG_ECX );
894 sh4_x86.tstate = TSTATE_G;
898 load_reg( REG_EAX, Rm );
899 load_reg( REG_ECX, Rn );
900 CMPL_r32_r32( REG_EAX, REG_ECX );
902 sh4_x86.tstate = TSTATE_A;
906 load_reg( REG_EAX, Rm );
907 load_reg( REG_ECX, Rn );
908 CMPL_r32_r32( REG_EAX, REG_ECX );
910 sh4_x86.tstate = TSTATE_AE;
914 load_reg( REG_EAX, Rn );
915 CMPL_imms_r32( 0, REG_EAX );
917 sh4_x86.tstate = TSTATE_G;
921 load_reg( REG_EAX, Rn );
922 CMPL_imms_r32( 0, REG_EAX );
924 sh4_x86.tstate = TSTATE_GE;
927 COUNT_INST(I_CMPSTR);
928 load_reg( REG_EAX, Rm );
929 load_reg( REG_ECX, Rn );
930 XORL_r32_r32( REG_ECX, REG_EAX );
931 TESTB_r8_r8( REG_AL, REG_AL );
933 TESTB_r8_r8( REG_AH, REG_AH );
935 SHRL_imm_r32( 16, REG_EAX );
936 TESTB_r8_r8( REG_AL, REG_AL );
938 TESTB_r8_r8( REG_AH, REG_AH );
943 sh4_x86.tstate = TSTATE_E;
947 load_reg( REG_EAX, Rm );
948 load_reg( REG_ECX, Rn );
949 SHRL_imm_r32( 31, REG_EAX );
950 SHRL_imm_r32( 31, REG_ECX );
951 MOVL_r32_rbpdisp( REG_EAX, R_M );
952 MOVL_r32_rbpdisp( REG_ECX, R_Q );
953 CMPL_r32_r32( REG_EAX, REG_ECX );
955 sh4_x86.tstate = TSTATE_NE;
959 XORL_r32_r32( REG_EAX, REG_EAX );
960 MOVL_r32_rbpdisp( REG_EAX, R_Q );
961 MOVL_r32_rbpdisp( REG_EAX, R_M );
962 MOVL_r32_rbpdisp( REG_EAX, R_T );
963 sh4_x86.tstate = TSTATE_C; // works for DIV1
967 MOVL_rbpdisp_r32( R_M, REG_ECX );
968 load_reg( REG_EAX, Rn );
969 if( sh4_x86.tstate != TSTATE_C ) {
972 RCLL_imm_r32( 1, REG_EAX );
973 SETC_r8( REG_DL ); // Q'
974 CMPL_rbpdisp_r32( R_Q, REG_ECX );
976 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
979 SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
981 store_reg( REG_EAX, Rn ); // Done with Rn now
982 SETC_r8(REG_AL); // tmp1
983 XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
984 XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
985 MOVL_r32_rbpdisp( REG_ECX, R_Q );
986 XORL_imms_r32( 1, REG_AL ); // T = !Q'
987 MOVZXL_r8_r32( REG_AL, REG_EAX );
988 MOVL_r32_rbpdisp( REG_EAX, R_T );
989 sh4_x86.tstate = TSTATE_NONE;
993 load_reg( REG_EAX, Rm );
994 load_reg( REG_ECX, Rn );
996 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
997 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
998 sh4_x86.tstate = TSTATE_NONE;
1001 COUNT_INST(I_DMULU);
1002 load_reg( REG_EAX, Rm );
1003 load_reg( REG_ECX, Rn );
1005 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1006 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1007 sh4_x86.tstate = TSTATE_NONE;
1011 load_reg( REG_EAX, Rn );
1012 ADDL_imms_r32( -1, REG_EAX );
1013 store_reg( REG_EAX, Rn );
1015 sh4_x86.tstate = TSTATE_E;
1018 COUNT_INST(I_EXTSB);
1019 load_reg( REG_EAX, Rm );
1020 MOVSXL_r8_r32( REG_EAX, REG_EAX );
1021 store_reg( REG_EAX, Rn );
1024 COUNT_INST(I_EXTSW);
1025 load_reg( REG_EAX, Rm );
1026 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1027 store_reg( REG_EAX, Rn );
1030 COUNT_INST(I_EXTUB);
1031 load_reg( REG_EAX, Rm );
1032 MOVZXL_r8_r32( REG_EAX, REG_EAX );
1033 store_reg( REG_EAX, Rn );
1036 COUNT_INST(I_EXTUW);
1037 load_reg( REG_EAX, Rm );
1038 MOVZXL_r16_r32( REG_EAX, REG_EAX );
1039 store_reg( REG_EAX, Rn );
1044 load_reg( REG_EAX, Rm );
1045 check_ralign32( REG_EAX );
1046 MEM_READ_LONG( REG_EAX, REG_EAX );
1047 MOVL_r32_rspdisp(REG_EAX, 0);
1048 load_reg( REG_EAX, Rm );
1049 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
1050 MEM_READ_LONG( REG_EAX, REG_EAX );
1051 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
1053 load_reg( REG_EAX, Rm );
1054 check_ralign32( REG_EAX );
1055 MEM_READ_LONG( REG_EAX, REG_EAX );
1056 MOVL_r32_rspdisp( REG_EAX, 0 );
1057 load_reg( REG_EAX, Rn );
1058 check_ralign32( REG_EAX );
1059 MEM_READ_LONG( REG_EAX, REG_EAX );
1060 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1061 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1065 ADDL_r32_rbpdisp( REG_EAX, R_MACL );
1066 ADCL_r32_rbpdisp( REG_EDX, R_MACH );
1068 MOVL_rbpdisp_r32( R_S, REG_ECX );
1069 TESTL_r32_r32(REG_ECX, REG_ECX);
1071 CALL_ptr( signsat48 );
1072 JMP_TARGET( nosat );
1073 sh4_x86.tstate = TSTATE_NONE;
1078 load_reg( REG_EAX, Rm );
1079 check_ralign16( REG_EAX );
1080 MEM_READ_WORD( REG_EAX, REG_EAX );
1081 MOVL_r32_rspdisp( REG_EAX, 0 );
1082 load_reg( REG_EAX, Rm );
1083 LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
1084 MEM_READ_WORD( REG_EAX, REG_EAX );
1085 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1086 // Note translate twice in case of page boundaries. Maybe worth
1087 // adding a page-boundary check to skip the second translation
1089 load_reg( REG_EAX, Rn );
1090 check_ralign16( REG_EAX );
1091 MEM_READ_WORD( REG_EAX, REG_EAX );
1092 MOVL_r32_rspdisp( REG_EAX, 0 );
1093 load_reg( REG_EAX, Rm );
1094 check_ralign16( REG_EAX );
1095 MEM_READ_WORD( REG_EAX, REG_EAX );
1096 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
1097 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1100 MOVL_rbpdisp_r32( R_S, REG_ECX );
1101 TESTL_r32_r32( REG_ECX, REG_ECX );
1104 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1105 JNO_label( end ); // 2
1106 MOVL_imm32_r32( 1, REG_EDX ); // 5
1107 MOVL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1108 JS_label( positive ); // 2
1109 MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
1110 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1111 JMP_label(end2); // 2
1113 JMP_TARGET(positive);
1114 MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
1115 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1116 JMP_label(end3); // 2
1119 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1120 ADCL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1124 sh4_x86.tstate = TSTATE_NONE;
1128 MOVL_rbpdisp_r32( R_T, REG_EAX );
1129 store_reg( REG_EAX, Rn );
1133 load_reg( REG_EAX, Rm );
1134 load_reg( REG_ECX, Rn );
1135 MULL_r32( REG_ECX );
1136 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1137 sh4_x86.tstate = TSTATE_NONE;
1140 COUNT_INST(I_MULSW);
1141 MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1142 MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1143 MULL_r32( REG_ECX );
1144 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1145 sh4_x86.tstate = TSTATE_NONE;
1148 COUNT_INST(I_MULUW);
1149 MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1150 MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1151 MULL_r32( REG_ECX );
1152 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1153 sh4_x86.tstate = TSTATE_NONE;
1157 load_reg( REG_EAX, Rm );
1158 NEGL_r32( REG_EAX );
1159 store_reg( REG_EAX, Rn );
1160 sh4_x86.tstate = TSTATE_NONE;
1164 load_reg( REG_EAX, Rm );
1165 XORL_r32_r32( REG_ECX, REG_ECX );
1167 SBBL_r32_r32( REG_EAX, REG_ECX );
1168 store_reg( REG_ECX, Rn );
1170 sh4_x86.tstate = TSTATE_C;
1174 load_reg( REG_EAX, Rm );
1175 NOTL_r32( REG_EAX );
1176 store_reg( REG_EAX, Rn );
1177 sh4_x86.tstate = TSTATE_NONE;
1181 load_reg( REG_EAX, Rm );
1182 load_reg( REG_ECX, Rn );
1183 ORL_r32_r32( REG_EAX, REG_ECX );
1184 store_reg( REG_ECX, Rn );
1185 sh4_x86.tstate = TSTATE_NONE;
1189 load_reg( REG_EAX, 0 );
1190 ORL_imms_r32(imm, REG_EAX);
1191 store_reg( REG_EAX, 0 );
1192 sh4_x86.tstate = TSTATE_NONE;
1194 OR.B #imm, @(R0, GBR) {:
1196 load_reg( REG_EAX, 0 );
1197 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1198 MOVL_r32_rspdisp( REG_EAX, 0 );
1199 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1200 MOVL_rspdisp_r32( 0, REG_EAX );
1201 ORL_imms_r32(imm, REG_EDX );
1202 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1203 sh4_x86.tstate = TSTATE_NONE;
1206 COUNT_INST(I_ROTCL);
1207 load_reg( REG_EAX, Rn );
1208 if( sh4_x86.tstate != TSTATE_C ) {
1211 RCLL_imm_r32( 1, REG_EAX );
1212 store_reg( REG_EAX, Rn );
1214 sh4_x86.tstate = TSTATE_C;
1217 COUNT_INST(I_ROTCR);
1218 load_reg( REG_EAX, Rn );
1219 if( sh4_x86.tstate != TSTATE_C ) {
1222 RCRL_imm_r32( 1, REG_EAX );
1223 store_reg( REG_EAX, Rn );
1225 sh4_x86.tstate = TSTATE_C;
1229 load_reg( REG_EAX, Rn );
1230 ROLL_imm_r32( 1, REG_EAX );
1231 store_reg( REG_EAX, Rn );
1233 sh4_x86.tstate = TSTATE_C;
1237 load_reg( REG_EAX, Rn );
1238 RORL_imm_r32( 1, REG_EAX );
1239 store_reg( REG_EAX, Rn );
1241 sh4_x86.tstate = TSTATE_C;
1245 /* Annoyingly enough, not directly convertible */
1246 load_reg( REG_EAX, Rn );
1247 load_reg( REG_ECX, Rm );
1248 CMPL_imms_r32( 0, REG_ECX );
1251 NEGL_r32( REG_ECX ); // 2
1252 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1253 JE_label(emptysar); // 2
1254 SARL_cl_r32( REG_EAX ); // 2
1255 JMP_label(end); // 2
1257 JMP_TARGET(emptysar);
1258 SARL_imm_r32(31, REG_EAX ); // 3
1262 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1263 SHLL_cl_r32( REG_EAX ); // 2
1266 store_reg( REG_EAX, Rn );
1267 sh4_x86.tstate = TSTATE_NONE;
1271 load_reg( REG_EAX, Rn );
1272 load_reg( REG_ECX, Rm );
1273 CMPL_imms_r32( 0, REG_ECX );
1276 NEGL_r32( REG_ECX ); // 2
1277 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1278 JE_label(emptyshr );
1279 SHRL_cl_r32( REG_EAX ); // 2
1280 JMP_label(end); // 2
1282 JMP_TARGET(emptyshr);
1283 XORL_r32_r32( REG_EAX, REG_EAX );
1287 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1288 SHLL_cl_r32( REG_EAX ); // 2
1291 store_reg( REG_EAX, Rn );
1292 sh4_x86.tstate = TSTATE_NONE;
1296 load_reg( REG_EAX, Rn );
1297 SHLL_imm_r32( 1, REG_EAX );
1299 store_reg( REG_EAX, Rn );
1300 sh4_x86.tstate = TSTATE_C;
1304 load_reg( REG_EAX, Rn );
1305 SARL_imm_r32( 1, REG_EAX );
1307 store_reg( REG_EAX, Rn );
1308 sh4_x86.tstate = TSTATE_C;
1312 load_reg( REG_EAX, Rn );
1313 SHLL_imm_r32( 1, REG_EAX );
1315 store_reg( REG_EAX, Rn );
1316 sh4_x86.tstate = TSTATE_C;
1320 load_reg( REG_EAX, Rn );
1321 SHLL_imm_r32( 2, REG_EAX );
1322 store_reg( REG_EAX, Rn );
1323 sh4_x86.tstate = TSTATE_NONE;
1327 load_reg( REG_EAX, Rn );
1328 SHLL_imm_r32( 8, REG_EAX );
1329 store_reg( REG_EAX, Rn );
1330 sh4_x86.tstate = TSTATE_NONE;
1334 load_reg( REG_EAX, Rn );
1335 SHLL_imm_r32( 16, REG_EAX );
1336 store_reg( REG_EAX, Rn );
1337 sh4_x86.tstate = TSTATE_NONE;
1341 load_reg( REG_EAX, Rn );
1342 SHRL_imm_r32( 1, REG_EAX );
1344 store_reg( REG_EAX, Rn );
1345 sh4_x86.tstate = TSTATE_C;
1349 load_reg( REG_EAX, Rn );
1350 SHRL_imm_r32( 2, REG_EAX );
1351 store_reg( REG_EAX, Rn );
1352 sh4_x86.tstate = TSTATE_NONE;
1356 load_reg( REG_EAX, Rn );
1357 SHRL_imm_r32( 8, REG_EAX );
1358 store_reg( REG_EAX, Rn );
1359 sh4_x86.tstate = TSTATE_NONE;
1363 load_reg( REG_EAX, Rn );
1364 SHRL_imm_r32( 16, REG_EAX );
1365 store_reg( REG_EAX, Rn );
1366 sh4_x86.tstate = TSTATE_NONE;
1370 load_reg( REG_EAX, Rm );
1371 load_reg( REG_ECX, Rn );
1372 SUBL_r32_r32( REG_EAX, REG_ECX );
1373 store_reg( REG_ECX, Rn );
1374 sh4_x86.tstate = TSTATE_NONE;
1378 load_reg( REG_EAX, Rm );
1379 load_reg( REG_ECX, Rn );
1380 if( sh4_x86.tstate != TSTATE_C ) {
1383 SBBL_r32_r32( REG_EAX, REG_ECX );
1384 store_reg( REG_ECX, Rn );
1386 sh4_x86.tstate = TSTATE_C;
1390 load_reg( REG_EAX, Rm );
1391 load_reg( REG_ECX, Rn );
1392 SUBL_r32_r32( REG_EAX, REG_ECX );
1393 store_reg( REG_ECX, Rn );
1395 sh4_x86.tstate = TSTATE_O;
1398 COUNT_INST(I_SWAPB);
1399 load_reg( REG_EAX, Rm );
1400 XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
1401 store_reg( REG_EAX, Rn );
1404 COUNT_INST(I_SWAPB);
1405 load_reg( REG_EAX, Rm );
1406 MOVL_r32_r32( REG_EAX, REG_ECX );
1407 SHLL_imm_r32( 16, REG_ECX );
1408 SHRL_imm_r32( 16, REG_EAX );
1409 ORL_r32_r32( REG_EAX, REG_ECX );
1410 store_reg( REG_ECX, Rn );
1411 sh4_x86.tstate = TSTATE_NONE;
1415 load_reg( REG_EAX, Rn );
1416 MOVL_r32_rspdisp( REG_EAX, 0 );
1417 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1418 TESTB_r8_r8( REG_DL, REG_DL );
1420 ORB_imms_r8( 0x80, REG_DL );
1421 MOVL_rspdisp_r32( 0, REG_EAX );
1422 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1423 sh4_x86.tstate = TSTATE_NONE;
1427 load_reg( REG_EAX, Rm );
1428 load_reg( REG_ECX, Rn );
1429 TESTL_r32_r32( REG_EAX, REG_ECX );
1431 sh4_x86.tstate = TSTATE_E;
1435 load_reg( REG_EAX, 0 );
1436 TESTL_imms_r32( imm, REG_EAX );
1438 sh4_x86.tstate = TSTATE_E;
1440 TST.B #imm, @(R0, GBR) {:
1442 load_reg( REG_EAX, 0);
1443 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1444 MEM_READ_BYTE( REG_EAX, REG_EAX );
1445 TESTB_imms_r8( imm, REG_AL );
1447 sh4_x86.tstate = TSTATE_E;
1451 load_reg( REG_EAX, Rm );
1452 load_reg( REG_ECX, Rn );
1453 XORL_r32_r32( REG_EAX, REG_ECX );
1454 store_reg( REG_ECX, Rn );
1455 sh4_x86.tstate = TSTATE_NONE;
1459 load_reg( REG_EAX, 0 );
1460 XORL_imms_r32( imm, REG_EAX );
1461 store_reg( REG_EAX, 0 );
1462 sh4_x86.tstate = TSTATE_NONE;
1464 XOR.B #imm, @(R0, GBR) {:
1466 load_reg( REG_EAX, 0 );
1467 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1468 MOVL_r32_rspdisp( REG_EAX, 0 );
1469 MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
1470 MOVL_rspdisp_r32( 0, REG_EAX );
1471 XORL_imms_r32( imm, REG_EDX );
1472 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1473 sh4_x86.tstate = TSTATE_NONE;
1476 COUNT_INST(I_XTRCT);
1477 load_reg( REG_EAX, Rm );
1478 load_reg( REG_ECX, Rn );
1479 SHLL_imm_r32( 16, REG_EAX );
1480 SHRL_imm_r32( 16, REG_ECX );
1481 ORL_r32_r32( REG_EAX, REG_ECX );
1482 store_reg( REG_ECX, Rn );
1483 sh4_x86.tstate = TSTATE_NONE;
1486 /* Data move instructions */
1489 load_reg( REG_EAX, Rm );
1490 store_reg( REG_EAX, Rn );
1494 MOVL_imm32_r32( imm, REG_EAX );
1495 store_reg( REG_EAX, Rn );
1499 load_reg( REG_EAX, Rn );
1500 load_reg( REG_EDX, Rm );
1501 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1502 sh4_x86.tstate = TSTATE_NONE;
1506 load_reg( REG_EAX, Rn );
1507 LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
1508 load_reg( REG_EDX, Rm );
1509 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1510 ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
1511 sh4_x86.tstate = TSTATE_NONE;
1513 MOV.B Rm, @(R0, Rn) {:
1515 load_reg( REG_EAX, 0 );
1516 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1517 load_reg( REG_EDX, Rm );
1518 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1519 sh4_x86.tstate = TSTATE_NONE;
1521 MOV.B R0, @(disp, GBR) {:
1523 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1524 ADDL_imms_r32( disp, REG_EAX );
1525 load_reg( REG_EDX, 0 );
1526 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1527 sh4_x86.tstate = TSTATE_NONE;
1529 MOV.B R0, @(disp, Rn) {:
1531 load_reg( REG_EAX, Rn );
1532 ADDL_imms_r32( disp, REG_EAX );
1533 load_reg( REG_EDX, 0 );
1534 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1535 sh4_x86.tstate = TSTATE_NONE;
1539 load_reg( REG_EAX, Rm );
1540 MEM_READ_BYTE( REG_EAX, REG_EAX );
1541 store_reg( REG_EAX, Rn );
1542 sh4_x86.tstate = TSTATE_NONE;
1546 load_reg( REG_EAX, Rm );
1547 MEM_READ_BYTE( REG_EAX, REG_EAX );
1549 ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
1551 store_reg( REG_EAX, Rn );
1552 sh4_x86.tstate = TSTATE_NONE;
1554 MOV.B @(R0, Rm), Rn {:
1556 load_reg( REG_EAX, 0 );
1557 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1558 MEM_READ_BYTE( REG_EAX, REG_EAX );
1559 store_reg( REG_EAX, Rn );
1560 sh4_x86.tstate = TSTATE_NONE;
1562 MOV.B @(disp, GBR), R0 {:
1564 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1565 ADDL_imms_r32( disp, REG_EAX );
1566 MEM_READ_BYTE( REG_EAX, REG_EAX );
1567 store_reg( REG_EAX, 0 );
1568 sh4_x86.tstate = TSTATE_NONE;
1570 MOV.B @(disp, Rm), R0 {:
1572 load_reg( REG_EAX, Rm );
1573 ADDL_imms_r32( disp, REG_EAX );
1574 MEM_READ_BYTE( REG_EAX, REG_EAX );
1575 store_reg( REG_EAX, 0 );
1576 sh4_x86.tstate = TSTATE_NONE;
1580 load_reg( REG_EAX, Rn );
1581 check_walign32(REG_EAX);
1582 MOVL_r32_r32( REG_EAX, REG_ECX );
1583 ANDL_imms_r32( 0xFC000000, REG_ECX );
1584 CMPL_imms_r32( 0xE0000000, REG_ECX );
1586 ANDL_imms_r32( 0x3C, REG_EAX );
1587 load_reg( REG_EDX, Rm );
1588 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1591 load_reg( REG_EDX, Rm );
1592 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1594 sh4_x86.tstate = TSTATE_NONE;
1598 load_reg( REG_EAX, Rn );
1599 ADDL_imms_r32( -4, REG_EAX );
1600 check_walign32( REG_EAX );
1601 load_reg( REG_EDX, Rm );
1602 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1603 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
1604 sh4_x86.tstate = TSTATE_NONE;
1606 MOV.L Rm, @(R0, Rn) {:
1608 load_reg( REG_EAX, 0 );
1609 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1610 check_walign32( REG_EAX );
1611 load_reg( REG_EDX, Rm );
1612 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1613 sh4_x86.tstate = TSTATE_NONE;
1615 MOV.L R0, @(disp, GBR) {:
1617 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1618 ADDL_imms_r32( disp, REG_EAX );
1619 check_walign32( REG_EAX );
1620 load_reg( REG_EDX, 0 );
1621 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1622 sh4_x86.tstate = TSTATE_NONE;
1624 MOV.L Rm, @(disp, Rn) {:
1626 load_reg( REG_EAX, Rn );
1627 ADDL_imms_r32( disp, REG_EAX );
1628 check_walign32( REG_EAX );
1629 MOVL_r32_r32( REG_EAX, REG_ECX );
1630 ANDL_imms_r32( 0xFC000000, REG_ECX );
1631 CMPL_imms_r32( 0xE0000000, REG_ECX );
1633 ANDL_imms_r32( 0x3C, REG_EAX );
1634 load_reg( REG_EDX, Rm );
1635 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1638 load_reg( REG_EDX, Rm );
1639 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1641 sh4_x86.tstate = TSTATE_NONE;
1645 load_reg( REG_EAX, Rm );
1646 check_ralign32( REG_EAX );
1647 MEM_READ_LONG( REG_EAX, REG_EAX );
1648 store_reg( REG_EAX, Rn );
1649 sh4_x86.tstate = TSTATE_NONE;
1653 load_reg( REG_EAX, Rm );
1654 check_ralign32( REG_EAX );
1655 MEM_READ_LONG( REG_EAX, REG_EAX );
1657 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1659 store_reg( REG_EAX, Rn );
1660 sh4_x86.tstate = TSTATE_NONE;
1662 MOV.L @(R0, Rm), Rn {:
1664 load_reg( REG_EAX, 0 );
1665 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1666 check_ralign32( REG_EAX );
1667 MEM_READ_LONG( REG_EAX, REG_EAX );
1668 store_reg( REG_EAX, Rn );
1669 sh4_x86.tstate = TSTATE_NONE;
1671 MOV.L @(disp, GBR), R0 {:
1673 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1674 ADDL_imms_r32( disp, REG_EAX );
1675 check_ralign32( REG_EAX );
1676 MEM_READ_LONG( REG_EAX, REG_EAX );
1677 store_reg( REG_EAX, 0 );
1678 sh4_x86.tstate = TSTATE_NONE;
1680 MOV.L @(disp, PC), Rn {:
1681 COUNT_INST(I_MOVLPC);
1682 if( sh4_x86.in_delay_slot ) {
1685 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1686 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1687 // If the target address is in the same page as the code, it's
1688 // pretty safe to just ref it directly and circumvent the whole
1689 // memory subsystem. (this is a big performance win)
1691 // FIXME: There's a corner-case that's not handled here when
1692 // the current code-page is in the ITLB but not in the UTLB.
1693 // (should generate a TLB miss although need to test SH4
1694 // behaviour to confirm) Unlikely to be anyone depending on this
1695 // behaviour though.
1696 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1697 MOVL_moffptr_eax( ptr );
1699 // Note: we use sh4r.pc for the calc as we could be running at a
1700 // different virtual address than the translation was done with,
1701 // but we can safely assume that the low bits are the same.
1702 MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
1703 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1704 MEM_READ_LONG( REG_EAX, REG_EAX );
1705 sh4_x86.tstate = TSTATE_NONE;
1707 store_reg( REG_EAX, Rn );
1710 MOV.L @(disp, Rm), Rn {:
1712 load_reg( REG_EAX, Rm );
1713 ADDL_imms_r32( disp, REG_EAX );
1714 check_ralign32( REG_EAX );
1715 MEM_READ_LONG( REG_EAX, REG_EAX );
1716 store_reg( REG_EAX, Rn );
1717 sh4_x86.tstate = TSTATE_NONE;
1721 load_reg( REG_EAX, Rn );
1722 check_walign16( REG_EAX );
1723 load_reg( REG_EDX, Rm );
1724 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1725 sh4_x86.tstate = TSTATE_NONE;
1729 load_reg( REG_EAX, Rn );
1730 check_walign16( REG_EAX );
1731 LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
1732 load_reg( REG_EDX, Rm );
1733 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1734 ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
1735 sh4_x86.tstate = TSTATE_NONE;
1737 MOV.W Rm, @(R0, Rn) {:
1739 load_reg( REG_EAX, 0 );
1740 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1741 check_walign16( REG_EAX );
1742 load_reg( REG_EDX, Rm );
1743 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1744 sh4_x86.tstate = TSTATE_NONE;
1746 MOV.W R0, @(disp, GBR) {:
1748 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1749 ADDL_imms_r32( disp, REG_EAX );
1750 check_walign16( REG_EAX );
1751 load_reg( REG_EDX, 0 );
1752 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1753 sh4_x86.tstate = TSTATE_NONE;
1755 MOV.W R0, @(disp, Rn) {:
1757 load_reg( REG_EAX, Rn );
1758 ADDL_imms_r32( disp, REG_EAX );
1759 check_walign16( REG_EAX );
1760 load_reg( REG_EDX, 0 );
1761 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1762 sh4_x86.tstate = TSTATE_NONE;
1766 load_reg( REG_EAX, Rm );
1767 check_ralign16( REG_EAX );
1768 MEM_READ_WORD( REG_EAX, REG_EAX );
1769 store_reg( REG_EAX, Rn );
1770 sh4_x86.tstate = TSTATE_NONE;
1774 load_reg( REG_EAX, Rm );
1775 check_ralign16( REG_EAX );
1776 MEM_READ_WORD( REG_EAX, REG_EAX );
1778 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1780 store_reg( REG_EAX, Rn );
1781 sh4_x86.tstate = TSTATE_NONE;
1783 MOV.W @(R0, Rm), Rn {:
1785 load_reg( REG_EAX, 0 );
1786 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1787 check_ralign16( REG_EAX );
1788 MEM_READ_WORD( REG_EAX, REG_EAX );
1789 store_reg( REG_EAX, Rn );
1790 sh4_x86.tstate = TSTATE_NONE;
1792 MOV.W @(disp, GBR), R0 {:
1794 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1795 ADDL_imms_r32( disp, REG_EAX );
1796 check_ralign16( REG_EAX );
1797 MEM_READ_WORD( REG_EAX, REG_EAX );
1798 store_reg( REG_EAX, 0 );
1799 sh4_x86.tstate = TSTATE_NONE;
1801 MOV.W @(disp, PC), Rn {:
1803 if( sh4_x86.in_delay_slot ) {
1806 // See comments for MOV.L @(disp, PC), Rn
1807 uint32_t target = pc + disp + 4;
1808 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1809 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1810 MOVL_moffptr_eax( ptr );
1811 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1813 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
1814 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1815 MEM_READ_WORD( REG_EAX, REG_EAX );
1816 sh4_x86.tstate = TSTATE_NONE;
1818 store_reg( REG_EAX, Rn );
1821 MOV.W @(disp, Rm), R0 {:
1823 load_reg( REG_EAX, Rm );
1824 ADDL_imms_r32( disp, REG_EAX );
1825 check_ralign16( REG_EAX );
1826 MEM_READ_WORD( REG_EAX, REG_EAX );
1827 store_reg( REG_EAX, 0 );
1828 sh4_x86.tstate = TSTATE_NONE;
1830 MOVA @(disp, PC), R0 {:
1832 if( sh4_x86.in_delay_slot ) {
1835 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
1836 ADDL_rbpdisp_r32( R_PC, REG_ECX );
1837 store_reg( REG_ECX, 0 );
1838 sh4_x86.tstate = TSTATE_NONE;
1842 COUNT_INST(I_MOVCA);
1843 load_reg( REG_EAX, Rn );
1844 check_walign32( REG_EAX );
1845 load_reg( REG_EDX, 0 );
1846 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1847 sh4_x86.tstate = TSTATE_NONE;
1850 /* Control transfer instructions */
1853 if( sh4_x86.in_delay_slot ) {
1856 sh4vma_t target = disp + pc + 4;
1857 JT_label( nottaken );
1858 exit_block_rel(target, pc+2 );
1859 JMP_TARGET(nottaken);
1865 if( sh4_x86.in_delay_slot ) {
1868 sh4_x86.in_delay_slot = DELAY_PC;
1869 if( UNTRANSLATABLE(pc+2) ) {
1870 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1872 ADDL_imms_r32( disp, REG_EAX );
1873 JMP_TARGET(nottaken);
1874 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1875 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1876 exit_block_emu(pc+2);
1877 sh4_x86.branch_taken = TRUE;
1881 sh4vma_t target = disp + pc + 4;
1882 JCC_cc_rel32(sh4_x86.tstate,0);
1883 uint32_t *patch = ((uint32_t *)xlat_output)-1;
1884 int save_tstate = sh4_x86.tstate;
1885 sh4_translate_instruction(pc+2);
1886 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
1887 exit_block_rel( target, pc+4 );
1890 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1891 sh4_x86.tstate = save_tstate;
1892 sh4_translate_instruction(pc+2);
1899 if( sh4_x86.in_delay_slot ) {
1902 sh4_x86.in_delay_slot = DELAY_PC;
1903 sh4_x86.branch_taken = TRUE;
1904 if( UNTRANSLATABLE(pc+2) ) {
1905 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1906 ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
1907 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1908 exit_block_emu(pc+2);
1911 sh4_translate_instruction( pc + 2 );
1912 exit_block_rel( disp + pc + 4, pc+4 );
1919 if( sh4_x86.in_delay_slot ) {
1922 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1923 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1924 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1925 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1926 sh4_x86.in_delay_slot = DELAY_PC;
1927 sh4_x86.tstate = TSTATE_NONE;
1928 sh4_x86.branch_taken = TRUE;
1929 if( UNTRANSLATABLE(pc+2) ) {
1930 exit_block_emu(pc+2);
1933 sh4_translate_instruction( pc + 2 );
1934 exit_block_newpcset(pc+4);
1941 if( sh4_x86.in_delay_slot ) {
1944 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1945 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1946 MOVL_r32_rbpdisp( REG_EAX, R_PR );
1947 sh4_x86.in_delay_slot = DELAY_PC;
1948 sh4_x86.branch_taken = TRUE;
1949 sh4_x86.tstate = TSTATE_NONE;
1950 if( UNTRANSLATABLE(pc+2) ) {
1951 ADDL_imms_r32( disp, REG_EAX );
1952 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1953 exit_block_emu(pc+2);
1956 sh4_translate_instruction( pc + 2 );
1957 exit_block_rel( disp + pc + 4, pc+4 );
1964 if( sh4_x86.in_delay_slot ) {
1967 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1968 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1969 MOVL_r32_rbpdisp( REG_EAX, R_PR );
1970 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1971 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1973 sh4_x86.in_delay_slot = DELAY_PC;
1974 sh4_x86.tstate = TSTATE_NONE;
1975 sh4_x86.branch_taken = TRUE;
1976 if( UNTRANSLATABLE(pc+2) ) {
1977 exit_block_emu(pc+2);
1980 sh4_translate_instruction( pc + 2 );
1981 exit_block_newpcset(pc+4);
1988 if( sh4_x86.in_delay_slot ) {
1991 sh4vma_t target = disp + pc + 4;
1992 JF_label( nottaken );
1993 exit_block_rel(target, pc+2 );
1994 JMP_TARGET(nottaken);
2000 if( sh4_x86.in_delay_slot ) {
2003 sh4_x86.in_delay_slot = DELAY_PC;
2004 if( UNTRANSLATABLE(pc+2) ) {
2005 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2007 ADDL_imms_r32( disp, REG_EAX );
2008 JMP_TARGET(nottaken);
2009 ADDL_rbpdisp_r32( R_PC, REG_EAX );
2010 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2011 exit_block_emu(pc+2);
2012 sh4_x86.branch_taken = TRUE;
2016 JCC_cc_rel32(sh4_x86.tstate^1,0);
2017 uint32_t *patch = ((uint32_t *)xlat_output)-1;
2019 int save_tstate = sh4_x86.tstate;
2020 sh4_translate_instruction(pc+2);
2021 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
2022 exit_block_rel( disp + pc + 4, pc+4 );
2024 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
2025 sh4_x86.tstate = save_tstate;
2026 sh4_translate_instruction(pc+2);
2033 if( sh4_x86.in_delay_slot ) {
2036 load_reg( REG_ECX, Rn );
2037 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2038 sh4_x86.in_delay_slot = DELAY_PC;
2039 sh4_x86.branch_taken = TRUE;
2040 if( UNTRANSLATABLE(pc+2) ) {
2041 exit_block_emu(pc+2);
2044 sh4_translate_instruction(pc+2);
2045 exit_block_newpcset(pc+4);
2052 if( sh4_x86.in_delay_slot ) {
2055 MOVL_rbpdisp_r32( R_PC, REG_EAX );
2056 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2057 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2058 load_reg( REG_ECX, Rn );
2059 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2060 sh4_x86.in_delay_slot = DELAY_PC;
2061 sh4_x86.branch_taken = TRUE;
2062 sh4_x86.tstate = TSTATE_NONE;
2063 if( UNTRANSLATABLE(pc+2) ) {
2064 exit_block_emu(pc+2);
2067 sh4_translate_instruction(pc+2);
2068 exit_block_newpcset(pc+4);
2075 if( sh4_x86.in_delay_slot ) {
2079 MOVL_rbpdisp_r32( R_SPC, REG_ECX );
2080 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2081 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2082 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2083 sh4_x86.in_delay_slot = DELAY_PC;
2084 sh4_x86.fpuen_checked = FALSE;
2085 sh4_x86.tstate = TSTATE_NONE;
2086 sh4_x86.branch_taken = TRUE;
2087 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2088 if( UNTRANSLATABLE(pc+2) ) {
2089 exit_block_emu(pc+2);
2092 sh4_translate_instruction(pc+2);
2093 exit_block_newpcset(pc+4);
2100 if( sh4_x86.in_delay_slot ) {
2103 MOVL_rbpdisp_r32( R_PR, REG_ECX );
2104 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2105 sh4_x86.in_delay_slot = DELAY_PC;
2106 sh4_x86.branch_taken = TRUE;
2107 if( UNTRANSLATABLE(pc+2) ) {
2108 exit_block_emu(pc+2);
2111 sh4_translate_instruction(pc+2);
2112 exit_block_newpcset(pc+4);
2118 COUNT_INST(I_TRAPA);
2119 if( sh4_x86.in_delay_slot ) {
2122 MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX ); // 5
2123 ADDL_r32_rbpdisp( REG_ECX, R_PC );
2124 MOVL_imm32_r32( imm, REG_EAX );
2125 CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
2126 sh4_x86.tstate = TSTATE_NONE;
2127 exit_block_pcset(pc+2);
2128 sh4_x86.branch_taken = TRUE;
2133 COUNT_INST(I_UNDEF);
2134 if( sh4_x86.in_delay_slot ) {
2135 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);
2137 exit_block_exc(EXC_ILLEGAL, pc, 2);
2143 COUNT_INST(I_CLRMAC);
2144 XORL_r32_r32(REG_EAX, REG_EAX);
2145 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2146 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2147 sh4_x86.tstate = TSTATE_NONE;
2152 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2153 sh4_x86.tstate = TSTATE_NONE;
2159 sh4_x86.tstate = TSTATE_C;
2164 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2165 sh4_x86.tstate = TSTATE_NONE;
2171 sh4_x86.tstate = TSTATE_C;
2174 /* Floating point moves */
2176 COUNT_INST(I_FMOV1);
2178 if( sh4_x86.double_size ) {
2179 load_dr0( REG_EAX, FRm );
2180 load_dr1( REG_ECX, FRm );
2181 store_dr0( REG_EAX, FRn );
2182 store_dr1( REG_ECX, FRn );
2184 load_fr( REG_EAX, FRm ); // SZ=0 branch
2185 store_fr( REG_EAX, FRn );
2189 COUNT_INST(I_FMOV2);
2191 load_reg( REG_EAX, Rn );
2192 if( sh4_x86.double_size ) {
2193 check_walign64( REG_EAX );
2194 load_dr0( REG_EDX, FRm );
2195 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2196 load_reg( REG_EAX, Rn );
2197 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2198 load_dr1( REG_EDX, FRm );
2199 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2201 check_walign32( REG_EAX );
2202 load_fr( REG_EDX, FRm );
2203 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2205 sh4_x86.tstate = TSTATE_NONE;
2208 COUNT_INST(I_FMOV5);
2210 load_reg( REG_EAX, Rm );
2211 if( sh4_x86.double_size ) {
2212 check_ralign64( REG_EAX );
2213 MEM_READ_LONG( REG_EAX, REG_EAX );
2214 store_dr0( REG_EAX, FRn );
2215 load_reg( REG_EAX, Rm );
2216 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2217 MEM_READ_LONG( REG_EAX, REG_EAX );
2218 store_dr1( REG_EAX, FRn );
2220 check_ralign32( REG_EAX );
2221 MEM_READ_LONG( REG_EAX, REG_EAX );
2222 store_fr( REG_EAX, FRn );
2224 sh4_x86.tstate = TSTATE_NONE;
2227 COUNT_INST(I_FMOV3);
2229 load_reg( REG_EAX, Rn );
2230 if( sh4_x86.double_size ) {
2231 check_walign64( REG_EAX );
2232 LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
2233 load_dr0( REG_EDX, FRm );
2234 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2235 load_reg( REG_EAX, Rn );
2236 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2237 load_dr1( REG_EDX, FRm );
2238 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2239 ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
2241 check_walign32( REG_EAX );
2242 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2243 load_fr( REG_EDX, FRm );
2244 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2245 ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
2247 sh4_x86.tstate = TSTATE_NONE;
2250 COUNT_INST(I_FMOV6);
2252 load_reg( REG_EAX, Rm );
2253 if( sh4_x86.double_size ) {
2254 check_ralign64( REG_EAX );
2255 MEM_READ_LONG( REG_EAX, REG_EAX );
2256 store_dr0( REG_EAX, FRn );
2257 load_reg( REG_EAX, Rm );
2258 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2259 MEM_READ_LONG( REG_EAX, REG_EAX );
2260 store_dr1( REG_EAX, FRn );
2261 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
2263 check_ralign32( REG_EAX );
2264 MEM_READ_LONG( REG_EAX, REG_EAX );
2265 store_fr( REG_EAX, FRn );
2266 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2268 sh4_x86.tstate = TSTATE_NONE;
2270 FMOV FRm, @(R0, Rn) {:
2271 COUNT_INST(I_FMOV4);
2273 load_reg( REG_EAX, Rn );
2274 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2275 if( sh4_x86.double_size ) {
2276 check_walign64( REG_EAX );
2277 load_dr0( REG_EDX, FRm );
2278 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2279 load_reg( REG_EAX, Rn );
2280 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2281 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2282 load_dr1( REG_EDX, FRm );
2283 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2285 check_walign32( REG_EAX );
2286 load_fr( REG_EDX, FRm );
2287 MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
2289 sh4_x86.tstate = TSTATE_NONE;
2291 FMOV @(R0, Rm), FRn {:
2292 COUNT_INST(I_FMOV7);
2294 load_reg( REG_EAX, Rm );
2295 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2296 if( sh4_x86.double_size ) {
2297 check_ralign64( REG_EAX );
2298 MEM_READ_LONG( REG_EAX, REG_EAX );
2299 store_dr0( REG_EAX, FRn );
2300 load_reg( REG_EAX, Rm );
2301 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2302 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2303 MEM_READ_LONG( REG_EAX, REG_EAX );
2304 store_dr1( REG_EAX, FRn );
2306 check_ralign32( REG_EAX );
2307 MEM_READ_LONG( REG_EAX, REG_EAX );
2308 store_fr( REG_EAX, FRn );
2310 sh4_x86.tstate = TSTATE_NONE;
2312 FLDI0 FRn {: /* IFF PR=0 */
2313 COUNT_INST(I_FLDI0);
2315 if( sh4_x86.double_prec == 0 ) {
2316 XORL_r32_r32( REG_EAX, REG_EAX );
2317 store_fr( REG_EAX, FRn );
2319 sh4_x86.tstate = TSTATE_NONE;
2321 FLDI1 FRn {: /* IFF PR=0 */
2322 COUNT_INST(I_FLDI1);
2324 if( sh4_x86.double_prec == 0 ) {
2325 MOVL_imm32_r32( 0x3F800000, REG_EAX );
2326 store_fr( REG_EAX, FRn );
2331 COUNT_INST(I_FLOAT);
2333 FILD_rbpdisp(R_FPUL);
2334 if( sh4_x86.double_prec ) {
2343 if( sh4_x86.double_prec ) {
2348 MOVP_immptr_rptr( &min_int, REG_ECX );
2349 FILD_r32disp( REG_ECX, 0 );
2353 MOVP_immptr_rptr( &max_int, REG_ECX );
2354 FILD_r32disp( REG_ECX, 0 );
2357 MOVP_immptr_rptr( &save_fcw, REG_EAX );
2358 FNSTCW_r32disp( REG_EAX, 0 );
2359 MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
2360 FLDCW_r32disp( REG_EDX, 0 );
2361 FISTP_rbpdisp(R_FPUL);
2362 FLDCW_r32disp( REG_EAX, 0 );
2368 MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
2369 MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
2372 sh4_x86.tstate = TSTATE_NONE;
2377 load_fr( REG_EAX, FRm );
2378 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2383 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2384 store_fr( REG_EAX, FRn );
2387 COUNT_INST(I_FCNVDS);
2389 if( sh4_x86.double_prec ) {
2395 COUNT_INST(I_FCNVSD);
2397 if( sh4_x86.double_prec ) {
2403 /* Floating point instructions */
2407 if( sh4_x86.double_prec ) {
2420 if( sh4_x86.double_prec ) {
2435 if( sh4_x86.double_prec ) {
2447 FMAC FR0, FRm, FRn {:
2450 if( sh4_x86.double_prec ) {
2470 if( sh4_x86.double_prec ) {
2485 if( sh4_x86.double_prec ) {
2496 COUNT_INST(I_FSRRA);
2498 if( sh4_x86.double_prec == 0 ) {
2507 COUNT_INST(I_FSQRT);
2509 if( sh4_x86.double_prec ) {
2522 if( sh4_x86.double_prec ) {
2536 COUNT_INST(I_FCMPEQ);
2538 if( sh4_x86.double_prec ) {
2545 XORL_r32_r32(REG_EAX, REG_EAX);
2546 XORL_r32_r32(REG_EDX, REG_EDX);
2548 SETCCB_cc_r8(X86_COND_NP, REG_DL);
2549 CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
2550 MOVL_r32_rbpdisp(REG_EAX, R_T);
2552 sh4_x86.tstate = TSTATE_NONE;
2555 COUNT_INST(I_FCMPGT);
2557 if( sh4_x86.double_prec ) {
2567 sh4_x86.tstate = TSTATE_A;
2573 if( sh4_x86.double_prec == 0 ) {
2574 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
2575 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2576 CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
2578 sh4_x86.tstate = TSTATE_NONE;
2583 if( sh4_x86.double_prec == 0 ) {
2584 if( sh4_x86.sse3_enabled ) {
2585 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2586 MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2587 HADDPS_xmm_xmm( 4, 4 );
2588 HADDPS_xmm_xmm( 4, 4 );
2589 MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2594 push_fr( (FVm<<2)+1);
2595 push_fr( (FVn<<2)+1);
2598 push_fr( (FVm<<2)+2);
2599 push_fr( (FVn<<2)+2);
2602 push_fr( (FVm<<2)+3);
2603 push_fr( (FVn<<2)+3);
2606 pop_fr( (FVn<<2)+3);
2613 if( sh4_x86.double_prec == 0 ) {
2614 if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
2615 /* FIXME: For now, disable this inlining when we're running in shadow mode -
2616 * it gives slightly different results from the emu core. Need to
2617 * fix the precision so both give the right results.
2619 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2620 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2621 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2622 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2624 MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2625 MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2626 MOV_xmm_xmm( 4, 6 );
2627 MOV_xmm_xmm( 5, 7 );
2628 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2629 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2630 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2631 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2632 MULPS_xmm_xmm( 0, 4 );
2633 MULPS_xmm_xmm( 1, 5 );
2634 MULPS_xmm_xmm( 2, 6 );
2635 MULPS_xmm_xmm( 3, 7 );
2636 ADDPS_xmm_xmm( 5, 4 );
2637 ADDPS_xmm_xmm( 7, 6 );
2638 ADDPS_xmm_xmm( 6, 4 );
2639 MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
2641 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
2642 CALL1_ptr_r32( sh4_ftrv, REG_EAX );
2645 sh4_x86.tstate = TSTATE_NONE;
2649 COUNT_INST(I_FRCHG);
2651 XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
2652 CALL_ptr( sh4_switch_fr_banks );
2653 sh4_x86.tstate = TSTATE_NONE;
2656 COUNT_INST(I_FSCHG);
2658 XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
2659 XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2660 sh4_x86.tstate = TSTATE_NONE;
2661 sh4_x86.double_size = !sh4_x86.double_size;
2662 sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
2665 /* Processor control instructions */
2667 COUNT_INST(I_LDCSR);
2668 if( sh4_x86.in_delay_slot ) {
2672 load_reg( REG_EAX, Rm );
2673 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2674 sh4_x86.fpuen_checked = FALSE;
2675 sh4_x86.tstate = TSTATE_NONE;
2676 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2682 load_reg( REG_EAX, Rm );
2683 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2688 load_reg( REG_EAX, Rm );
2689 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2690 sh4_x86.tstate = TSTATE_NONE;
2695 load_reg( REG_EAX, Rm );
2696 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2697 sh4_x86.tstate = TSTATE_NONE;
2702 load_reg( REG_EAX, Rm );
2703 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2704 sh4_x86.tstate = TSTATE_NONE;
2709 load_reg( REG_EAX, Rm );
2710 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2711 sh4_x86.tstate = TSTATE_NONE;
2716 load_reg( REG_EAX, Rm );
2717 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2718 sh4_x86.tstate = TSTATE_NONE;
2723 load_reg( REG_EAX, Rm );
2724 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2725 sh4_x86.tstate = TSTATE_NONE;
2729 load_reg( REG_EAX, Rm );
2730 check_ralign32( REG_EAX );
2731 MEM_READ_LONG( REG_EAX, REG_EAX );
2732 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2733 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2734 sh4_x86.tstate = TSTATE_NONE;
2737 COUNT_INST(I_LDCSRM);
2738 if( sh4_x86.in_delay_slot ) {
2742 load_reg( REG_EAX, Rm );
2743 check_ralign32( REG_EAX );
2744 MEM_READ_LONG( REG_EAX, REG_EAX );
2745 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2746 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2747 sh4_x86.fpuen_checked = FALSE;
2748 sh4_x86.tstate = TSTATE_NONE;
2749 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2756 load_reg( REG_EAX, Rm );
2757 check_ralign32( REG_EAX );
2758 MEM_READ_LONG( REG_EAX, REG_EAX );
2759 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2760 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2761 sh4_x86.tstate = TSTATE_NONE;
2766 load_reg( REG_EAX, Rm );
2767 check_ralign32( REG_EAX );
2768 MEM_READ_LONG( REG_EAX, REG_EAX );
2769 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2770 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2771 sh4_x86.tstate = TSTATE_NONE;
2776 load_reg( REG_EAX, Rm );
2777 check_ralign32( REG_EAX );
2778 MEM_READ_LONG( REG_EAX, REG_EAX );
2779 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2780 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2781 sh4_x86.tstate = TSTATE_NONE;
2786 load_reg( REG_EAX, Rm );
2787 check_ralign32( REG_EAX );
2788 MEM_READ_LONG( REG_EAX, REG_EAX );
2789 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2790 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2791 sh4_x86.tstate = TSTATE_NONE;
2796 load_reg( REG_EAX, Rm );
2797 check_ralign32( REG_EAX );
2798 MEM_READ_LONG( REG_EAX, REG_EAX );
2799 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2800 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2801 sh4_x86.tstate = TSTATE_NONE;
2803 LDC.L @Rm+, Rn_BANK {:
2806 load_reg( REG_EAX, Rm );
2807 check_ralign32( REG_EAX );
2808 MEM_READ_LONG( REG_EAX, REG_EAX );
2809 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2810 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2811 sh4_x86.tstate = TSTATE_NONE;
2814 COUNT_INST(I_LDSFPSCR);
2816 load_reg( REG_EAX, Rm );
2817 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2818 sh4_x86.tstate = TSTATE_NONE;
2819 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2822 LDS.L @Rm+, FPSCR {:
2823 COUNT_INST(I_LDSFPSCRM);
2825 load_reg( REG_EAX, Rm );
2826 check_ralign32( REG_EAX );
2827 MEM_READ_LONG( REG_EAX, REG_EAX );
2828 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2829 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2830 sh4_x86.tstate = TSTATE_NONE;
2831 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2837 load_reg( REG_EAX, Rm );
2838 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2843 load_reg( REG_EAX, Rm );
2844 check_ralign32( REG_EAX );
2845 MEM_READ_LONG( REG_EAX, REG_EAX );
2846 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2847 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2848 sh4_x86.tstate = TSTATE_NONE;
2852 load_reg( REG_EAX, Rm );
2853 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2857 load_reg( REG_EAX, Rm );
2858 check_ralign32( REG_EAX );
2859 MEM_READ_LONG( REG_EAX, REG_EAX );
2860 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2861 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2862 sh4_x86.tstate = TSTATE_NONE;
2866 load_reg( REG_EAX, Rm );
2867 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2871 load_reg( REG_EAX, Rm );
2872 check_ralign32( REG_EAX );
2873 MEM_READ_LONG( REG_EAX, REG_EAX );
2874 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2875 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2876 sh4_x86.tstate = TSTATE_NONE;
2880 load_reg( REG_EAX, Rm );
2881 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2885 load_reg( REG_EAX, Rm );
2886 check_ralign32( REG_EAX );
2887 MEM_READ_LONG( REG_EAX, REG_EAX );
2888 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2889 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2890 sh4_x86.tstate = TSTATE_NONE;
2893 COUNT_INST(I_LDTLB);
2894 CALL_ptr( MMU_ldtlb );
2895 sh4_x86.tstate = TSTATE_NONE;
2904 COUNT_INST(I_OCBWB);
2908 load_reg( REG_EAX, Rn );
2909 MEM_PREFETCH( REG_EAX );
2910 sh4_x86.tstate = TSTATE_NONE;
2913 COUNT_INST(I_SLEEP);
2915 CALL_ptr( sh4_sleep );
2916 sh4_x86.tstate = TSTATE_NONE;
2917 sh4_x86.in_delay_slot = DELAY_NONE;
2921 COUNT_INST(I_STCSR);
2923 CALL_ptr(sh4_read_sr);
2924 store_reg( REG_EAX, Rn );
2925 sh4_x86.tstate = TSTATE_NONE;
2929 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
2930 store_reg( REG_EAX, Rn );
2935 MOVL_rbpdisp_r32( R_VBR, REG_EAX );
2936 store_reg( REG_EAX, Rn );
2937 sh4_x86.tstate = TSTATE_NONE;
2942 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2943 store_reg( REG_EAX, Rn );
2944 sh4_x86.tstate = TSTATE_NONE;
2949 MOVL_rbpdisp_r32( R_SPC, REG_EAX );
2950 store_reg( REG_EAX, Rn );
2951 sh4_x86.tstate = TSTATE_NONE;
2956 MOVL_rbpdisp_r32( R_SGR, REG_EAX );
2957 store_reg( REG_EAX, Rn );
2958 sh4_x86.tstate = TSTATE_NONE;
2963 MOVL_rbpdisp_r32( R_DBR, REG_EAX );
2964 store_reg( REG_EAX, Rn );
2965 sh4_x86.tstate = TSTATE_NONE;
2970 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
2971 store_reg( REG_EAX, Rn );
2972 sh4_x86.tstate = TSTATE_NONE;
2975 COUNT_INST(I_STCSRM);
2977 CALL_ptr( sh4_read_sr );
2978 MOVL_r32_r32( REG_EAX, REG_EDX );
2979 load_reg( REG_EAX, Rn );
2980 check_walign32( REG_EAX );
2981 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2982 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2983 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
2984 sh4_x86.tstate = TSTATE_NONE;
2989 load_reg( REG_EAX, Rn );
2990 check_walign32( REG_EAX );
2991 ADDL_imms_r32( -4, REG_EAX );
2992 MOVL_rbpdisp_r32( R_VBR, REG_EDX );
2993 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2994 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
2995 sh4_x86.tstate = TSTATE_NONE;
3000 load_reg( REG_EAX, Rn );
3001 check_walign32( REG_EAX );
3002 ADDL_imms_r32( -4, REG_EAX );
3003 MOVL_rbpdisp_r32( R_SSR, REG_EDX );
3004 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3005 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3006 sh4_x86.tstate = TSTATE_NONE;
3011 load_reg( REG_EAX, Rn );
3012 check_walign32( REG_EAX );
3013 ADDL_imms_r32( -4, REG_EAX );
3014 MOVL_rbpdisp_r32( R_SPC, REG_EDX );
3015 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3016 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3017 sh4_x86.tstate = TSTATE_NONE;
3022 load_reg( REG_EAX, Rn );
3023 check_walign32( REG_EAX );
3024 ADDL_imms_r32( -4, REG_EAX );
3025 MOVL_rbpdisp_r32( R_SGR, REG_EDX );
3026 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3027 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3028 sh4_x86.tstate = TSTATE_NONE;
3033 load_reg( REG_EAX, Rn );
3034 check_walign32( REG_EAX );
3035 ADDL_imms_r32( -4, REG_EAX );
3036 MOVL_rbpdisp_r32( R_DBR, REG_EDX );
3037 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3038 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3039 sh4_x86.tstate = TSTATE_NONE;
3041 STC.L Rm_BANK, @-Rn {:
3044 load_reg( REG_EAX, Rn );
3045 check_walign32( REG_EAX );
3046 ADDL_imms_r32( -4, REG_EAX );
3047 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
3048 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3049 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3050 sh4_x86.tstate = TSTATE_NONE;
3054 load_reg( REG_EAX, Rn );
3055 check_walign32( REG_EAX );
3056 ADDL_imms_r32( -4, REG_EAX );
3057 MOVL_rbpdisp_r32( R_GBR, REG_EDX );
3058 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3059 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3060 sh4_x86.tstate = TSTATE_NONE;
3063 COUNT_INST(I_STSFPSCR);
3065 MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
3066 store_reg( REG_EAX, Rn );
3068 STS.L FPSCR, @-Rn {:
3069 COUNT_INST(I_STSFPSCRM);
3071 load_reg( REG_EAX, Rn );
3072 check_walign32( REG_EAX );
3073 ADDL_imms_r32( -4, REG_EAX );
3074 MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
3075 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3076 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3077 sh4_x86.tstate = TSTATE_NONE;
3082 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
3083 store_reg( REG_EAX, Rn );
3088 load_reg( REG_EAX, Rn );
3089 check_walign32( REG_EAX );
3090 ADDL_imms_r32( -4, REG_EAX );
3091 MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
3092 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3093 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3094 sh4_x86.tstate = TSTATE_NONE;
3098 MOVL_rbpdisp_r32( R_MACH, REG_EAX );
3099 store_reg( REG_EAX, Rn );
3103 load_reg( REG_EAX, Rn );
3104 check_walign32( REG_EAX );
3105 ADDL_imms_r32( -4, REG_EAX );
3106 MOVL_rbpdisp_r32( R_MACH, REG_EDX );
3107 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3108 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3109 sh4_x86.tstate = TSTATE_NONE;
3113 MOVL_rbpdisp_r32( R_MACL, REG_EAX );
3114 store_reg( REG_EAX, Rn );
3118 load_reg( REG_EAX, Rn );
3119 check_walign32( REG_EAX );
3120 ADDL_imms_r32( -4, REG_EAX );
3121 MOVL_rbpdisp_r32( R_MACL, REG_EDX );
3122 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3123 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3124 sh4_x86.tstate = TSTATE_NONE;
3128 MOVL_rbpdisp_r32( R_PR, REG_EAX );
3129 store_reg( REG_EAX, Rn );
3133 load_reg( REG_EAX, Rn );
3134 check_walign32( REG_EAX );
3135 ADDL_imms_r32( -4, REG_EAX );
3136 MOVL_rbpdisp_r32( R_PR, REG_EDX );
3137 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3138 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3139 sh4_x86.tstate = TSTATE_NONE;
3144 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
3147 sh4_x86.in_delay_slot = DELAY_NONE;
3153 * The unwind methods only work if we compiled with DWARF2 frame information
3154 * (ie -fexceptions), otherwise we have to use the direct frame scan.
3156 #ifdef HAVE_EXCEPTIONS
3160 uintptr_t block_start;
3161 uintptr_t block_end;
3165 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
3167 struct UnwindInfo *info = arg;
3168 void *pc = (void *)_Unwind_GetIP(context);
3169 if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
3171 return _URC_NORMAL_STOP;
3173 return _URC_NO_REASON;
3176 void *xlat_get_native_pc( void *code, uint32_t code_size )
3178 struct _Unwind_Exception exc;
3179 struct UnwindInfo info;
3182 info.block_start = (uintptr_t)code;
3183 info.block_end = info.block_start + code_size;
3184 void *result = NULL;
3185 _Unwind_Backtrace( xlat_check_frame, &info );
3189 /* Assume this is an ia32 build - amd64 should always have dwarf information */
3190 void *xlat_get_native_pc( void *code, uint32_t code_size )
3192 void *result = NULL;
3194 "mov %%ebp, %%eax\n\t"
3195 "mov $0x8, %%ecx\n\t"
3197 "frame_loop: test %%eax, %%eax\n\t"
3198 "je frame_not_found\n\t"
3199 "cmp (%%eax), %%edx\n\t"
3200 "je frame_found\n\t"
3201 "sub $0x1, %%ecx\n\t"
3202 "je frame_not_found\n\t"
3203 "movl (%%eax), %%eax\n\t"
3205 "frame_found: movl 0x4(%%eax), %0\n"
3208 : "r" (((uint8_t *)&sh4r) + 128 )
3209 : "eax", "ecx", "edx" );
.