filename | src/sh4/sh4x86.in |
changeset | 1214:49152b3d8b75 |
prev | 1198:407659e01ef0 |
next | 1216:defbd44429d8 |
author | nkeynes |
date | Mon Feb 13 12:27:09 2012 +1000 (10 years ago) |
permissions | -rw-r--r-- |
last change | Setup the unlink_blocks function via a callback, rather than calling directly into sh4/x86 code from xltcache |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
7 *
8 * Copyright (c) 2007 Nathan Keynes.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
21 #include <assert.h>
22 #include <math.h>
24 #ifndef NDEBUG
25 #define DEBUG_JUMPS 1
26 #endif
28 #include "lxdream.h"
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4dasm.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/mmu.h"
35 #include "xlat/xltcache.h"
36 #include "xlat/x86/x86op.h"
37 #include "x86dasm/x86dasm.h"
38 #include "clock.h"
40 #define DEFAULT_BACKPATCH_SIZE 4096
42 /* Offset of a reg relative to the sh4r structure */
43 #define REG_OFFSET(reg) (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
45 #define R_T REG_OFFSET(t)
46 #define R_Q REG_OFFSET(q)
47 #define R_S REG_OFFSET(s)
48 #define R_M REG_OFFSET(m)
49 #define R_SR REG_OFFSET(sr)
50 #define R_GBR REG_OFFSET(gbr)
51 #define R_SSR REG_OFFSET(ssr)
52 #define R_SPC REG_OFFSET(spc)
53 #define R_VBR REG_OFFSET(vbr)
54 #define R_MACH REG_OFFSET(mac)+4
55 #define R_MACL REG_OFFSET(mac)
56 #define R_PC REG_OFFSET(pc)
57 #define R_NEW_PC REG_OFFSET(new_pc)
58 #define R_PR REG_OFFSET(pr)
59 #define R_SGR REG_OFFSET(sgr)
60 #define R_FPUL REG_OFFSET(fpul)
61 #define R_FPSCR REG_OFFSET(fpscr)
62 #define R_DBR REG_OFFSET(dbr)
63 #define R_R(rn) REG_OFFSET(r[rn])
64 #define R_FR(f) REG_OFFSET(fr[0][(f)^1])
65 #define R_XF(f) REG_OFFSET(fr[1][(f)^1])
66 #define R_DR(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
70 #define DELAY_NONE 0
71 #define DELAY_PC 1
72 #define DELAY_PC_PR 2
74 #define SH4_MODE_UNKNOWN -1
76 struct backpatch_record {
77 uint32_t fixup_offset;
78 uint32_t fixup_icount;
79 int32_t exc_code;
80 };
82 /**
83 * Struct to manage internal translation state. This state is not saved -
84 * it is only valid between calls to sh4_translate_begin_block() and
85 * sh4_translate_end_block()
86 */
87 struct sh4_x86_state {
88 int in_delay_slot;
89 uint8_t *code;
90 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
91 gboolean branch_taken; /* true if we branched unconditionally */
92 gboolean double_prec; /* true if FPU is in double-precision mode */
93 gboolean double_size; /* true if FPU is in double-size mode */
94 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
95 uint32_t block_start_pc;
96 uint32_t stack_posn; /* Trace stack height for alignment purposes */
97 uint32_t sh4_mode; /* Mirror of sh4r.xlat_sh4_mode */
98 int tstate;
100 /* mode settings */
101 gboolean tlb_on; /* True if tlb translation is active */
102 struct mem_region_fn **priv_address_space;
103 struct mem_region_fn **user_address_space;
105 /* Instrumentation */
106 xlat_block_begin_callback_t begin_callback;
107 xlat_block_end_callback_t end_callback;
108 gboolean fastmem;
109 gboolean profile_blocks;
111 /* Allocated memory for the (block-wide) back-patch list */
112 struct backpatch_record *backpatch_list;
113 uint32_t backpatch_posn;
114 uint32_t backpatch_size;
115 };
117 static struct sh4_x86_state sh4_x86;
119 static uint32_t max_int = 0x7FFFFFFF;
120 static uint32_t min_int = 0x80000000;
121 static uint32_t save_fcw; /* save value for fpu control word */
122 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
124 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc );
125 static void sh4_x86_translate_unlink_block( void *use_list );
127 static struct x86_symbol x86_symbol_table[] = {
128 { "sh4r+128", ((char *)&sh4r)+128 },
129 { "sh4_cpu_period", &sh4_cpu_period },
130 { "sh4_address_space", NULL },
131 { "sh4_user_address_space", NULL },
132 { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
133 { "sh4_translate_get_code_and_backpatch", sh4_translate_get_code_and_backpatch },
134 { "sh4_write_fpscr", sh4_write_fpscr },
135 { "sh4_write_sr", sh4_write_sr },
136 { "sh4_read_sr", sh4_read_sr },
137 { "sh4_raise_exception", sh4_raise_exception },
138 { "sh4_sleep", sh4_sleep },
139 { "sh4_fsca", sh4_fsca },
140 { "sh4_ftrv", sh4_ftrv },
141 { "sh4_switch_fr_banks", sh4_switch_fr_banks },
142 { "sh4_execute_instruction", sh4_execute_instruction },
143 { "signsat48", signsat48 },
144 { "xlat_get_code_by_vma", xlat_get_code_by_vma },
145 { "xlat_get_code", xlat_get_code }
146 };
148 static struct xlat_target_fns x86_target_fns = {
149 sh4_x86_translate_unlink_block
150 };
153 gboolean is_sse3_supported()
154 {
155 uint32_t features;
157 __asm__ __volatile__(
158 "mov $0x01, %%eax\n\t"
159 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
160 return (features & 1) ? TRUE : FALSE;
161 }
163 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
164 {
165 sh4_x86.priv_address_space = priv;
166 sh4_x86.user_address_space = user;
167 x86_symbol_table[2].ptr = priv;
168 x86_symbol_table[3].ptr = user;
169 }
171 void sh4_translate_init(void)
172 {
173 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
174 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
175 sh4_x86.begin_callback = NULL;
176 sh4_x86.end_callback = NULL;
177 sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
178 sh4_x86.fastmem = TRUE;
179 sh4_x86.profile_blocks = FALSE;
180 sh4_x86.sse3_enabled = is_sse3_supported();
181 x86_disasm_init();
182 x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
183 xlat_set_target_fns(x86_target_fns);
184 }
186 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
187 {
188 sh4_x86.begin_callback = begin;
189 sh4_x86.end_callback = end;
190 }
192 void sh4_translate_set_fastmem( gboolean flag )
193 {
194 sh4_x86.fastmem = flag;
195 }
197 void sh4_translate_set_profile_blocks( gboolean flag )
198 {
199 sh4_x86.profile_blocks = flag;
200 }
202 gboolean sh4_translate_get_profile_blocks()
203 {
204 return sh4_x86.profile_blocks;
205 }
207 /**
208 * Disassemble the given translated code block, and it's source SH4 code block
209 * side-by-side. The current native pc will be marked if non-null.
210 */
211 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
212 {
213 char buf[256];
214 char op[256];
216 uintptr_t target_start = (uintptr_t)code, target_pc;
217 uintptr_t target_end = target_start + xlat_get_code_size(code);
218 uint32_t source_pc = source_start;
219 uint32_t source_end = source_pc;
220 xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
221 xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
223 for( target_pc = target_start; target_pc < target_end; ) {
224 uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
225 #if SIZEOF_VOID_P == 8
226 fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
227 target_pc, op, buf );
228 #else
229 fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
230 target_pc, op, buf );
231 #endif
232 if( source_recov_table < source_recov_end &&
233 target_pc >= (target_start + source_recov_table->xlat_offset) ) {
234 source_recov_table++;
235 if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
236 source_end = source_start + (source_recov_table->sh4_icount)*2;
237 }
239 if( source_pc < source_end ) {
240 uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
241 fprintf( out, " %08X: %s %s\n", source_pc, op, buf );
242 source_pc = source_pc2;
243 } else {
244 fprintf( out, "\n" );
245 }
247 target_pc = pc2;
248 }
250 while( source_pc < source_end ) {
251 uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
252 fprintf( out, "%*c %08X: %s %s\n", 72,' ', source_pc, op, buf );
253 source_pc = source_pc2;
254 }
255 }
257 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
258 {
259 int reloc_size = 4;
261 if( exc_code == -2 ) {
262 reloc_size = sizeof(void *);
263 }
265 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
266 sh4_x86.backpatch_size <<= 1;
267 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
268 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
269 assert( sh4_x86.backpatch_list != NULL );
270 }
271 if( sh4_x86.in_delay_slot ) {
272 fixup_pc -= 2;
273 }
275 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
276 (((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
277 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
278 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
279 sh4_x86.backpatch_posn++;
280 }
282 #define TSTATE_NONE -1
283 #define TSTATE_O X86_COND_O
284 #define TSTATE_C X86_COND_C
285 #define TSTATE_E X86_COND_E
286 #define TSTATE_NE X86_COND_NE
287 #define TSTATE_G X86_COND_G
288 #define TSTATE_GE X86_COND_GE
289 #define TSTATE_A X86_COND_A
290 #define TSTATE_AE X86_COND_AE
292 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
293 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
295 /* Convenience instructions */
296 #define LDC_t() CMPB_imms_rbpdisp(1,R_T); CMC()
297 #define SETE_t() SETCCB_cc_rbpdisp(X86_COND_E,R_T)
298 #define SETA_t() SETCCB_cc_rbpdisp(X86_COND_A,R_T)
299 #define SETAE_t() SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
300 #define SETG_t() SETCCB_cc_rbpdisp(X86_COND_G,R_T)
301 #define SETGE_t() SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
302 #define SETC_t() SETCCB_cc_rbpdisp(X86_COND_C,R_T)
303 #define SETO_t() SETCCB_cc_rbpdisp(X86_COND_O,R_T)
304 #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
305 #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1)
306 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
307 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
308 #define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
309 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
310 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
311 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
312 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
313 #define JP_label(label) JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
314 #define JS_label(label) JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
315 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
316 #define JNE_exc(exc) JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
318 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
319 CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }
321 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
322 #define JT_label(label) LOAD_t() \
323 JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
325 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
326 #define JF_label(label) LOAD_t() \
327 JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
330 #define load_reg(x86reg,sh4reg) MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
331 #define store_reg(x86reg,sh4reg) MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
333 /**
334 * Load an FR register (single-precision floating point) into an integer x86
335 * register (eg for register-to-register moves)
336 */
337 #define load_fr(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
338 #define load_xf(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
340 /**
341 * Load the low half of a DR register (DR or XD) into an integer x86 register
342 */
343 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
344 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
346 /**
347 * Store an FR register (single-precision floating point) from an integer x86+
348 * register (eg for register-to-register moves)
349 */
350 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
351 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
353 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
354 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
357 #define push_fpul() FLDF_rbpdisp(R_FPUL)
358 #define pop_fpul() FSTPF_rbpdisp(R_FPUL)
359 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
360 #define pop_fr(frm) FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
361 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
362 #define pop_xf(frm) FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
363 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
364 #define pop_dr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
365 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
366 #define pop_xdr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
368 #ifdef ENABLE_SH4STATS
369 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
370 #else
371 #define COUNT_INST(id)
372 #endif
375 /* Exception checks - Note that all exception checks will clobber EAX */
377 #define check_priv( ) \
378 if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
379 if( sh4_x86.in_delay_slot ) { \
380 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
381 } else { \
382 exit_block_exc(EXC_ILLEGAL, pc, 2); \
383 } \
384 sh4_x86.branch_taken = TRUE; \
385 sh4_x86.in_delay_slot = DELAY_NONE; \
386 return 2; \
387 }
389 #define check_fpuen( ) \
390 if( !sh4_x86.fpuen_checked ) {\
391 sh4_x86.fpuen_checked = TRUE;\
392 MOVL_rbpdisp_r32( R_SR, REG_EAX );\
393 ANDL_imms_r32( SR_FD, REG_EAX );\
394 if( sh4_x86.in_delay_slot ) {\
395 JNE_exc(EXC_SLOT_FPU_DISABLED);\
396 } else {\
397 JNE_exc(EXC_FPU_DISABLED);\
398 }\
399 sh4_x86.tstate = TSTATE_NONE; \
400 }
402 #define check_ralign16( x86reg ) \
403 TESTL_imms_r32( 0x00000001, x86reg ); \
404 JNE_exc(EXC_DATA_ADDR_READ)
406 #define check_walign16( x86reg ) \
407 TESTL_imms_r32( 0x00000001, x86reg ); \
408 JNE_exc(EXC_DATA_ADDR_WRITE);
410 #define check_ralign32( x86reg ) \
411 TESTL_imms_r32( 0x00000003, x86reg ); \
412 JNE_exc(EXC_DATA_ADDR_READ)
414 #define check_walign32( x86reg ) \
415 TESTL_imms_r32( 0x00000003, x86reg ); \
416 JNE_exc(EXC_DATA_ADDR_WRITE);
418 #define check_ralign64( x86reg ) \
419 TESTL_imms_r32( 0x00000007, x86reg ); \
420 JNE_exc(EXC_DATA_ADDR_READ)
422 #define check_walign64( x86reg ) \
423 TESTL_imms_r32( 0x00000007, x86reg ); \
424 JNE_exc(EXC_DATA_ADDR_WRITE);
426 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
428 #define UNDEF(ir)
429 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
430 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
431 */
432 #ifdef HAVE_FRAME_ADDRESS
433 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
434 {
435 decode_address(address_space(), addr_reg);
436 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
437 CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
438 } else {
439 if( addr_reg != REG_ARG1 ) {
440 MOVL_r32_r32( addr_reg, REG_ARG1 );
441 }
442 MOVP_immptr_rptr( 0, REG_ARG2 );
443 sh4_x86_add_backpatch( xlat_output, pc, -2 );
444 CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
445 }
446 if( value_reg != REG_RESULT1 ) {
447 MOVL_r32_r32( REG_RESULT1, value_reg );
448 }
449 }
451 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
452 {
453 decode_address(address_space(), addr_reg);
454 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
455 CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
456 } else {
457 if( value_reg != REG_ARG2 ) {
458 MOVL_r32_r32( value_reg, REG_ARG2 );
459 }
460 if( addr_reg != REG_ARG1 ) {
461 MOVL_r32_r32( addr_reg, REG_ARG1 );
462 }
463 #if MAX_REG_ARG > 2
464 MOVP_immptr_rptr( 0, REG_ARG3 );
465 sh4_x86_add_backpatch( xlat_output, pc, -2 );
466 CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
467 #else
468 MOVL_imm32_rspdisp( 0, 0 );
469 sh4_x86_add_backpatch( xlat_output, pc, -2 );
470 CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
471 #endif
472 }
473 }
474 #else
475 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
476 {
477 decode_address(address_space(), addr_reg);
478 CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
479 if( value_reg != REG_RESULT1 ) {
480 MOVL_r32_r32( REG_RESULT1, value_reg );
481 }
482 }
484 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
485 {
486 decode_address(address_space(), addr_reg);
487 CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
488 }
489 #endif
491 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
492 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
493 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc)
494 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
495 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
496 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
497 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
498 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
499 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
501 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
503 /** Offset of xlat_sh4_mode field relative to the code pointer */
504 #define XLAT_SH4_MODE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
505 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
506 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
508 void sh4_translate_begin_block( sh4addr_t pc )
509 {
510 sh4_x86.code = xlat_output;
511 sh4_x86.in_delay_slot = FALSE;
512 sh4_x86.fpuen_checked = FALSE;
513 sh4_x86.branch_taken = FALSE;
514 sh4_x86.backpatch_posn = 0;
515 sh4_x86.block_start_pc = pc;
516 sh4_x86.tlb_on = IS_TLB_ENABLED();
517 sh4_x86.tstate = TSTATE_NONE;
518 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
519 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
520 sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
521 emit_prologue();
522 if( sh4_x86.begin_callback ) {
523 CALL_ptr( sh4_x86.begin_callback );
524 }
525 if( sh4_x86.profile_blocks ) {
526 MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
527 ADDL_imms_r32disp( 1, REG_EAX, 0 );
528 }
529 }
532 uint32_t sh4_translate_end_block_size()
533 {
534 uint32_t epilogue_size = EPILOGUE_SIZE;
535 if( sh4_x86.end_callback ) {
536 epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
537 }
538 if( sh4_x86.backpatch_posn <= 3 ) {
539 epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
540 } else {
541 epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
542 }
543 return epilogue_size;
544 }
547 /**
548 * Embed a breakpoint into the generated code
549 */
550 void sh4_translate_emit_breakpoint( sh4vma_t pc )
551 {
552 MOVL_imm32_r32( pc, REG_EAX );
553 CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
554 sh4_x86.tstate = TSTATE_NONE;
555 }
558 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
560 /**
561 * Test if the loaded target code pointer in %eax is valid, and if so jump
562 * directly into it, bypassing the normal exit.
563 */
564 static void jump_next_block()
565 {
566 uint8_t *ptr = xlat_output;
567 TESTP_rptr_rptr(REG_EAX, REG_EAX);
568 JE_label(nocode);
569 if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
570 /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
571 MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
572 CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
573 } else {
574 CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
575 }
576 JNE_label(wrongmode);
577 LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
578 if( sh4_x86.end_callback ) {
579 /* Note this does leave the stack out of alignment, but doesn't matter
580 * for what we're currently using it for.
581 */
582 PUSH_r32(REG_EAX);
583 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
584 JMP_rptr(REG_ECX);
585 } else {
586 JMP_rptr(REG_EAX);
587 }
588 JMP_TARGET(wrongmode);
589 MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
590 int rel = ptr - xlat_output;
591 JMP_prerel(rel);
592 JMP_TARGET(nocode);
593 }
595 /**
596 *
597 */
598 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
599 {
600 uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
601 while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
602 target = XLAT_BLOCK_CHAIN(target);
603 }
604 if( target == NULL ) {
605 target = sh4_translate_basic_block( pc );
606 }
607 uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
608 *backpatch = 0xE9;
609 *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
610 *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
611 XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch;
613 uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
614 assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
615 *retptr = backpatch;
616 }
618 static void emit_translate_and_backpatch()
619 {
620 /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
621 CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
623 /* When patched, the jmp instruction will be 5 bytes (either platform) -
624 * we need to reserve sizeof(void*) bytes for the use-list
625 * pointer
626 */
627 if( sizeof(void*) == 8 ) {
628 NOP();
629 } else {
630 NOP2();
631 }
632 }
634 /**
635 * If we're jumping to a fixed address (or at least fixed relative to the
636 * current PC, then we can do a direct branch. REG_ARG1 should contain
637 * the PC at this point.
638 */
639 static void jump_next_block_fixed_pc( sh4addr_t pc )
640 {
641 if( IS_IN_ICACHE(pc) ) {
642 if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
643 /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
644 * fetch-and-backpatch routine, which will replace the call with a branch */
645 emit_translate_and_backpatch();
646 return;
647 } else {
648 MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
649 ANDP_imms_rptr( -4, REG_EAX );
650 }
651 } else if( sh4_x86.tlb_on ) {
652 CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
653 } else {
654 CALL1_ptr_r32(xlat_get_code, REG_ARG1);
655 }
656 jump_next_block();
659 }
661 static void sh4_x86_translate_unlink_block( void *use_list )
662 {
663 uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
664 void *next = use_list;
665 while( next != NULL ) {
666 xlat_output = (uint8_t *)next;
667 next = *(void **)(xlat_output+5);
668 emit_translate_and_backpatch();
669 }
670 xlat_output = tmp;
671 }
675 static void exit_block()
676 {
677 emit_epilogue();
678 if( sh4_x86.end_callback ) {
679 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
680 JMP_rptr(REG_ECX);
681 } else {
682 RET();
683 }
684 }
686 /**
687 * Exit the block with sh4r.pc already written
688 */
689 void exit_block_pcset( sh4addr_t pc )
690 {
691 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
692 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
693 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
694 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
695 JBE_label(exitloop);
696 MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
697 if( sh4_x86.tlb_on ) {
698 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
699 } else {
700 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
701 }
703 jump_next_block();
704 JMP_TARGET(exitloop);
705 exit_block();
706 }
708 /**
709 * Exit the block with sh4r.new_pc written with the target pc
710 */
711 void exit_block_newpcset( sh4addr_t pc )
712 {
713 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
714 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
715 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
716 MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
717 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
718 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
719 JBE_label(exitloop);
720 if( sh4_x86.tlb_on ) {
721 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
722 } else {
723 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
724 }
726 jump_next_block();
727 JMP_TARGET(exitloop);
728 exit_block();
729 }
732 /**
733 * Exit the block to an absolute PC
734 */
735 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
736 {
737 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
738 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
739 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
741 MOVL_imm32_r32( pc, REG_ARG1 );
742 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
743 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
744 JBE_label(exitloop);
745 jump_next_block_fixed_pc(pc);
746 JMP_TARGET(exitloop);
747 exit_block();
748 }
750 /**
751 * Exit the block to a relative PC
752 */
753 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
754 {
755 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
756 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
757 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
759 if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
760 /* Special case for tight loops - the PC doesn't change, and
761 * we already know the target address. Just check events pending before
762 * looping.
763 */
764 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
765 uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
766 JCC_cc_prerel(X86_COND_A, backdisp);
767 } else {
768 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
769 ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
770 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
771 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
772 JBE_label(exitloop2);
774 jump_next_block_fixed_pc(pc);
775 JMP_TARGET(exitloop2);
776 }
777 exit_block();
778 }
780 /**
781 * Exit unconditionally with a general exception
782 */
783 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
784 {
785 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
786 ADDL_r32_rbpdisp( REG_ECX, R_PC );
787 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
788 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
789 MOVL_imm32_r32( code, REG_ARG1 );
790 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
791 exit_block();
792 }
794 /**
795 * Embed a call to sh4_execute_instruction for situations that we
796 * can't translate (just page-crossing delay slots at the moment).
797 * Caller is responsible for setting new_pc before calling this function.
798 *
799 * Performs:
800 * Set PC = endpc
801 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
802 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
803 * Call sh4_execute_instruction
804 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
805 */
806 void exit_block_emu( sh4vma_t endpc )
807 {
808 MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX ); // 5
809 ADDL_r32_rbpdisp( REG_ECX, R_PC );
811 MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
812 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); // 6
813 MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
814 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
816 CALL_ptr( sh4_execute_instruction );
817 exit_block();
818 }
820 /**
821 * Write the block trailer (exception handling block)
822 */
823 void sh4_translate_end_block( sh4addr_t pc ) {
824 if( sh4_x86.branch_taken == FALSE ) {
825 // Didn't exit unconditionally already, so write the termination here
826 exit_block_rel( pc, pc );
827 }
828 if( sh4_x86.backpatch_posn != 0 ) {
829 unsigned int i;
830 // Exception raised - cleanup and exit
831 uint8_t *end_ptr = xlat_output;
832 MOVL_r32_r32( REG_EDX, REG_ECX );
833 ADDL_r32_r32( REG_EDX, REG_ECX );
834 ADDL_r32_rbpdisp( REG_ECX, R_SPC );
835 MOVL_moffptr_eax( &sh4_cpu_period );
836 INC_r32( REG_EDX ); /* Add 1 for the aborting instruction itself */
837 MULL_r32( REG_EDX );
838 ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
839 exit_block();
841 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
842 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
843 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
844 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
845 *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output;
846 } else {
847 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
848 }
849 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
850 int rel = end_ptr - xlat_output;
851 JMP_prerel(rel);
852 } else {
853 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
854 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
855 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
856 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
857 int rel = end_ptr - xlat_output;
858 JMP_prerel(rel);
859 }
860 }
861 }
862 }
864 /**
865 * Translate a single instruction. Delayed branches are handled specially
866 * by translating both branch and delayed instruction as a single unit (as
867 *
868 * The instruction MUST be in the icache (assert check)
869 *
870 * @return true if the instruction marks the end of a basic block
871 * (eg a branch or
872 */
873 uint32_t sh4_translate_instruction( sh4vma_t pc )
874 {
875 uint32_t ir;
876 /* Read instruction from icache */
877 assert( IS_IN_ICACHE(pc) );
878 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
880 if( !sh4_x86.in_delay_slot ) {
881 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
882 }
884 /* check for breakpoints at this pc */
885 for( int i=0; i<sh4_breakpoint_count; i++ ) {
886 if( sh4_breakpoints[i].address == pc ) {
887 sh4_translate_emit_breakpoint(pc);
888 break;
889 }
890 }
891 %%
892 /* ALU operations */
893 ADD Rm, Rn {:
894 COUNT_INST(I_ADD);
895 load_reg( REG_EAX, Rm );
896 load_reg( REG_ECX, Rn );
897 ADDL_r32_r32( REG_EAX, REG_ECX );
898 store_reg( REG_ECX, Rn );
899 sh4_x86.tstate = TSTATE_NONE;
900 :}
901 ADD #imm, Rn {:
902 COUNT_INST(I_ADDI);
903 ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
904 sh4_x86.tstate = TSTATE_NONE;
905 :}
906 ADDC Rm, Rn {:
907 COUNT_INST(I_ADDC);
908 if( sh4_x86.tstate != TSTATE_C ) {
909 LDC_t();
910 }
911 load_reg( REG_EAX, Rm );
912 load_reg( REG_ECX, Rn );
913 ADCL_r32_r32( REG_EAX, REG_ECX );
914 store_reg( REG_ECX, Rn );
915 SETC_t();
916 sh4_x86.tstate = TSTATE_C;
917 :}
918 ADDV Rm, Rn {:
919 COUNT_INST(I_ADDV);
920 load_reg( REG_EAX, Rm );
921 load_reg( REG_ECX, Rn );
922 ADDL_r32_r32( REG_EAX, REG_ECX );
923 store_reg( REG_ECX, Rn );
924 SETO_t();
925 sh4_x86.tstate = TSTATE_O;
926 :}
927 AND Rm, Rn {:
928 COUNT_INST(I_AND);
929 load_reg( REG_EAX, Rm );
930 load_reg( REG_ECX, Rn );
931 ANDL_r32_r32( REG_EAX, REG_ECX );
932 store_reg( REG_ECX, Rn );
933 sh4_x86.tstate = TSTATE_NONE;
934 :}
935 AND #imm, R0 {:
936 COUNT_INST(I_ANDI);
937 load_reg( REG_EAX, 0 );
938 ANDL_imms_r32(imm, REG_EAX);
939 store_reg( REG_EAX, 0 );
940 sh4_x86.tstate = TSTATE_NONE;
941 :}
942 AND.B #imm, @(R0, GBR) {:
943 COUNT_INST(I_ANDB);
944 load_reg( REG_EAX, 0 );
945 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
946 MOVL_r32_rspdisp(REG_EAX, 0);
947 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
948 MOVL_rspdisp_r32(0, REG_EAX);
949 ANDL_imms_r32(imm, REG_EDX );
950 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
951 sh4_x86.tstate = TSTATE_NONE;
952 :}
953 CMP/EQ Rm, Rn {:
954 COUNT_INST(I_CMPEQ);
955 load_reg( REG_EAX, Rm );
956 load_reg( REG_ECX, Rn );
957 CMPL_r32_r32( REG_EAX, REG_ECX );
958 SETE_t();
959 sh4_x86.tstate = TSTATE_E;
960 :}
961 CMP/EQ #imm, R0 {:
962 COUNT_INST(I_CMPEQI);
963 load_reg( REG_EAX, 0 );
964 CMPL_imms_r32(imm, REG_EAX);
965 SETE_t();
966 sh4_x86.tstate = TSTATE_E;
967 :}
968 CMP/GE Rm, Rn {:
969 COUNT_INST(I_CMPGE);
970 load_reg( REG_EAX, Rm );
971 load_reg( REG_ECX, Rn );
972 CMPL_r32_r32( REG_EAX, REG_ECX );
973 SETGE_t();
974 sh4_x86.tstate = TSTATE_GE;
975 :}
976 CMP/GT Rm, Rn {:
977 COUNT_INST(I_CMPGT);
978 load_reg( REG_EAX, Rm );
979 load_reg( REG_ECX, Rn );
980 CMPL_r32_r32( REG_EAX, REG_ECX );
981 SETG_t();
982 sh4_x86.tstate = TSTATE_G;
983 :}
984 CMP/HI Rm, Rn {:
985 COUNT_INST(I_CMPHI);
986 load_reg( REG_EAX, Rm );
987 load_reg( REG_ECX, Rn );
988 CMPL_r32_r32( REG_EAX, REG_ECX );
989 SETA_t();
990 sh4_x86.tstate = TSTATE_A;
991 :}
992 CMP/HS Rm, Rn {:
993 COUNT_INST(I_CMPHS);
994 load_reg( REG_EAX, Rm );
995 load_reg( REG_ECX, Rn );
996 CMPL_r32_r32( REG_EAX, REG_ECX );
997 SETAE_t();
998 sh4_x86.tstate = TSTATE_AE;
999 :}
1000 CMP/PL Rn {:
1001 COUNT_INST(I_CMPPL);
1002 load_reg( REG_EAX, Rn );
1003 CMPL_imms_r32( 0, REG_EAX );
1004 SETG_t();
1005 sh4_x86.tstate = TSTATE_G;
1006 :}
1007 CMP/PZ Rn {:
1008 COUNT_INST(I_CMPPZ);
1009 load_reg( REG_EAX, Rn );
1010 CMPL_imms_r32( 0, REG_EAX );
1011 SETGE_t();
1012 sh4_x86.tstate = TSTATE_GE;
1013 :}
1014 CMP/STR Rm, Rn {:
1015 COUNT_INST(I_CMPSTR);
1016 load_reg( REG_EAX, Rm );
1017 load_reg( REG_ECX, Rn );
1018 XORL_r32_r32( REG_ECX, REG_EAX );
1019 TESTB_r8_r8( REG_AL, REG_AL );
1020 JE_label(target1);
1021 TESTB_r8_r8( REG_AH, REG_AH );
1022 JE_label(target2);
1023 SHRL_imm_r32( 16, REG_EAX );
1024 TESTB_r8_r8( REG_AL, REG_AL );
1025 JE_label(target3);
1026 TESTB_r8_r8( REG_AH, REG_AH );
1027 JMP_TARGET(target1);
1028 JMP_TARGET(target2);
1029 JMP_TARGET(target3);
1030 SETE_t();
1031 sh4_x86.tstate = TSTATE_E;
1032 :}
1033 DIV0S Rm, Rn {:
1034 COUNT_INST(I_DIV0S);
1035 load_reg( REG_EAX, Rm );
1036 load_reg( REG_ECX, Rn );
1037 SHRL_imm_r32( 31, REG_EAX );
1038 SHRL_imm_r32( 31, REG_ECX );
1039 MOVL_r32_rbpdisp( REG_EAX, R_M );
1040 MOVL_r32_rbpdisp( REG_ECX, R_Q );
1041 CMPL_r32_r32( REG_EAX, REG_ECX );
1042 SETNE_t();
1043 sh4_x86.tstate = TSTATE_NE;
1044 :}
1045 DIV0U {:
1046 COUNT_INST(I_DIV0U);
1047 XORL_r32_r32( REG_EAX, REG_EAX );
1048 MOVL_r32_rbpdisp( REG_EAX, R_Q );
1049 MOVL_r32_rbpdisp( REG_EAX, R_M );
1050 MOVL_r32_rbpdisp( REG_EAX, R_T );
1051 sh4_x86.tstate = TSTATE_C; // works for DIV1
1052 :}
1053 DIV1 Rm, Rn {:
1054 COUNT_INST(I_DIV1);
1055 MOVL_rbpdisp_r32( R_M, REG_ECX );
1056 load_reg( REG_EAX, Rn );
1057 if( sh4_x86.tstate != TSTATE_C ) {
1058 LDC_t();
1059 }
1060 RCLL_imm_r32( 1, REG_EAX );
1061 SETC_r8( REG_DL ); // Q'
1062 CMPL_rbpdisp_r32( R_Q, REG_ECX );
1063 JE_label(mqequal);
1064 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1065 JMP_label(end);
1066 JMP_TARGET(mqequal);
1067 SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1068 JMP_TARGET(end);
1069 store_reg( REG_EAX, Rn ); // Done with Rn now
1070 SETC_r8(REG_AL); // tmp1
1071 XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
1072 XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
1073 MOVL_r32_rbpdisp( REG_ECX, R_Q );
1074 XORL_imms_r32( 1, REG_AL ); // T = !Q'
1075 MOVZXL_r8_r32( REG_AL, REG_EAX );
1076 MOVL_r32_rbpdisp( REG_EAX, R_T );
1077 sh4_x86.tstate = TSTATE_NONE;
1078 :}
1079 DMULS.L Rm, Rn {:
1080 COUNT_INST(I_DMULS);
1081 load_reg( REG_EAX, Rm );
1082 load_reg( REG_ECX, Rn );
1083 IMULL_r32(REG_ECX);
1084 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1085 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1086 sh4_x86.tstate = TSTATE_NONE;
1087 :}
1088 DMULU.L Rm, Rn {:
1089 COUNT_INST(I_DMULU);
1090 load_reg( REG_EAX, Rm );
1091 load_reg( REG_ECX, Rn );
1092 MULL_r32(REG_ECX);
1093 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1094 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1095 sh4_x86.tstate = TSTATE_NONE;
1096 :}
1097 DT Rn {:
1098 COUNT_INST(I_DT);
1099 load_reg( REG_EAX, Rn );
1100 ADDL_imms_r32( -1, REG_EAX );
1101 store_reg( REG_EAX, Rn );
1102 SETE_t();
1103 sh4_x86.tstate = TSTATE_E;
1104 :}
1105 EXTS.B Rm, Rn {:
1106 COUNT_INST(I_EXTSB);
1107 load_reg( REG_EAX, Rm );
1108 MOVSXL_r8_r32( REG_EAX, REG_EAX );
1109 store_reg( REG_EAX, Rn );
1110 :}
1111 EXTS.W Rm, Rn {:
1112 COUNT_INST(I_EXTSW);
1113 load_reg( REG_EAX, Rm );
1114 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1115 store_reg( REG_EAX, Rn );
1116 :}
1117 EXTU.B Rm, Rn {:
1118 COUNT_INST(I_EXTUB);
1119 load_reg( REG_EAX, Rm );
1120 MOVZXL_r8_r32( REG_EAX, REG_EAX );
1121 store_reg( REG_EAX, Rn );
1122 :}
1123 EXTU.W Rm, Rn {:
1124 COUNT_INST(I_EXTUW);
1125 load_reg( REG_EAX, Rm );
1126 MOVZXL_r16_r32( REG_EAX, REG_EAX );
1127 store_reg( REG_EAX, Rn );
1128 :}
1129 MAC.L @Rm+, @Rn+ {:
1130 COUNT_INST(I_MACL);
1131 if( Rm == Rn ) {
1132 load_reg( REG_EAX, Rm );
1133 check_ralign32( REG_EAX );
1134 MEM_READ_LONG( REG_EAX, REG_EAX );
1135 MOVL_r32_rspdisp(REG_EAX, 0);
1136 load_reg( REG_EAX, Rm );
1137 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
1138 MEM_READ_LONG( REG_EAX, REG_EAX );
1139 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
1140 } else {
1141 load_reg( REG_EAX, Rm );
1142 check_ralign32( REG_EAX );
1143 MEM_READ_LONG( REG_EAX, REG_EAX );
1144 MOVL_r32_rspdisp( REG_EAX, 0 );
1145 load_reg( REG_EAX, Rn );
1146 check_ralign32( REG_EAX );
1147 MEM_READ_LONG( REG_EAX, REG_EAX );
1148 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1149 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1150 }
1152 IMULL_rspdisp( 0 );
1153 ADDL_r32_rbpdisp( REG_EAX, R_MACL );
1154 ADCL_r32_rbpdisp( REG_EDX, R_MACH );
1156 MOVL_rbpdisp_r32( R_S, REG_ECX );
1157 TESTL_r32_r32(REG_ECX, REG_ECX);
1158 JE_label( nosat );
1159 CALL_ptr( signsat48 );
1160 JMP_TARGET( nosat );
1161 sh4_x86.tstate = TSTATE_NONE;
1162 :}
1163 MAC.W @Rm+, @Rn+ {:
1164 COUNT_INST(I_MACW);
1165 if( Rm == Rn ) {
1166 load_reg( REG_EAX, Rm );
1167 check_ralign16( REG_EAX );
1168 MEM_READ_WORD( REG_EAX, REG_EAX );
1169 MOVL_r32_rspdisp( REG_EAX, 0 );
1170 load_reg( REG_EAX, Rm );
1171 LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
1172 MEM_READ_WORD( REG_EAX, REG_EAX );
1173 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1174 // Note translate twice in case of page boundaries. Maybe worth
1175 // adding a page-boundary check to skip the second translation
1176 } else {
1177 load_reg( REG_EAX, Rn );
1178 check_ralign16( REG_EAX );
1179 MEM_READ_WORD( REG_EAX, REG_EAX );
1180 MOVL_r32_rspdisp( REG_EAX, 0 );
1181 load_reg( REG_EAX, Rm );
1182 check_ralign16( REG_EAX );
1183 MEM_READ_WORD( REG_EAX, REG_EAX );
1184 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
1185 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1186 }
1187 IMULL_rspdisp( 0 );
1188 MOVL_rbpdisp_r32( R_S, REG_ECX );
1189 TESTL_r32_r32( REG_ECX, REG_ECX );
1190 JE_label( nosat );
1192 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1193 JNO_label( end ); // 2
1194 MOVL_imm32_r32( 1, REG_EDX ); // 5
1195 MOVL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1196 JS_label( positive ); // 2
1197 MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
1198 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1199 JMP_label(end2); // 2
1201 JMP_TARGET(positive);
1202 MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
1203 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1204 JMP_label(end3); // 2
1206 JMP_TARGET(nosat);
1207 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1208 ADCL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1209 JMP_TARGET(end);
1210 JMP_TARGET(end2);
1211 JMP_TARGET(end3);
1212 sh4_x86.tstate = TSTATE_NONE;
1213 :}
1214 MOVT Rn {:
1215 COUNT_INST(I_MOVT);
1216 MOVL_rbpdisp_r32( R_T, REG_EAX );
1217 store_reg( REG_EAX, Rn );
1218 :}
1219 MUL.L Rm, Rn {:
1220 COUNT_INST(I_MULL);
1221 load_reg( REG_EAX, Rm );
1222 load_reg( REG_ECX, Rn );
1223 MULL_r32( REG_ECX );
1224 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1225 sh4_x86.tstate = TSTATE_NONE;
1226 :}
1227 MULS.W Rm, Rn {:
1228 COUNT_INST(I_MULSW);
1229 MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1230 MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1231 MULL_r32( REG_ECX );
1232 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1233 sh4_x86.tstate = TSTATE_NONE;
1234 :}
1235 MULU.W Rm, Rn {:
1236 COUNT_INST(I_MULUW);
1237 MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1238 MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1239 MULL_r32( REG_ECX );
1240 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1241 sh4_x86.tstate = TSTATE_NONE;
1242 :}
1243 NEG Rm, Rn {:
1244 COUNT_INST(I_NEG);
1245 load_reg( REG_EAX, Rm );
1246 NEGL_r32( REG_EAX );
1247 store_reg( REG_EAX, Rn );
1248 sh4_x86.tstate = TSTATE_NONE;
1249 :}
1250 NEGC Rm, Rn {:
1251 COUNT_INST(I_NEGC);
1252 load_reg( REG_EAX, Rm );
1253 XORL_r32_r32( REG_ECX, REG_ECX );
1254 LDC_t();
1255 SBBL_r32_r32( REG_EAX, REG_ECX );
1256 store_reg( REG_ECX, Rn );
1257 SETC_t();
1258 sh4_x86.tstate = TSTATE_C;
1259 :}
1260 NOT Rm, Rn {:
1261 COUNT_INST(I_NOT);
1262 load_reg( REG_EAX, Rm );
1263 NOTL_r32( REG_EAX );
1264 store_reg( REG_EAX, Rn );
1265 sh4_x86.tstate = TSTATE_NONE;
1266 :}
1267 OR Rm, Rn {:
1268 COUNT_INST(I_OR);
1269 load_reg( REG_EAX, Rm );
1270 load_reg( REG_ECX, Rn );
1271 ORL_r32_r32( REG_EAX, REG_ECX );
1272 store_reg( REG_ECX, Rn );
1273 sh4_x86.tstate = TSTATE_NONE;
1274 :}
1275 OR #imm, R0 {:
1276 COUNT_INST(I_ORI);
1277 load_reg( REG_EAX, 0 );
1278 ORL_imms_r32(imm, REG_EAX);
1279 store_reg( REG_EAX, 0 );
1280 sh4_x86.tstate = TSTATE_NONE;
1281 :}
1282 OR.B #imm, @(R0, GBR) {:
1283 COUNT_INST(I_ORB);
1284 load_reg( REG_EAX, 0 );
1285 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1286 MOVL_r32_rspdisp( REG_EAX, 0 );
1287 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1288 MOVL_rspdisp_r32( 0, REG_EAX );
1289 ORL_imms_r32(imm, REG_EDX );
1290 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1291 sh4_x86.tstate = TSTATE_NONE;
1292 :}
1293 ROTCL Rn {:
1294 COUNT_INST(I_ROTCL);
1295 load_reg( REG_EAX, Rn );
1296 if( sh4_x86.tstate != TSTATE_C ) {
1297 LDC_t();
1298 }
1299 RCLL_imm_r32( 1, REG_EAX );
1300 store_reg( REG_EAX, Rn );
1301 SETC_t();
1302 sh4_x86.tstate = TSTATE_C;
1303 :}
1304 ROTCR Rn {:
1305 COUNT_INST(I_ROTCR);
1306 load_reg( REG_EAX, Rn );
1307 if( sh4_x86.tstate != TSTATE_C ) {
1308 LDC_t();
1309 }
1310 RCRL_imm_r32( 1, REG_EAX );
1311 store_reg( REG_EAX, Rn );
1312 SETC_t();
1313 sh4_x86.tstate = TSTATE_C;
1314 :}
1315 ROTL Rn {:
1316 COUNT_INST(I_ROTL);
1317 load_reg( REG_EAX, Rn );
1318 ROLL_imm_r32( 1, REG_EAX );
1319 store_reg( REG_EAX, Rn );
1320 SETC_t();
1321 sh4_x86.tstate = TSTATE_C;
1322 :}
1323 ROTR Rn {:
1324 COUNT_INST(I_ROTR);
1325 load_reg( REG_EAX, Rn );
1326 RORL_imm_r32( 1, REG_EAX );
1327 store_reg( REG_EAX, Rn );
1328 SETC_t();
1329 sh4_x86.tstate = TSTATE_C;
1330 :}
1331 SHAD Rm, Rn {:
1332 COUNT_INST(I_SHAD);
1333 /* Annoyingly enough, not directly convertible */
1334 load_reg( REG_EAX, Rn );
1335 load_reg( REG_ECX, Rm );
1336 CMPL_imms_r32( 0, REG_ECX );
1337 JGE_label(doshl);
1339 NEGL_r32( REG_ECX ); // 2
1340 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1341 JE_label(emptysar); // 2
1342 SARL_cl_r32( REG_EAX ); // 2
1343 JMP_label(end); // 2
1345 JMP_TARGET(emptysar);
1346 SARL_imm_r32(31, REG_EAX ); // 3
1347 JMP_label(end2);
1349 JMP_TARGET(doshl);
1350 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1351 SHLL_cl_r32( REG_EAX ); // 2
1352 JMP_TARGET(end);
1353 JMP_TARGET(end2);
1354 store_reg( REG_EAX, Rn );
1355 sh4_x86.tstate = TSTATE_NONE;
1356 :}
1357 SHLD Rm, Rn {:
1358 COUNT_INST(I_SHLD);
1359 load_reg( REG_EAX, Rn );
1360 load_reg( REG_ECX, Rm );
1361 CMPL_imms_r32( 0, REG_ECX );
1362 JGE_label(doshl);
1364 NEGL_r32( REG_ECX ); // 2
1365 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1366 JE_label(emptyshr );
1367 SHRL_cl_r32( REG_EAX ); // 2
1368 JMP_label(end); // 2
1370 JMP_TARGET(emptyshr);
1371 XORL_r32_r32( REG_EAX, REG_EAX );
1372 JMP_label(end2);
1374 JMP_TARGET(doshl);
1375 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1376 SHLL_cl_r32( REG_EAX ); // 2
1377 JMP_TARGET(end);
1378 JMP_TARGET(end2);
1379 store_reg( REG_EAX, Rn );
1380 sh4_x86.tstate = TSTATE_NONE;
1381 :}
1382 SHAL Rn {:
1383 COUNT_INST(I_SHAL);
1384 load_reg( REG_EAX, Rn );
1385 SHLL_imm_r32( 1, REG_EAX );
1386 SETC_t();
1387 store_reg( REG_EAX, Rn );
1388 sh4_x86.tstate = TSTATE_C;
1389 :}
1390 SHAR Rn {:
1391 COUNT_INST(I_SHAR);
1392 load_reg( REG_EAX, Rn );
1393 SARL_imm_r32( 1, REG_EAX );
1394 SETC_t();
1395 store_reg( REG_EAX, Rn );
1396 sh4_x86.tstate = TSTATE_C;
1397 :}
1398 SHLL Rn {:
1399 COUNT_INST(I_SHLL);
1400 load_reg( REG_EAX, Rn );
1401 SHLL_imm_r32( 1, REG_EAX );
1402 SETC_t();
1403 store_reg( REG_EAX, Rn );
1404 sh4_x86.tstate = TSTATE_C;
1405 :}
1406 SHLL2 Rn {:
1407 COUNT_INST(I_SHLL);
1408 load_reg( REG_EAX, Rn );
1409 SHLL_imm_r32( 2, REG_EAX );
1410 store_reg( REG_EAX, Rn );
1411 sh4_x86.tstate = TSTATE_NONE;
1412 :}
1413 SHLL8 Rn {:
1414 COUNT_INST(I_SHLL);
1415 load_reg( REG_EAX, Rn );
1416 SHLL_imm_r32( 8, REG_EAX );
1417 store_reg( REG_EAX, Rn );
1418 sh4_x86.tstate = TSTATE_NONE;
1419 :}
1420 SHLL16 Rn {:
1421 COUNT_INST(I_SHLL);
1422 load_reg( REG_EAX, Rn );
1423 SHLL_imm_r32( 16, REG_EAX );
1424 store_reg( REG_EAX, Rn );
1425 sh4_x86.tstate = TSTATE_NONE;
1426 :}
1427 SHLR Rn {:
1428 COUNT_INST(I_SHLR);
1429 load_reg( REG_EAX, Rn );
1430 SHRL_imm_r32( 1, REG_EAX );
1431 SETC_t();
1432 store_reg( REG_EAX, Rn );
1433 sh4_x86.tstate = TSTATE_C;
1434 :}
1435 SHLR2 Rn {:
1436 COUNT_INST(I_SHLR);
1437 load_reg( REG_EAX, Rn );
1438 SHRL_imm_r32( 2, REG_EAX );
1439 store_reg( REG_EAX, Rn );
1440 sh4_x86.tstate = TSTATE_NONE;
1441 :}
1442 SHLR8 Rn {:
1443 COUNT_INST(I_SHLR);
1444 load_reg( REG_EAX, Rn );
1445 SHRL_imm_r32( 8, REG_EAX );
1446 store_reg( REG_EAX, Rn );
1447 sh4_x86.tstate = TSTATE_NONE;
1448 :}
1449 SHLR16 Rn {:
1450 COUNT_INST(I_SHLR);
1451 load_reg( REG_EAX, Rn );
1452 SHRL_imm_r32( 16, REG_EAX );
1453 store_reg( REG_EAX, Rn );
1454 sh4_x86.tstate = TSTATE_NONE;
1455 :}
1456 SUB Rm, Rn {:
1457 COUNT_INST(I_SUB);
1458 load_reg( REG_EAX, Rm );
1459 load_reg( REG_ECX, Rn );
1460 SUBL_r32_r32( REG_EAX, REG_ECX );
1461 store_reg( REG_ECX, Rn );
1462 sh4_x86.tstate = TSTATE_NONE;
1463 :}
1464 SUBC Rm, Rn {:
1465 COUNT_INST(I_SUBC);
1466 load_reg( REG_EAX, Rm );
1467 load_reg( REG_ECX, Rn );
1468 if( sh4_x86.tstate != TSTATE_C ) {
1469 LDC_t();
1470 }
1471 SBBL_r32_r32( REG_EAX, REG_ECX );
1472 store_reg( REG_ECX, Rn );
1473 SETC_t();
1474 sh4_x86.tstate = TSTATE_C;
1475 :}
1476 SUBV Rm, Rn {:
1477 COUNT_INST(I_SUBV);
1478 load_reg( REG_EAX, Rm );
1479 load_reg( REG_ECX, Rn );
1480 SUBL_r32_r32( REG_EAX, REG_ECX );
1481 store_reg( REG_ECX, Rn );
1482 SETO_t();
1483 sh4_x86.tstate = TSTATE_O;
1484 :}
1485 SWAP.B Rm, Rn {:
1486 COUNT_INST(I_SWAPB);
1487 load_reg( REG_EAX, Rm );
1488 XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
1489 store_reg( REG_EAX, Rn );
1490 :}
1491 SWAP.W Rm, Rn {:
1492 COUNT_INST(I_SWAPB);
1493 load_reg( REG_EAX, Rm );
1494 MOVL_r32_r32( REG_EAX, REG_ECX );
1495 SHLL_imm_r32( 16, REG_ECX );
1496 SHRL_imm_r32( 16, REG_EAX );
1497 ORL_r32_r32( REG_EAX, REG_ECX );
1498 store_reg( REG_ECX, Rn );
1499 sh4_x86.tstate = TSTATE_NONE;
1500 :}
1501 TAS.B @Rn {:
1502 COUNT_INST(I_TASB);
1503 load_reg( REG_EAX, Rn );
1504 MOVL_r32_rspdisp( REG_EAX, 0 );
1505 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1506 TESTB_r8_r8( REG_DL, REG_DL );
1507 SETE_t();
1508 ORB_imms_r8( 0x80, REG_DL );
1509 MOVL_rspdisp_r32( 0, REG_EAX );
1510 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1511 sh4_x86.tstate = TSTATE_NONE;
1512 :}
1513 TST Rm, Rn {:
1514 COUNT_INST(I_TST);
1515 load_reg( REG_EAX, Rm );
1516 load_reg( REG_ECX, Rn );
1517 TESTL_r32_r32( REG_EAX, REG_ECX );
1518 SETE_t();
1519 sh4_x86.tstate = TSTATE_E;
1520 :}
1521 TST #imm, R0 {:
1522 COUNT_INST(I_TSTI);
1523 load_reg( REG_EAX, 0 );
1524 TESTL_imms_r32( imm, REG_EAX );
1525 SETE_t();
1526 sh4_x86.tstate = TSTATE_E;
1527 :}
1528 TST.B #imm, @(R0, GBR) {:
1529 COUNT_INST(I_TSTB);
1530 load_reg( REG_EAX, 0);
1531 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1532 MEM_READ_BYTE( REG_EAX, REG_EAX );
1533 TESTB_imms_r8( imm, REG_AL );
1534 SETE_t();
1535 sh4_x86.tstate = TSTATE_E;
1536 :}
1537 XOR Rm, Rn {:
1538 COUNT_INST(I_XOR);
1539 load_reg( REG_EAX, Rm );
1540 load_reg( REG_ECX, Rn );
1541 XORL_r32_r32( REG_EAX, REG_ECX );
1542 store_reg( REG_ECX, Rn );
1543 sh4_x86.tstate = TSTATE_NONE;
1544 :}
1545 XOR #imm, R0 {:
1546 COUNT_INST(I_XORI);
1547 load_reg( REG_EAX, 0 );
1548 XORL_imms_r32( imm, REG_EAX );
1549 store_reg( REG_EAX, 0 );
1550 sh4_x86.tstate = TSTATE_NONE;
1551 :}
1552 XOR.B #imm, @(R0, GBR) {:
1553 COUNT_INST(I_XORB);
1554 load_reg( REG_EAX, 0 );
1555 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1556 MOVL_r32_rspdisp( REG_EAX, 0 );
1557 MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
1558 MOVL_rspdisp_r32( 0, REG_EAX );
1559 XORL_imms_r32( imm, REG_EDX );
1560 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1561 sh4_x86.tstate = TSTATE_NONE;
1562 :}
1563 XTRCT Rm, Rn {:
1564 COUNT_INST(I_XTRCT);
1565 load_reg( REG_EAX, Rm );
1566 load_reg( REG_ECX, Rn );
1567 SHLL_imm_r32( 16, REG_EAX );
1568 SHRL_imm_r32( 16, REG_ECX );
1569 ORL_r32_r32( REG_EAX, REG_ECX );
1570 store_reg( REG_ECX, Rn );
1571 sh4_x86.tstate = TSTATE_NONE;
1572 :}
1574 /* Data move instructions */
1575 MOV Rm, Rn {:
1576 COUNT_INST(I_MOV);
1577 load_reg( REG_EAX, Rm );
1578 store_reg( REG_EAX, Rn );
1579 :}
1580 MOV #imm, Rn {:
1581 COUNT_INST(I_MOVI);
1582 MOVL_imm32_r32( imm, REG_EAX );
1583 store_reg( REG_EAX, Rn );
1584 :}
1585 MOV.B Rm, @Rn {:
1586 COUNT_INST(I_MOVB);
1587 load_reg( REG_EAX, Rn );
1588 load_reg( REG_EDX, Rm );
1589 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1590 sh4_x86.tstate = TSTATE_NONE;
1591 :}
1592 MOV.B Rm, @-Rn {:
1593 COUNT_INST(I_MOVB);
1594 load_reg( REG_EAX, Rn );
1595 LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
1596 load_reg( REG_EDX, Rm );
1597 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1598 ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
1599 sh4_x86.tstate = TSTATE_NONE;
1600 :}
1601 MOV.B Rm, @(R0, Rn) {:
1602 COUNT_INST(I_MOVB);
1603 load_reg( REG_EAX, 0 );
1604 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1605 load_reg( REG_EDX, Rm );
1606 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1607 sh4_x86.tstate = TSTATE_NONE;
1608 :}
1609 MOV.B R0, @(disp, GBR) {:
1610 COUNT_INST(I_MOVB);
1611 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1612 ADDL_imms_r32( disp, REG_EAX );
1613 load_reg( REG_EDX, 0 );
1614 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1615 sh4_x86.tstate = TSTATE_NONE;
1616 :}
1617 MOV.B R0, @(disp, Rn) {:
1618 COUNT_INST(I_MOVB);
1619 load_reg( REG_EAX, Rn );
1620 ADDL_imms_r32( disp, REG_EAX );
1621 load_reg( REG_EDX, 0 );
1622 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1623 sh4_x86.tstate = TSTATE_NONE;
1624 :}
1625 MOV.B @Rm, Rn {:
1626 COUNT_INST(I_MOVB);
1627 load_reg( REG_EAX, Rm );
1628 MEM_READ_BYTE( REG_EAX, REG_EAX );
1629 store_reg( REG_EAX, Rn );
1630 sh4_x86.tstate = TSTATE_NONE;
1631 :}
1632 MOV.B @Rm+, Rn {:
1633 COUNT_INST(I_MOVB);
1634 load_reg( REG_EAX, Rm );
1635 MEM_READ_BYTE( REG_EAX, REG_EAX );
1636 if( Rm != Rn ) {
1637 ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
1638 }
1639 store_reg( REG_EAX, Rn );
1640 sh4_x86.tstate = TSTATE_NONE;
1641 :}
1642 MOV.B @(R0, Rm), Rn {:
1643 COUNT_INST(I_MOVB);
1644 load_reg( REG_EAX, 0 );
1645 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1646 MEM_READ_BYTE( REG_EAX, REG_EAX );
1647 store_reg( REG_EAX, Rn );
1648 sh4_x86.tstate = TSTATE_NONE;
1649 :}
1650 MOV.B @(disp, GBR), R0 {:
1651 COUNT_INST(I_MOVB);
1652 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1653 ADDL_imms_r32( disp, REG_EAX );
1654 MEM_READ_BYTE( REG_EAX, REG_EAX );
1655 store_reg( REG_EAX, 0 );
1656 sh4_x86.tstate = TSTATE_NONE;
1657 :}
1658 MOV.B @(disp, Rm), R0 {:
1659 COUNT_INST(I_MOVB);
1660 load_reg( REG_EAX, Rm );
1661 ADDL_imms_r32( disp, REG_EAX );
1662 MEM_READ_BYTE( REG_EAX, REG_EAX );
1663 store_reg( REG_EAX, 0 );
1664 sh4_x86.tstate = TSTATE_NONE;
1665 :}
1666 MOV.L Rm, @Rn {:
1667 COUNT_INST(I_MOVL);
1668 load_reg( REG_EAX, Rn );
1669 check_walign32(REG_EAX);
1670 MOVL_r32_r32( REG_EAX, REG_ECX );
1671 ANDL_imms_r32( 0xFC000000, REG_ECX );
1672 CMPL_imms_r32( 0xE0000000, REG_ECX );
1673 JNE_label( notsq );
1674 ANDL_imms_r32( 0x3C, REG_EAX );
1675 load_reg( REG_EDX, Rm );
1676 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1677 JMP_label(end);
1678 JMP_TARGET(notsq);
1679 load_reg( REG_EDX, Rm );
1680 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1681 JMP_TARGET(end);
1682 sh4_x86.tstate = TSTATE_NONE;
1683 :}
1684 MOV.L Rm, @-Rn {:
1685 COUNT_INST(I_MOVL);
1686 load_reg( REG_EAX, Rn );
1687 ADDL_imms_r32( -4, REG_EAX );
1688 check_walign32( REG_EAX );
1689 load_reg( REG_EDX, Rm );
1690 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1691 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
1692 sh4_x86.tstate = TSTATE_NONE;
1693 :}
1694 MOV.L Rm, @(R0, Rn) {:
1695 COUNT_INST(I_MOVL);
1696 load_reg( REG_EAX, 0 );
1697 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1698 check_walign32( REG_EAX );
1699 load_reg( REG_EDX, Rm );
1700 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1701 sh4_x86.tstate = TSTATE_NONE;
1702 :}
1703 MOV.L R0, @(disp, GBR) {:
1704 COUNT_INST(I_MOVL);
1705 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1706 ADDL_imms_r32( disp, REG_EAX );
1707 check_walign32( REG_EAX );
1708 load_reg( REG_EDX, 0 );
1709 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1710 sh4_x86.tstate = TSTATE_NONE;
1711 :}
1712 MOV.L Rm, @(disp, Rn) {:
1713 COUNT_INST(I_MOVL);
1714 load_reg( REG_EAX, Rn );
1715 ADDL_imms_r32( disp, REG_EAX );
1716 check_walign32( REG_EAX );
1717 MOVL_r32_r32( REG_EAX, REG_ECX );
1718 ANDL_imms_r32( 0xFC000000, REG_ECX );
1719 CMPL_imms_r32( 0xE0000000, REG_ECX );
1720 JNE_label( notsq );
1721 ANDL_imms_r32( 0x3C, REG_EAX );
1722 load_reg( REG_EDX, Rm );
1723 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1724 JMP_label(end);
1725 JMP_TARGET(notsq);
1726 load_reg( REG_EDX, Rm );
1727 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1728 JMP_TARGET(end);
1729 sh4_x86.tstate = TSTATE_NONE;
1730 :}
1731 MOV.L @Rm, Rn {:
1732 COUNT_INST(I_MOVL);
1733 load_reg( REG_EAX, Rm );
1734 check_ralign32( REG_EAX );
1735 MEM_READ_LONG( REG_EAX, REG_EAX );
1736 store_reg( REG_EAX, Rn );
1737 sh4_x86.tstate = TSTATE_NONE;
1738 :}
1739 MOV.L @Rm+, Rn {:
1740 COUNT_INST(I_MOVL);
1741 load_reg( REG_EAX, Rm );
1742 check_ralign32( REG_EAX );
1743 MEM_READ_LONG( REG_EAX, REG_EAX );
1744 if( Rm != Rn ) {
1745 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1746 }
1747 store_reg( REG_EAX, Rn );
1748 sh4_x86.tstate = TSTATE_NONE;
1749 :}
1750 MOV.L @(R0, Rm), Rn {:
1751 COUNT_INST(I_MOVL);
1752 load_reg( REG_EAX, 0 );
1753 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1754 check_ralign32( REG_EAX );
1755 MEM_READ_LONG( REG_EAX, REG_EAX );
1756 store_reg( REG_EAX, Rn );
1757 sh4_x86.tstate = TSTATE_NONE;
1758 :}
1759 MOV.L @(disp, GBR), R0 {:
1760 COUNT_INST(I_MOVL);
1761 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1762 ADDL_imms_r32( disp, REG_EAX );
1763 check_ralign32( REG_EAX );
1764 MEM_READ_LONG( REG_EAX, REG_EAX );
1765 store_reg( REG_EAX, 0 );
1766 sh4_x86.tstate = TSTATE_NONE;
1767 :}
1768 MOV.L @(disp, PC), Rn {:
1769 COUNT_INST(I_MOVLPC);
1770 if( sh4_x86.in_delay_slot ) {
1771 SLOTILLEGAL();
1772 } else {
1773 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1774 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1775 // If the target address is in the same page as the code, it's
1776 // pretty safe to just ref it directly and circumvent the whole
1777 // memory subsystem. (this is a big performance win)
1779 // FIXME: There's a corner-case that's not handled here when
1780 // the current code-page is in the ITLB but not in the UTLB.
1781 // (should generate a TLB miss although need to test SH4
1782 // behaviour to confirm) Unlikely to be anyone depending on this
1783 // behaviour though.
1784 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1785 MOVL_moffptr_eax( ptr );
1786 } else {
1787 // Note: we use sh4r.pc for the calc as we could be running at a
1788 // different virtual address than the translation was done with,
1789 // but we can safely assume that the low bits are the same.
1790 MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
1791 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1792 MEM_READ_LONG( REG_EAX, REG_EAX );
1793 sh4_x86.tstate = TSTATE_NONE;
1794 }
1795 store_reg( REG_EAX, Rn );
1796 }
1797 :}
1798 MOV.L @(disp, Rm), Rn {:
1799 COUNT_INST(I_MOVL);
1800 load_reg( REG_EAX, Rm );
1801 ADDL_imms_r32( disp, REG_EAX );
1802 check_ralign32( REG_EAX );
1803 MEM_READ_LONG( REG_EAX, REG_EAX );
1804 store_reg( REG_EAX, Rn );
1805 sh4_x86.tstate = TSTATE_NONE;
1806 :}
1807 MOV.W Rm, @Rn {:
1808 COUNT_INST(I_MOVW);
1809 load_reg( REG_EAX, Rn );
1810 check_walign16( REG_EAX );
1811 load_reg( REG_EDX, Rm );
1812 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1813 sh4_x86.tstate = TSTATE_NONE;
1814 :}
1815 MOV.W Rm, @-Rn {:
1816 COUNT_INST(I_MOVW);
1817 load_reg( REG_EAX, Rn );
1818 check_walign16( REG_EAX );
1819 LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
1820 load_reg( REG_EDX, Rm );
1821 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1822 ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
1823 sh4_x86.tstate = TSTATE_NONE;
1824 :}
1825 MOV.W Rm, @(R0, Rn) {:
1826 COUNT_INST(I_MOVW);
1827 load_reg( REG_EAX, 0 );
1828 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1829 check_walign16( REG_EAX );
1830 load_reg( REG_EDX, Rm );
1831 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1832 sh4_x86.tstate = TSTATE_NONE;
1833 :}
1834 MOV.W R0, @(disp, GBR) {:
1835 COUNT_INST(I_MOVW);
1836 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1837 ADDL_imms_r32( disp, REG_EAX );
1838 check_walign16( REG_EAX );
1839 load_reg( REG_EDX, 0 );
1840 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1841 sh4_x86.tstate = TSTATE_NONE;
1842 :}
1843 MOV.W R0, @(disp, Rn) {:
1844 COUNT_INST(I_MOVW);
1845 load_reg( REG_EAX, Rn );
1846 ADDL_imms_r32( disp, REG_EAX );
1847 check_walign16( REG_EAX );
1848 load_reg( REG_EDX, 0 );
1849 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1850 sh4_x86.tstate = TSTATE_NONE;
1851 :}
1852 MOV.W @Rm, Rn {:
1853 COUNT_INST(I_MOVW);
1854 load_reg( REG_EAX, Rm );
1855 check_ralign16( REG_EAX );
1856 MEM_READ_WORD( REG_EAX, REG_EAX );
1857 store_reg( REG_EAX, Rn );
1858 sh4_x86.tstate = TSTATE_NONE;
1859 :}
1860 MOV.W @Rm+, Rn {:
1861 COUNT_INST(I_MOVW);
1862 load_reg( REG_EAX, Rm );
1863 check_ralign16( REG_EAX );
1864 MEM_READ_WORD( REG_EAX, REG_EAX );
1865 if( Rm != Rn ) {
1866 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1867 }
1868 store_reg( REG_EAX, Rn );
1869 sh4_x86.tstate = TSTATE_NONE;
1870 :}
1871 MOV.W @(R0, Rm), Rn {:
1872 COUNT_INST(I_MOVW);
1873 load_reg( REG_EAX, 0 );
1874 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1875 check_ralign16( REG_EAX );
1876 MEM_READ_WORD( REG_EAX, REG_EAX );
1877 store_reg( REG_EAX, Rn );
1878 sh4_x86.tstate = TSTATE_NONE;
1879 :}
1880 MOV.W @(disp, GBR), R0 {:
1881 COUNT_INST(I_MOVW);
1882 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1883 ADDL_imms_r32( disp, REG_EAX );
1884 check_ralign16( REG_EAX );
1885 MEM_READ_WORD( REG_EAX, REG_EAX );
1886 store_reg( REG_EAX, 0 );
1887 sh4_x86.tstate = TSTATE_NONE;
1888 :}
1889 MOV.W @(disp, PC), Rn {:
1890 COUNT_INST(I_MOVW);
1891 if( sh4_x86.in_delay_slot ) {
1892 SLOTILLEGAL();
1893 } else {
1894 // See comments for MOV.L @(disp, PC), Rn
1895 uint32_t target = pc + disp + 4;
1896 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1897 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1898 MOVL_moffptr_eax( ptr );
1899 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1900 } else {
1901 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
1902 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1903 MEM_READ_WORD( REG_EAX, REG_EAX );
1904 sh4_x86.tstate = TSTATE_NONE;
1905 }
1906 store_reg( REG_EAX, Rn );
1907 }
1908 :}
1909 MOV.W @(disp, Rm), R0 {:
1910 COUNT_INST(I_MOVW);
1911 load_reg( REG_EAX, Rm );
1912 ADDL_imms_r32( disp, REG_EAX );
1913 check_ralign16( REG_EAX );
1914 MEM_READ_WORD( REG_EAX, REG_EAX );
1915 store_reg( REG_EAX, 0 );
1916 sh4_x86.tstate = TSTATE_NONE;
1917 :}
1918 MOVA @(disp, PC), R0 {:
1919 COUNT_INST(I_MOVA);
1920 if( sh4_x86.in_delay_slot ) {
1921 SLOTILLEGAL();
1922 } else {
1923 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
1924 ADDL_rbpdisp_r32( R_PC, REG_ECX );
1925 store_reg( REG_ECX, 0 );
1926 sh4_x86.tstate = TSTATE_NONE;
1927 }
1928 :}
1929 MOVCA.L R0, @Rn {:
1930 COUNT_INST(I_MOVCA);
1931 load_reg( REG_EAX, Rn );
1932 check_walign32( REG_EAX );
1933 load_reg( REG_EDX, 0 );
1934 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1935 sh4_x86.tstate = TSTATE_NONE;
1936 :}
1938 /* Control transfer instructions */
1939 BF disp {:
1940 COUNT_INST(I_BF);
1941 if( sh4_x86.in_delay_slot ) {
1942 SLOTILLEGAL();
1943 } else {
1944 sh4vma_t target = disp + pc + 4;
1945 JT_label( nottaken );
1946 exit_block_rel(target, pc+2 );
1947 JMP_TARGET(nottaken);
1948 return 2;
1949 }
1950 :}
1951 BF/S disp {:
1952 COUNT_INST(I_BFS);
1953 if( sh4_x86.in_delay_slot ) {
1954 SLOTILLEGAL();
1955 } else {
1956 sh4_x86.in_delay_slot = DELAY_PC;
1957 if( UNTRANSLATABLE(pc+2) ) {
1958 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1959 JT_label(nottaken);
1960 ADDL_imms_r32( disp, REG_EAX );
1961 JMP_TARGET(nottaken);
1962 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1963 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1964 exit_block_emu(pc+2);
1965 sh4_x86.branch_taken = TRUE;
1966 return 2;
1967 } else {
1968 LOAD_t();
1969 sh4vma_t target = disp + pc + 4;
1970 JCC_cc_rel32(sh4_x86.tstate,0);
1971 uint32_t *patch = ((uint32_t *)xlat_output)-1;
1972 int save_tstate = sh4_x86.tstate;
1973 sh4_translate_instruction(pc+2);
1974 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
1975 exit_block_rel( target, pc+4 );
1977 // not taken
1978 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1979 sh4_x86.tstate = save_tstate;
1980 sh4_translate_instruction(pc+2);
1981 return 4;
1982 }
1983 }
1984 :}
1985 BRA disp {:
1986 COUNT_INST(I_BRA);
1987 if( sh4_x86.in_delay_slot ) {
1988 SLOTILLEGAL();
1989 } else {
1990 sh4_x86.in_delay_slot = DELAY_PC;
1991 sh4_x86.branch_taken = TRUE;
1992 if( UNTRANSLATABLE(pc+2) ) {
1993 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1994 ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
1995 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1996 exit_block_emu(pc+2);
1997 return 2;
1998 } else {
1999 sh4_translate_instruction( pc + 2 );
2000 exit_block_rel( disp + pc + 4, pc+4 );
2001 return 4;
2002 }
2003 }
2004 :}
2005 BRAF Rn {:
2006 COUNT_INST(I_BRAF);
2007 if( sh4_x86.in_delay_slot ) {
2008 SLOTILLEGAL();
2009 } else {
2010 MOVL_rbpdisp_r32( R_PC, REG_EAX );
2011 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2012 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
2013 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2014 sh4_x86.in_delay_slot = DELAY_PC;
2015 sh4_x86.tstate = TSTATE_NONE;
2016 sh4_x86.branch_taken = TRUE;
2017 if( UNTRANSLATABLE(pc+2) ) {
2018 exit_block_emu(pc+2);
2019 return 2;
2020 } else {
2021 sh4_translate_instruction( pc + 2 );
2022 exit_block_newpcset(pc+4);
2023 return 4;
2024 }
2025 }
2026 :}
2027 BSR disp {:
2028 COUNT_INST(I_BSR);
2029 if( sh4_x86.in_delay_slot ) {
2030 SLOTILLEGAL();
2031 } else {
2032 MOVL_rbpdisp_r32( R_PC, REG_EAX );
2033 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2034 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2035 sh4_x86.in_delay_slot = DELAY_PC;
2036 sh4_x86.branch_taken = TRUE;
2037 sh4_x86.tstate = TSTATE_NONE;
2038 if( UNTRANSLATABLE(pc+2) ) {
2039 ADDL_imms_r32( disp, REG_EAX );
2040 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2041 exit_block_emu(pc+2);
2042 return 2;
2043 } else {
2044 sh4_translate_instruction( pc + 2 );
2045 exit_block_rel( disp + pc + 4, pc+4 );
2046 return 4;
2047 }
2048 }
2049 :}
2050 BSRF Rn {:
2051 COUNT_INST(I_BSRF);
2052 if( sh4_x86.in_delay_slot ) {
2053 SLOTILLEGAL();
2054 } else {
2055 MOVL_rbpdisp_r32( R_PC, REG_EAX );
2056 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2057 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2058 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
2059 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2061 sh4_x86.in_delay_slot = DELAY_PC;
2062 sh4_x86.tstate = TSTATE_NONE;
2063 sh4_x86.branch_taken = TRUE;
2064 if( UNTRANSLATABLE(pc+2) ) {
2065 exit_block_emu(pc+2);
2066 return 2;
2067 } else {
2068 sh4_translate_instruction( pc + 2 );
2069 exit_block_newpcset(pc+4);
2070 return 4;
2071 }
2072 }
2073 :}
2074 BT disp {:
2075 COUNT_INST(I_BT);
2076 if( sh4_x86.in_delay_slot ) {
2077 SLOTILLEGAL();
2078 } else {
2079 sh4vma_t target = disp + pc + 4;
2080 JF_label( nottaken );
2081 exit_block_rel(target, pc+2 );
2082 JMP_TARGET(nottaken);
2083 return 2;
2084 }
2085 :}
2086 BT/S disp {:
2087 COUNT_INST(I_BTS);
2088 if( sh4_x86.in_delay_slot ) {
2089 SLOTILLEGAL();
2090 } else {
2091 sh4_x86.in_delay_slot = DELAY_PC;
2092 if( UNTRANSLATABLE(pc+2) ) {
2093 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2094 JF_label(nottaken);
2095 ADDL_imms_r32( disp, REG_EAX );
2096 JMP_TARGET(nottaken);
2097 ADDL_rbpdisp_r32( R_PC, REG_EAX );
2098 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2099 exit_block_emu(pc+2);
2100 sh4_x86.branch_taken = TRUE;
2101 return 2;
2102 } else {
2103 LOAD_t();
2104 JCC_cc_rel32(sh4_x86.tstate^1,0);
2105 uint32_t *patch = ((uint32_t *)xlat_output)-1;
2107 int save_tstate = sh4_x86.tstate;
2108 sh4_translate_instruction(pc+2);
2109 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
2110 exit_block_rel( disp + pc + 4, pc+4 );
2111 // not taken
2112 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
2113 sh4_x86.tstate = save_tstate;
2114 sh4_translate_instruction(pc+2);
2115 return 4;
2116 }
2117 }
2118 :}
2119 JMP @Rn {:
2120 COUNT_INST(I_JMP);
2121 if( sh4_x86.in_delay_slot ) {
2122 SLOTILLEGAL();
2123 } else {
2124 load_reg( REG_ECX, Rn );
2125 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2126 sh4_x86.in_delay_slot = DELAY_PC;
2127 sh4_x86.branch_taken = TRUE;
2128 if( UNTRANSLATABLE(pc+2) ) {
2129 exit_block_emu(pc+2);
2130 return 2;
2131 } else {
2132 sh4_translate_instruction(pc+2);
2133 exit_block_newpcset(pc+4);
2134 return 4;
2135 }
2136 }
2137 :}
2138 JSR @Rn {:
2139 COUNT_INST(I_JSR);
2140 if( sh4_x86.in_delay_slot ) {
2141 SLOTILLEGAL();
2142 } else {
2143 MOVL_rbpdisp_r32( R_PC, REG_EAX );
2144 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2145 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2146 load_reg( REG_ECX, Rn );
2147 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2148 sh4_x86.in_delay_slot = DELAY_PC;
2149 sh4_x86.branch_taken = TRUE;
2150 sh4_x86.tstate = TSTATE_NONE;
2151 if( UNTRANSLATABLE(pc+2) ) {
2152 exit_block_emu(pc+2);
2153 return 2;
2154 } else {
2155 sh4_translate_instruction(pc+2);
2156 exit_block_newpcset(pc+4);
2157 return 4;
2158 }
2159 }
2160 :}
2161 RTE {:
2162 COUNT_INST(I_RTE);
2163 if( sh4_x86.in_delay_slot ) {
2164 SLOTILLEGAL();
2165 } else {
2166 check_priv();
2167 MOVL_rbpdisp_r32( R_SPC, REG_ECX );
2168 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2169 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2170 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2171 sh4_x86.in_delay_slot = DELAY_PC;
2172 sh4_x86.fpuen_checked = FALSE;
2173 sh4_x86.tstate = TSTATE_NONE;
2174 sh4_x86.branch_taken = TRUE;
2175 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2176 if( UNTRANSLATABLE(pc+2) ) {
2177 exit_block_emu(pc+2);
2178 return 2;
2179 } else {
2180 sh4_translate_instruction(pc+2);
2181 exit_block_newpcset(pc+4);
2182 return 4;
2183 }
2184 }
2185 :}
2186 RTS {:
2187 COUNT_INST(I_RTS);
2188 if( sh4_x86.in_delay_slot ) {
2189 SLOTILLEGAL();
2190 } else {
2191 MOVL_rbpdisp_r32( R_PR, REG_ECX );
2192 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2193 sh4_x86.in_delay_slot = DELAY_PC;
2194 sh4_x86.branch_taken = TRUE;
2195 if( UNTRANSLATABLE(pc+2) ) {
2196 exit_block_emu(pc+2);
2197 return 2;
2198 } else {
2199 sh4_translate_instruction(pc+2);
2200 exit_block_newpcset(pc+4);
2201 return 4;
2202 }
2203 }
2204 :}
2205 TRAPA #imm {:
2206 COUNT_INST(I_TRAPA);
2207 if( sh4_x86.in_delay_slot ) {
2208 SLOTILLEGAL();
2209 } else {
2210 MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX ); // 5
2211 ADDL_r32_rbpdisp( REG_ECX, R_PC );
2212 MOVL_imm32_r32( imm, REG_EAX );
2213 CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
2214 sh4_x86.tstate = TSTATE_NONE;
2215 exit_block_pcset(pc+2);
2216 sh4_x86.branch_taken = TRUE;
2217 return 2;
2218 }
2219 :}
2220 UNDEF {:
2221 COUNT_INST(I_UNDEF);
2222 if( sh4_x86.in_delay_slot ) {
2223 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);
2224 } else {
2225 exit_block_exc(EXC_ILLEGAL, pc, 2);
2226 return 2;
2227 }
2228 :}
2230 CLRMAC {:
2231 COUNT_INST(I_CLRMAC);
2232 XORL_r32_r32(REG_EAX, REG_EAX);
2233 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2234 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2235 sh4_x86.tstate = TSTATE_NONE;
2236 :}
2237 CLRS {:
2238 COUNT_INST(I_CLRS);
2239 CLC();
2240 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2241 sh4_x86.tstate = TSTATE_NONE;
2242 :}
2243 CLRT {:
2244 COUNT_INST(I_CLRT);
2245 CLC();
2246 SETC_t();
2247 sh4_x86.tstate = TSTATE_C;
2248 :}
2249 SETS {:
2250 COUNT_INST(I_SETS);
2251 STC();
2252 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2253 sh4_x86.tstate = TSTATE_NONE;
2254 :}
2255 SETT {:
2256 COUNT_INST(I_SETT);
2257 STC();
2258 SETC_t();
2259 sh4_x86.tstate = TSTATE_C;
2260 :}
2262 /* Floating point moves */
2263 FMOV FRm, FRn {:
2264 COUNT_INST(I_FMOV1);
2265 check_fpuen();
2266 if( sh4_x86.double_size ) {
2267 load_dr0( REG_EAX, FRm );
2268 load_dr1( REG_ECX, FRm );
2269 store_dr0( REG_EAX, FRn );
2270 store_dr1( REG_ECX, FRn );
2271 } else {
2272 load_fr( REG_EAX, FRm ); // SZ=0 branch
2273 store_fr( REG_EAX, FRn );
2274 }
2275 :}
2276 FMOV FRm, @Rn {:
2277 COUNT_INST(I_FMOV2);
2278 check_fpuen();
2279 load_reg( REG_EAX, Rn );
2280 if( sh4_x86.double_size ) {
2281 check_walign64( REG_EAX );
2282 load_dr0( REG_EDX, FRm );
2283 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2284 load_reg( REG_EAX, Rn );
2285 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2286 load_dr1( REG_EDX, FRm );
2287 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2288 } else {
2289 check_walign32( REG_EAX );
2290 load_fr( REG_EDX, FRm );
2291 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2292 }
2293 sh4_x86.tstate = TSTATE_NONE;
2294 :}
2295 FMOV @Rm, FRn {:
2296 COUNT_INST(I_FMOV5);
2297 check_fpuen();
2298 load_reg( REG_EAX, Rm );
2299 if( sh4_x86.double_size ) {
2300 check_ralign64( REG_EAX );
2301 MEM_READ_LONG( REG_EAX, REG_EAX );
2302 store_dr0( REG_EAX, FRn );
2303 load_reg( REG_EAX, Rm );
2304 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2305 MEM_READ_LONG( REG_EAX, REG_EAX );
2306 store_dr1( REG_EAX, FRn );
2307 } else {
2308 check_ralign32( REG_EAX );
2309 MEM_READ_LONG( REG_EAX, REG_EAX );
2310 store_fr( REG_EAX, FRn );
2311 }
2312 sh4_x86.tstate = TSTATE_NONE;
2313 :}
2314 FMOV FRm, @-Rn {:
2315 COUNT_INST(I_FMOV3);
2316 check_fpuen();
2317 load_reg( REG_EAX, Rn );
2318 if( sh4_x86.double_size ) {
2319 check_walign64( REG_EAX );
2320 LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
2321 load_dr0( REG_EDX, FRm );
2322 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2323 load_reg( REG_EAX, Rn );
2324 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2325 load_dr1( REG_EDX, FRm );
2326 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2327 ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
2328 } else {
2329 check_walign32( REG_EAX );
2330 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2331 load_fr( REG_EDX, FRm );
2332 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2333 ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
2334 }
2335 sh4_x86.tstate = TSTATE_NONE;
2336 :}
2337 FMOV @Rm+, FRn {:
2338 COUNT_INST(I_FMOV6);
2339 check_fpuen();
2340 load_reg( REG_EAX, Rm );
2341 if( sh4_x86.double_size ) {
2342 check_ralign64( REG_EAX );
2343 MEM_READ_LONG( REG_EAX, REG_EAX );
2344 store_dr0( REG_EAX, FRn );
2345 load_reg( REG_EAX, Rm );
2346 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2347 MEM_READ_LONG( REG_EAX, REG_EAX );
2348 store_dr1( REG_EAX, FRn );
2349 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
2350 } else {
2351 check_ralign32( REG_EAX );
2352 MEM_READ_LONG( REG_EAX, REG_EAX );
2353 store_fr( REG_EAX, FRn );
2354 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2355 }
2356 sh4_x86.tstate = TSTATE_NONE;
2357 :}
2358 FMOV FRm, @(R0, Rn) {:
2359 COUNT_INST(I_FMOV4);
2360 check_fpuen();
2361 load_reg( REG_EAX, Rn );
2362 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2363 if( sh4_x86.double_size ) {
2364 check_walign64( REG_EAX );
2365 load_dr0( REG_EDX, FRm );
2366 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2367 load_reg( REG_EAX, Rn );
2368 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2369 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2370 load_dr1( REG_EDX, FRm );
2371 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2372 } else {
2373 check_walign32( REG_EAX );
2374 load_fr( REG_EDX, FRm );
2375 MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
2376 }
2377 sh4_x86.tstate = TSTATE_NONE;
2378 :}
2379 FMOV @(R0, Rm), FRn {:
2380 COUNT_INST(I_FMOV7);
2381 check_fpuen();
2382 load_reg( REG_EAX, Rm );
2383 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2384 if( sh4_x86.double_size ) {
2385 check_ralign64( REG_EAX );
2386 MEM_READ_LONG( REG_EAX, REG_EAX );
2387 store_dr0( REG_EAX, FRn );
2388 load_reg( REG_EAX, Rm );
2389 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2390 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2391 MEM_READ_LONG( REG_EAX, REG_EAX );
2392 store_dr1( REG_EAX, FRn );
2393 } else {
2394 check_ralign32( REG_EAX );
2395 MEM_READ_LONG( REG_EAX, REG_EAX );
2396 store_fr( REG_EAX, FRn );
2397 }
2398 sh4_x86.tstate = TSTATE_NONE;
2399 :}
2400 FLDI0 FRn {: /* IFF PR=0 */
2401 COUNT_INST(I_FLDI0);
2402 check_fpuen();
2403 if( sh4_x86.double_prec == 0 ) {
2404 XORL_r32_r32( REG_EAX, REG_EAX );
2405 store_fr( REG_EAX, FRn );
2406 }
2407 sh4_x86.tstate = TSTATE_NONE;
2408 :}
2409 FLDI1 FRn {: /* IFF PR=0 */
2410 COUNT_INST(I_FLDI1);
2411 check_fpuen();
2412 if( sh4_x86.double_prec == 0 ) {
2413 MOVL_imm32_r32( 0x3F800000, REG_EAX );
2414 store_fr( REG_EAX, FRn );
2415 }
2416 :}
2418 FLOAT FPUL, FRn {:
2419 COUNT_INST(I_FLOAT);
2420 check_fpuen();
2421 FILD_rbpdisp(R_FPUL);
2422 if( sh4_x86.double_prec ) {
2423 pop_dr( FRn );
2424 } else {
2425 pop_fr( FRn );
2426 }
2427 :}
2428 FTRC FRm, FPUL {:
2429 COUNT_INST(I_FTRC);
2430 check_fpuen();
2431 if( sh4_x86.double_prec ) {
2432 push_dr( FRm );
2433 } else {
2434 push_fr( FRm );
2435 }
2436 MOVP_immptr_rptr( &min_int, REG_ECX );
2437 FILD_r32disp( REG_ECX, 0 );
2438 FCOMIP_st(1);
2439 JAE_label( sat );
2440 JP_label( sat2 );
2441 MOVP_immptr_rptr( &max_int, REG_ECX );
2442 FILD_r32disp( REG_ECX, 0 );
2443 FCOMIP_st(1);
2444 JNA_label( sat3 );
2445 MOVP_immptr_rptr( &save_fcw, REG_EAX );
2446 FNSTCW_r32disp( REG_EAX, 0 );
2447 MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
2448 FLDCW_r32disp( REG_EDX, 0 );
2449 FISTP_rbpdisp(R_FPUL);
2450 FLDCW_r32disp( REG_EAX, 0 );
2451 JMP_label(end);
2453 JMP_TARGET(sat);
2454 JMP_TARGET(sat2);
2455 JMP_TARGET(sat3);
2456 MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
2457 MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
2458 FPOP_st();
2459 JMP_TARGET(end);
2460 sh4_x86.tstate = TSTATE_NONE;
2461 :}
2462 FLDS FRm, FPUL {:
2463 COUNT_INST(I_FLDS);
2464 check_fpuen();
2465 load_fr( REG_EAX, FRm );
2466 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2467 :}
2468 FSTS FPUL, FRn {:
2469 COUNT_INST(I_FSTS);
2470 check_fpuen();
2471 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2472 store_fr( REG_EAX, FRn );
2473 :}
2474 FCNVDS FRm, FPUL {:
2475 COUNT_INST(I_FCNVDS);
2476 check_fpuen();
2477 if( sh4_x86.double_prec ) {
2478 push_dr( FRm );
2479 pop_fpul();
2480 }
2481 :}
2482 FCNVSD FPUL, FRn {:
2483 COUNT_INST(I_FCNVSD);
2484 check_fpuen();
2485 if( sh4_x86.double_prec ) {
2486 push_fpul();
2487 pop_dr( FRn );
2488 }
2489 :}
2491 /* Floating point instructions */
2492 FABS FRn {:
2493 COUNT_INST(I_FABS);
2494 check_fpuen();
2495 if( sh4_x86.double_prec ) {
2496 push_dr(FRn);
2497 FABS_st0();
2498 pop_dr(FRn);
2499 } else {
2500 push_fr(FRn);
2501 FABS_st0();
2502 pop_fr(FRn);
2503 }
2504 :}
2505 FADD FRm, FRn {:
2506 COUNT_INST(I_FADD);
2507 check_fpuen();
2508 if( sh4_x86.double_prec ) {
2509 push_dr(FRm);
2510 push_dr(FRn);
2511 FADDP_st(1);
2512 pop_dr(FRn);
2513 } else {
2514 push_fr(FRm);
2515 push_fr(FRn);
2516 FADDP_st(1);
2517 pop_fr(FRn);
2518 }
2519 :}
2520 FDIV FRm, FRn {:
2521 COUNT_INST(I_FDIV);
2522 check_fpuen();
2523 if( sh4_x86.double_prec ) {
2524 push_dr(FRn);
2525 push_dr(FRm);
2526 FDIVP_st(1);
2527 pop_dr(FRn);
2528 } else {
2529 push_fr(FRn);
2530 push_fr(FRm);
2531 FDIVP_st(1);
2532 pop_fr(FRn);
2533 }
2534 :}
2535 FMAC FR0, FRm, FRn {:
2536 COUNT_INST(I_FMAC);
2537 check_fpuen();
2538 if( sh4_x86.double_prec ) {
2539 push_dr( 0 );
2540 push_dr( FRm );
2541 FMULP_st(1);
2542 push_dr( FRn );
2543 FADDP_st(1);
2544 pop_dr( FRn );
2545 } else {
2546 push_fr( 0 );
2547 push_fr( FRm );
2548 FMULP_st(1);
2549 push_fr( FRn );
2550 FADDP_st(1);
2551 pop_fr( FRn );
2552 }
2553 :}
2555 FMUL FRm, FRn {:
2556 COUNT_INST(I_FMUL);
2557 check_fpuen();
2558 if( sh4_x86.double_prec ) {
2559 push_dr(FRm);
2560 push_dr(FRn);
2561 FMULP_st(1);
2562 pop_dr(FRn);
2563 } else {
2564 push_fr(FRm);
2565 push_fr(FRn);
2566 FMULP_st(1);
2567 pop_fr(FRn);
2568 }
2569 :}
2570 FNEG FRn {:
2571 COUNT_INST(I_FNEG);
2572 check_fpuen();
2573 if( sh4_x86.double_prec ) {
2574 push_dr(FRn);
2575 FCHS_st0();
2576 pop_dr(FRn);
2577 } else {
2578 push_fr(FRn);
2579 FCHS_st0();
2580 pop_fr(FRn);
2581 }
2582 :}
2583 FSRRA FRn {:
2584 COUNT_INST(I_FSRRA);
2585 check_fpuen();
2586 if( sh4_x86.double_prec == 0 ) {
2587 FLD1_st0();
2588 push_fr(FRn);
2589 FSQRT_st0();
2590 FDIVP_st(1);
2591 pop_fr(FRn);
2592 }
2593 :}
2594 FSQRT FRn {:
2595 COUNT_INST(I_FSQRT);
2596 check_fpuen();
2597 if( sh4_x86.double_prec ) {
2598 push_dr(FRn);
2599 FSQRT_st0();
2600 pop_dr(FRn);
2601 } else {
2602 push_fr(FRn);
2603 FSQRT_st0();
2604 pop_fr(FRn);
2605 }
2606 :}
2607 FSUB FRm, FRn {:
2608 COUNT_INST(I_FSUB);
2609 check_fpuen();
2610 if( sh4_x86.double_prec ) {
2611 push_dr(FRn);
2612 push_dr(FRm);
2613 FSUBP_st(1);
2614 pop_dr(FRn);
2615 } else {
2616 push_fr(FRn);
2617 push_fr(FRm);
2618 FSUBP_st(1);
2619 pop_fr(FRn);
2620 }
2621 :}
2623 FCMP/EQ FRm, FRn {:
2624 COUNT_INST(I_FCMPEQ);
2625 check_fpuen();
2626 if( sh4_x86.double_prec ) {
2627 push_dr(FRm);
2628 push_dr(FRn);
2629 } else {
2630 push_fr(FRm);
2631 push_fr(FRn);
2632 }
2633 XORL_r32_r32(REG_EAX, REG_EAX);
2634 XORL_r32_r32(REG_EDX, REG_EDX);
2635 FCOMIP_st(1);
2636 SETCCB_cc_r8(X86_COND_NP, REG_DL);
2637 CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
2638 MOVL_r32_rbpdisp(REG_EAX, R_T);
2639 FPOP_st();
2640 sh4_x86.tstate = TSTATE_NONE;
2641 :}
2642 FCMP/GT FRm, FRn {:
2643 COUNT_INST(I_FCMPGT);
2644 check_fpuen();
2645 if( sh4_x86.double_prec ) {
2646 push_dr(FRm);
2647 push_dr(FRn);
2648 } else {
2649 push_fr(FRm);
2650 push_fr(FRn);
2651 }
2652 FCOMIP_st(1);
2653 SETA_t();
2654 FPOP_st();
2655 sh4_x86.tstate = TSTATE_A;
2656 :}
2658 FSCA FPUL, FRn {:
2659 COUNT_INST(I_FSCA);
2660 check_fpuen();
2661 if( sh4_x86.double_prec == 0 ) {
2662 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
2663 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2664 CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
2665 }
2666 sh4_x86.tstate = TSTATE_NONE;
2667 :}
2668 FIPR FVm, FVn {:
2669 COUNT_INST(I_FIPR);
2670 check_fpuen();
2671 if( sh4_x86.double_prec == 0 ) {
2672 if( sh4_x86.sse3_enabled ) {
2673 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2674 MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2675 HADDPS_xmm_xmm( 4, 4 );
2676 HADDPS_xmm_xmm( 4, 4 );
2677 MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2678 } else {
2679 push_fr( FVm<<2 );
2680 push_fr( FVn<<2 );
2681 FMULP_st(1);
2682 push_fr( (FVm<<2)+1);
2683 push_fr( (FVn<<2)+1);
2684 FMULP_st(1);
2685 FADDP_st(1);
2686 push_fr( (FVm<<2)+2);
2687 push_fr( (FVn<<2)+2);
2688 FMULP_st(1);
2689 FADDP_st(1);
2690 push_fr( (FVm<<2)+3);
2691 push_fr( (FVn<<2)+3);
2692 FMULP_st(1);
2693 FADDP_st(1);
2694 pop_fr( (FVn<<2)+3);
2695 }
2696 }
2697 :}
2698 FTRV XMTRX, FVn {:
2699 COUNT_INST(I_FTRV);
2700 check_fpuen();
2701 if( sh4_x86.double_prec == 0 ) {
2702 if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
2703 /* FIXME: For now, disable this inlining when we're running in shadow mode -
2704 * it gives slightly different results from the emu core. Need to
2705 * fix the precision so both give the right results.
2706 */
2707 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2708 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2709 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2710 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2712 MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2713 MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2714 MOV_xmm_xmm( 4, 6 );
2715 MOV_xmm_xmm( 5, 7 );
2716 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2717 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2718 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2719 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2720 MULPS_xmm_xmm( 0, 4 );
2721 MULPS_xmm_xmm( 1, 5 );
2722 MULPS_xmm_xmm( 2, 6 );
2723 MULPS_xmm_xmm( 3, 7 );
2724 ADDPS_xmm_xmm( 5, 4 );
2725 ADDPS_xmm_xmm( 7, 6 );
2726 ADDPS_xmm_xmm( 6, 4 );
2727 MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
2728 } else {
2729 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
2730 CALL1_ptr_r32( sh4_ftrv, REG_EAX );
2731 }
2732 }
2733 sh4_x86.tstate = TSTATE_NONE;
2734 :}
2736 FRCHG {:
2737 COUNT_INST(I_FRCHG);
2738 check_fpuen();
2739 XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
2740 CALL_ptr( sh4_switch_fr_banks );
2741 sh4_x86.tstate = TSTATE_NONE;
2742 :}
2743 FSCHG {:
2744 COUNT_INST(I_FSCHG);
2745 check_fpuen();
2746 XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
2747 XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2748 sh4_x86.tstate = TSTATE_NONE;
2749 sh4_x86.double_size = !sh4_x86.double_size;
2750 sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
2751 :}
2753 /* Processor control instructions */
2754 LDC Rm, SR {:
2755 COUNT_INST(I_LDCSR);
2756 if( sh4_x86.in_delay_slot ) {
2757 SLOTILLEGAL();
2758 } else {
2759 check_priv();
2760 load_reg( REG_EAX, Rm );
2761 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2762 sh4_x86.fpuen_checked = FALSE;
2763 sh4_x86.tstate = TSTATE_NONE;
2764 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2765 return 2;
2766 }
2767 :}
2768 LDC Rm, GBR {:
2769 COUNT_INST(I_LDC);
2770 load_reg( REG_EAX, Rm );
2771 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2772 :}
2773 LDC Rm, VBR {:
2774 COUNT_INST(I_LDC);
2775 check_priv();
2776 load_reg( REG_EAX, Rm );
2777 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2778 sh4_x86.tstate = TSTATE_NONE;
2779 :}
2780 LDC Rm, SSR {:
2781 COUNT_INST(I_LDC);
2782 check_priv();
2783 load_reg( REG_EAX, Rm );
2784 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2785 sh4_x86.tstate = TSTATE_NONE;
2786 :}
2787 LDC Rm, SGR {:
2788 COUNT_INST(I_LDC);
2789 check_priv();
2790 load_reg( REG_EAX, Rm );
2791 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2792 sh4_x86.tstate = TSTATE_NONE;
2793 :}
2794 LDC Rm, SPC {:
2795 COUNT_INST(I_LDC);
2796 check_priv();
2797 load_reg( REG_EAX, Rm );
2798 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2799 sh4_x86.tstate = TSTATE_NONE;
2800 :}
2801 LDC Rm, DBR {:
2802 COUNT_INST(I_LDC);
2803 check_priv();
2804 load_reg( REG_EAX, Rm );
2805 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2806 sh4_x86.tstate = TSTATE_NONE;
2807 :}
2808 LDC Rm, Rn_BANK {:
2809 COUNT_INST(I_LDC);
2810 check_priv();
2811 load_reg( REG_EAX, Rm );
2812 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2813 sh4_x86.tstate = TSTATE_NONE;
2814 :}
2815 LDC.L @Rm+, GBR {:
2816 COUNT_INST(I_LDCM);
2817 load_reg( REG_EAX, Rm );
2818 check_ralign32( REG_EAX );
2819 MEM_READ_LONG( REG_EAX, REG_EAX );
2820 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2821 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2822 sh4_x86.tstate = TSTATE_NONE;
2823 :}
2824 LDC.L @Rm+, SR {:
2825 COUNT_INST(I_LDCSRM);
2826 if( sh4_x86.in_delay_slot ) {
2827 SLOTILLEGAL();
2828 } else {
2829 check_priv();
2830 load_reg( REG_EAX, Rm );
2831 check_ralign32( REG_EAX );
2832 MEM_READ_LONG( REG_EAX, REG_EAX );
2833 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2834 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2835 sh4_x86.fpuen_checked = FALSE;
2836 sh4_x86.tstate = TSTATE_NONE;
2837 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2838 return 2;
2839 }
2840 :}
2841 LDC.L @Rm+, VBR {:
2842 COUNT_INST(I_LDCM);
2843 check_priv();
2844 load_reg( REG_EAX, Rm );
2845 check_ralign32( REG_EAX );
2846 MEM_READ_LONG( REG_EAX, REG_EAX );
2847 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2848 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2849 sh4_x86.tstate = TSTATE_NONE;
2850 :}
2851 LDC.L @Rm+, SSR {:
2852 COUNT_INST(I_LDCM);
2853 check_priv();
2854 load_reg( REG_EAX, Rm );
2855 check_ralign32( REG_EAX );
2856 MEM_READ_LONG( REG_EAX, REG_EAX );
2857 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2858 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2859 sh4_x86.tstate = TSTATE_NONE;
2860 :}
2861 LDC.L @Rm+, SGR {:
2862 COUNT_INST(I_LDCM);
2863 check_priv();
2864 load_reg( REG_EAX, Rm );
2865 check_ralign32( REG_EAX );
2866 MEM_READ_LONG( REG_EAX, REG_EAX );
2867 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2868 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2869 sh4_x86.tstate = TSTATE_NONE;
2870 :}
2871 LDC.L @Rm+, SPC {:
2872 COUNT_INST(I_LDCM);
2873 check_priv();
2874 load_reg( REG_EAX, Rm );
2875 check_ralign32( REG_EAX );
2876 MEM_READ_LONG( REG_EAX, REG_EAX );
2877 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2878 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2879 sh4_x86.tstate = TSTATE_NONE;
2880 :}
2881 LDC.L @Rm+, DBR {:
2882 COUNT_INST(I_LDCM);
2883 check_priv();
2884 load_reg( REG_EAX, Rm );
2885 check_ralign32( REG_EAX );
2886 MEM_READ_LONG( REG_EAX, REG_EAX );
2887 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2888 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2889 sh4_x86.tstate = TSTATE_NONE;
2890 :}
2891 LDC.L @Rm+, Rn_BANK {:
2892 COUNT_INST(I_LDCM);
2893 check_priv();
2894 load_reg( REG_EAX, Rm );
2895 check_ralign32( REG_EAX );
2896 MEM_READ_LONG( REG_EAX, REG_EAX );
2897 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2898 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2899 sh4_x86.tstate = TSTATE_NONE;
2900 :}
2901 LDS Rm, FPSCR {:
2902 COUNT_INST(I_LDSFPSCR);
2903 check_fpuen();
2904 load_reg( REG_EAX, Rm );
2905 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2906 sh4_x86.tstate = TSTATE_NONE;
2907 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2908 return 2;
2909 :}
2910 LDS.L @Rm+, FPSCR {:
2911 COUNT_INST(I_LDSFPSCRM);
2912 check_fpuen();
2913 load_reg( REG_EAX, Rm );
2914 check_ralign32( REG_EAX );
2915 MEM_READ_LONG( REG_EAX, REG_EAX );
2916 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2917 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2918 sh4_x86.tstate = TSTATE_NONE;
2919 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2920 return 2;
2921 :}
2922 LDS Rm, FPUL {:
2923 COUNT_INST(I_LDS);
2924 check_fpuen();
2925 load_reg( REG_EAX, Rm );
2926 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2927 :}
2928 LDS.L @Rm+, FPUL {:
2929 COUNT_INST(I_LDSM);
2930 check_fpuen();
2931 load_reg( REG_EAX, Rm );
2932 check_ralign32( REG_EAX );
2933 MEM_READ_LONG( REG_EAX, REG_EAX );
2934 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2935 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2936 sh4_x86.tstate = TSTATE_NONE;
2937 :}
2938 LDS Rm, MACH {:
2939 COUNT_INST(I_LDS);
2940 load_reg( REG_EAX, Rm );
2941 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2942 :}
2943 LDS.L @Rm+, MACH {:
2944 COUNT_INST(I_LDSM);
2945 load_reg( REG_EAX, Rm );
2946 check_ralign32( REG_EAX );
2947 MEM_READ_LONG( REG_EAX, REG_EAX );
2948 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2949 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2950 sh4_x86.tstate = TSTATE_NONE;
2951 :}
2952 LDS Rm, MACL {:
2953 COUNT_INST(I_LDS);
2954 load_reg( REG_EAX, Rm );
2955 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2956 :}
2957 LDS.L @Rm+, MACL {:
2958 COUNT_INST(I_LDSM);
2959 load_reg( REG_EAX, Rm );
2960 check_ralign32( REG_EAX );
2961 MEM_READ_LONG( REG_EAX, REG_EAX );
2962 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2963 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2964 sh4_x86.tstate = TSTATE_NONE;
2965 :}
2966 LDS Rm, PR {:
2967 COUNT_INST(I_LDS);
2968 load_reg( REG_EAX, Rm );
2969 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2970 :}
2971 LDS.L @Rm+, PR {:
2972 COUNT_INST(I_LDSM);
2973 load_reg( REG_EAX, Rm );
2974 check_ralign32( REG_EAX );
2975 MEM_READ_LONG( REG_EAX, REG_EAX );
2976 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2977 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2978 sh4_x86.tstate = TSTATE_NONE;
2979 :}
2980 LDTLB {:
2981 COUNT_INST(I_LDTLB);
2982 CALL_ptr( MMU_ldtlb );
2983 sh4_x86.tstate = TSTATE_NONE;
2984 :}
2985 OCBI @Rn {:
2986 COUNT_INST(I_OCBI);
2987 :}
2988 OCBP @Rn {:
2989 COUNT_INST(I_OCBP);
2990 :}
2991 OCBWB @Rn {:
2992 COUNT_INST(I_OCBWB);
2993 :}
2994 PREF @Rn {:
2995 COUNT_INST(I_PREF);
2996 load_reg( REG_EAX, Rn );
2997 MEM_PREFETCH( REG_EAX );
2998 sh4_x86.tstate = TSTATE_NONE;
2999 :}
3000 SLEEP {:
3001 COUNT_INST(I_SLEEP);
3002 check_priv();
3003 CALL_ptr( sh4_sleep );
3004 sh4_x86.tstate = TSTATE_NONE;
3005 sh4_x86.in_delay_slot = DELAY_NONE;
3006 return 2;
3007 :}
3008 STC SR, Rn {:
3009 COUNT_INST(I_STCSR);
3010 check_priv();
3011 CALL_ptr(sh4_read_sr);
3012 store_reg( REG_EAX, Rn );
3013 sh4_x86.tstate = TSTATE_NONE;
3014 :}
3015 STC GBR, Rn {:
3016 COUNT_INST(I_STC);
3017 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
3018 store_reg( REG_EAX, Rn );
3019 :}
3020 STC VBR, Rn {:
3021 COUNT_INST(I_STC);
3022 check_priv();
3023 MOVL_rbpdisp_r32( R_VBR, REG_EAX );
3024 store_reg( REG_EAX, Rn );
3025 sh4_x86.tstate = TSTATE_NONE;
3026 :}
3027 STC SSR, Rn {:
3028 COUNT_INST(I_STC);
3029 check_priv();
3030 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
3031 store_reg( REG_EAX, Rn );
3032 sh4_x86.tstate = TSTATE_NONE;
3033 :}
3034 STC SPC, Rn {:
3035 COUNT_INST(I_STC);
3036 check_priv();
3037 MOVL_rbpdisp_r32( R_SPC, REG_EAX );
3038 store_reg( REG_EAX, Rn );
3039 sh4_x86.tstate = TSTATE_NONE;
3040 :}
3041 STC SGR, Rn {:
3042 COUNT_INST(I_STC);
3043 check_priv();
3044 MOVL_rbpdisp_r32( R_SGR, REG_EAX );
3045 store_reg( REG_EAX, Rn );
3046 sh4_x86.tstate = TSTATE_NONE;
3047 :}
3048 STC DBR, Rn {:
3049 COUNT_INST(I_STC);
3050 check_priv();
3051 MOVL_rbpdisp_r32( R_DBR, REG_EAX );
3052 store_reg( REG_EAX, Rn );
3053 sh4_x86.tstate = TSTATE_NONE;
3054 :}
3055 STC Rm_BANK, Rn {:
3056 COUNT_INST(I_STC);
3057 check_priv();
3058 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
3059 store_reg( REG_EAX, Rn );
3060 sh4_x86.tstate = TSTATE_NONE;
3061 :}
3062 STC.L SR, @-Rn {:
3063 COUNT_INST(I_STCSRM);
3064 check_priv();
3065 CALL_ptr( sh4_read_sr );
3066 MOVL_r32_r32( REG_EAX, REG_EDX );
3067 load_reg( REG_EAX, Rn );
3068 check_walign32( REG_EAX );
3069 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
3070 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3071 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3072 sh4_x86.tstate = TSTATE_NONE;
3073 :}
3074 STC.L VBR, @-Rn {:
3075 COUNT_INST(I_STCM);
3076 check_priv();
3077 load_reg( REG_EAX, Rn );
3078 check_walign32( REG_EAX );
3079 ADDL_imms_r32( -4, REG_EAX );
3080 MOVL_rbpdisp_r32( R_VBR, REG_EDX );
3081 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3082 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3083 sh4_x86.tstate = TSTATE_NONE;
3084 :}
3085 STC.L SSR, @-Rn {:
3086 COUNT_INST(I_STCM);
3087 check_priv();
3088 load_reg( REG_EAX, Rn );
3089 check_walign32( REG_EAX );
3090 ADDL_imms_r32( -4, REG_EAX );
3091 MOVL_rbpdisp_r32( R_SSR, REG_EDX );
3092 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3093 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3094 sh4_x86.tstate = TSTATE_NONE;
3095 :}
3096 STC.L SPC, @-Rn {:
3097 COUNT_INST(I_STCM);
3098 check_priv();
3099 load_reg( REG_EAX, Rn );
3100 check_walign32( REG_EAX );
3101 ADDL_imms_r32( -4, REG_EAX );
3102 MOVL_rbpdisp_r32( R_SPC, REG_EDX );
3103 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3104 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3105 sh4_x86.tstate = TSTATE_NONE;
3106 :}
3107 STC.L SGR, @-Rn {:
3108 COUNT_INST(I_STCM);
3109 check_priv();
3110 load_reg( REG_EAX, Rn );
3111 check_walign32( REG_EAX );
3112 ADDL_imms_r32( -4, REG_EAX );
3113 MOVL_rbpdisp_r32( R_SGR, REG_EDX );
3114 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3115 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3116 sh4_x86.tstate = TSTATE_NONE;
3117 :}
3118 STC.L DBR, @-Rn {:
3119 COUNT_INST(I_STCM);
3120 check_priv();
3121 load_reg( REG_EAX, Rn );
3122 check_walign32( REG_EAX );
3123 ADDL_imms_r32( -4, REG_EAX );
3124 MOVL_rbpdisp_r32( R_DBR, REG_EDX );
3125 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3126 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3127 sh4_x86.tstate = TSTATE_NONE;
3128 :}
3129 STC.L Rm_BANK, @-Rn {:
3130 COUNT_INST(I_STCM);
3131 check_priv();
3132 load_reg( REG_EAX, Rn );
3133 check_walign32( REG_EAX );
3134 ADDL_imms_r32( -4, REG_EAX );
3135 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
3136 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3137 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3138 sh4_x86.tstate = TSTATE_NONE;
3139 :}
3140 STC.L GBR, @-Rn {:
3141 COUNT_INST(I_STCM);
3142 load_reg( REG_EAX, Rn );
3143 check_walign32( REG_EAX );
3144 ADDL_imms_r32( -4, REG_EAX );
3145 MOVL_rbpdisp_r32( R_GBR, REG_EDX );
3146 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3147 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3148 sh4_x86.tstate = TSTATE_NONE;
3149 :}
3150 STS FPSCR, Rn {:
3151 COUNT_INST(I_STSFPSCR);
3152 check_fpuen();
3153 MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
3154 store_reg( REG_EAX, Rn );
3155 :}
3156 STS.L FPSCR, @-Rn {:
3157 COUNT_INST(I_STSFPSCRM);
3158 check_fpuen();
3159 load_reg( REG_EAX, Rn );
3160 check_walign32( REG_EAX );
3161 ADDL_imms_r32( -4, REG_EAX );
3162 MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
3163 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3164 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3165 sh4_x86.tstate = TSTATE_NONE;
3166 :}
3167 STS FPUL, Rn {:
3168 COUNT_INST(I_STS);
3169 check_fpuen();
3170 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
3171 store_reg( REG_EAX, Rn );
3172 :}
3173 STS.L FPUL, @-Rn {:
3174 COUNT_INST(I_STSM);
3175 check_fpuen();
3176 load_reg( REG_EAX, Rn );
3177 check_walign32( REG_EAX );
3178 ADDL_imms_r32( -4, REG_EAX );
3179 MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
3180 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3181 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3182 sh4_x86.tstate = TSTATE_NONE;
3183 :}
3184 STS MACH, Rn {:
3185 COUNT_INST(I_STS);
3186 MOVL_rbpdisp_r32( R_MACH, REG_EAX );
3187 store_reg( REG_EAX, Rn );
3188 :}
3189 STS.L MACH, @-Rn {:
3190 COUNT_INST(I_STSM);
3191 load_reg( REG_EAX, Rn );
3192 check_walign32( REG_EAX );
3193 ADDL_imms_r32( -4, REG_EAX );
3194 MOVL_rbpdisp_r32( R_MACH, REG_EDX );
3195 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3196 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3197 sh4_x86.tstate = TSTATE_NONE;
3198 :}
3199 STS MACL, Rn {:
3200 COUNT_INST(I_STS);
3201 MOVL_rbpdisp_r32( R_MACL, REG_EAX );
3202 store_reg( REG_EAX, Rn );
3203 :}
3204 STS.L MACL, @-Rn {:
3205 COUNT_INST(I_STSM);
3206 load_reg( REG_EAX, Rn );
3207 check_walign32( REG_EAX );
3208 ADDL_imms_r32( -4, REG_EAX );
3209 MOVL_rbpdisp_r32( R_MACL, REG_EDX );
3210 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3211 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3212 sh4_x86.tstate = TSTATE_NONE;
3213 :}
3214 STS PR, Rn {:
3215 COUNT_INST(I_STS);
3216 MOVL_rbpdisp_r32( R_PR, REG_EAX );
3217 store_reg( REG_EAX, Rn );
3218 :}
3219 STS.L PR, @-Rn {:
3220 COUNT_INST(I_STSM);
3221 load_reg( REG_EAX, Rn );
3222 check_walign32( REG_EAX );
3223 ADDL_imms_r32( -4, REG_EAX );
3224 MOVL_rbpdisp_r32( R_PR, REG_EDX );
3225 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3226 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3227 sh4_x86.tstate = TSTATE_NONE;
3228 :}
3230 NOP {:
3231 COUNT_INST(I_NOP);
3232 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
3233 :}
3234 %%
3235 sh4_x86.in_delay_slot = DELAY_NONE;
3236 return 0;
3237 }
3240 /**
3241 * The unwind methods only work if we compiled with DWARF2 frame information
3242 * (ie -fexceptions), otherwise we have to use the direct frame scan.
3243 */
3244 #ifdef HAVE_EXCEPTIONS
3245 #include <unwind.h>
3247 struct UnwindInfo {
3248 uintptr_t block_start;
3249 uintptr_t block_end;
3250 void *pc;
3251 };
3253 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
3254 {
3255 struct UnwindInfo *info = arg;
3256 void *pc = (void *)_Unwind_GetIP(context);
3257 if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
3258 info->pc = pc;
3259 return _URC_NORMAL_STOP;
3260 }
3261 return _URC_NO_REASON;
3262 }
3264 void *xlat_get_native_pc( void *code, uint32_t code_size )
3265 {
3266 struct _Unwind_Exception exc;
3267 struct UnwindInfo info;
3269 info.pc = NULL;
3270 info.block_start = (uintptr_t)code;
3271 info.block_end = info.block_start + code_size;
3272 void *result = NULL;
3273 _Unwind_Backtrace( xlat_check_frame, &info );
3274 return info.pc;
3275 }
3276 #else
3277 /* Assume this is an ia32 build - amd64 should always have dwarf information */
3278 void *xlat_get_native_pc( void *code, uint32_t code_size )
3279 {
3280 void *result = NULL;
3281 __asm__(
3282 "mov %%ebp, %%eax\n\t"
3283 "mov $0x8, %%ecx\n\t"
3284 "mov %1, %%edx\n"
3285 "frame_loop: test %%eax, %%eax\n\t"
3286 "je frame_not_found\n\t"
3287 "cmp (%%eax), %%edx\n\t"
3288 "je frame_found\n\t"
3289 "sub $0x1, %%ecx\n\t"
3290 "je frame_not_found\n\t"
3291 "movl (%%eax), %%eax\n\t"
3292 "jmp frame_loop\n"
3293 "frame_found: movl 0x4(%%eax), %0\n"
3294 "frame_not_found:"
3295 : "=r" (result)
3296 : "r" (((uint8_t *)&sh4r) + 128 )
3297 : "eax", "ecx", "edx" );
3298 return result;
3299 }
3300 #endif
.