filename | src/sh4/sh4x86.in |
changeset | 1182:b38a327ad8fa |
prev | 1176:70feb1749427 |
next | 1186:2dc47c67bb93 |
author | nkeynes |
date | Sun Nov 27 18:20:21 2011 +1000 (12 years ago) |
permissions | -rw-r--r-- |
last change | Add block profiling option to count the number of executions of each block, and dump them out from most-to-least used. |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
7 *
8 * Copyright (c) 2007 Nathan Keynes.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
21 #include <assert.h>
22 #include <math.h>
24 #ifndef NDEBUG
25 #define DEBUG_JUMPS 1
26 #endif
28 #include "lxdream.h"
29 #include "sh4/sh4core.h"
30 #include "sh4/sh4dasm.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/mmu.h"
35 #include "xlat/xltcache.h"
36 #include "xlat/x86/x86op.h"
37 #include "x86dasm/x86dasm.h"
38 #include "clock.h"
40 #define DEFAULT_BACKPATCH_SIZE 4096
42 /* Offset of a reg relative to the sh4r structure */
43 #define REG_OFFSET(reg) (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
45 #define R_T REG_OFFSET(t)
46 #define R_Q REG_OFFSET(q)
47 #define R_S REG_OFFSET(s)
48 #define R_M REG_OFFSET(m)
49 #define R_SR REG_OFFSET(sr)
50 #define R_GBR REG_OFFSET(gbr)
51 #define R_SSR REG_OFFSET(ssr)
52 #define R_SPC REG_OFFSET(spc)
53 #define R_VBR REG_OFFSET(vbr)
54 #define R_MACH REG_OFFSET(mac)+4
55 #define R_MACL REG_OFFSET(mac)
56 #define R_PC REG_OFFSET(pc)
57 #define R_NEW_PC REG_OFFSET(new_pc)
58 #define R_PR REG_OFFSET(pr)
59 #define R_SGR REG_OFFSET(sgr)
60 #define R_FPUL REG_OFFSET(fpul)
61 #define R_FPSCR REG_OFFSET(fpscr)
62 #define R_DBR REG_OFFSET(dbr)
63 #define R_R(rn) REG_OFFSET(r[rn])
64 #define R_FR(f) REG_OFFSET(fr[0][(f)^1])
65 #define R_XF(f) REG_OFFSET(fr[1][(f)^1])
66 #define R_DR(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
70 #define DELAY_NONE 0
71 #define DELAY_PC 1
72 #define DELAY_PC_PR 2
74 #define SH4_MODE_UNKNOWN -1
76 struct backpatch_record {
77 uint32_t fixup_offset;
78 uint32_t fixup_icount;
79 int32_t exc_code;
80 };
82 /**
83 * Struct to manage internal translation state. This state is not saved -
84 * it is only valid between calls to sh4_translate_begin_block() and
85 * sh4_translate_end_block()
86 */
87 struct sh4_x86_state {
88 int in_delay_slot;
89 uint8_t *code;
90 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
91 gboolean branch_taken; /* true if we branched unconditionally */
92 gboolean double_prec; /* true if FPU is in double-precision mode */
93 gboolean double_size; /* true if FPU is in double-size mode */
94 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
95 uint32_t block_start_pc;
96 uint32_t stack_posn; /* Trace stack height for alignment purposes */
97 uint32_t sh4_mode; /* Mirror of sh4r.xlat_sh4_mode */
98 int tstate;
100 /* mode settings */
101 gboolean tlb_on; /* True if tlb translation is active */
102 struct mem_region_fn **priv_address_space;
103 struct mem_region_fn **user_address_space;
105 /* Instrumentation */
106 xlat_block_begin_callback_t begin_callback;
107 xlat_block_end_callback_t end_callback;
108 gboolean fastmem;
109 gboolean profile_blocks;
111 /* Allocated memory for the (block-wide) back-patch list */
112 struct backpatch_record *backpatch_list;
113 uint32_t backpatch_posn;
114 uint32_t backpatch_size;
115 };
117 static struct sh4_x86_state sh4_x86;
119 static uint32_t max_int = 0x7FFFFFFF;
120 static uint32_t min_int = 0x80000000;
121 static uint32_t save_fcw; /* save value for fpu control word */
122 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
124 static struct x86_symbol x86_symbol_table[] = {
125 { "sh4r+128", ((char *)&sh4r)+128 },
126 { "sh4_cpu_period", &sh4_cpu_period },
127 { "sh4_address_space", NULL },
128 { "sh4_user_address_space", NULL },
129 { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
130 { "sh4_write_fpscr", sh4_write_fpscr },
131 { "sh4_write_sr", sh4_write_sr },
132 { "sh4_read_sr", sh4_read_sr },
133 { "sh4_sleep", sh4_sleep },
134 { "sh4_fsca", sh4_fsca },
135 { "sh4_ftrv", sh4_ftrv },
136 { "sh4_switch_fr_banks", sh4_switch_fr_banks },
137 { "sh4_execute_instruction", sh4_execute_instruction },
138 { "signsat48", signsat48 },
139 { "xlat_get_code_by_vma", xlat_get_code_by_vma },
140 { "xlat_get_code", xlat_get_code }
141 };
144 gboolean is_sse3_supported()
145 {
146 uint32_t features;
148 __asm__ __volatile__(
149 "mov $0x01, %%eax\n\t"
150 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
151 return (features & 1) ? TRUE : FALSE;
152 }
154 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
155 {
156 sh4_x86.priv_address_space = priv;
157 sh4_x86.user_address_space = user;
158 x86_symbol_table[2].ptr = priv;
159 x86_symbol_table[3].ptr = user;
160 }
162 void sh4_translate_init(void)
163 {
164 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
165 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
166 sh4_x86.begin_callback = NULL;
167 sh4_x86.end_callback = NULL;
168 sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
169 sh4_x86.fastmem = TRUE;
170 sh4_x86.profile_blocks = FALSE;
171 sh4_x86.sse3_enabled = is_sse3_supported();
172 x86_disasm_init();
173 x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
174 }
176 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
177 {
178 sh4_x86.begin_callback = begin;
179 sh4_x86.end_callback = end;
180 }
182 void sh4_translate_set_fastmem( gboolean flag )
183 {
184 sh4_x86.fastmem = flag;
185 }
187 void sh4_translate_set_profile_blocks( gboolean flag )
188 {
189 sh4_x86.profile_blocks = flag;
190 }
192 gboolean sh4_translate_get_profile_blocks()
193 {
194 return sh4_x86.profile_blocks;
195 }
197 /**
198 * Disassemble the given translated code block, and it's source SH4 code block
199 * side-by-side. The current native pc will be marked if non-null.
200 */
201 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
202 {
203 char buf[256];
204 char op[256];
206 uintptr_t target_start = (uintptr_t)code, target_pc;
207 uintptr_t target_end = target_start + xlat_get_code_size(code);
208 uint32_t source_pc = source_start;
209 uint32_t source_end = source_pc;
210 xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
211 xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
213 for( target_pc = target_start; target_pc < target_end; ) {
214 uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
215 #if SIZEOF_VOID_P == 8
216 fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
217 target_pc, op, buf );
218 #else
219 fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
220 target_pc, op, buf );
221 #endif
222 if( source_recov_table < source_recov_end &&
223 target_pc >= (target_start + source_recov_table->xlat_offset) ) {
224 source_recov_table++;
225 if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
226 source_end = source_start + (source_recov_table->sh4_icount)*2;
227 }
229 if( source_pc < source_end ) {
230 uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
231 fprintf( out, " %08X: %s %s\n", source_pc, op, buf );
232 source_pc = source_pc2;
233 } else {
234 fprintf( out, "\n" );
235 }
237 target_pc = pc2;
238 }
240 while( source_pc < source_end ) {
241 uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
242 fprintf( out, "%*c %08X: %s %s\n", 72,' ', source_pc, op, buf );
243 source_pc = source_pc2;
244 }
245 }
247 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
248 {
249 int reloc_size = 4;
251 if( exc_code == -2 ) {
252 reloc_size = sizeof(void *);
253 }
255 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
256 sh4_x86.backpatch_size <<= 1;
257 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
258 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
259 assert( sh4_x86.backpatch_list != NULL );
260 }
261 if( sh4_x86.in_delay_slot ) {
262 fixup_pc -= 2;
263 }
265 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
266 (((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
267 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
268 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
269 sh4_x86.backpatch_posn++;
270 }
272 #define TSTATE_NONE -1
273 #define TSTATE_O X86_COND_O
274 #define TSTATE_C X86_COND_C
275 #define TSTATE_E X86_COND_E
276 #define TSTATE_NE X86_COND_NE
277 #define TSTATE_G X86_COND_G
278 #define TSTATE_GE X86_COND_GE
279 #define TSTATE_A X86_COND_A
280 #define TSTATE_AE X86_COND_AE
282 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
283 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
285 /* Convenience instructions */
286 #define LDC_t() CMPB_imms_rbpdisp(1,R_T); CMC()
287 #define SETE_t() SETCCB_cc_rbpdisp(X86_COND_E,R_T)
288 #define SETA_t() SETCCB_cc_rbpdisp(X86_COND_A,R_T)
289 #define SETAE_t() SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
290 #define SETG_t() SETCCB_cc_rbpdisp(X86_COND_G,R_T)
291 #define SETGE_t() SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
292 #define SETC_t() SETCCB_cc_rbpdisp(X86_COND_C,R_T)
293 #define SETO_t() SETCCB_cc_rbpdisp(X86_COND_O,R_T)
294 #define SETNE_t() SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
295 #define SETC_r8(r1) SETCCB_cc_r8(X86_COND_C, r1)
296 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
297 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
298 #define JE_label(label) JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
299 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
300 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
301 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
302 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
303 #define JS_label(label) JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
304 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
305 #define JNE_exc(exc) JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
307 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
308 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
309 CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
310 JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
312 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
313 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
314 CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
315 JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
318 #define load_reg(x86reg,sh4reg) MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
319 #define store_reg(x86reg,sh4reg) MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
321 /**
322 * Load an FR register (single-precision floating point) into an integer x86
323 * register (eg for register-to-register moves)
324 */
325 #define load_fr(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
326 #define load_xf(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
328 /**
329 * Load the low half of a DR register (DR or XD) into an integer x86 register
330 */
331 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
332 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
334 /**
335 * Store an FR register (single-precision floating point) from an integer x86+
336 * register (eg for register-to-register moves)
337 */
338 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
339 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
341 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
342 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
345 #define push_fpul() FLDF_rbpdisp(R_FPUL)
346 #define pop_fpul() FSTPF_rbpdisp(R_FPUL)
347 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
348 #define pop_fr(frm) FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
349 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
350 #define pop_xf(frm) FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
351 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
352 #define pop_dr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
353 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
354 #define pop_xdr(frm) FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
356 #ifdef ENABLE_SH4STATS
357 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
358 #else
359 #define COUNT_INST(id)
360 #endif
363 /* Exception checks - Note that all exception checks will clobber EAX */
365 #define check_priv( ) \
366 if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
367 if( sh4_x86.in_delay_slot ) { \
368 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
369 } else { \
370 exit_block_exc(EXC_ILLEGAL, pc); \
371 } \
372 sh4_x86.branch_taken = TRUE; \
373 sh4_x86.in_delay_slot = DELAY_NONE; \
374 return 2; \
375 }
377 #define check_fpuen( ) \
378 if( !sh4_x86.fpuen_checked ) {\
379 sh4_x86.fpuen_checked = TRUE;\
380 MOVL_rbpdisp_r32( R_SR, REG_EAX );\
381 ANDL_imms_r32( SR_FD, REG_EAX );\
382 if( sh4_x86.in_delay_slot ) {\
383 JNE_exc(EXC_SLOT_FPU_DISABLED);\
384 } else {\
385 JNE_exc(EXC_FPU_DISABLED);\
386 }\
387 sh4_x86.tstate = TSTATE_NONE; \
388 }
390 #define check_ralign16( x86reg ) \
391 TESTL_imms_r32( 0x00000001, x86reg ); \
392 JNE_exc(EXC_DATA_ADDR_READ)
394 #define check_walign16( x86reg ) \
395 TESTL_imms_r32( 0x00000001, x86reg ); \
396 JNE_exc(EXC_DATA_ADDR_WRITE);
398 #define check_ralign32( x86reg ) \
399 TESTL_imms_r32( 0x00000003, x86reg ); \
400 JNE_exc(EXC_DATA_ADDR_READ)
402 #define check_walign32( x86reg ) \
403 TESTL_imms_r32( 0x00000003, x86reg ); \
404 JNE_exc(EXC_DATA_ADDR_WRITE);
406 #define check_ralign64( x86reg ) \
407 TESTL_imms_r32( 0x00000007, x86reg ); \
408 JNE_exc(EXC_DATA_ADDR_READ)
410 #define check_walign64( x86reg ) \
411 TESTL_imms_r32( 0x00000007, x86reg ); \
412 JNE_exc(EXC_DATA_ADDR_WRITE);
414 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
416 #define UNDEF(ir)
417 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
418 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
419 */
420 #ifdef HAVE_FRAME_ADDRESS
421 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
422 {
423 decode_address(address_space(), addr_reg);
424 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
425 CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
426 } else {
427 if( addr_reg != REG_ARG1 ) {
428 MOVL_r32_r32( addr_reg, REG_ARG1 );
429 }
430 MOVP_immptr_rptr( 0, REG_ARG2 );
431 sh4_x86_add_backpatch( xlat_output, pc, -2 );
432 CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
433 }
434 if( value_reg != REG_RESULT1 ) {
435 MOVL_r32_r32( REG_RESULT1, value_reg );
436 }
437 }
439 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
440 {
441 decode_address(address_space(), addr_reg);
442 if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) {
443 CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
444 } else {
445 if( value_reg != REG_ARG2 ) {
446 MOVL_r32_r32( value_reg, REG_ARG2 );
447 }
448 if( addr_reg != REG_ARG1 ) {
449 MOVL_r32_r32( addr_reg, REG_ARG1 );
450 }
451 #if MAX_REG_ARG > 2
452 MOVP_immptr_rptr( 0, REG_ARG3 );
453 sh4_x86_add_backpatch( xlat_output, pc, -2 );
454 CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
455 #else
456 MOVL_imm32_rspdisp( 0, 0 );
457 sh4_x86_add_backpatch( xlat_output, pc, -2 );
458 CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
459 #endif
460 }
461 }
462 #else
463 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
464 {
465 decode_address(address_space(), addr_reg);
466 CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
467 if( value_reg != REG_RESULT1 ) {
468 MOVL_r32_r32( REG_RESULT1, value_reg );
469 }
470 }
472 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
473 {
474 decode_address(address_space(), addr_reg);
475 CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
476 }
477 #endif
479 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
480 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
481 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc)
482 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
483 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
484 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
485 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
486 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
487 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
489 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
491 /** Offset of xlat_sh4_mode field relative to the code pointer */
492 #define XLAT_SH4_MODE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
493 #define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
494 #define XLAT_ACTIVE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
496 void sh4_translate_begin_block( sh4addr_t pc )
497 {
498 sh4_x86.code = xlat_output;
499 sh4_x86.in_delay_slot = FALSE;
500 sh4_x86.fpuen_checked = FALSE;
501 sh4_x86.branch_taken = FALSE;
502 sh4_x86.backpatch_posn = 0;
503 sh4_x86.block_start_pc = pc;
504 sh4_x86.tlb_on = IS_TLB_ENABLED();
505 sh4_x86.tstate = TSTATE_NONE;
506 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
507 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
508 sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
509 emit_prologue();
510 if( sh4_x86.begin_callback ) {
511 CALL_ptr( sh4_x86.begin_callback );
512 }
513 if( sh4_x86.profile_blocks ) {
514 MOVP_immptr_rptr( ((uintptr_t)sh4_x86.code) + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
515 ADDL_imms_r32disp( 1, REG_EAX, 0 );
516 }
517 }
520 uint32_t sh4_translate_end_block_size()
521 {
522 if( sh4_x86.backpatch_posn <= 3 ) {
523 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
524 } else {
525 return EPILOGUE_SIZE + (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
526 }
527 }
530 /**
531 * Embed a breakpoint into the generated code
532 */
533 void sh4_translate_emit_breakpoint( sh4vma_t pc )
534 {
535 MOVL_imm32_r32( pc, REG_EAX );
536 CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
537 sh4_x86.tstate = TSTATE_NONE;
538 }
541 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
543 /**
544 * Test if the loaded target code pointer in %eax is valid, and if so jump
545 * directly into it, bypassing the normal exit.
546 */
547 static void jump_next_block()
548 {
549 uint8_t *ptr = xlat_output;
550 TESTP_rptr_rptr(REG_EAX, REG_EAX);
551 JE_label(nocode);
552 if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
553 /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
554 MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
555 CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
556 } else {
557 CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
558 }
559 JNE_label(wrongmode);
560 LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
561 if( sh4_x86.end_callback ) {
562 /* Note this does leave the stack out of alignment, but doesn't matter
563 * for what we're currently using it for.
564 */
565 PUSH_r32(REG_EAX);
566 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
567 JMP_rptr(REG_ECX);
568 } else {
569 JMP_rptr(REG_EAX);
570 }
571 JMP_TARGET(wrongmode);
572 MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
573 int rel = ptr - xlat_output;
574 JMP_prerel(rel);
575 JMP_TARGET(nocode);
576 }
578 static void exit_block()
579 {
580 emit_epilogue();
581 if( sh4_x86.end_callback ) {
582 MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
583 JMP_rptr(REG_ECX);
584 } else {
585 RET();
586 }
587 }
589 /**
590 * Exit the block with sh4r.pc already written
591 */
592 void exit_block_pcset( sh4addr_t pc )
593 {
594 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
595 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
596 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
597 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
598 JBE_label(exitloop);
599 MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
600 if( sh4_x86.tlb_on ) {
601 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
602 } else {
603 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
604 }
606 jump_next_block();
607 JMP_TARGET(exitloop);
608 exit_block();
609 }
611 /**
612 * Exit the block with sh4r.new_pc written with the target pc
613 */
614 void exit_block_newpcset( sh4addr_t pc )
615 {
616 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
617 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
618 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
619 MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
620 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
621 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
622 JBE_label(exitloop);
623 if( sh4_x86.tlb_on ) {
624 CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
625 } else {
626 CALL1_ptr_r32(xlat_get_code,REG_ARG1);
627 }
629 jump_next_block();
630 JMP_TARGET(exitloop);
631 exit_block();
632 }
635 /**
636 * Exit the block to an absolute PC
637 */
638 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
639 {
640 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
641 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
642 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
644 MOVL_imm32_r32( pc, REG_ARG1 );
645 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
646 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
647 JBE_label(exitloop);
649 if( IS_IN_ICACHE(pc) ) {
650 MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
651 ANDP_imms_rptr( -4, REG_EAX );
652 } else if( sh4_x86.tlb_on ) {
653 CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
654 } else {
655 CALL1_ptr_r32(xlat_get_code, REG_ARG1);
656 }
657 jump_next_block();
658 JMP_TARGET(exitloop);
659 exit_block();
660 }
662 /**
663 * Exit the block to a relative PC
664 */
665 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
666 {
667 MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
668 ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
669 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
671 if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
672 /* Special case for tight loops - the PC doesn't change, and
673 * we already know the target address. Just check events pending before
674 * looping.
675 */
676 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
677 uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
678 JCC_cc_prerel(X86_COND_A, backdisp);
679 } else {
680 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
681 ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
682 MOVL_r32_rbpdisp( REG_ARG1, R_PC );
683 CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
684 JBE_label(exitloop2);
686 if( IS_IN_ICACHE(pc) ) {
687 MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
688 ANDP_imms_rptr( -4, REG_EAX );
689 } else if( sh4_x86.tlb_on ) {
690 CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
691 } else {
692 CALL1_ptr_r32(xlat_get_code, REG_ARG1);
693 }
694 jump_next_block();
695 JMP_TARGET(exitloop2);
696 }
697 exit_block();
698 }
700 /**
701 * Exit unconditionally with a general exception
702 */
703 void exit_block_exc( int code, sh4addr_t pc )
704 {
705 MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
706 ADDL_r32_rbpdisp( REG_ECX, R_PC );
707 MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
708 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
709 MOVL_imm32_r32( code, REG_ARG1 );
710 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
711 exit_block();
712 }
714 /**
715 * Embed a call to sh4_execute_instruction for situations that we
716 * can't translate (just page-crossing delay slots at the moment).
717 * Caller is responsible for setting new_pc before calling this function.
718 *
719 * Performs:
720 * Set PC = endpc
721 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
722 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
723 * Call sh4_execute_instruction
724 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
725 */
726 void exit_block_emu( sh4vma_t endpc )
727 {
728 MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX ); // 5
729 ADDL_r32_rbpdisp( REG_ECX, R_PC );
731 MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
732 ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) ); // 6
733 MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
734 MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
736 CALL_ptr( sh4_execute_instruction );
737 exit_block();
738 }
740 /**
741 * Write the block trailer (exception handling block)
742 */
743 void sh4_translate_end_block( sh4addr_t pc ) {
744 if( sh4_x86.branch_taken == FALSE ) {
745 // Didn't exit unconditionally already, so write the termination here
746 exit_block_rel( pc, pc );
747 }
748 if( sh4_x86.backpatch_posn != 0 ) {
749 unsigned int i;
750 // Exception raised - cleanup and exit
751 uint8_t *end_ptr = xlat_output;
752 MOVL_r32_r32( REG_EDX, REG_ECX );
753 ADDL_r32_r32( REG_EDX, REG_ECX );
754 ADDL_r32_rbpdisp( REG_ECX, R_SPC );
755 MOVL_moffptr_eax( &sh4_cpu_period );
756 MULL_r32( REG_EDX );
757 ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
758 exit_block();
760 for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
761 uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
762 if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
763 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
764 *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output;
765 } else {
766 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
767 }
768 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
769 int rel = end_ptr - xlat_output;
770 JMP_prerel(rel);
771 } else {
772 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
773 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
774 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
775 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
776 int rel = end_ptr - xlat_output;
777 JMP_prerel(rel);
778 }
779 }
780 }
781 }
783 /**
784 * Translate a single instruction. Delayed branches are handled specially
785 * by translating both branch and delayed instruction as a single unit (as
786 *
787 * The instruction MUST be in the icache (assert check)
788 *
789 * @return true if the instruction marks the end of a basic block
790 * (eg a branch or
791 */
792 uint32_t sh4_translate_instruction( sh4vma_t pc )
793 {
794 uint32_t ir;
795 /* Read instruction from icache */
796 assert( IS_IN_ICACHE(pc) );
797 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
799 if( !sh4_x86.in_delay_slot ) {
800 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
801 }
803 /* check for breakpoints at this pc */
804 for( int i=0; i<sh4_breakpoint_count; i++ ) {
805 if( sh4_breakpoints[i].address == pc ) {
806 sh4_translate_emit_breakpoint(pc);
807 break;
808 }
809 }
810 %%
811 /* ALU operations */
812 ADD Rm, Rn {:
813 COUNT_INST(I_ADD);
814 load_reg( REG_EAX, Rm );
815 load_reg( REG_ECX, Rn );
816 ADDL_r32_r32( REG_EAX, REG_ECX );
817 store_reg( REG_ECX, Rn );
818 sh4_x86.tstate = TSTATE_NONE;
819 :}
820 ADD #imm, Rn {:
821 COUNT_INST(I_ADDI);
822 ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
823 sh4_x86.tstate = TSTATE_NONE;
824 :}
825 ADDC Rm, Rn {:
826 COUNT_INST(I_ADDC);
827 if( sh4_x86.tstate != TSTATE_C ) {
828 LDC_t();
829 }
830 load_reg( REG_EAX, Rm );
831 load_reg( REG_ECX, Rn );
832 ADCL_r32_r32( REG_EAX, REG_ECX );
833 store_reg( REG_ECX, Rn );
834 SETC_t();
835 sh4_x86.tstate = TSTATE_C;
836 :}
837 ADDV Rm, Rn {:
838 COUNT_INST(I_ADDV);
839 load_reg( REG_EAX, Rm );
840 load_reg( REG_ECX, Rn );
841 ADDL_r32_r32( REG_EAX, REG_ECX );
842 store_reg( REG_ECX, Rn );
843 SETO_t();
844 sh4_x86.tstate = TSTATE_O;
845 :}
846 AND Rm, Rn {:
847 COUNT_INST(I_AND);
848 load_reg( REG_EAX, Rm );
849 load_reg( REG_ECX, Rn );
850 ANDL_r32_r32( REG_EAX, REG_ECX );
851 store_reg( REG_ECX, Rn );
852 sh4_x86.tstate = TSTATE_NONE;
853 :}
854 AND #imm, R0 {:
855 COUNT_INST(I_ANDI);
856 load_reg( REG_EAX, 0 );
857 ANDL_imms_r32(imm, REG_EAX);
858 store_reg( REG_EAX, 0 );
859 sh4_x86.tstate = TSTATE_NONE;
860 :}
861 AND.B #imm, @(R0, GBR) {:
862 COUNT_INST(I_ANDB);
863 load_reg( REG_EAX, 0 );
864 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
865 MOVL_r32_rspdisp(REG_EAX, 0);
866 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
867 MOVL_rspdisp_r32(0, REG_EAX);
868 ANDL_imms_r32(imm, REG_EDX );
869 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
870 sh4_x86.tstate = TSTATE_NONE;
871 :}
872 CMP/EQ Rm, Rn {:
873 COUNT_INST(I_CMPEQ);
874 load_reg( REG_EAX, Rm );
875 load_reg( REG_ECX, Rn );
876 CMPL_r32_r32( REG_EAX, REG_ECX );
877 SETE_t();
878 sh4_x86.tstate = TSTATE_E;
879 :}
880 CMP/EQ #imm, R0 {:
881 COUNT_INST(I_CMPEQI);
882 load_reg( REG_EAX, 0 );
883 CMPL_imms_r32(imm, REG_EAX);
884 SETE_t();
885 sh4_x86.tstate = TSTATE_E;
886 :}
887 CMP/GE Rm, Rn {:
888 COUNT_INST(I_CMPGE);
889 load_reg( REG_EAX, Rm );
890 load_reg( REG_ECX, Rn );
891 CMPL_r32_r32( REG_EAX, REG_ECX );
892 SETGE_t();
893 sh4_x86.tstate = TSTATE_GE;
894 :}
895 CMP/GT Rm, Rn {:
896 COUNT_INST(I_CMPGT);
897 load_reg( REG_EAX, Rm );
898 load_reg( REG_ECX, Rn );
899 CMPL_r32_r32( REG_EAX, REG_ECX );
900 SETG_t();
901 sh4_x86.tstate = TSTATE_G;
902 :}
903 CMP/HI Rm, Rn {:
904 COUNT_INST(I_CMPHI);
905 load_reg( REG_EAX, Rm );
906 load_reg( REG_ECX, Rn );
907 CMPL_r32_r32( REG_EAX, REG_ECX );
908 SETA_t();
909 sh4_x86.tstate = TSTATE_A;
910 :}
911 CMP/HS Rm, Rn {:
912 COUNT_INST(I_CMPHS);
913 load_reg( REG_EAX, Rm );
914 load_reg( REG_ECX, Rn );
915 CMPL_r32_r32( REG_EAX, REG_ECX );
916 SETAE_t();
917 sh4_x86.tstate = TSTATE_AE;
918 :}
919 CMP/PL Rn {:
920 COUNT_INST(I_CMPPL);
921 load_reg( REG_EAX, Rn );
922 CMPL_imms_r32( 0, REG_EAX );
923 SETG_t();
924 sh4_x86.tstate = TSTATE_G;
925 :}
926 CMP/PZ Rn {:
927 COUNT_INST(I_CMPPZ);
928 load_reg( REG_EAX, Rn );
929 CMPL_imms_r32( 0, REG_EAX );
930 SETGE_t();
931 sh4_x86.tstate = TSTATE_GE;
932 :}
933 CMP/STR Rm, Rn {:
934 COUNT_INST(I_CMPSTR);
935 load_reg( REG_EAX, Rm );
936 load_reg( REG_ECX, Rn );
937 XORL_r32_r32( REG_ECX, REG_EAX );
938 TESTB_r8_r8( REG_AL, REG_AL );
939 JE_label(target1);
940 TESTB_r8_r8( REG_AH, REG_AH );
941 JE_label(target2);
942 SHRL_imm_r32( 16, REG_EAX );
943 TESTB_r8_r8( REG_AL, REG_AL );
944 JE_label(target3);
945 TESTB_r8_r8( REG_AH, REG_AH );
946 JMP_TARGET(target1);
947 JMP_TARGET(target2);
948 JMP_TARGET(target3);
949 SETE_t();
950 sh4_x86.tstate = TSTATE_E;
951 :}
952 DIV0S Rm, Rn {:
953 COUNT_INST(I_DIV0S);
954 load_reg( REG_EAX, Rm );
955 load_reg( REG_ECX, Rn );
956 SHRL_imm_r32( 31, REG_EAX );
957 SHRL_imm_r32( 31, REG_ECX );
958 MOVL_r32_rbpdisp( REG_EAX, R_M );
959 MOVL_r32_rbpdisp( REG_ECX, R_Q );
960 CMPL_r32_r32( REG_EAX, REG_ECX );
961 SETNE_t();
962 sh4_x86.tstate = TSTATE_NE;
963 :}
964 DIV0U {:
965 COUNT_INST(I_DIV0U);
966 XORL_r32_r32( REG_EAX, REG_EAX );
967 MOVL_r32_rbpdisp( REG_EAX, R_Q );
968 MOVL_r32_rbpdisp( REG_EAX, R_M );
969 MOVL_r32_rbpdisp( REG_EAX, R_T );
970 sh4_x86.tstate = TSTATE_C; // works for DIV1
971 :}
972 DIV1 Rm, Rn {:
973 COUNT_INST(I_DIV1);
974 MOVL_rbpdisp_r32( R_M, REG_ECX );
975 load_reg( REG_EAX, Rn );
976 if( sh4_x86.tstate != TSTATE_C ) {
977 LDC_t();
978 }
979 RCLL_imm_r32( 1, REG_EAX );
980 SETC_r8( REG_DL ); // Q'
981 CMPL_rbpdisp_r32( R_Q, REG_ECX );
982 JE_label(mqequal);
983 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
984 JMP_label(end);
985 JMP_TARGET(mqequal);
986 SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
987 JMP_TARGET(end);
988 store_reg( REG_EAX, Rn ); // Done with Rn now
989 SETC_r8(REG_AL); // tmp1
990 XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
991 XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
992 MOVL_r32_rbpdisp( REG_ECX, R_Q );
993 XORL_imms_r32( 1, REG_AL ); // T = !Q'
994 MOVZXL_r8_r32( REG_AL, REG_EAX );
995 MOVL_r32_rbpdisp( REG_EAX, R_T );
996 sh4_x86.tstate = TSTATE_NONE;
997 :}
998 DMULS.L Rm, Rn {:
999 COUNT_INST(I_DMULS);
1000 load_reg( REG_EAX, Rm );
1001 load_reg( REG_ECX, Rn );
1002 IMULL_r32(REG_ECX);
1003 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1004 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1005 sh4_x86.tstate = TSTATE_NONE;
1006 :}
1007 DMULU.L Rm, Rn {:
1008 COUNT_INST(I_DMULU);
1009 load_reg( REG_EAX, Rm );
1010 load_reg( REG_ECX, Rn );
1011 MULL_r32(REG_ECX);
1012 MOVL_r32_rbpdisp( REG_EDX, R_MACH );
1013 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1014 sh4_x86.tstate = TSTATE_NONE;
1015 :}
1016 DT Rn {:
1017 COUNT_INST(I_DT);
1018 load_reg( REG_EAX, Rn );
1019 ADDL_imms_r32( -1, REG_EAX );
1020 store_reg( REG_EAX, Rn );
1021 SETE_t();
1022 sh4_x86.tstate = TSTATE_E;
1023 :}
1024 EXTS.B Rm, Rn {:
1025 COUNT_INST(I_EXTSB);
1026 load_reg( REG_EAX, Rm );
1027 MOVSXL_r8_r32( REG_EAX, REG_EAX );
1028 store_reg( REG_EAX, Rn );
1029 :}
1030 EXTS.W Rm, Rn {:
1031 COUNT_INST(I_EXTSW);
1032 load_reg( REG_EAX, Rm );
1033 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1034 store_reg( REG_EAX, Rn );
1035 :}
1036 EXTU.B Rm, Rn {:
1037 COUNT_INST(I_EXTUB);
1038 load_reg( REG_EAX, Rm );
1039 MOVZXL_r8_r32( REG_EAX, REG_EAX );
1040 store_reg( REG_EAX, Rn );
1041 :}
1042 EXTU.W Rm, Rn {:
1043 COUNT_INST(I_EXTUW);
1044 load_reg( REG_EAX, Rm );
1045 MOVZXL_r16_r32( REG_EAX, REG_EAX );
1046 store_reg( REG_EAX, Rn );
1047 :}
1048 MAC.L @Rm+, @Rn+ {:
1049 COUNT_INST(I_MACL);
1050 if( Rm == Rn ) {
1051 load_reg( REG_EAX, Rm );
1052 check_ralign32( REG_EAX );
1053 MEM_READ_LONG( REG_EAX, REG_EAX );
1054 MOVL_r32_rspdisp(REG_EAX, 0);
1055 load_reg( REG_EAX, Rm );
1056 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
1057 MEM_READ_LONG( REG_EAX, REG_EAX );
1058 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
1059 } else {
1060 load_reg( REG_EAX, Rm );
1061 check_ralign32( REG_EAX );
1062 MEM_READ_LONG( REG_EAX, REG_EAX );
1063 MOVL_r32_rspdisp( REG_EAX, 0 );
1064 load_reg( REG_EAX, Rn );
1065 check_ralign32( REG_EAX );
1066 MEM_READ_LONG( REG_EAX, REG_EAX );
1067 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1068 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1069 }
1071 IMULL_rspdisp( 0 );
1072 ADDL_r32_rbpdisp( REG_EAX, R_MACL );
1073 ADCL_r32_rbpdisp( REG_EDX, R_MACH );
1075 MOVL_rbpdisp_r32( R_S, REG_ECX );
1076 TESTL_r32_r32(REG_ECX, REG_ECX);
1077 JE_label( nosat );
1078 CALL_ptr( signsat48 );
1079 JMP_TARGET( nosat );
1080 sh4_x86.tstate = TSTATE_NONE;
1081 :}
1082 MAC.W @Rm+, @Rn+ {:
1083 COUNT_INST(I_MACW);
1084 if( Rm == Rn ) {
1085 load_reg( REG_EAX, Rm );
1086 check_ralign16( REG_EAX );
1087 MEM_READ_WORD( REG_EAX, REG_EAX );
1088 MOVL_r32_rspdisp( REG_EAX, 0 );
1089 load_reg( REG_EAX, Rm );
1090 LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
1091 MEM_READ_WORD( REG_EAX, REG_EAX );
1092 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
1093 // Note translate twice in case of page boundaries. Maybe worth
1094 // adding a page-boundary check to skip the second translation
1095 } else {
1096 load_reg( REG_EAX, Rm );
1097 check_ralign16( REG_EAX );
1098 MEM_READ_WORD( REG_EAX, REG_EAX );
1099 MOVL_r32_rspdisp( REG_EAX, 0 );
1100 load_reg( REG_EAX, Rn );
1101 check_ralign16( REG_EAX );
1102 MEM_READ_WORD( REG_EAX, REG_EAX );
1103 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
1104 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1105 }
1106 IMULL_rspdisp( 0 );
1107 MOVL_rbpdisp_r32( R_S, REG_ECX );
1108 TESTL_r32_r32( REG_ECX, REG_ECX );
1109 JE_label( nosat );
1111 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1112 JNO_label( end ); // 2
1113 MOVL_imm32_r32( 1, REG_EDX ); // 5
1114 MOVL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1115 JS_label( positive ); // 2
1116 MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
1117 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1118 JMP_label(end2); // 2
1120 JMP_TARGET(positive);
1121 MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
1122 MOVL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1123 JMP_label(end3); // 2
1125 JMP_TARGET(nosat);
1126 ADDL_r32_rbpdisp( REG_EAX, R_MACL ); // 6
1127 ADCL_r32_rbpdisp( REG_EDX, R_MACH ); // 6
1128 JMP_TARGET(end);
1129 JMP_TARGET(end2);
1130 JMP_TARGET(end3);
1131 sh4_x86.tstate = TSTATE_NONE;
1132 :}
1133 MOVT Rn {:
1134 COUNT_INST(I_MOVT);
1135 MOVL_rbpdisp_r32( R_T, REG_EAX );
1136 store_reg( REG_EAX, Rn );
1137 :}
1138 MUL.L Rm, Rn {:
1139 COUNT_INST(I_MULL);
1140 load_reg( REG_EAX, Rm );
1141 load_reg( REG_ECX, Rn );
1142 MULL_r32( REG_ECX );
1143 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1144 sh4_x86.tstate = TSTATE_NONE;
1145 :}
1146 MULS.W Rm, Rn {:
1147 COUNT_INST(I_MULSW);
1148 MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1149 MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1150 MULL_r32( REG_ECX );
1151 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1152 sh4_x86.tstate = TSTATE_NONE;
1153 :}
1154 MULU.W Rm, Rn {:
1155 COUNT_INST(I_MULUW);
1156 MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
1157 MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
1158 MULL_r32( REG_ECX );
1159 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
1160 sh4_x86.tstate = TSTATE_NONE;
1161 :}
1162 NEG Rm, Rn {:
1163 COUNT_INST(I_NEG);
1164 load_reg( REG_EAX, Rm );
1165 NEGL_r32( REG_EAX );
1166 store_reg( REG_EAX, Rn );
1167 sh4_x86.tstate = TSTATE_NONE;
1168 :}
1169 NEGC Rm, Rn {:
1170 COUNT_INST(I_NEGC);
1171 load_reg( REG_EAX, Rm );
1172 XORL_r32_r32( REG_ECX, REG_ECX );
1173 LDC_t();
1174 SBBL_r32_r32( REG_EAX, REG_ECX );
1175 store_reg( REG_ECX, Rn );
1176 SETC_t();
1177 sh4_x86.tstate = TSTATE_C;
1178 :}
1179 NOT Rm, Rn {:
1180 COUNT_INST(I_NOT);
1181 load_reg( REG_EAX, Rm );
1182 NOTL_r32( REG_EAX );
1183 store_reg( REG_EAX, Rn );
1184 sh4_x86.tstate = TSTATE_NONE;
1185 :}
1186 OR Rm, Rn {:
1187 COUNT_INST(I_OR);
1188 load_reg( REG_EAX, Rm );
1189 load_reg( REG_ECX, Rn );
1190 ORL_r32_r32( REG_EAX, REG_ECX );
1191 store_reg( REG_ECX, Rn );
1192 sh4_x86.tstate = TSTATE_NONE;
1193 :}
1194 OR #imm, R0 {:
1195 COUNT_INST(I_ORI);
1196 load_reg( REG_EAX, 0 );
1197 ORL_imms_r32(imm, REG_EAX);
1198 store_reg( REG_EAX, 0 );
1199 sh4_x86.tstate = TSTATE_NONE;
1200 :}
1201 OR.B #imm, @(R0, GBR) {:
1202 COUNT_INST(I_ORB);
1203 load_reg( REG_EAX, 0 );
1204 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1205 MOVL_r32_rspdisp( REG_EAX, 0 );
1206 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1207 MOVL_rspdisp_r32( 0, REG_EAX );
1208 ORL_imms_r32(imm, REG_EDX );
1209 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1210 sh4_x86.tstate = TSTATE_NONE;
1211 :}
1212 ROTCL Rn {:
1213 COUNT_INST(I_ROTCL);
1214 load_reg( REG_EAX, Rn );
1215 if( sh4_x86.tstate != TSTATE_C ) {
1216 LDC_t();
1217 }
1218 RCLL_imm_r32( 1, REG_EAX );
1219 store_reg( REG_EAX, Rn );
1220 SETC_t();
1221 sh4_x86.tstate = TSTATE_C;
1222 :}
1223 ROTCR Rn {:
1224 COUNT_INST(I_ROTCR);
1225 load_reg( REG_EAX, Rn );
1226 if( sh4_x86.tstate != TSTATE_C ) {
1227 LDC_t();
1228 }
1229 RCRL_imm_r32( 1, REG_EAX );
1230 store_reg( REG_EAX, Rn );
1231 SETC_t();
1232 sh4_x86.tstate = TSTATE_C;
1233 :}
1234 ROTL Rn {:
1235 COUNT_INST(I_ROTL);
1236 load_reg( REG_EAX, Rn );
1237 ROLL_imm_r32( 1, REG_EAX );
1238 store_reg( REG_EAX, Rn );
1239 SETC_t();
1240 sh4_x86.tstate = TSTATE_C;
1241 :}
1242 ROTR Rn {:
1243 COUNT_INST(I_ROTR);
1244 load_reg( REG_EAX, Rn );
1245 RORL_imm_r32( 1, REG_EAX );
1246 store_reg( REG_EAX, Rn );
1247 SETC_t();
1248 sh4_x86.tstate = TSTATE_C;
1249 :}
1250 SHAD Rm, Rn {:
1251 COUNT_INST(I_SHAD);
1252 /* Annoyingly enough, not directly convertible */
1253 load_reg( REG_EAX, Rn );
1254 load_reg( REG_ECX, Rm );
1255 CMPL_imms_r32( 0, REG_ECX );
1256 JGE_label(doshl);
1258 NEGL_r32( REG_ECX ); // 2
1259 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1260 JE_label(emptysar); // 2
1261 SARL_cl_r32( REG_EAX ); // 2
1262 JMP_label(end); // 2
1264 JMP_TARGET(emptysar);
1265 SARL_imm_r32(31, REG_EAX ); // 3
1266 JMP_label(end2);
1268 JMP_TARGET(doshl);
1269 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1270 SHLL_cl_r32( REG_EAX ); // 2
1271 JMP_TARGET(end);
1272 JMP_TARGET(end2);
1273 store_reg( REG_EAX, Rn );
1274 sh4_x86.tstate = TSTATE_NONE;
1275 :}
1276 SHLD Rm, Rn {:
1277 COUNT_INST(I_SHLD);
1278 load_reg( REG_EAX, Rn );
1279 load_reg( REG_ECX, Rm );
1280 CMPL_imms_r32( 0, REG_ECX );
1281 JGE_label(doshl);
1283 NEGL_r32( REG_ECX ); // 2
1284 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1285 JE_label(emptyshr );
1286 SHRL_cl_r32( REG_EAX ); // 2
1287 JMP_label(end); // 2
1289 JMP_TARGET(emptyshr);
1290 XORL_r32_r32( REG_EAX, REG_EAX );
1291 JMP_label(end2);
1293 JMP_TARGET(doshl);
1294 ANDB_imms_r8( 0x1F, REG_CL ); // 3
1295 SHLL_cl_r32( REG_EAX ); // 2
1296 JMP_TARGET(end);
1297 JMP_TARGET(end2);
1298 store_reg( REG_EAX, Rn );
1299 sh4_x86.tstate = TSTATE_NONE;
1300 :}
1301 SHAL Rn {:
1302 COUNT_INST(I_SHAL);
1303 load_reg( REG_EAX, Rn );
1304 SHLL_imm_r32( 1, REG_EAX );
1305 SETC_t();
1306 store_reg( REG_EAX, Rn );
1307 sh4_x86.tstate = TSTATE_C;
1308 :}
1309 SHAR Rn {:
1310 COUNT_INST(I_SHAR);
1311 load_reg( REG_EAX, Rn );
1312 SARL_imm_r32( 1, REG_EAX );
1313 SETC_t();
1314 store_reg( REG_EAX, Rn );
1315 sh4_x86.tstate = TSTATE_C;
1316 :}
1317 SHLL Rn {:
1318 COUNT_INST(I_SHLL);
1319 load_reg( REG_EAX, Rn );
1320 SHLL_imm_r32( 1, REG_EAX );
1321 SETC_t();
1322 store_reg( REG_EAX, Rn );
1323 sh4_x86.tstate = TSTATE_C;
1324 :}
1325 SHLL2 Rn {:
1326 COUNT_INST(I_SHLL);
1327 load_reg( REG_EAX, Rn );
1328 SHLL_imm_r32( 2, REG_EAX );
1329 store_reg( REG_EAX, Rn );
1330 sh4_x86.tstate = TSTATE_NONE;
1331 :}
1332 SHLL8 Rn {:
1333 COUNT_INST(I_SHLL);
1334 load_reg( REG_EAX, Rn );
1335 SHLL_imm_r32( 8, REG_EAX );
1336 store_reg( REG_EAX, Rn );
1337 sh4_x86.tstate = TSTATE_NONE;
1338 :}
1339 SHLL16 Rn {:
1340 COUNT_INST(I_SHLL);
1341 load_reg( REG_EAX, Rn );
1342 SHLL_imm_r32( 16, REG_EAX );
1343 store_reg( REG_EAX, Rn );
1344 sh4_x86.tstate = TSTATE_NONE;
1345 :}
1346 SHLR Rn {:
1347 COUNT_INST(I_SHLR);
1348 load_reg( REG_EAX, Rn );
1349 SHRL_imm_r32( 1, REG_EAX );
1350 SETC_t();
1351 store_reg( REG_EAX, Rn );
1352 sh4_x86.tstate = TSTATE_C;
1353 :}
1354 SHLR2 Rn {:
1355 COUNT_INST(I_SHLR);
1356 load_reg( REG_EAX, Rn );
1357 SHRL_imm_r32( 2, REG_EAX );
1358 store_reg( REG_EAX, Rn );
1359 sh4_x86.tstate = TSTATE_NONE;
1360 :}
1361 SHLR8 Rn {:
1362 COUNT_INST(I_SHLR);
1363 load_reg( REG_EAX, Rn );
1364 SHRL_imm_r32( 8, REG_EAX );
1365 store_reg( REG_EAX, Rn );
1366 sh4_x86.tstate = TSTATE_NONE;
1367 :}
1368 SHLR16 Rn {:
1369 COUNT_INST(I_SHLR);
1370 load_reg( REG_EAX, Rn );
1371 SHRL_imm_r32( 16, REG_EAX );
1372 store_reg( REG_EAX, Rn );
1373 sh4_x86.tstate = TSTATE_NONE;
1374 :}
1375 SUB Rm, Rn {:
1376 COUNT_INST(I_SUB);
1377 load_reg( REG_EAX, Rm );
1378 load_reg( REG_ECX, Rn );
1379 SUBL_r32_r32( REG_EAX, REG_ECX );
1380 store_reg( REG_ECX, Rn );
1381 sh4_x86.tstate = TSTATE_NONE;
1382 :}
1383 SUBC Rm, Rn {:
1384 COUNT_INST(I_SUBC);
1385 load_reg( REG_EAX, Rm );
1386 load_reg( REG_ECX, Rn );
1387 if( sh4_x86.tstate != TSTATE_C ) {
1388 LDC_t();
1389 }
1390 SBBL_r32_r32( REG_EAX, REG_ECX );
1391 store_reg( REG_ECX, Rn );
1392 SETC_t();
1393 sh4_x86.tstate = TSTATE_C;
1394 :}
1395 SUBV Rm, Rn {:
1396 COUNT_INST(I_SUBV);
1397 load_reg( REG_EAX, Rm );
1398 load_reg( REG_ECX, Rn );
1399 SUBL_r32_r32( REG_EAX, REG_ECX );
1400 store_reg( REG_ECX, Rn );
1401 SETO_t();
1402 sh4_x86.tstate = TSTATE_O;
1403 :}
1404 SWAP.B Rm, Rn {:
1405 COUNT_INST(I_SWAPB);
1406 load_reg( REG_EAX, Rm );
1407 XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
1408 store_reg( REG_EAX, Rn );
1409 :}
1410 SWAP.W Rm, Rn {:
1411 COUNT_INST(I_SWAPB);
1412 load_reg( REG_EAX, Rm );
1413 MOVL_r32_r32( REG_EAX, REG_ECX );
1414 SHLL_imm_r32( 16, REG_ECX );
1415 SHRL_imm_r32( 16, REG_EAX );
1416 ORL_r32_r32( REG_EAX, REG_ECX );
1417 store_reg( REG_ECX, Rn );
1418 sh4_x86.tstate = TSTATE_NONE;
1419 :}
1420 TAS.B @Rn {:
1421 COUNT_INST(I_TASB);
1422 load_reg( REG_EAX, Rn );
1423 MOVL_r32_rspdisp( REG_EAX, 0 );
1424 MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
1425 TESTB_r8_r8( REG_DL, REG_DL );
1426 SETE_t();
1427 ORB_imms_r8( 0x80, REG_DL );
1428 MOVL_rspdisp_r32( 0, REG_EAX );
1429 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1430 sh4_x86.tstate = TSTATE_NONE;
1431 :}
1432 TST Rm, Rn {:
1433 COUNT_INST(I_TST);
1434 load_reg( REG_EAX, Rm );
1435 load_reg( REG_ECX, Rn );
1436 TESTL_r32_r32( REG_EAX, REG_ECX );
1437 SETE_t();
1438 sh4_x86.tstate = TSTATE_E;
1439 :}
1440 TST #imm, R0 {:
1441 COUNT_INST(I_TSTI);
1442 load_reg( REG_EAX, 0 );
1443 TESTL_imms_r32( imm, REG_EAX );
1444 SETE_t();
1445 sh4_x86.tstate = TSTATE_E;
1446 :}
1447 TST.B #imm, @(R0, GBR) {:
1448 COUNT_INST(I_TSTB);
1449 load_reg( REG_EAX, 0);
1450 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1451 MEM_READ_BYTE( REG_EAX, REG_EAX );
1452 TESTB_imms_r8( imm, REG_AL );
1453 SETE_t();
1454 sh4_x86.tstate = TSTATE_E;
1455 :}
1456 XOR Rm, Rn {:
1457 COUNT_INST(I_XOR);
1458 load_reg( REG_EAX, Rm );
1459 load_reg( REG_ECX, Rn );
1460 XORL_r32_r32( REG_EAX, REG_ECX );
1461 store_reg( REG_ECX, Rn );
1462 sh4_x86.tstate = TSTATE_NONE;
1463 :}
1464 XOR #imm, R0 {:
1465 COUNT_INST(I_XORI);
1466 load_reg( REG_EAX, 0 );
1467 XORL_imms_r32( imm, REG_EAX );
1468 store_reg( REG_EAX, 0 );
1469 sh4_x86.tstate = TSTATE_NONE;
1470 :}
1471 XOR.B #imm, @(R0, GBR) {:
1472 COUNT_INST(I_XORB);
1473 load_reg( REG_EAX, 0 );
1474 ADDL_rbpdisp_r32( R_GBR, REG_EAX );
1475 MOVL_r32_rspdisp( REG_EAX, 0 );
1476 MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
1477 MOVL_rspdisp_r32( 0, REG_EAX );
1478 XORL_imms_r32( imm, REG_EDX );
1479 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1480 sh4_x86.tstate = TSTATE_NONE;
1481 :}
1482 XTRCT Rm, Rn {:
1483 COUNT_INST(I_XTRCT);
1484 load_reg( REG_EAX, Rm );
1485 load_reg( REG_ECX, Rn );
1486 SHLL_imm_r32( 16, REG_EAX );
1487 SHRL_imm_r32( 16, REG_ECX );
1488 ORL_r32_r32( REG_EAX, REG_ECX );
1489 store_reg( REG_ECX, Rn );
1490 sh4_x86.tstate = TSTATE_NONE;
1491 :}
1493 /* Data move instructions */
1494 MOV Rm, Rn {:
1495 COUNT_INST(I_MOV);
1496 load_reg( REG_EAX, Rm );
1497 store_reg( REG_EAX, Rn );
1498 :}
1499 MOV #imm, Rn {:
1500 COUNT_INST(I_MOVI);
1501 MOVL_imm32_r32( imm, REG_EAX );
1502 store_reg( REG_EAX, Rn );
1503 :}
1504 MOV.B Rm, @Rn {:
1505 COUNT_INST(I_MOVB);
1506 load_reg( REG_EAX, Rn );
1507 load_reg( REG_EDX, Rm );
1508 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1509 sh4_x86.tstate = TSTATE_NONE;
1510 :}
1511 MOV.B Rm, @-Rn {:
1512 COUNT_INST(I_MOVB);
1513 load_reg( REG_EAX, Rn );
1514 LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
1515 load_reg( REG_EDX, Rm );
1516 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1517 ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
1518 sh4_x86.tstate = TSTATE_NONE;
1519 :}
1520 MOV.B Rm, @(R0, Rn) {:
1521 COUNT_INST(I_MOVB);
1522 load_reg( REG_EAX, 0 );
1523 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1524 load_reg( REG_EDX, Rm );
1525 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1526 sh4_x86.tstate = TSTATE_NONE;
1527 :}
1528 MOV.B R0, @(disp, GBR) {:
1529 COUNT_INST(I_MOVB);
1530 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1531 ADDL_imms_r32( disp, REG_EAX );
1532 load_reg( REG_EDX, 0 );
1533 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1534 sh4_x86.tstate = TSTATE_NONE;
1535 :}
1536 MOV.B R0, @(disp, Rn) {:
1537 COUNT_INST(I_MOVB);
1538 load_reg( REG_EAX, Rn );
1539 ADDL_imms_r32( disp, REG_EAX );
1540 load_reg( REG_EDX, 0 );
1541 MEM_WRITE_BYTE( REG_EAX, REG_EDX );
1542 sh4_x86.tstate = TSTATE_NONE;
1543 :}
1544 MOV.B @Rm, Rn {:
1545 COUNT_INST(I_MOVB);
1546 load_reg( REG_EAX, Rm );
1547 MEM_READ_BYTE( REG_EAX, REG_EAX );
1548 store_reg( REG_EAX, Rn );
1549 sh4_x86.tstate = TSTATE_NONE;
1550 :}
1551 MOV.B @Rm+, Rn {:
1552 COUNT_INST(I_MOVB);
1553 load_reg( REG_EAX, Rm );
1554 MEM_READ_BYTE( REG_EAX, REG_EAX );
1555 if( Rm != Rn ) {
1556 ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
1557 }
1558 store_reg( REG_EAX, Rn );
1559 sh4_x86.tstate = TSTATE_NONE;
1560 :}
1561 MOV.B @(R0, Rm), Rn {:
1562 COUNT_INST(I_MOVB);
1563 load_reg( REG_EAX, 0 );
1564 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1565 MEM_READ_BYTE( REG_EAX, REG_EAX );
1566 store_reg( REG_EAX, Rn );
1567 sh4_x86.tstate = TSTATE_NONE;
1568 :}
1569 MOV.B @(disp, GBR), R0 {:
1570 COUNT_INST(I_MOVB);
1571 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1572 ADDL_imms_r32( disp, REG_EAX );
1573 MEM_READ_BYTE( REG_EAX, REG_EAX );
1574 store_reg( REG_EAX, 0 );
1575 sh4_x86.tstate = TSTATE_NONE;
1576 :}
1577 MOV.B @(disp, Rm), R0 {:
1578 COUNT_INST(I_MOVB);
1579 load_reg( REG_EAX, Rm );
1580 ADDL_imms_r32( disp, REG_EAX );
1581 MEM_READ_BYTE( REG_EAX, REG_EAX );
1582 store_reg( REG_EAX, 0 );
1583 sh4_x86.tstate = TSTATE_NONE;
1584 :}
1585 MOV.L Rm, @Rn {:
1586 COUNT_INST(I_MOVL);
1587 load_reg( REG_EAX, Rn );
1588 check_walign32(REG_EAX);
1589 MOVL_r32_r32( REG_EAX, REG_ECX );
1590 ANDL_imms_r32( 0xFC000000, REG_ECX );
1591 CMPL_imms_r32( 0xE0000000, REG_ECX );
1592 JNE_label( notsq );
1593 ANDL_imms_r32( 0x3C, REG_EAX );
1594 load_reg( REG_EDX, Rm );
1595 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1596 JMP_label(end);
1597 JMP_TARGET(notsq);
1598 load_reg( REG_EDX, Rm );
1599 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1600 JMP_TARGET(end);
1601 sh4_x86.tstate = TSTATE_NONE;
1602 :}
1603 MOV.L Rm, @-Rn {:
1604 COUNT_INST(I_MOVL);
1605 load_reg( REG_EAX, Rn );
1606 ADDL_imms_r32( -4, REG_EAX );
1607 check_walign32( REG_EAX );
1608 load_reg( REG_EDX, Rm );
1609 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1610 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
1611 sh4_x86.tstate = TSTATE_NONE;
1612 :}
1613 MOV.L Rm, @(R0, Rn) {:
1614 COUNT_INST(I_MOVL);
1615 load_reg( REG_EAX, 0 );
1616 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1617 check_walign32( REG_EAX );
1618 load_reg( REG_EDX, Rm );
1619 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1620 sh4_x86.tstate = TSTATE_NONE;
1621 :}
1622 MOV.L R0, @(disp, GBR) {:
1623 COUNT_INST(I_MOVL);
1624 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1625 ADDL_imms_r32( disp, REG_EAX );
1626 check_walign32( REG_EAX );
1627 load_reg( REG_EDX, 0 );
1628 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1629 sh4_x86.tstate = TSTATE_NONE;
1630 :}
1631 MOV.L Rm, @(disp, Rn) {:
1632 COUNT_INST(I_MOVL);
1633 load_reg( REG_EAX, Rn );
1634 ADDL_imms_r32( disp, REG_EAX );
1635 check_walign32( REG_EAX );
1636 MOVL_r32_r32( REG_EAX, REG_ECX );
1637 ANDL_imms_r32( 0xFC000000, REG_ECX );
1638 CMPL_imms_r32( 0xE0000000, REG_ECX );
1639 JNE_label( notsq );
1640 ANDL_imms_r32( 0x3C, REG_EAX );
1641 load_reg( REG_EDX, Rm );
1642 MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
1643 JMP_label(end);
1644 JMP_TARGET(notsq);
1645 load_reg( REG_EDX, Rm );
1646 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1647 JMP_TARGET(end);
1648 sh4_x86.tstate = TSTATE_NONE;
1649 :}
1650 MOV.L @Rm, Rn {:
1651 COUNT_INST(I_MOVL);
1652 load_reg( REG_EAX, Rm );
1653 check_ralign32( REG_EAX );
1654 MEM_READ_LONG( REG_EAX, REG_EAX );
1655 store_reg( REG_EAX, Rn );
1656 sh4_x86.tstate = TSTATE_NONE;
1657 :}
1658 MOV.L @Rm+, Rn {:
1659 COUNT_INST(I_MOVL);
1660 load_reg( REG_EAX, Rm );
1661 check_ralign32( REG_EAX );
1662 MEM_READ_LONG( REG_EAX, REG_EAX );
1663 if( Rm != Rn ) {
1664 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
1665 }
1666 store_reg( REG_EAX, Rn );
1667 sh4_x86.tstate = TSTATE_NONE;
1668 :}
1669 MOV.L @(R0, Rm), Rn {:
1670 COUNT_INST(I_MOVL);
1671 load_reg( REG_EAX, 0 );
1672 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1673 check_ralign32( REG_EAX );
1674 MEM_READ_LONG( REG_EAX, REG_EAX );
1675 store_reg( REG_EAX, Rn );
1676 sh4_x86.tstate = TSTATE_NONE;
1677 :}
1678 MOV.L @(disp, GBR), R0 {:
1679 COUNT_INST(I_MOVL);
1680 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1681 ADDL_imms_r32( disp, REG_EAX );
1682 check_ralign32( REG_EAX );
1683 MEM_READ_LONG( REG_EAX, REG_EAX );
1684 store_reg( REG_EAX, 0 );
1685 sh4_x86.tstate = TSTATE_NONE;
1686 :}
1687 MOV.L @(disp, PC), Rn {:
1688 COUNT_INST(I_MOVLPC);
1689 if( sh4_x86.in_delay_slot ) {
1690 SLOTILLEGAL();
1691 } else {
1692 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1693 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1694 // If the target address is in the same page as the code, it's
1695 // pretty safe to just ref it directly and circumvent the whole
1696 // memory subsystem. (this is a big performance win)
1698 // FIXME: There's a corner-case that's not handled here when
1699 // the current code-page is in the ITLB but not in the UTLB.
1700 // (should generate a TLB miss although need to test SH4
1701 // behaviour to confirm) Unlikely to be anyone depending on this
1702 // behaviour though.
1703 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1704 MOVL_moffptr_eax( ptr );
1705 } else {
1706 // Note: we use sh4r.pc for the calc as we could be running at a
1707 // different virtual address than the translation was done with,
1708 // but we can safely assume that the low bits are the same.
1709 MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
1710 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1711 MEM_READ_LONG( REG_EAX, REG_EAX );
1712 sh4_x86.tstate = TSTATE_NONE;
1713 }
1714 store_reg( REG_EAX, Rn );
1715 }
1716 :}
1717 MOV.L @(disp, Rm), Rn {:
1718 COUNT_INST(I_MOVL);
1719 load_reg( REG_EAX, Rm );
1720 ADDL_imms_r32( disp, REG_EAX );
1721 check_ralign32( REG_EAX );
1722 MEM_READ_LONG( REG_EAX, REG_EAX );
1723 store_reg( REG_EAX, Rn );
1724 sh4_x86.tstate = TSTATE_NONE;
1725 :}
1726 MOV.W Rm, @Rn {:
1727 COUNT_INST(I_MOVW);
1728 load_reg( REG_EAX, Rn );
1729 check_walign16( REG_EAX );
1730 load_reg( REG_EDX, Rm );
1731 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1732 sh4_x86.tstate = TSTATE_NONE;
1733 :}
1734 MOV.W Rm, @-Rn {:
1735 COUNT_INST(I_MOVW);
1736 load_reg( REG_EAX, Rn );
1737 check_walign16( REG_EAX );
1738 LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
1739 load_reg( REG_EDX, Rm );
1740 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1741 ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
1742 sh4_x86.tstate = TSTATE_NONE;
1743 :}
1744 MOV.W Rm, @(R0, Rn) {:
1745 COUNT_INST(I_MOVW);
1746 load_reg( REG_EAX, 0 );
1747 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1748 check_walign16( REG_EAX );
1749 load_reg( REG_EDX, Rm );
1750 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1751 sh4_x86.tstate = TSTATE_NONE;
1752 :}
1753 MOV.W R0, @(disp, GBR) {:
1754 COUNT_INST(I_MOVW);
1755 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1756 ADDL_imms_r32( disp, REG_EAX );
1757 check_walign16( REG_EAX );
1758 load_reg( REG_EDX, 0 );
1759 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1760 sh4_x86.tstate = TSTATE_NONE;
1761 :}
1762 MOV.W R0, @(disp, Rn) {:
1763 COUNT_INST(I_MOVW);
1764 load_reg( REG_EAX, Rn );
1765 ADDL_imms_r32( disp, REG_EAX );
1766 check_walign16( REG_EAX );
1767 load_reg( REG_EDX, 0 );
1768 MEM_WRITE_WORD( REG_EAX, REG_EDX );
1769 sh4_x86.tstate = TSTATE_NONE;
1770 :}
1771 MOV.W @Rm, Rn {:
1772 COUNT_INST(I_MOVW);
1773 load_reg( REG_EAX, Rm );
1774 check_ralign16( REG_EAX );
1775 MEM_READ_WORD( REG_EAX, REG_EAX );
1776 store_reg( REG_EAX, Rn );
1777 sh4_x86.tstate = TSTATE_NONE;
1778 :}
1779 MOV.W @Rm+, Rn {:
1780 COUNT_INST(I_MOVW);
1781 load_reg( REG_EAX, Rm );
1782 check_ralign16( REG_EAX );
1783 MEM_READ_WORD( REG_EAX, REG_EAX );
1784 if( Rm != Rn ) {
1785 ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
1786 }
1787 store_reg( REG_EAX, Rn );
1788 sh4_x86.tstate = TSTATE_NONE;
1789 :}
1790 MOV.W @(R0, Rm), Rn {:
1791 COUNT_INST(I_MOVW);
1792 load_reg( REG_EAX, 0 );
1793 ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
1794 check_ralign16( REG_EAX );
1795 MEM_READ_WORD( REG_EAX, REG_EAX );
1796 store_reg( REG_EAX, Rn );
1797 sh4_x86.tstate = TSTATE_NONE;
1798 :}
1799 MOV.W @(disp, GBR), R0 {:
1800 COUNT_INST(I_MOVW);
1801 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
1802 ADDL_imms_r32( disp, REG_EAX );
1803 check_ralign16( REG_EAX );
1804 MEM_READ_WORD( REG_EAX, REG_EAX );
1805 store_reg( REG_EAX, 0 );
1806 sh4_x86.tstate = TSTATE_NONE;
1807 :}
1808 MOV.W @(disp, PC), Rn {:
1809 COUNT_INST(I_MOVW);
1810 if( sh4_x86.in_delay_slot ) {
1811 SLOTILLEGAL();
1812 } else {
1813 // See comments for MOV.L @(disp, PC), Rn
1814 uint32_t target = pc + disp + 4;
1815 if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
1816 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1817 MOVL_moffptr_eax( ptr );
1818 MOVSXL_r16_r32( REG_EAX, REG_EAX );
1819 } else {
1820 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
1821 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1822 MEM_READ_WORD( REG_EAX, REG_EAX );
1823 sh4_x86.tstate = TSTATE_NONE;
1824 }
1825 store_reg( REG_EAX, Rn );
1826 }
1827 :}
1828 MOV.W @(disp, Rm), R0 {:
1829 COUNT_INST(I_MOVW);
1830 load_reg( REG_EAX, Rm );
1831 ADDL_imms_r32( disp, REG_EAX );
1832 check_ralign16( REG_EAX );
1833 MEM_READ_WORD( REG_EAX, REG_EAX );
1834 store_reg( REG_EAX, 0 );
1835 sh4_x86.tstate = TSTATE_NONE;
1836 :}
1837 MOVA @(disp, PC), R0 {:
1838 COUNT_INST(I_MOVA);
1839 if( sh4_x86.in_delay_slot ) {
1840 SLOTILLEGAL();
1841 } else {
1842 MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
1843 ADDL_rbpdisp_r32( R_PC, REG_ECX );
1844 store_reg( REG_ECX, 0 );
1845 sh4_x86.tstate = TSTATE_NONE;
1846 }
1847 :}
1848 MOVCA.L R0, @Rn {:
1849 COUNT_INST(I_MOVCA);
1850 load_reg( REG_EAX, Rn );
1851 check_walign32( REG_EAX );
1852 load_reg( REG_EDX, 0 );
1853 MEM_WRITE_LONG( REG_EAX, REG_EDX );
1854 sh4_x86.tstate = TSTATE_NONE;
1855 :}
1857 /* Control transfer instructions */
1858 BF disp {:
1859 COUNT_INST(I_BF);
1860 if( sh4_x86.in_delay_slot ) {
1861 SLOTILLEGAL();
1862 } else {
1863 sh4vma_t target = disp + pc + 4;
1864 JT_label( nottaken );
1865 exit_block_rel(target, pc+2 );
1866 JMP_TARGET(nottaken);
1867 return 2;
1868 }
1869 :}
1870 BF/S disp {:
1871 COUNT_INST(I_BFS);
1872 if( sh4_x86.in_delay_slot ) {
1873 SLOTILLEGAL();
1874 } else {
1875 sh4_x86.in_delay_slot = DELAY_PC;
1876 if( UNTRANSLATABLE(pc+2) ) {
1877 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1878 JT_label(nottaken);
1879 ADDL_imms_r32( disp, REG_EAX );
1880 JMP_TARGET(nottaken);
1881 ADDL_rbpdisp_r32( R_PC, REG_EAX );
1882 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1883 exit_block_emu(pc+2);
1884 sh4_x86.branch_taken = TRUE;
1885 return 2;
1886 } else {
1887 if( sh4_x86.tstate == TSTATE_NONE ) {
1888 CMPL_imms_rbpdisp( 1, R_T );
1889 sh4_x86.tstate = TSTATE_E;
1890 }
1891 sh4vma_t target = disp + pc + 4;
1892 JCC_cc_rel32(sh4_x86.tstate,0);
1893 uint32_t *patch = ((uint32_t *)xlat_output)-1;
1894 int save_tstate = sh4_x86.tstate;
1895 sh4_translate_instruction(pc+2);
1896 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
1897 exit_block_rel( target, pc+4 );
1899 // not taken
1900 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1901 sh4_x86.tstate = save_tstate;
1902 sh4_translate_instruction(pc+2);
1903 return 4;
1904 }
1905 }
1906 :}
1907 BRA disp {:
1908 COUNT_INST(I_BRA);
1909 if( sh4_x86.in_delay_slot ) {
1910 SLOTILLEGAL();
1911 } else {
1912 sh4_x86.in_delay_slot = DELAY_PC;
1913 sh4_x86.branch_taken = TRUE;
1914 if( UNTRANSLATABLE(pc+2) ) {
1915 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1916 ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
1917 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1918 exit_block_emu(pc+2);
1919 return 2;
1920 } else {
1921 sh4_translate_instruction( pc + 2 );
1922 exit_block_rel( disp + pc + 4, pc+4 );
1923 return 4;
1924 }
1925 }
1926 :}
1927 BRAF Rn {:
1928 COUNT_INST(I_BRAF);
1929 if( sh4_x86.in_delay_slot ) {
1930 SLOTILLEGAL();
1931 } else {
1932 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1933 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1934 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1935 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1936 sh4_x86.in_delay_slot = DELAY_PC;
1937 sh4_x86.tstate = TSTATE_NONE;
1938 sh4_x86.branch_taken = TRUE;
1939 if( UNTRANSLATABLE(pc+2) ) {
1940 exit_block_emu(pc+2);
1941 return 2;
1942 } else {
1943 sh4_translate_instruction( pc + 2 );
1944 exit_block_newpcset(pc+4);
1945 return 4;
1946 }
1947 }
1948 :}
1949 BSR disp {:
1950 COUNT_INST(I_BSR);
1951 if( sh4_x86.in_delay_slot ) {
1952 SLOTILLEGAL();
1953 } else {
1954 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1955 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1956 MOVL_r32_rbpdisp( REG_EAX, R_PR );
1957 sh4_x86.in_delay_slot = DELAY_PC;
1958 sh4_x86.branch_taken = TRUE;
1959 sh4_x86.tstate = TSTATE_NONE;
1960 if( UNTRANSLATABLE(pc+2) ) {
1961 ADDL_imms_r32( disp, REG_EAX );
1962 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1963 exit_block_emu(pc+2);
1964 return 2;
1965 } else {
1966 sh4_translate_instruction( pc + 2 );
1967 exit_block_rel( disp + pc + 4, pc+4 );
1968 return 4;
1969 }
1970 }
1971 :}
1972 BSRF Rn {:
1973 COUNT_INST(I_BSRF);
1974 if( sh4_x86.in_delay_slot ) {
1975 SLOTILLEGAL();
1976 } else {
1977 MOVL_rbpdisp_r32( R_PC, REG_EAX );
1978 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
1979 MOVL_r32_rbpdisp( REG_EAX, R_PR );
1980 ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
1981 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
1983 sh4_x86.in_delay_slot = DELAY_PC;
1984 sh4_x86.tstate = TSTATE_NONE;
1985 sh4_x86.branch_taken = TRUE;
1986 if( UNTRANSLATABLE(pc+2) ) {
1987 exit_block_emu(pc+2);
1988 return 2;
1989 } else {
1990 sh4_translate_instruction( pc + 2 );
1991 exit_block_newpcset(pc+4);
1992 return 4;
1993 }
1994 }
1995 :}
1996 BT disp {:
1997 COUNT_INST(I_BT);
1998 if( sh4_x86.in_delay_slot ) {
1999 SLOTILLEGAL();
2000 } else {
2001 sh4vma_t target = disp + pc + 4;
2002 JF_label( nottaken );
2003 exit_block_rel(target, pc+2 );
2004 JMP_TARGET(nottaken);
2005 return 2;
2006 }
2007 :}
2008 BT/S disp {:
2009 COUNT_INST(I_BTS);
2010 if( sh4_x86.in_delay_slot ) {
2011 SLOTILLEGAL();
2012 } else {
2013 sh4_x86.in_delay_slot = DELAY_PC;
2014 if( UNTRANSLATABLE(pc+2) ) {
2015 MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2016 JF_label(nottaken);
2017 ADDL_imms_r32( disp, REG_EAX );
2018 JMP_TARGET(nottaken);
2019 ADDL_rbpdisp_r32( R_PC, REG_EAX );
2020 MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
2021 exit_block_emu(pc+2);
2022 sh4_x86.branch_taken = TRUE;
2023 return 2;
2024 } else {
2025 if( sh4_x86.tstate == TSTATE_NONE ) {
2026 CMPL_imms_rbpdisp( 1, R_T );
2027 sh4_x86.tstate = TSTATE_E;
2028 }
2029 JCC_cc_rel32(sh4_x86.tstate^1,0);
2030 uint32_t *patch = ((uint32_t *)xlat_output)-1;
2032 int save_tstate = sh4_x86.tstate;
2033 sh4_translate_instruction(pc+2);
2034 sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
2035 exit_block_rel( disp + pc + 4, pc+4 );
2036 // not taken
2037 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
2038 sh4_x86.tstate = save_tstate;
2039 sh4_translate_instruction(pc+2);
2040 return 4;
2041 }
2042 }
2043 :}
2044 JMP @Rn {:
2045 COUNT_INST(I_JMP);
2046 if( sh4_x86.in_delay_slot ) {
2047 SLOTILLEGAL();
2048 } else {
2049 load_reg( REG_ECX, Rn );
2050 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2051 sh4_x86.in_delay_slot = DELAY_PC;
2052 sh4_x86.branch_taken = TRUE;
2053 if( UNTRANSLATABLE(pc+2) ) {
2054 exit_block_emu(pc+2);
2055 return 2;
2056 } else {
2057 sh4_translate_instruction(pc+2);
2058 exit_block_newpcset(pc+4);
2059 return 4;
2060 }
2061 }
2062 :}
2063 JSR @Rn {:
2064 COUNT_INST(I_JSR);
2065 if( sh4_x86.in_delay_slot ) {
2066 SLOTILLEGAL();
2067 } else {
2068 MOVL_rbpdisp_r32( R_PC, REG_EAX );
2069 ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
2070 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2071 load_reg( REG_ECX, Rn );
2072 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2073 sh4_x86.in_delay_slot = DELAY_PC;
2074 sh4_x86.branch_taken = TRUE;
2075 sh4_x86.tstate = TSTATE_NONE;
2076 if( UNTRANSLATABLE(pc+2) ) {
2077 exit_block_emu(pc+2);
2078 return 2;
2079 } else {
2080 sh4_translate_instruction(pc+2);
2081 exit_block_newpcset(pc+4);
2082 return 4;
2083 }
2084 }
2085 :}
2086 RTE {:
2087 COUNT_INST(I_RTE);
2088 if( sh4_x86.in_delay_slot ) {
2089 SLOTILLEGAL();
2090 } else {
2091 check_priv();
2092 MOVL_rbpdisp_r32( R_SPC, REG_ECX );
2093 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2094 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2095 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2096 sh4_x86.in_delay_slot = DELAY_PC;
2097 sh4_x86.fpuen_checked = FALSE;
2098 sh4_x86.tstate = TSTATE_NONE;
2099 sh4_x86.branch_taken = TRUE;
2100 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2101 if( UNTRANSLATABLE(pc+2) ) {
2102 exit_block_emu(pc+2);
2103 return 2;
2104 } else {
2105 sh4_translate_instruction(pc+2);
2106 exit_block_newpcset(pc+4);
2107 return 4;
2108 }
2109 }
2110 :}
2111 RTS {:
2112 COUNT_INST(I_RTS);
2113 if( sh4_x86.in_delay_slot ) {
2114 SLOTILLEGAL();
2115 } else {
2116 MOVL_rbpdisp_r32( R_PR, REG_ECX );
2117 MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
2118 sh4_x86.in_delay_slot = DELAY_PC;
2119 sh4_x86.branch_taken = TRUE;
2120 if( UNTRANSLATABLE(pc+2) ) {
2121 exit_block_emu(pc+2);
2122 return 2;
2123 } else {
2124 sh4_translate_instruction(pc+2);
2125 exit_block_newpcset(pc+4);
2126 return 4;
2127 }
2128 }
2129 :}
2130 TRAPA #imm {:
2131 COUNT_INST(I_TRAPA);
2132 if( sh4_x86.in_delay_slot ) {
2133 SLOTILLEGAL();
2134 } else {
2135 MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX ); // 5
2136 ADDL_r32_rbpdisp( REG_ECX, R_PC );
2137 MOVL_imm32_r32( imm, REG_EAX );
2138 CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
2139 sh4_x86.tstate = TSTATE_NONE;
2140 exit_block_pcset(pc+2);
2141 sh4_x86.branch_taken = TRUE;
2142 return 2;
2143 }
2144 :}
2145 UNDEF {:
2146 COUNT_INST(I_UNDEF);
2147 if( sh4_x86.in_delay_slot ) {
2148 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);
2149 } else {
2150 exit_block_exc(EXC_ILLEGAL, pc);
2151 return 2;
2152 }
2153 :}
2155 CLRMAC {:
2156 COUNT_INST(I_CLRMAC);
2157 XORL_r32_r32(REG_EAX, REG_EAX);
2158 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2159 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2160 sh4_x86.tstate = TSTATE_NONE;
2161 :}
2162 CLRS {:
2163 COUNT_INST(I_CLRS);
2164 CLC();
2165 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2166 sh4_x86.tstate = TSTATE_NONE;
2167 :}
2168 CLRT {:
2169 COUNT_INST(I_CLRT);
2170 CLC();
2171 SETC_t();
2172 sh4_x86.tstate = TSTATE_C;
2173 :}
2174 SETS {:
2175 COUNT_INST(I_SETS);
2176 STC();
2177 SETCCB_cc_rbpdisp(X86_COND_C, R_S);
2178 sh4_x86.tstate = TSTATE_NONE;
2179 :}
2180 SETT {:
2181 COUNT_INST(I_SETT);
2182 STC();
2183 SETC_t();
2184 sh4_x86.tstate = TSTATE_C;
2185 :}
2187 /* Floating point moves */
2188 FMOV FRm, FRn {:
2189 COUNT_INST(I_FMOV1);
2190 check_fpuen();
2191 if( sh4_x86.double_size ) {
2192 load_dr0( REG_EAX, FRm );
2193 load_dr1( REG_ECX, FRm );
2194 store_dr0( REG_EAX, FRn );
2195 store_dr1( REG_ECX, FRn );
2196 } else {
2197 load_fr( REG_EAX, FRm ); // SZ=0 branch
2198 store_fr( REG_EAX, FRn );
2199 }
2200 :}
2201 FMOV FRm, @Rn {:
2202 COUNT_INST(I_FMOV2);
2203 check_fpuen();
2204 load_reg( REG_EAX, Rn );
2205 if( sh4_x86.double_size ) {
2206 check_walign64( REG_EAX );
2207 load_dr0( REG_EDX, FRm );
2208 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2209 load_reg( REG_EAX, Rn );
2210 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2211 load_dr1( REG_EDX, FRm );
2212 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2213 } else {
2214 check_walign32( REG_EAX );
2215 load_fr( REG_EDX, FRm );
2216 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2217 }
2218 sh4_x86.tstate = TSTATE_NONE;
2219 :}
2220 FMOV @Rm, FRn {:
2221 COUNT_INST(I_FMOV5);
2222 check_fpuen();
2223 load_reg( REG_EAX, Rm );
2224 if( sh4_x86.double_size ) {
2225 check_ralign64( REG_EAX );
2226 MEM_READ_LONG( REG_EAX, REG_EAX );
2227 store_dr0( REG_EAX, FRn );
2228 load_reg( REG_EAX, Rm );
2229 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2230 MEM_READ_LONG( REG_EAX, REG_EAX );
2231 store_dr1( REG_EAX, FRn );
2232 } else {
2233 check_ralign32( REG_EAX );
2234 MEM_READ_LONG( REG_EAX, REG_EAX );
2235 store_fr( REG_EAX, FRn );
2236 }
2237 sh4_x86.tstate = TSTATE_NONE;
2238 :}
2239 FMOV FRm, @-Rn {:
2240 COUNT_INST(I_FMOV3);
2241 check_fpuen();
2242 load_reg( REG_EAX, Rn );
2243 if( sh4_x86.double_size ) {
2244 check_walign64( REG_EAX );
2245 LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
2246 load_dr0( REG_EDX, FRm );
2247 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2248 load_reg( REG_EAX, Rn );
2249 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2250 load_dr1( REG_EDX, FRm );
2251 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2252 ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
2253 } else {
2254 check_walign32( REG_EAX );
2255 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2256 load_fr( REG_EDX, FRm );
2257 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2258 ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
2259 }
2260 sh4_x86.tstate = TSTATE_NONE;
2261 :}
2262 FMOV @Rm+, FRn {:
2263 COUNT_INST(I_FMOV6);
2264 check_fpuen();
2265 load_reg( REG_EAX, Rm );
2266 if( sh4_x86.double_size ) {
2267 check_ralign64( REG_EAX );
2268 MEM_READ_LONG( REG_EAX, REG_EAX );
2269 store_dr0( REG_EAX, FRn );
2270 load_reg( REG_EAX, Rm );
2271 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2272 MEM_READ_LONG( REG_EAX, REG_EAX );
2273 store_dr1( REG_EAX, FRn );
2274 ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
2275 } else {
2276 check_ralign32( REG_EAX );
2277 MEM_READ_LONG( REG_EAX, REG_EAX );
2278 store_fr( REG_EAX, FRn );
2279 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2280 }
2281 sh4_x86.tstate = TSTATE_NONE;
2282 :}
2283 FMOV FRm, @(R0, Rn) {:
2284 COUNT_INST(I_FMOV4);
2285 check_fpuen();
2286 load_reg( REG_EAX, Rn );
2287 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2288 if( sh4_x86.double_size ) {
2289 check_walign64( REG_EAX );
2290 load_dr0( REG_EDX, FRm );
2291 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2292 load_reg( REG_EAX, Rn );
2293 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2294 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2295 load_dr1( REG_EDX, FRm );
2296 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2297 } else {
2298 check_walign32( REG_EAX );
2299 load_fr( REG_EDX, FRm );
2300 MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
2301 }
2302 sh4_x86.tstate = TSTATE_NONE;
2303 :}
2304 FMOV @(R0, Rm), FRn {:
2305 COUNT_INST(I_FMOV7);
2306 check_fpuen();
2307 load_reg( REG_EAX, Rm );
2308 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2309 if( sh4_x86.double_size ) {
2310 check_ralign64( REG_EAX );
2311 MEM_READ_LONG( REG_EAX, REG_EAX );
2312 store_dr0( REG_EAX, FRn );
2313 load_reg( REG_EAX, Rm );
2314 ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
2315 LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
2316 MEM_READ_LONG( REG_EAX, REG_EAX );
2317 store_dr1( REG_EAX, FRn );
2318 } else {
2319 check_ralign32( REG_EAX );
2320 MEM_READ_LONG( REG_EAX, REG_EAX );
2321 store_fr( REG_EAX, FRn );
2322 }
2323 sh4_x86.tstate = TSTATE_NONE;
2324 :}
2325 FLDI0 FRn {: /* IFF PR=0 */
2326 COUNT_INST(I_FLDI0);
2327 check_fpuen();
2328 if( sh4_x86.double_prec == 0 ) {
2329 XORL_r32_r32( REG_EAX, REG_EAX );
2330 store_fr( REG_EAX, FRn );
2331 }
2332 sh4_x86.tstate = TSTATE_NONE;
2333 :}
2334 FLDI1 FRn {: /* IFF PR=0 */
2335 COUNT_INST(I_FLDI1);
2336 check_fpuen();
2337 if( sh4_x86.double_prec == 0 ) {
2338 MOVL_imm32_r32( 0x3F800000, REG_EAX );
2339 store_fr( REG_EAX, FRn );
2340 }
2341 :}
2343 FLOAT FPUL, FRn {:
2344 COUNT_INST(I_FLOAT);
2345 check_fpuen();
2346 FILD_rbpdisp(R_FPUL);
2347 if( sh4_x86.double_prec ) {
2348 pop_dr( FRn );
2349 } else {
2350 pop_fr( FRn );
2351 }
2352 :}
2353 FTRC FRm, FPUL {:
2354 COUNT_INST(I_FTRC);
2355 check_fpuen();
2356 if( sh4_x86.double_prec ) {
2357 push_dr( FRm );
2358 } else {
2359 push_fr( FRm );
2360 }
2361 MOVP_immptr_rptr( &max_int, REG_ECX );
2362 FILD_r32disp( REG_ECX, 0 );
2363 FCOMIP_st(1);
2364 JNA_label( sat );
2365 MOVP_immptr_rptr( &min_int, REG_ECX );
2366 FILD_r32disp( REG_ECX, 0 );
2367 FCOMIP_st(1);
2368 JAE_label( sat2 );
2369 MOVP_immptr_rptr( &save_fcw, REG_EAX );
2370 FNSTCW_r32disp( REG_EAX, 0 );
2371 MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
2372 FLDCW_r32disp( REG_EDX, 0 );
2373 FISTP_rbpdisp(R_FPUL);
2374 FLDCW_r32disp( REG_EAX, 0 );
2375 JMP_label(end);
2377 JMP_TARGET(sat);
2378 JMP_TARGET(sat2);
2379 MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
2380 MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
2381 FPOP_st();
2382 JMP_TARGET(end);
2383 sh4_x86.tstate = TSTATE_NONE;
2384 :}
2385 FLDS FRm, FPUL {:
2386 COUNT_INST(I_FLDS);
2387 check_fpuen();
2388 load_fr( REG_EAX, FRm );
2389 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2390 :}
2391 FSTS FPUL, FRn {:
2392 COUNT_INST(I_FSTS);
2393 check_fpuen();
2394 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2395 store_fr( REG_EAX, FRn );
2396 :}
2397 FCNVDS FRm, FPUL {:
2398 COUNT_INST(I_FCNVDS);
2399 check_fpuen();
2400 if( sh4_x86.double_prec ) {
2401 push_dr( FRm );
2402 pop_fpul();
2403 }
2404 :}
2405 FCNVSD FPUL, FRn {:
2406 COUNT_INST(I_FCNVSD);
2407 check_fpuen();
2408 if( sh4_x86.double_prec ) {
2409 push_fpul();
2410 pop_dr( FRn );
2411 }
2412 :}
2414 /* Floating point instructions */
2415 FABS FRn {:
2416 COUNT_INST(I_FABS);
2417 check_fpuen();
2418 if( sh4_x86.double_prec ) {
2419 push_dr(FRn);
2420 FABS_st0();
2421 pop_dr(FRn);
2422 } else {
2423 push_fr(FRn);
2424 FABS_st0();
2425 pop_fr(FRn);
2426 }
2427 :}
2428 FADD FRm, FRn {:
2429 COUNT_INST(I_FADD);
2430 check_fpuen();
2431 if( sh4_x86.double_prec ) {
2432 push_dr(FRm);
2433 push_dr(FRn);
2434 FADDP_st(1);
2435 pop_dr(FRn);
2436 } else {
2437 push_fr(FRm);
2438 push_fr(FRn);
2439 FADDP_st(1);
2440 pop_fr(FRn);
2441 }
2442 :}
2443 FDIV FRm, FRn {:
2444 COUNT_INST(I_FDIV);
2445 check_fpuen();
2446 if( sh4_x86.double_prec ) {
2447 push_dr(FRn);
2448 push_dr(FRm);
2449 FDIVP_st(1);
2450 pop_dr(FRn);
2451 } else {
2452 push_fr(FRn);
2453 push_fr(FRm);
2454 FDIVP_st(1);
2455 pop_fr(FRn);
2456 }
2457 :}
2458 FMAC FR0, FRm, FRn {:
2459 COUNT_INST(I_FMAC);
2460 check_fpuen();
2461 if( sh4_x86.double_prec ) {
2462 push_dr( 0 );
2463 push_dr( FRm );
2464 FMULP_st(1);
2465 push_dr( FRn );
2466 FADDP_st(1);
2467 pop_dr( FRn );
2468 } else {
2469 push_fr( 0 );
2470 push_fr( FRm );
2471 FMULP_st(1);
2472 push_fr( FRn );
2473 FADDP_st(1);
2474 pop_fr( FRn );
2475 }
2476 :}
2478 FMUL FRm, FRn {:
2479 COUNT_INST(I_FMUL);
2480 check_fpuen();
2481 if( sh4_x86.double_prec ) {
2482 push_dr(FRm);
2483 push_dr(FRn);
2484 FMULP_st(1);
2485 pop_dr(FRn);
2486 } else {
2487 push_fr(FRm);
2488 push_fr(FRn);
2489 FMULP_st(1);
2490 pop_fr(FRn);
2491 }
2492 :}
2493 FNEG FRn {:
2494 COUNT_INST(I_FNEG);
2495 check_fpuen();
2496 if( sh4_x86.double_prec ) {
2497 push_dr(FRn);
2498 FCHS_st0();
2499 pop_dr(FRn);
2500 } else {
2501 push_fr(FRn);
2502 FCHS_st0();
2503 pop_fr(FRn);
2504 }
2505 :}
2506 FSRRA FRn {:
2507 COUNT_INST(I_FSRRA);
2508 check_fpuen();
2509 if( sh4_x86.double_prec == 0 ) {
2510 FLD1_st0();
2511 push_fr(FRn);
2512 FSQRT_st0();
2513 FDIVP_st(1);
2514 pop_fr(FRn);
2515 }
2516 :}
2517 FSQRT FRn {:
2518 COUNT_INST(I_FSQRT);
2519 check_fpuen();
2520 if( sh4_x86.double_prec ) {
2521 push_dr(FRn);
2522 FSQRT_st0();
2523 pop_dr(FRn);
2524 } else {
2525 push_fr(FRn);
2526 FSQRT_st0();
2527 pop_fr(FRn);
2528 }
2529 :}
2530 FSUB FRm, FRn {:
2531 COUNT_INST(I_FSUB);
2532 check_fpuen();
2533 if( sh4_x86.double_prec ) {
2534 push_dr(FRn);
2535 push_dr(FRm);
2536 FSUBP_st(1);
2537 pop_dr(FRn);
2538 } else {
2539 push_fr(FRn);
2540 push_fr(FRm);
2541 FSUBP_st(1);
2542 pop_fr(FRn);
2543 }
2544 :}
2546 FCMP/EQ FRm, FRn {:
2547 COUNT_INST(I_FCMPEQ);
2548 check_fpuen();
2549 if( sh4_x86.double_prec ) {
2550 push_dr(FRm);
2551 push_dr(FRn);
2552 } else {
2553 push_fr(FRm);
2554 push_fr(FRn);
2555 }
2556 FCOMIP_st(1);
2557 SETE_t();
2558 FPOP_st();
2559 sh4_x86.tstate = TSTATE_E;
2560 :}
2561 FCMP/GT FRm, FRn {:
2562 COUNT_INST(I_FCMPGT);
2563 check_fpuen();
2564 if( sh4_x86.double_prec ) {
2565 push_dr(FRm);
2566 push_dr(FRn);
2567 } else {
2568 push_fr(FRm);
2569 push_fr(FRn);
2570 }
2571 FCOMIP_st(1);
2572 SETA_t();
2573 FPOP_st();
2574 sh4_x86.tstate = TSTATE_A;
2575 :}
2577 FSCA FPUL, FRn {:
2578 COUNT_INST(I_FSCA);
2579 check_fpuen();
2580 if( sh4_x86.double_prec == 0 ) {
2581 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
2582 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
2583 CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
2584 }
2585 sh4_x86.tstate = TSTATE_NONE;
2586 :}
2587 FIPR FVm, FVn {:
2588 COUNT_INST(I_FIPR);
2589 check_fpuen();
2590 if( sh4_x86.double_prec == 0 ) {
2591 if( sh4_x86.sse3_enabled ) {
2592 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2593 MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2594 HADDPS_xmm_xmm( 4, 4 );
2595 HADDPS_xmm_xmm( 4, 4 );
2596 MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2597 } else {
2598 push_fr( FVm<<2 );
2599 push_fr( FVn<<2 );
2600 FMULP_st(1);
2601 push_fr( (FVm<<2)+1);
2602 push_fr( (FVn<<2)+1);
2603 FMULP_st(1);
2604 FADDP_st(1);
2605 push_fr( (FVm<<2)+2);
2606 push_fr( (FVn<<2)+2);
2607 FMULP_st(1);
2608 FADDP_st(1);
2609 push_fr( (FVm<<2)+3);
2610 push_fr( (FVn<<2)+3);
2611 FMULP_st(1);
2612 FADDP_st(1);
2613 pop_fr( (FVn<<2)+3);
2614 }
2615 }
2616 :}
2617 FTRV XMTRX, FVn {:
2618 COUNT_INST(I_FTRV);
2619 check_fpuen();
2620 if( sh4_x86.double_prec == 0 ) {
2621 if( sh4_x86.sse3_enabled ) {
2622 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2623 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2624 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2625 MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2627 MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2628 MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2629 MOV_xmm_xmm( 4, 6 );
2630 MOV_xmm_xmm( 5, 7 );
2631 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2632 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2633 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2634 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2635 MULPS_xmm_xmm( 0, 4 );
2636 MULPS_xmm_xmm( 1, 5 );
2637 MULPS_xmm_xmm( 2, 6 );
2638 MULPS_xmm_xmm( 3, 7 );
2639 ADDPS_xmm_xmm( 5, 4 );
2640 ADDPS_xmm_xmm( 7, 6 );
2641 ADDPS_xmm_xmm( 6, 4 );
2642 MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
2643 } else {
2644 LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
2645 CALL1_ptr_r32( sh4_ftrv, REG_EAX );
2646 }
2647 }
2648 sh4_x86.tstate = TSTATE_NONE;
2649 :}
2651 FRCHG {:
2652 COUNT_INST(I_FRCHG);
2653 check_fpuen();
2654 XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
2655 CALL_ptr( sh4_switch_fr_banks );
2656 sh4_x86.tstate = TSTATE_NONE;
2657 :}
2658 FSCHG {:
2659 COUNT_INST(I_FSCHG);
2660 check_fpuen();
2661 XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
2662 XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2663 sh4_x86.tstate = TSTATE_NONE;
2664 sh4_x86.double_size = !sh4_x86.double_size;
2665 sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
2666 :}
2668 /* Processor control instructions */
2669 LDC Rm, SR {:
2670 COUNT_INST(I_LDCSR);
2671 if( sh4_x86.in_delay_slot ) {
2672 SLOTILLEGAL();
2673 } else {
2674 check_priv();
2675 load_reg( REG_EAX, Rm );
2676 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2677 sh4_x86.fpuen_checked = FALSE;
2678 sh4_x86.tstate = TSTATE_NONE;
2679 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2680 return 2;
2681 }
2682 :}
2683 LDC Rm, GBR {:
2684 COUNT_INST(I_LDC);
2685 load_reg( REG_EAX, Rm );
2686 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2687 :}
2688 LDC Rm, VBR {:
2689 COUNT_INST(I_LDC);
2690 check_priv();
2691 load_reg( REG_EAX, Rm );
2692 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2693 sh4_x86.tstate = TSTATE_NONE;
2694 :}
2695 LDC Rm, SSR {:
2696 COUNT_INST(I_LDC);
2697 check_priv();
2698 load_reg( REG_EAX, Rm );
2699 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2700 sh4_x86.tstate = TSTATE_NONE;
2701 :}
2702 LDC Rm, SGR {:
2703 COUNT_INST(I_LDC);
2704 check_priv();
2705 load_reg( REG_EAX, Rm );
2706 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2707 sh4_x86.tstate = TSTATE_NONE;
2708 :}
2709 LDC Rm, SPC {:
2710 COUNT_INST(I_LDC);
2711 check_priv();
2712 load_reg( REG_EAX, Rm );
2713 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2714 sh4_x86.tstate = TSTATE_NONE;
2715 :}
2716 LDC Rm, DBR {:
2717 COUNT_INST(I_LDC);
2718 check_priv();
2719 load_reg( REG_EAX, Rm );
2720 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2721 sh4_x86.tstate = TSTATE_NONE;
2722 :}
2723 LDC Rm, Rn_BANK {:
2724 COUNT_INST(I_LDC);
2725 check_priv();
2726 load_reg( REG_EAX, Rm );
2727 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2728 sh4_x86.tstate = TSTATE_NONE;
2729 :}
2730 LDC.L @Rm+, GBR {:
2731 COUNT_INST(I_LDCM);
2732 load_reg( REG_EAX, Rm );
2733 check_ralign32( REG_EAX );
2734 MEM_READ_LONG( REG_EAX, REG_EAX );
2735 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2736 MOVL_r32_rbpdisp( REG_EAX, R_GBR );
2737 sh4_x86.tstate = TSTATE_NONE;
2738 :}
2739 LDC.L @Rm+, SR {:
2740 COUNT_INST(I_LDCSRM);
2741 if( sh4_x86.in_delay_slot ) {
2742 SLOTILLEGAL();
2743 } else {
2744 check_priv();
2745 load_reg( REG_EAX, Rm );
2746 check_ralign32( REG_EAX );
2747 MEM_READ_LONG( REG_EAX, REG_EAX );
2748 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2749 CALL1_ptr_r32( sh4_write_sr, REG_EAX );
2750 sh4_x86.fpuen_checked = FALSE;
2751 sh4_x86.tstate = TSTATE_NONE;
2752 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2753 return 2;
2754 }
2755 :}
2756 LDC.L @Rm+, VBR {:
2757 COUNT_INST(I_LDCM);
2758 check_priv();
2759 load_reg( REG_EAX, Rm );
2760 check_ralign32( REG_EAX );
2761 MEM_READ_LONG( REG_EAX, REG_EAX );
2762 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2763 MOVL_r32_rbpdisp( REG_EAX, R_VBR );
2764 sh4_x86.tstate = TSTATE_NONE;
2765 :}
2766 LDC.L @Rm+, SSR {:
2767 COUNT_INST(I_LDCM);
2768 check_priv();
2769 load_reg( REG_EAX, Rm );
2770 check_ralign32( REG_EAX );
2771 MEM_READ_LONG( REG_EAX, REG_EAX );
2772 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2773 MOVL_r32_rbpdisp( REG_EAX, R_SSR );
2774 sh4_x86.tstate = TSTATE_NONE;
2775 :}
2776 LDC.L @Rm+, SGR {:
2777 COUNT_INST(I_LDCM);
2778 check_priv();
2779 load_reg( REG_EAX, Rm );
2780 check_ralign32( REG_EAX );
2781 MEM_READ_LONG( REG_EAX, REG_EAX );
2782 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2783 MOVL_r32_rbpdisp( REG_EAX, R_SGR );
2784 sh4_x86.tstate = TSTATE_NONE;
2785 :}
2786 LDC.L @Rm+, SPC {:
2787 COUNT_INST(I_LDCM);
2788 check_priv();
2789 load_reg( REG_EAX, Rm );
2790 check_ralign32( REG_EAX );
2791 MEM_READ_LONG( REG_EAX, REG_EAX );
2792 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2793 MOVL_r32_rbpdisp( REG_EAX, R_SPC );
2794 sh4_x86.tstate = TSTATE_NONE;
2795 :}
2796 LDC.L @Rm+, DBR {:
2797 COUNT_INST(I_LDCM);
2798 check_priv();
2799 load_reg( REG_EAX, Rm );
2800 check_ralign32( REG_EAX );
2801 MEM_READ_LONG( REG_EAX, REG_EAX );
2802 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2803 MOVL_r32_rbpdisp( REG_EAX, R_DBR );
2804 sh4_x86.tstate = TSTATE_NONE;
2805 :}
2806 LDC.L @Rm+, Rn_BANK {:
2807 COUNT_INST(I_LDCM);
2808 check_priv();
2809 load_reg( REG_EAX, Rm );
2810 check_ralign32( REG_EAX );
2811 MEM_READ_LONG( REG_EAX, REG_EAX );
2812 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2813 MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2814 sh4_x86.tstate = TSTATE_NONE;
2815 :}
2816 LDS Rm, FPSCR {:
2817 COUNT_INST(I_LDSFPSCR);
2818 check_fpuen();
2819 load_reg( REG_EAX, Rm );
2820 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2821 sh4_x86.tstate = TSTATE_NONE;
2822 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2823 return 2;
2824 :}
2825 LDS.L @Rm+, FPSCR {:
2826 COUNT_INST(I_LDSFPSCRM);
2827 check_fpuen();
2828 load_reg( REG_EAX, Rm );
2829 check_ralign32( REG_EAX );
2830 MEM_READ_LONG( REG_EAX, REG_EAX );
2831 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2832 CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
2833 sh4_x86.tstate = TSTATE_NONE;
2834 sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
2835 return 2;
2836 :}
2837 LDS Rm, FPUL {:
2838 COUNT_INST(I_LDS);
2839 check_fpuen();
2840 load_reg( REG_EAX, Rm );
2841 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2842 :}
2843 LDS.L @Rm+, FPUL {:
2844 COUNT_INST(I_LDSM);
2845 check_fpuen();
2846 load_reg( REG_EAX, Rm );
2847 check_ralign32( REG_EAX );
2848 MEM_READ_LONG( REG_EAX, REG_EAX );
2849 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2850 MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
2851 sh4_x86.tstate = TSTATE_NONE;
2852 :}
2853 LDS Rm, MACH {:
2854 COUNT_INST(I_LDS);
2855 load_reg( REG_EAX, Rm );
2856 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2857 :}
2858 LDS.L @Rm+, MACH {:
2859 COUNT_INST(I_LDSM);
2860 load_reg( REG_EAX, Rm );
2861 check_ralign32( REG_EAX );
2862 MEM_READ_LONG( REG_EAX, REG_EAX );
2863 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2864 MOVL_r32_rbpdisp( REG_EAX, R_MACH );
2865 sh4_x86.tstate = TSTATE_NONE;
2866 :}
2867 LDS Rm, MACL {:
2868 COUNT_INST(I_LDS);
2869 load_reg( REG_EAX, Rm );
2870 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2871 :}
2872 LDS.L @Rm+, MACL {:
2873 COUNT_INST(I_LDSM);
2874 load_reg( REG_EAX, Rm );
2875 check_ralign32( REG_EAX );
2876 MEM_READ_LONG( REG_EAX, REG_EAX );
2877 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2878 MOVL_r32_rbpdisp( REG_EAX, R_MACL );
2879 sh4_x86.tstate = TSTATE_NONE;
2880 :}
2881 LDS Rm, PR {:
2882 COUNT_INST(I_LDS);
2883 load_reg( REG_EAX, Rm );
2884 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2885 :}
2886 LDS.L @Rm+, PR {:
2887 COUNT_INST(I_LDSM);
2888 load_reg( REG_EAX, Rm );
2889 check_ralign32( REG_EAX );
2890 MEM_READ_LONG( REG_EAX, REG_EAX );
2891 ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
2892 MOVL_r32_rbpdisp( REG_EAX, R_PR );
2893 sh4_x86.tstate = TSTATE_NONE;
2894 :}
2895 LDTLB {:
2896 COUNT_INST(I_LDTLB);
2897 CALL_ptr( MMU_ldtlb );
2898 sh4_x86.tstate = TSTATE_NONE;
2899 :}
2900 OCBI @Rn {:
2901 COUNT_INST(I_OCBI);
2902 :}
2903 OCBP @Rn {:
2904 COUNT_INST(I_OCBP);
2905 :}
2906 OCBWB @Rn {:
2907 COUNT_INST(I_OCBWB);
2908 :}
2909 PREF @Rn {:
2910 COUNT_INST(I_PREF);
2911 load_reg( REG_EAX, Rn );
2912 MEM_PREFETCH( REG_EAX );
2913 sh4_x86.tstate = TSTATE_NONE;
2914 :}
2915 SLEEP {:
2916 COUNT_INST(I_SLEEP);
2917 check_priv();
2918 CALL_ptr( sh4_sleep );
2919 sh4_x86.tstate = TSTATE_NONE;
2920 sh4_x86.in_delay_slot = DELAY_NONE;
2921 return 2;
2922 :}
2923 STC SR, Rn {:
2924 COUNT_INST(I_STCSR);
2925 check_priv();
2926 CALL_ptr(sh4_read_sr);
2927 store_reg( REG_EAX, Rn );
2928 sh4_x86.tstate = TSTATE_NONE;
2929 :}
2930 STC GBR, Rn {:
2931 COUNT_INST(I_STC);
2932 MOVL_rbpdisp_r32( R_GBR, REG_EAX );
2933 store_reg( REG_EAX, Rn );
2934 :}
2935 STC VBR, Rn {:
2936 COUNT_INST(I_STC);
2937 check_priv();
2938 MOVL_rbpdisp_r32( R_VBR, REG_EAX );
2939 store_reg( REG_EAX, Rn );
2940 sh4_x86.tstate = TSTATE_NONE;
2941 :}
2942 STC SSR, Rn {:
2943 COUNT_INST(I_STC);
2944 check_priv();
2945 MOVL_rbpdisp_r32( R_SSR, REG_EAX );
2946 store_reg( REG_EAX, Rn );
2947 sh4_x86.tstate = TSTATE_NONE;
2948 :}
2949 STC SPC, Rn {:
2950 COUNT_INST(I_STC);
2951 check_priv();
2952 MOVL_rbpdisp_r32( R_SPC, REG_EAX );
2953 store_reg( REG_EAX, Rn );
2954 sh4_x86.tstate = TSTATE_NONE;
2955 :}
2956 STC SGR, Rn {:
2957 COUNT_INST(I_STC);
2958 check_priv();
2959 MOVL_rbpdisp_r32( R_SGR, REG_EAX );
2960 store_reg( REG_EAX, Rn );
2961 sh4_x86.tstate = TSTATE_NONE;
2962 :}
2963 STC DBR, Rn {:
2964 COUNT_INST(I_STC);
2965 check_priv();
2966 MOVL_rbpdisp_r32( R_DBR, REG_EAX );
2967 store_reg( REG_EAX, Rn );
2968 sh4_x86.tstate = TSTATE_NONE;
2969 :}
2970 STC Rm_BANK, Rn {:
2971 COUNT_INST(I_STC);
2972 check_priv();
2973 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
2974 store_reg( REG_EAX, Rn );
2975 sh4_x86.tstate = TSTATE_NONE;
2976 :}
2977 STC.L SR, @-Rn {:
2978 COUNT_INST(I_STCSRM);
2979 check_priv();
2980 CALL_ptr( sh4_read_sr );
2981 MOVL_r32_r32( REG_EAX, REG_EDX );
2982 load_reg( REG_EAX, Rn );
2983 check_walign32( REG_EAX );
2984 LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
2985 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2986 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
2987 sh4_x86.tstate = TSTATE_NONE;
2988 :}
2989 STC.L VBR, @-Rn {:
2990 COUNT_INST(I_STCM);
2991 check_priv();
2992 load_reg( REG_EAX, Rn );
2993 check_walign32( REG_EAX );
2994 ADDL_imms_r32( -4, REG_EAX );
2995 MOVL_rbpdisp_r32( R_VBR, REG_EDX );
2996 MEM_WRITE_LONG( REG_EAX, REG_EDX );
2997 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
2998 sh4_x86.tstate = TSTATE_NONE;
2999 :}
3000 STC.L SSR, @-Rn {:
3001 COUNT_INST(I_STCM);
3002 check_priv();
3003 load_reg( REG_EAX, Rn );
3004 check_walign32( REG_EAX );
3005 ADDL_imms_r32( -4, REG_EAX );
3006 MOVL_rbpdisp_r32( R_SSR, REG_EDX );
3007 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3008 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3009 sh4_x86.tstate = TSTATE_NONE;
3010 :}
3011 STC.L SPC, @-Rn {:
3012 COUNT_INST(I_STCM);
3013 check_priv();
3014 load_reg( REG_EAX, Rn );
3015 check_walign32( REG_EAX );
3016 ADDL_imms_r32( -4, REG_EAX );
3017 MOVL_rbpdisp_r32( R_SPC, REG_EDX );
3018 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3019 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3020 sh4_x86.tstate = TSTATE_NONE;
3021 :}
3022 STC.L SGR, @-Rn {:
3023 COUNT_INST(I_STCM);
3024 check_priv();
3025 load_reg( REG_EAX, Rn );
3026 check_walign32( REG_EAX );
3027 ADDL_imms_r32( -4, REG_EAX );
3028 MOVL_rbpdisp_r32( R_SGR, REG_EDX );
3029 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3030 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3031 sh4_x86.tstate = TSTATE_NONE;
3032 :}
3033 STC.L DBR, @-Rn {:
3034 COUNT_INST(I_STCM);
3035 check_priv();
3036 load_reg( REG_EAX, Rn );
3037 check_walign32( REG_EAX );
3038 ADDL_imms_r32( -4, REG_EAX );
3039 MOVL_rbpdisp_r32( R_DBR, REG_EDX );
3040 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3041 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3042 sh4_x86.tstate = TSTATE_NONE;
3043 :}
3044 STC.L Rm_BANK, @-Rn {:
3045 COUNT_INST(I_STCM);
3046 check_priv();
3047 load_reg( REG_EAX, Rn );
3048 check_walign32( REG_EAX );
3049 ADDL_imms_r32( -4, REG_EAX );
3050 MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
3051 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3052 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3053 sh4_x86.tstate = TSTATE_NONE;
3054 :}
3055 STC.L GBR, @-Rn {:
3056 COUNT_INST(I_STCM);
3057 load_reg( REG_EAX, Rn );
3058 check_walign32( REG_EAX );
3059 ADDL_imms_r32( -4, REG_EAX );
3060 MOVL_rbpdisp_r32( R_GBR, REG_EDX );
3061 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3062 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3063 sh4_x86.tstate = TSTATE_NONE;
3064 :}
3065 STS FPSCR, Rn {:
3066 COUNT_INST(I_STSFPSCR);
3067 check_fpuen();
3068 MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
3069 store_reg( REG_EAX, Rn );
3070 :}
3071 STS.L FPSCR, @-Rn {:
3072 COUNT_INST(I_STSFPSCRM);
3073 check_fpuen();
3074 load_reg( REG_EAX, Rn );
3075 check_walign32( REG_EAX );
3076 ADDL_imms_r32( -4, REG_EAX );
3077 MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
3078 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3079 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3080 sh4_x86.tstate = TSTATE_NONE;
3081 :}
3082 STS FPUL, Rn {:
3083 COUNT_INST(I_STS);
3084 check_fpuen();
3085 MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
3086 store_reg( REG_EAX, Rn );
3087 :}
3088 STS.L FPUL, @-Rn {:
3089 COUNT_INST(I_STSM);
3090 check_fpuen();
3091 load_reg( REG_EAX, Rn );
3092 check_walign32( REG_EAX );
3093 ADDL_imms_r32( -4, REG_EAX );
3094 MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
3095 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3096 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3097 sh4_x86.tstate = TSTATE_NONE;
3098 :}
3099 STS MACH, Rn {:
3100 COUNT_INST(I_STS);
3101 MOVL_rbpdisp_r32( R_MACH, REG_EAX );
3102 store_reg( REG_EAX, Rn );
3103 :}
3104 STS.L MACH, @-Rn {:
3105 COUNT_INST(I_STSM);
3106 load_reg( REG_EAX, Rn );
3107 check_walign32( REG_EAX );
3108 ADDL_imms_r32( -4, REG_EAX );
3109 MOVL_rbpdisp_r32( R_MACH, REG_EDX );
3110 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3111 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3112 sh4_x86.tstate = TSTATE_NONE;
3113 :}
3114 STS MACL, Rn {:
3115 COUNT_INST(I_STS);
3116 MOVL_rbpdisp_r32( R_MACL, REG_EAX );
3117 store_reg( REG_EAX, Rn );
3118 :}
3119 STS.L MACL, @-Rn {:
3120 COUNT_INST(I_STSM);
3121 load_reg( REG_EAX, Rn );
3122 check_walign32( REG_EAX );
3123 ADDL_imms_r32( -4, REG_EAX );
3124 MOVL_rbpdisp_r32( R_MACL, REG_EDX );
3125 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3126 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3127 sh4_x86.tstate = TSTATE_NONE;
3128 :}
3129 STS PR, Rn {:
3130 COUNT_INST(I_STS);
3131 MOVL_rbpdisp_r32( R_PR, REG_EAX );
3132 store_reg( REG_EAX, Rn );
3133 :}
3134 STS.L PR, @-Rn {:
3135 COUNT_INST(I_STSM);
3136 load_reg( REG_EAX, Rn );
3137 check_walign32( REG_EAX );
3138 ADDL_imms_r32( -4, REG_EAX );
3139 MOVL_rbpdisp_r32( R_PR, REG_EDX );
3140 MEM_WRITE_LONG( REG_EAX, REG_EDX );
3141 ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
3142 sh4_x86.tstate = TSTATE_NONE;
3143 :}
3145 NOP {:
3146 COUNT_INST(I_NOP);
3147 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
3148 :}
3149 %%
3150 sh4_x86.in_delay_slot = DELAY_NONE;
3151 return 0;
3152 }
3155 /**
3156 * The unwind methods only work if we compiled with DWARF2 frame information
3157 * (ie -fexceptions), otherwise we have to use the direct frame scan.
3158 */
3159 #ifdef HAVE_EXCEPTIONS
3160 #include <unwind.h>
3162 struct UnwindInfo {
3163 uintptr_t block_start;
3164 uintptr_t block_end;
3165 void *pc;
3166 };
3168 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
3169 {
3170 struct UnwindInfo *info = arg;
3171 void *pc = (void *)_Unwind_GetIP(context);
3172 if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
3173 info->pc = pc;
3174 return _URC_NORMAL_STOP;
3175 }
3176 return _URC_NO_REASON;
3177 }
3179 void *xlat_get_native_pc( void *code, uint32_t code_size )
3180 {
3181 struct _Unwind_Exception exc;
3182 struct UnwindInfo info;
3184 info.pc = NULL;
3185 info.block_start = (uintptr_t)code;
3186 info.block_end = info.block_start + code_size;
3187 void *result = NULL;
3188 _Unwind_Backtrace( xlat_check_frame, &info );
3189 return info.pc;
3190 }
3191 #else
3192 /* Assume this is an ia32 build - amd64 should always have dwarf information */
3193 void *xlat_get_native_pc( void *code, uint32_t code_size )
3194 {
3195 void *result = NULL;
3196 __asm__(
3197 "mov %%ebp, %%eax\n\t"
3198 "mov $0x8, %%ecx\n\t"
3199 "mov %1, %%edx\n"
3200 "frame_loop: test %%eax, %%eax\n\t"
3201 "je frame_not_found\n\t"
3202 "cmp (%%eax), %%edx\n\t"
3203 "je frame_found\n\t"
3204 "sub $0x1, %%ecx\n\t"
3205 "je frame_not_found\n\t"
3206 "movl (%%eax), %%eax\n\t"
3207 "jmp frame_loop\n"
3208 "frame_found: movl 0x4(%%eax), %0\n"
3209 "frame_not_found:"
3210 : "=r" (result)
3211 : "r" (((uint8_t *)&sh4r) + 128 )
3212 : "eax", "ecx", "edx" );
3213 return result;
3214 }
3215 #endif
.