filename | src/sh4/sh4x86.in |
changeset | 956:4c1ed9e03985 |
prev | 953:f4a156508ad1 |
next | 974:16b079ed11bb |
author | nkeynes |
date | Thu Jan 15 04:15:11 2009 +0000 (15 years ago) |
permissions | -rw-r--r-- |
last change | Add support for the Intel ICC compiler (C only, icc doesn't support Obj-C) - Rename Obj-C source to .m - Separate paths.c into paths_unix.c and paths_osx.m - Add configuration detection of ICC, along with specific opt flags |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
7 *
8 * Copyright (c) 2007 Nathan Keynes.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
21 #include <assert.h>
22 #include <math.h>
24 #ifndef NDEBUG
25 #define DEBUG_JUMPS 1
26 #endif
28 #include "lxdream.h"
29 #include "sh4/xltcache.h"
30 #include "sh4/sh4core.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/x86op.h"
35 #include "sh4/mmu.h"
36 #include "clock.h"
38 #define DEFAULT_BACKPATCH_SIZE 4096
40 struct backpatch_record {
41 uint32_t fixup_offset;
42 uint32_t fixup_icount;
43 int32_t exc_code;
44 };
46 #define DELAY_NONE 0
47 #define DELAY_PC 1
48 #define DELAY_PC_PR 2
50 /**
51 * Struct to manage internal translation state. This state is not saved -
52 * it is only valid between calls to sh4_translate_begin_block() and
53 * sh4_translate_end_block()
54 */
55 struct sh4_x86_state {
56 int in_delay_slot;
57 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
58 gboolean branch_taken; /* true if we branched unconditionally */
59 gboolean double_prec; /* true if FPU is in double-precision mode */
60 gboolean double_size; /* true if FPU is in double-size mode */
61 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
62 uint32_t block_start_pc;
63 uint32_t stack_posn; /* Trace stack height for alignment purposes */
64 int tstate;
66 /* mode flags */
67 gboolean tlb_on; /* True if tlb translation is active */
69 /* Allocated memory for the (block-wide) back-patch list */
70 struct backpatch_record *backpatch_list;
71 uint32_t backpatch_posn;
72 uint32_t backpatch_size;
73 };
75 #define TSTATE_NONE -1
76 #define TSTATE_O 0
77 #define TSTATE_C 2
78 #define TSTATE_E 4
79 #define TSTATE_NE 5
80 #define TSTATE_G 0xF
81 #define TSTATE_GE 0xD
82 #define TSTATE_A 7
83 #define TSTATE_AE 3
85 #ifdef ENABLE_SH4STATS
86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
87 #else
88 #define COUNT_INST(id)
89 #endif
91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
93 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
94 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
98 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
99 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
101 static struct sh4_x86_state sh4_x86;
103 static uint32_t max_int = 0x7FFFFFFF;
104 static uint32_t min_int = 0x80000000;
105 static uint32_t save_fcw; /* save value for fpu control word */
106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
108 gboolean is_sse3_supported()
109 {
110 uint32_t features;
112 __asm__ __volatile__(
113 "mov $0x01, %%eax\n\t"
114 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
115 return (features & 1) ? TRUE : FALSE;
116 }
118 void sh4_translate_init(void)
119 {
120 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
121 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
122 sh4_x86.sse3_enabled = is_sse3_supported();
123 }
126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
127 {
128 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
129 sh4_x86.backpatch_size <<= 1;
130 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
131 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
132 assert( sh4_x86.backpatch_list != NULL );
133 }
134 if( sh4_x86.in_delay_slot ) {
135 fixup_pc -= 2;
136 }
137 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
138 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
139 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
140 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
141 sh4_x86.backpatch_posn++;
142 }
144 /**
145 * Emit an instruction to load an SH4 reg into a real register
146 */
147 static inline void load_reg( int x86reg, int sh4reg )
148 {
149 /* mov [bp+n], reg */
150 OP(0x8B);
151 OP(0x45 + (x86reg<<3));
152 OP(REG_OFFSET(r[sh4reg]));
153 }
155 static inline void load_reg16s( int x86reg, int sh4reg )
156 {
157 OP(0x0F);
158 OP(0xBF);
159 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
160 }
162 static inline void load_reg16u( int x86reg, int sh4reg )
163 {
164 OP(0x0F);
165 OP(0xB7);
166 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
168 }
170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
172 /**
173 * Emit an instruction to load an immediate value into a register
174 */
175 static inline void load_imm32( int x86reg, uint32_t value ) {
176 /* mov #value, reg */
177 OP(0xB8 + x86reg);
178 OP32(value);
179 }
182 /**
183 * Load an immediate 64-bit quantity (note: x86-64 only)
184 */
185 static inline void load_imm64( int x86reg, uint64_t value ) {
186 /* mov #value, reg */
187 REXW();
188 OP(0xB8 + x86reg);
189 OP64(value);
190 }
192 /**
193 * Emit an instruction to store an SH4 reg (RN)
194 */
195 void static inline store_reg( int x86reg, int sh4reg ) {
196 /* mov reg, [bp+n] */
197 OP(0x89);
198 OP(0x45 + (x86reg<<3));
199 OP(REG_OFFSET(r[sh4reg]));
200 }
202 /**
203 * Load an FR register (single-precision floating point) into an integer x86
204 * register (eg for register-to-register moves)
205 */
206 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
207 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
209 /**
210 * Load the low half of a DR register (DR or XD) into an integer x86 register
211 */
212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
215 /**
216 * Store an FR register (single-precision floating point) from an integer x86+
217 * register (eg for register-to-register moves)
218 */
219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
226 #define push_fpul() FLDF_sh4r(R_FPUL)
227 #define pop_fpul() FSTPF_sh4r(R_FPUL)
228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
229 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
231 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
233 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
235 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
239 /* Exception checks - Note that all exception checks will clobber EAX */
241 #define check_priv( ) \
242 if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
243 if( sh4_x86.in_delay_slot ) { \
244 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
245 } else { \
246 exit_block_exc(EXC_ILLEGAL, pc); \
247 } \
248 sh4_x86.branch_taken = TRUE; \
249 sh4_x86.in_delay_slot = DELAY_NONE; \
250 return 2; \
251 }
253 #define check_fpuen( ) \
254 if( !sh4_x86.fpuen_checked ) {\
255 sh4_x86.fpuen_checked = TRUE;\
256 load_spreg( R_EAX, R_SR );\
257 AND_imm32_r32( SR_FD, R_EAX );\
258 if( sh4_x86.in_delay_slot ) {\
259 JNE_exc(EXC_SLOT_FPU_DISABLED);\
260 } else {\
261 JNE_exc(EXC_FPU_DISABLED);\
262 }\
263 sh4_x86.tstate = TSTATE_NONE; \
264 }
266 #define check_ralign16( x86reg ) \
267 TEST_imm32_r32( 0x00000001, x86reg ); \
268 JNE_exc(EXC_DATA_ADDR_READ)
270 #define check_walign16( x86reg ) \
271 TEST_imm32_r32( 0x00000001, x86reg ); \
272 JNE_exc(EXC_DATA_ADDR_WRITE);
274 #define check_ralign32( x86reg ) \
275 TEST_imm32_r32( 0x00000003, x86reg ); \
276 JNE_exc(EXC_DATA_ADDR_READ)
278 #define check_walign32( x86reg ) \
279 TEST_imm32_r32( 0x00000003, x86reg ); \
280 JNE_exc(EXC_DATA_ADDR_WRITE);
282 #define check_ralign64( x86reg ) \
283 TEST_imm32_r32( 0x00000007, x86reg ); \
284 JNE_exc(EXC_DATA_ADDR_READ)
286 #define check_walign64( x86reg ) \
287 TEST_imm32_r32( 0x00000007, x86reg ); \
288 JNE_exc(EXC_DATA_ADDR_WRITE);
290 #define UNDEF(ir)
291 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
292 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
293 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
294 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
295 */
297 #ifdef HAVE_FRAME_ADDRESS
298 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
299 call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
300 call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); }
301 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
302 call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
303 call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
304 #else
305 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
306 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
307 #endif
309 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
310 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
311 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
312 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
313 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
314 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
315 #define MEM_PREFETCH( addr_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, prefetch)
317 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
319 /****** Import appropriate calling conventions ******/
320 #if SIZEOF_VOID_P == 8
321 #include "sh4/ia64abi.h"
322 #else /* 32-bit system */
323 #include "sh4/ia32abi.h"
324 #endif
326 void sh4_translate_begin_block( sh4addr_t pc )
327 {
328 enter_block();
329 sh4_x86.in_delay_slot = FALSE;
330 sh4_x86.fpuen_checked = FALSE;
331 sh4_x86.branch_taken = FALSE;
332 sh4_x86.backpatch_posn = 0;
333 sh4_x86.block_start_pc = pc;
334 sh4_x86.tlb_on = IS_TLB_ENABLED();
335 sh4_x86.tstate = TSTATE_NONE;
336 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
337 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
338 }
341 uint32_t sh4_translate_end_block_size()
342 {
343 if( sh4_x86.backpatch_posn <= 3 ) {
344 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
345 } else {
346 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
347 }
348 }
351 /**
352 * Embed a breakpoint into the generated code
353 */
354 void sh4_translate_emit_breakpoint( sh4vma_t pc )
355 {
356 load_imm32( R_EAX, pc );
357 call_func1( sh4_translate_breakpoint_hit, R_EAX );
358 sh4_x86.tstate = TSTATE_NONE;
359 }
362 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
364 /**
365 * Embed a call to sh4_execute_instruction for situations that we
366 * can't translate (just page-crossing delay slots at the moment).
367 * Caller is responsible for setting new_pc before calling this function.
368 *
369 * Performs:
370 * Set PC = endpc
371 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
372 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
373 * Call sh4_execute_instruction
374 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
375 */
376 void exit_block_emu( sh4vma_t endpc )
377 {
378 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
379 ADD_r32_sh4r( R_ECX, R_PC );
381 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
382 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
383 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
384 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
386 call_func0( sh4_execute_instruction );
387 load_spreg( R_EAX, R_PC );
388 if( sh4_x86.tlb_on ) {
389 call_func1(xlat_get_code_by_vma,R_EAX);
390 } else {
391 call_func1(xlat_get_code,R_EAX);
392 }
393 exit_block();
394 }
396 /**
397 * Translate a single instruction. Delayed branches are handled specially
398 * by translating both branch and delayed instruction as a single unit (as
399 *
400 * The instruction MUST be in the icache (assert check)
401 *
402 * @return true if the instruction marks the end of a basic block
403 * (eg a branch or
404 */
405 uint32_t sh4_translate_instruction( sh4vma_t pc )
406 {
407 uint32_t ir;
408 /* Read instruction from icache */
409 assert( IS_IN_ICACHE(pc) );
410 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
412 if( !sh4_x86.in_delay_slot ) {
413 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
414 }
415 %%
416 /* ALU operations */
417 ADD Rm, Rn {:
418 COUNT_INST(I_ADD);
419 load_reg( R_EAX, Rm );
420 load_reg( R_ECX, Rn );
421 ADD_r32_r32( R_EAX, R_ECX );
422 store_reg( R_ECX, Rn );
423 sh4_x86.tstate = TSTATE_NONE;
424 :}
425 ADD #imm, Rn {:
426 COUNT_INST(I_ADDI);
427 ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
428 sh4_x86.tstate = TSTATE_NONE;
429 :}
430 ADDC Rm, Rn {:
431 COUNT_INST(I_ADDC);
432 if( sh4_x86.tstate != TSTATE_C ) {
433 LDC_t();
434 }
435 load_reg( R_EAX, Rm );
436 load_reg( R_ECX, Rn );
437 ADC_r32_r32( R_EAX, R_ECX );
438 store_reg( R_ECX, Rn );
439 SETC_t();
440 sh4_x86.tstate = TSTATE_C;
441 :}
442 ADDV Rm, Rn {:
443 COUNT_INST(I_ADDV);
444 load_reg( R_EAX, Rm );
445 load_reg( R_ECX, Rn );
446 ADD_r32_r32( R_EAX, R_ECX );
447 store_reg( R_ECX, Rn );
448 SETO_t();
449 sh4_x86.tstate = TSTATE_O;
450 :}
451 AND Rm, Rn {:
452 COUNT_INST(I_AND);
453 load_reg( R_EAX, Rm );
454 load_reg( R_ECX, Rn );
455 AND_r32_r32( R_EAX, R_ECX );
456 store_reg( R_ECX, Rn );
457 sh4_x86.tstate = TSTATE_NONE;
458 :}
459 AND #imm, R0 {:
460 COUNT_INST(I_ANDI);
461 load_reg( R_EAX, 0 );
462 AND_imm32_r32(imm, R_EAX);
463 store_reg( R_EAX, 0 );
464 sh4_x86.tstate = TSTATE_NONE;
465 :}
466 AND.B #imm, @(R0, GBR) {:
467 COUNT_INST(I_ANDB);
468 load_reg( R_EAX, 0 );
469 ADD_sh4r_r32( R_GBR, R_EAX );
470 MOV_r32_esp8(R_EAX, 0);
471 MEM_READ_BYTE( R_EAX, R_EDX );
472 MOV_esp8_r32(0, R_EAX);
473 AND_imm32_r32(imm, R_EDX );
474 MEM_WRITE_BYTE( R_EAX, R_EDX );
475 sh4_x86.tstate = TSTATE_NONE;
476 :}
477 CMP/EQ Rm, Rn {:
478 COUNT_INST(I_CMPEQ);
479 load_reg( R_EAX, Rm );
480 load_reg( R_ECX, Rn );
481 CMP_r32_r32( R_EAX, R_ECX );
482 SETE_t();
483 sh4_x86.tstate = TSTATE_E;
484 :}
485 CMP/EQ #imm, R0 {:
486 COUNT_INST(I_CMPEQI);
487 load_reg( R_EAX, 0 );
488 CMP_imm8s_r32(imm, R_EAX);
489 SETE_t();
490 sh4_x86.tstate = TSTATE_E;
491 :}
492 CMP/GE Rm, Rn {:
493 COUNT_INST(I_CMPGE);
494 load_reg( R_EAX, Rm );
495 load_reg( R_ECX, Rn );
496 CMP_r32_r32( R_EAX, R_ECX );
497 SETGE_t();
498 sh4_x86.tstate = TSTATE_GE;
499 :}
500 CMP/GT Rm, Rn {:
501 COUNT_INST(I_CMPGT);
502 load_reg( R_EAX, Rm );
503 load_reg( R_ECX, Rn );
504 CMP_r32_r32( R_EAX, R_ECX );
505 SETG_t();
506 sh4_x86.tstate = TSTATE_G;
507 :}
508 CMP/HI Rm, Rn {:
509 COUNT_INST(I_CMPHI);
510 load_reg( R_EAX, Rm );
511 load_reg( R_ECX, Rn );
512 CMP_r32_r32( R_EAX, R_ECX );
513 SETA_t();
514 sh4_x86.tstate = TSTATE_A;
515 :}
516 CMP/HS Rm, Rn {:
517 COUNT_INST(I_CMPHS);
518 load_reg( R_EAX, Rm );
519 load_reg( R_ECX, Rn );
520 CMP_r32_r32( R_EAX, R_ECX );
521 SETAE_t();
522 sh4_x86.tstate = TSTATE_AE;
523 :}
524 CMP/PL Rn {:
525 COUNT_INST(I_CMPPL);
526 load_reg( R_EAX, Rn );
527 CMP_imm8s_r32( 0, R_EAX );
528 SETG_t();
529 sh4_x86.tstate = TSTATE_G;
530 :}
531 CMP/PZ Rn {:
532 COUNT_INST(I_CMPPZ);
533 load_reg( R_EAX, Rn );
534 CMP_imm8s_r32( 0, R_EAX );
535 SETGE_t();
536 sh4_x86.tstate = TSTATE_GE;
537 :}
538 CMP/STR Rm, Rn {:
539 COUNT_INST(I_CMPSTR);
540 load_reg( R_EAX, Rm );
541 load_reg( R_ECX, Rn );
542 XOR_r32_r32( R_ECX, R_EAX );
543 TEST_r8_r8( R_AL, R_AL );
544 JE_rel8(target1);
545 TEST_r8_r8( R_AH, R_AH );
546 JE_rel8(target2);
547 SHR_imm8_r32( 16, R_EAX );
548 TEST_r8_r8( R_AL, R_AL );
549 JE_rel8(target3);
550 TEST_r8_r8( R_AH, R_AH );
551 JMP_TARGET(target1);
552 JMP_TARGET(target2);
553 JMP_TARGET(target3);
554 SETE_t();
555 sh4_x86.tstate = TSTATE_E;
556 :}
557 DIV0S Rm, Rn {:
558 COUNT_INST(I_DIV0S);
559 load_reg( R_EAX, Rm );
560 load_reg( R_ECX, Rn );
561 SHR_imm8_r32( 31, R_EAX );
562 SHR_imm8_r32( 31, R_ECX );
563 store_spreg( R_EAX, R_M );
564 store_spreg( R_ECX, R_Q );
565 CMP_r32_r32( R_EAX, R_ECX );
566 SETNE_t();
567 sh4_x86.tstate = TSTATE_NE;
568 :}
569 DIV0U {:
570 COUNT_INST(I_DIV0U);
571 XOR_r32_r32( R_EAX, R_EAX );
572 store_spreg( R_EAX, R_Q );
573 store_spreg( R_EAX, R_M );
574 store_spreg( R_EAX, R_T );
575 sh4_x86.tstate = TSTATE_C; // works for DIV1
576 :}
577 DIV1 Rm, Rn {:
578 COUNT_INST(I_DIV1);
579 load_spreg( R_ECX, R_M );
580 load_reg( R_EAX, Rn );
581 if( sh4_x86.tstate != TSTATE_C ) {
582 LDC_t();
583 }
584 RCL1_r32( R_EAX );
585 SETC_r8( R_DL ); // Q'
586 CMP_sh4r_r32( R_Q, R_ECX );
587 JE_rel8(mqequal);
588 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
589 JMP_rel8(end);
590 JMP_TARGET(mqequal);
591 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
592 JMP_TARGET(end);
593 store_reg( R_EAX, Rn ); // Done with Rn now
594 SETC_r8(R_AL); // tmp1
595 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
596 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
597 store_spreg( R_ECX, R_Q );
598 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
599 MOVZX_r8_r32( R_AL, R_EAX );
600 store_spreg( R_EAX, R_T );
601 sh4_x86.tstate = TSTATE_NONE;
602 :}
603 DMULS.L Rm, Rn {:
604 COUNT_INST(I_DMULS);
605 load_reg( R_EAX, Rm );
606 load_reg( R_ECX, Rn );
607 IMUL_r32(R_ECX);
608 store_spreg( R_EDX, R_MACH );
609 store_spreg( R_EAX, R_MACL );
610 sh4_x86.tstate = TSTATE_NONE;
611 :}
612 DMULU.L Rm, Rn {:
613 COUNT_INST(I_DMULU);
614 load_reg( R_EAX, Rm );
615 load_reg( R_ECX, Rn );
616 MUL_r32(R_ECX);
617 store_spreg( R_EDX, R_MACH );
618 store_spreg( R_EAX, R_MACL );
619 sh4_x86.tstate = TSTATE_NONE;
620 :}
621 DT Rn {:
622 COUNT_INST(I_DT);
623 load_reg( R_EAX, Rn );
624 ADD_imm8s_r32( -1, R_EAX );
625 store_reg( R_EAX, Rn );
626 SETE_t();
627 sh4_x86.tstate = TSTATE_E;
628 :}
629 EXTS.B Rm, Rn {:
630 COUNT_INST(I_EXTSB);
631 load_reg( R_EAX, Rm );
632 MOVSX_r8_r32( R_EAX, R_EAX );
633 store_reg( R_EAX, Rn );
634 :}
635 EXTS.W Rm, Rn {:
636 COUNT_INST(I_EXTSW);
637 load_reg( R_EAX, Rm );
638 MOVSX_r16_r32( R_EAX, R_EAX );
639 store_reg( R_EAX, Rn );
640 :}
641 EXTU.B Rm, Rn {:
642 COUNT_INST(I_EXTUB);
643 load_reg( R_EAX, Rm );
644 MOVZX_r8_r32( R_EAX, R_EAX );
645 store_reg( R_EAX, Rn );
646 :}
647 EXTU.W Rm, Rn {:
648 COUNT_INST(I_EXTUW);
649 load_reg( R_EAX, Rm );
650 MOVZX_r16_r32( R_EAX, R_EAX );
651 store_reg( R_EAX, Rn );
652 :}
653 MAC.L @Rm+, @Rn+ {:
654 COUNT_INST(I_MACL);
655 if( Rm == Rn ) {
656 load_reg( R_EAX, Rm );
657 check_ralign32( R_EAX );
658 MEM_READ_LONG( R_EAX, R_EAX );
659 MOV_r32_esp8(R_EAX, 0);
660 load_reg( R_EAX, Rm );
661 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
662 MEM_READ_LONG( R_EAX, R_EAX );
663 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
664 } else {
665 load_reg( R_EAX, Rm );
666 check_ralign32( R_EAX );
667 MEM_READ_LONG( R_EAX, R_EAX );
668 MOV_r32_esp8( R_EAX, 0 );
669 load_reg( R_EAX, Rn );
670 check_ralign32( R_EAX );
671 MEM_READ_LONG( R_EAX, R_EAX );
672 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
673 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
674 }
676 IMUL_esp8( 0 );
677 ADD_r32_sh4r( R_EAX, R_MACL );
678 ADC_r32_sh4r( R_EDX, R_MACH );
680 load_spreg( R_ECX, R_S );
681 TEST_r32_r32(R_ECX, R_ECX);
682 JE_rel8( nosat );
683 call_func0( signsat48 );
684 JMP_TARGET( nosat );
685 sh4_x86.tstate = TSTATE_NONE;
686 :}
687 MAC.W @Rm+, @Rn+ {:
688 COUNT_INST(I_MACW);
689 if( Rm == Rn ) {
690 load_reg( R_EAX, Rm );
691 check_ralign16( R_EAX );
692 MEM_READ_WORD( R_EAX, R_EAX );
693 MOV_r32_esp8( R_EAX, 0 );
694 load_reg( R_EAX, Rm );
695 LEA_r32disp8_r32( R_EAX, 2, R_EAX );
696 MEM_READ_WORD( R_EAX, R_EAX );
697 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
698 // Note translate twice in case of page boundaries. Maybe worth
699 // adding a page-boundary check to skip the second translation
700 } else {
701 load_reg( R_EAX, Rm );
702 check_ralign16( R_EAX );
703 MEM_READ_WORD( R_EAX, R_EAX );
704 MOV_r32_esp8( R_EAX, 0 );
705 load_reg( R_EAX, Rn );
706 check_ralign16( R_EAX );
707 MEM_READ_WORD( R_EAX, R_EAX );
708 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
709 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
710 }
711 IMUL_esp8( 0 );
712 load_spreg( R_ECX, R_S );
713 TEST_r32_r32( R_ECX, R_ECX );
714 JE_rel8( nosat );
716 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
717 JNO_rel8( end ); // 2
718 load_imm32( R_EDX, 1 ); // 5
719 store_spreg( R_EDX, R_MACH ); // 6
720 JS_rel8( positive ); // 2
721 load_imm32( R_EAX, 0x80000000 );// 5
722 store_spreg( R_EAX, R_MACL ); // 6
723 JMP_rel8(end2); // 2
725 JMP_TARGET(positive);
726 load_imm32( R_EAX, 0x7FFFFFFF );// 5
727 store_spreg( R_EAX, R_MACL ); // 6
728 JMP_rel8(end3); // 2
730 JMP_TARGET(nosat);
731 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
732 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
733 JMP_TARGET(end);
734 JMP_TARGET(end2);
735 JMP_TARGET(end3);
736 sh4_x86.tstate = TSTATE_NONE;
737 :}
738 MOVT Rn {:
739 COUNT_INST(I_MOVT);
740 load_spreg( R_EAX, R_T );
741 store_reg( R_EAX, Rn );
742 :}
743 MUL.L Rm, Rn {:
744 COUNT_INST(I_MULL);
745 load_reg( R_EAX, Rm );
746 load_reg( R_ECX, Rn );
747 MUL_r32( R_ECX );
748 store_spreg( R_EAX, R_MACL );
749 sh4_x86.tstate = TSTATE_NONE;
750 :}
751 MULS.W Rm, Rn {:
752 COUNT_INST(I_MULSW);
753 load_reg16s( R_EAX, Rm );
754 load_reg16s( R_ECX, Rn );
755 MUL_r32( R_ECX );
756 store_spreg( R_EAX, R_MACL );
757 sh4_x86.tstate = TSTATE_NONE;
758 :}
759 MULU.W Rm, Rn {:
760 COUNT_INST(I_MULUW);
761 load_reg16u( R_EAX, Rm );
762 load_reg16u( R_ECX, Rn );
763 MUL_r32( R_ECX );
764 store_spreg( R_EAX, R_MACL );
765 sh4_x86.tstate = TSTATE_NONE;
766 :}
767 NEG Rm, Rn {:
768 COUNT_INST(I_NEG);
769 load_reg( R_EAX, Rm );
770 NEG_r32( R_EAX );
771 store_reg( R_EAX, Rn );
772 sh4_x86.tstate = TSTATE_NONE;
773 :}
774 NEGC Rm, Rn {:
775 COUNT_INST(I_NEGC);
776 load_reg( R_EAX, Rm );
777 XOR_r32_r32( R_ECX, R_ECX );
778 LDC_t();
779 SBB_r32_r32( R_EAX, R_ECX );
780 store_reg( R_ECX, Rn );
781 SETC_t();
782 sh4_x86.tstate = TSTATE_C;
783 :}
784 NOT Rm, Rn {:
785 COUNT_INST(I_NOT);
786 load_reg( R_EAX, Rm );
787 NOT_r32( R_EAX );
788 store_reg( R_EAX, Rn );
789 sh4_x86.tstate = TSTATE_NONE;
790 :}
791 OR Rm, Rn {:
792 COUNT_INST(I_OR);
793 load_reg( R_EAX, Rm );
794 load_reg( R_ECX, Rn );
795 OR_r32_r32( R_EAX, R_ECX );
796 store_reg( R_ECX, Rn );
797 sh4_x86.tstate = TSTATE_NONE;
798 :}
799 OR #imm, R0 {:
800 COUNT_INST(I_ORI);
801 load_reg( R_EAX, 0 );
802 OR_imm32_r32(imm, R_EAX);
803 store_reg( R_EAX, 0 );
804 sh4_x86.tstate = TSTATE_NONE;
805 :}
806 OR.B #imm, @(R0, GBR) {:
807 COUNT_INST(I_ORB);
808 load_reg( R_EAX, 0 );
809 ADD_sh4r_r32( R_GBR, R_EAX );
810 MOV_r32_esp8( R_EAX, 0 );
811 MEM_READ_BYTE( R_EAX, R_EDX );
812 MOV_esp8_r32( 0, R_EAX );
813 OR_imm32_r32(imm, R_EDX );
814 MEM_WRITE_BYTE( R_EAX, R_EDX );
815 sh4_x86.tstate = TSTATE_NONE;
816 :}
817 ROTCL Rn {:
818 COUNT_INST(I_ROTCL);
819 load_reg( R_EAX, Rn );
820 if( sh4_x86.tstate != TSTATE_C ) {
821 LDC_t();
822 }
823 RCL1_r32( R_EAX );
824 store_reg( R_EAX, Rn );
825 SETC_t();
826 sh4_x86.tstate = TSTATE_C;
827 :}
828 ROTCR Rn {:
829 COUNT_INST(I_ROTCR);
830 load_reg( R_EAX, Rn );
831 if( sh4_x86.tstate != TSTATE_C ) {
832 LDC_t();
833 }
834 RCR1_r32( R_EAX );
835 store_reg( R_EAX, Rn );
836 SETC_t();
837 sh4_x86.tstate = TSTATE_C;
838 :}
839 ROTL Rn {:
840 COUNT_INST(I_ROTL);
841 load_reg( R_EAX, Rn );
842 ROL1_r32( R_EAX );
843 store_reg( R_EAX, Rn );
844 SETC_t();
845 sh4_x86.tstate = TSTATE_C;
846 :}
847 ROTR Rn {:
848 COUNT_INST(I_ROTR);
849 load_reg( R_EAX, Rn );
850 ROR1_r32( R_EAX );
851 store_reg( R_EAX, Rn );
852 SETC_t();
853 sh4_x86.tstate = TSTATE_C;
854 :}
855 SHAD Rm, Rn {:
856 COUNT_INST(I_SHAD);
857 /* Annoyingly enough, not directly convertible */
858 load_reg( R_EAX, Rn );
859 load_reg( R_ECX, Rm );
860 CMP_imm32_r32( 0, R_ECX );
861 JGE_rel8(doshl);
863 NEG_r32( R_ECX ); // 2
864 AND_imm8_r8( 0x1F, R_CL ); // 3
865 JE_rel8(emptysar); // 2
866 SAR_r32_CL( R_EAX ); // 2
867 JMP_rel8(end); // 2
869 JMP_TARGET(emptysar);
870 SAR_imm8_r32(31, R_EAX ); // 3
871 JMP_rel8(end2);
873 JMP_TARGET(doshl);
874 AND_imm8_r8( 0x1F, R_CL ); // 3
875 SHL_r32_CL( R_EAX ); // 2
876 JMP_TARGET(end);
877 JMP_TARGET(end2);
878 store_reg( R_EAX, Rn );
879 sh4_x86.tstate = TSTATE_NONE;
880 :}
881 SHLD Rm, Rn {:
882 COUNT_INST(I_SHLD);
883 load_reg( R_EAX, Rn );
884 load_reg( R_ECX, Rm );
885 CMP_imm32_r32( 0, R_ECX );
886 JGE_rel8(doshl);
888 NEG_r32( R_ECX ); // 2
889 AND_imm8_r8( 0x1F, R_CL ); // 3
890 JE_rel8(emptyshr );
891 SHR_r32_CL( R_EAX ); // 2
892 JMP_rel8(end); // 2
894 JMP_TARGET(emptyshr);
895 XOR_r32_r32( R_EAX, R_EAX );
896 JMP_rel8(end2);
898 JMP_TARGET(doshl);
899 AND_imm8_r8( 0x1F, R_CL ); // 3
900 SHL_r32_CL( R_EAX ); // 2
901 JMP_TARGET(end);
902 JMP_TARGET(end2);
903 store_reg( R_EAX, Rn );
904 sh4_x86.tstate = TSTATE_NONE;
905 :}
906 SHAL Rn {:
907 COUNT_INST(I_SHAL);
908 load_reg( R_EAX, Rn );
909 SHL1_r32( R_EAX );
910 SETC_t();
911 store_reg( R_EAX, Rn );
912 sh4_x86.tstate = TSTATE_C;
913 :}
914 SHAR Rn {:
915 COUNT_INST(I_SHAR);
916 load_reg( R_EAX, Rn );
917 SAR1_r32( R_EAX );
918 SETC_t();
919 store_reg( R_EAX, Rn );
920 sh4_x86.tstate = TSTATE_C;
921 :}
922 SHLL Rn {:
923 COUNT_INST(I_SHLL);
924 load_reg( R_EAX, Rn );
925 SHL1_r32( R_EAX );
926 SETC_t();
927 store_reg( R_EAX, Rn );
928 sh4_x86.tstate = TSTATE_C;
929 :}
930 SHLL2 Rn {:
931 COUNT_INST(I_SHLL);
932 load_reg( R_EAX, Rn );
933 SHL_imm8_r32( 2, R_EAX );
934 store_reg( R_EAX, Rn );
935 sh4_x86.tstate = TSTATE_NONE;
936 :}
937 SHLL8 Rn {:
938 COUNT_INST(I_SHLL);
939 load_reg( R_EAX, Rn );
940 SHL_imm8_r32( 8, R_EAX );
941 store_reg( R_EAX, Rn );
942 sh4_x86.tstate = TSTATE_NONE;
943 :}
944 SHLL16 Rn {:
945 COUNT_INST(I_SHLL);
946 load_reg( R_EAX, Rn );
947 SHL_imm8_r32( 16, R_EAX );
948 store_reg( R_EAX, Rn );
949 sh4_x86.tstate = TSTATE_NONE;
950 :}
951 SHLR Rn {:
952 COUNT_INST(I_SHLR);
953 load_reg( R_EAX, Rn );
954 SHR1_r32( R_EAX );
955 SETC_t();
956 store_reg( R_EAX, Rn );
957 sh4_x86.tstate = TSTATE_C;
958 :}
959 SHLR2 Rn {:
960 COUNT_INST(I_SHLR);
961 load_reg( R_EAX, Rn );
962 SHR_imm8_r32( 2, R_EAX );
963 store_reg( R_EAX, Rn );
964 sh4_x86.tstate = TSTATE_NONE;
965 :}
966 SHLR8 Rn {:
967 COUNT_INST(I_SHLR);
968 load_reg( R_EAX, Rn );
969 SHR_imm8_r32( 8, R_EAX );
970 store_reg( R_EAX, Rn );
971 sh4_x86.tstate = TSTATE_NONE;
972 :}
973 SHLR16 Rn {:
974 COUNT_INST(I_SHLR);
975 load_reg( R_EAX, Rn );
976 SHR_imm8_r32( 16, R_EAX );
977 store_reg( R_EAX, Rn );
978 sh4_x86.tstate = TSTATE_NONE;
979 :}
980 SUB Rm, Rn {:
981 COUNT_INST(I_SUB);
982 load_reg( R_EAX, Rm );
983 load_reg( R_ECX, Rn );
984 SUB_r32_r32( R_EAX, R_ECX );
985 store_reg( R_ECX, Rn );
986 sh4_x86.tstate = TSTATE_NONE;
987 :}
988 SUBC Rm, Rn {:
989 COUNT_INST(I_SUBC);
990 load_reg( R_EAX, Rm );
991 load_reg( R_ECX, Rn );
992 if( sh4_x86.tstate != TSTATE_C ) {
993 LDC_t();
994 }
995 SBB_r32_r32( R_EAX, R_ECX );
996 store_reg( R_ECX, Rn );
997 SETC_t();
998 sh4_x86.tstate = TSTATE_C;
999 :}
1000 SUBV Rm, Rn {:
1001 COUNT_INST(I_SUBV);
1002 load_reg( R_EAX, Rm );
1003 load_reg( R_ECX, Rn );
1004 SUB_r32_r32( R_EAX, R_ECX );
1005 store_reg( R_ECX, Rn );
1006 SETO_t();
1007 sh4_x86.tstate = TSTATE_O;
1008 :}
1009 SWAP.B Rm, Rn {:
1010 COUNT_INST(I_SWAPB);
1011 load_reg( R_EAX, Rm );
1012 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1013 store_reg( R_EAX, Rn );
1014 :}
1015 SWAP.W Rm, Rn {:
1016 COUNT_INST(I_SWAPB);
1017 load_reg( R_EAX, Rm );
1018 MOV_r32_r32( R_EAX, R_ECX );
1019 SHL_imm8_r32( 16, R_ECX );
1020 SHR_imm8_r32( 16, R_EAX );
1021 OR_r32_r32( R_EAX, R_ECX );
1022 store_reg( R_ECX, Rn );
1023 sh4_x86.tstate = TSTATE_NONE;
1024 :}
1025 TAS.B @Rn {:
1026 COUNT_INST(I_TASB);
1027 load_reg( R_EAX, Rn );
1028 MOV_r32_esp8( R_EAX, 0 );
1029 MEM_READ_BYTE( R_EAX, R_EDX );
1030 TEST_r8_r8( R_DL, R_DL );
1031 SETE_t();
1032 OR_imm8_r8( 0x80, R_DL );
1033 MOV_esp8_r32( 0, R_EAX );
1034 MEM_WRITE_BYTE( R_EAX, R_EDX );
1035 sh4_x86.tstate = TSTATE_NONE;
1036 :}
1037 TST Rm, Rn {:
1038 COUNT_INST(I_TST);
1039 load_reg( R_EAX, Rm );
1040 load_reg( R_ECX, Rn );
1041 TEST_r32_r32( R_EAX, R_ECX );
1042 SETE_t();
1043 sh4_x86.tstate = TSTATE_E;
1044 :}
1045 TST #imm, R0 {:
1046 COUNT_INST(I_TSTI);
1047 load_reg( R_EAX, 0 );
1048 TEST_imm32_r32( imm, R_EAX );
1049 SETE_t();
1050 sh4_x86.tstate = TSTATE_E;
1051 :}
1052 TST.B #imm, @(R0, GBR) {:
1053 COUNT_INST(I_TSTB);
1054 load_reg( R_EAX, 0);
1055 ADD_sh4r_r32( R_GBR, R_EAX );
1056 MEM_READ_BYTE( R_EAX, R_EAX );
1057 TEST_imm8_r8( imm, R_AL );
1058 SETE_t();
1059 sh4_x86.tstate = TSTATE_E;
1060 :}
1061 XOR Rm, Rn {:
1062 COUNT_INST(I_XOR);
1063 load_reg( R_EAX, Rm );
1064 load_reg( R_ECX, Rn );
1065 XOR_r32_r32( R_EAX, R_ECX );
1066 store_reg( R_ECX, Rn );
1067 sh4_x86.tstate = TSTATE_NONE;
1068 :}
1069 XOR #imm, R0 {:
1070 COUNT_INST(I_XORI);
1071 load_reg( R_EAX, 0 );
1072 XOR_imm32_r32( imm, R_EAX );
1073 store_reg( R_EAX, 0 );
1074 sh4_x86.tstate = TSTATE_NONE;
1075 :}
1076 XOR.B #imm, @(R0, GBR) {:
1077 COUNT_INST(I_XORB);
1078 load_reg( R_EAX, 0 );
1079 ADD_sh4r_r32( R_GBR, R_EAX );
1080 MOV_r32_esp8( R_EAX, 0 );
1081 MEM_READ_BYTE(R_EAX, R_EDX);
1082 MOV_esp8_r32( 0, R_EAX );
1083 XOR_imm32_r32( imm, R_EDX );
1084 MEM_WRITE_BYTE( R_EAX, R_EDX );
1085 sh4_x86.tstate = TSTATE_NONE;
1086 :}
1087 XTRCT Rm, Rn {:
1088 COUNT_INST(I_XTRCT);
1089 load_reg( R_EAX, Rm );
1090 load_reg( R_ECX, Rn );
1091 SHL_imm8_r32( 16, R_EAX );
1092 SHR_imm8_r32( 16, R_ECX );
1093 OR_r32_r32( R_EAX, R_ECX );
1094 store_reg( R_ECX, Rn );
1095 sh4_x86.tstate = TSTATE_NONE;
1096 :}
1098 /* Data move instructions */
1099 MOV Rm, Rn {:
1100 COUNT_INST(I_MOV);
1101 load_reg( R_EAX, Rm );
1102 store_reg( R_EAX, Rn );
1103 :}
1104 MOV #imm, Rn {:
1105 COUNT_INST(I_MOVI);
1106 load_imm32( R_EAX, imm );
1107 store_reg( R_EAX, Rn );
1108 :}
1109 MOV.B Rm, @Rn {:
1110 COUNT_INST(I_MOVB);
1111 load_reg( R_EAX, Rn );
1112 load_reg( R_EDX, Rm );
1113 MEM_WRITE_BYTE( R_EAX, R_EDX );
1114 sh4_x86.tstate = TSTATE_NONE;
1115 :}
1116 MOV.B Rm, @-Rn {:
1117 COUNT_INST(I_MOVB);
1118 load_reg( R_EAX, Rn );
1119 LEA_r32disp8_r32( R_EAX, -1, R_EAX );
1120 load_reg( R_EDX, Rm );
1121 MEM_WRITE_BYTE( R_EAX, R_EDX );
1122 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1123 sh4_x86.tstate = TSTATE_NONE;
1124 :}
1125 MOV.B Rm, @(R0, Rn) {:
1126 COUNT_INST(I_MOVB);
1127 load_reg( R_EAX, 0 );
1128 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1129 load_reg( R_EDX, Rm );
1130 MEM_WRITE_BYTE( R_EAX, R_EDX );
1131 sh4_x86.tstate = TSTATE_NONE;
1132 :}
1133 MOV.B R0, @(disp, GBR) {:
1134 COUNT_INST(I_MOVB);
1135 load_spreg( R_EAX, R_GBR );
1136 ADD_imm32_r32( disp, R_EAX );
1137 load_reg( R_EDX, 0 );
1138 MEM_WRITE_BYTE( R_EAX, R_EDX );
1139 sh4_x86.tstate = TSTATE_NONE;
1140 :}
1141 MOV.B R0, @(disp, Rn) {:
1142 COUNT_INST(I_MOVB);
1143 load_reg( R_EAX, Rn );
1144 ADD_imm32_r32( disp, R_EAX );
1145 load_reg( R_EDX, 0 );
1146 MEM_WRITE_BYTE( R_EAX, R_EDX );
1147 sh4_x86.tstate = TSTATE_NONE;
1148 :}
1149 MOV.B @Rm, Rn {:
1150 COUNT_INST(I_MOVB);
1151 load_reg( R_EAX, Rm );
1152 MEM_READ_BYTE( R_EAX, R_EAX );
1153 store_reg( R_EAX, Rn );
1154 sh4_x86.tstate = TSTATE_NONE;
1155 :}
1156 MOV.B @Rm+, Rn {:
1157 COUNT_INST(I_MOVB);
1158 load_reg( R_EAX, Rm );
1159 MEM_READ_BYTE( R_EAX, R_EAX );
1160 if( Rm != Rn ) {
1161 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1162 }
1163 store_reg( R_EAX, Rn );
1164 sh4_x86.tstate = TSTATE_NONE;
1165 :}
1166 MOV.B @(R0, Rm), Rn {:
1167 COUNT_INST(I_MOVB);
1168 load_reg( R_EAX, 0 );
1169 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1170 MEM_READ_BYTE( R_EAX, R_EAX );
1171 store_reg( R_EAX, Rn );
1172 sh4_x86.tstate = TSTATE_NONE;
1173 :}
1174 MOV.B @(disp, GBR), R0 {:
1175 COUNT_INST(I_MOVB);
1176 load_spreg( R_EAX, R_GBR );
1177 ADD_imm32_r32( disp, R_EAX );
1178 MEM_READ_BYTE( R_EAX, R_EAX );
1179 store_reg( R_EAX, 0 );
1180 sh4_x86.tstate = TSTATE_NONE;
1181 :}
1182 MOV.B @(disp, Rm), R0 {:
1183 COUNT_INST(I_MOVB);
1184 load_reg( R_EAX, Rm );
1185 ADD_imm32_r32( disp, R_EAX );
1186 MEM_READ_BYTE( R_EAX, R_EAX );
1187 store_reg( R_EAX, 0 );
1188 sh4_x86.tstate = TSTATE_NONE;
1189 :}
1190 MOV.L Rm, @Rn {:
1191 COUNT_INST(I_MOVL);
1192 load_reg( R_EAX, Rn );
1193 check_walign32(R_EAX);
1194 MOV_r32_r32( R_EAX, R_ECX );
1195 AND_imm32_r32( 0xFC000000, R_ECX );
1196 CMP_imm32_r32( 0xE0000000, R_ECX );
1197 JNE_rel8( notsq );
1198 AND_imm8s_r32( 0x3C, R_EAX );
1199 load_reg( R_EDX, Rm );
1200 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1201 JMP_rel8(end);
1202 JMP_TARGET(notsq);
1203 load_reg( R_EDX, Rm );
1204 MEM_WRITE_LONG( R_EAX, R_EDX );
1205 JMP_TARGET(end);
1206 sh4_x86.tstate = TSTATE_NONE;
1207 :}
1208 MOV.L Rm, @-Rn {:
1209 COUNT_INST(I_MOVL);
1210 load_reg( R_EAX, Rn );
1211 ADD_imm8s_r32( -4, R_EAX );
1212 check_walign32( R_EAX );
1213 load_reg( R_EDX, Rm );
1214 MEM_WRITE_LONG( R_EAX, R_EDX );
1215 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1216 sh4_x86.tstate = TSTATE_NONE;
1217 :}
1218 MOV.L Rm, @(R0, Rn) {:
1219 COUNT_INST(I_MOVL);
1220 load_reg( R_EAX, 0 );
1221 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1222 check_walign32( R_EAX );
1223 load_reg( R_EDX, Rm );
1224 MEM_WRITE_LONG( R_EAX, R_EDX );
1225 sh4_x86.tstate = TSTATE_NONE;
1226 :}
1227 MOV.L R0, @(disp, GBR) {:
1228 COUNT_INST(I_MOVL);
1229 load_spreg( R_EAX, R_GBR );
1230 ADD_imm32_r32( disp, R_EAX );
1231 check_walign32( R_EAX );
1232 load_reg( R_EDX, 0 );
1233 MEM_WRITE_LONG( R_EAX, R_EDX );
1234 sh4_x86.tstate = TSTATE_NONE;
1235 :}
1236 MOV.L Rm, @(disp, Rn) {:
1237 COUNT_INST(I_MOVL);
1238 load_reg( R_EAX, Rn );
1239 ADD_imm32_r32( disp, R_EAX );
1240 check_walign32( R_EAX );
1241 MOV_r32_r32( R_EAX, R_ECX );
1242 AND_imm32_r32( 0xFC000000, R_ECX );
1243 CMP_imm32_r32( 0xE0000000, R_ECX );
1244 JNE_rel8( notsq );
1245 AND_imm8s_r32( 0x3C, R_EAX );
1246 load_reg( R_EDX, Rm );
1247 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1248 JMP_rel8(end);
1249 JMP_TARGET(notsq);
1250 load_reg( R_EDX, Rm );
1251 MEM_WRITE_LONG( R_EAX, R_EDX );
1252 JMP_TARGET(end);
1253 sh4_x86.tstate = TSTATE_NONE;
1254 :}
1255 MOV.L @Rm, Rn {:
1256 COUNT_INST(I_MOVL);
1257 load_reg( R_EAX, Rm );
1258 check_ralign32( R_EAX );
1259 MEM_READ_LONG( R_EAX, R_EAX );
1260 store_reg( R_EAX, Rn );
1261 sh4_x86.tstate = TSTATE_NONE;
1262 :}
1263 MOV.L @Rm+, Rn {:
1264 COUNT_INST(I_MOVL);
1265 load_reg( R_EAX, Rm );
1266 check_ralign32( R_EAX );
1267 MEM_READ_LONG( R_EAX, R_EAX );
1268 if( Rm != Rn ) {
1269 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1270 }
1271 store_reg( R_EAX, Rn );
1272 sh4_x86.tstate = TSTATE_NONE;
1273 :}
1274 MOV.L @(R0, Rm), Rn {:
1275 COUNT_INST(I_MOVL);
1276 load_reg( R_EAX, 0 );
1277 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1278 check_ralign32( R_EAX );
1279 MEM_READ_LONG( R_EAX, R_EAX );
1280 store_reg( R_EAX, Rn );
1281 sh4_x86.tstate = TSTATE_NONE;
1282 :}
1283 MOV.L @(disp, GBR), R0 {:
1284 COUNT_INST(I_MOVL);
1285 load_spreg( R_EAX, R_GBR );
1286 ADD_imm32_r32( disp, R_EAX );
1287 check_ralign32( R_EAX );
1288 MEM_READ_LONG( R_EAX, R_EAX );
1289 store_reg( R_EAX, 0 );
1290 sh4_x86.tstate = TSTATE_NONE;
1291 :}
1292 MOV.L @(disp, PC), Rn {:
1293 COUNT_INST(I_MOVLPC);
1294 if( sh4_x86.in_delay_slot ) {
1295 SLOTILLEGAL();
1296 } else {
1297 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1298 if( IS_IN_ICACHE(target) ) {
1299 // If the target address is in the same page as the code, it's
1300 // pretty safe to just ref it directly and circumvent the whole
1301 // memory subsystem. (this is a big performance win)
1303 // FIXME: There's a corner-case that's not handled here when
1304 // the current code-page is in the ITLB but not in the UTLB.
1305 // (should generate a TLB miss although need to test SH4
1306 // behaviour to confirm) Unlikely to be anyone depending on this
1307 // behaviour though.
1308 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1309 MOV_moff32_EAX( ptr );
1310 } else {
1311 // Note: we use sh4r.pc for the calc as we could be running at a
1312 // different virtual address than the translation was done with,
1313 // but we can safely assume that the low bits are the same.
1314 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1315 ADD_sh4r_r32( R_PC, R_EAX );
1316 MEM_READ_LONG( R_EAX, R_EAX );
1317 sh4_x86.tstate = TSTATE_NONE;
1318 }
1319 store_reg( R_EAX, Rn );
1320 }
1321 :}
1322 MOV.L @(disp, Rm), Rn {:
1323 COUNT_INST(I_MOVL);
1324 load_reg( R_EAX, Rm );
1325 ADD_imm8s_r32( disp, R_EAX );
1326 check_ralign32( R_EAX );
1327 MEM_READ_LONG( R_EAX, R_EAX );
1328 store_reg( R_EAX, Rn );
1329 sh4_x86.tstate = TSTATE_NONE;
1330 :}
1331 MOV.W Rm, @Rn {:
1332 COUNT_INST(I_MOVW);
1333 load_reg( R_EAX, Rn );
1334 check_walign16( R_EAX );
1335 load_reg( R_EDX, Rm );
1336 MEM_WRITE_WORD( R_EAX, R_EDX );
1337 sh4_x86.tstate = TSTATE_NONE;
1338 :}
1339 MOV.W Rm, @-Rn {:
1340 COUNT_INST(I_MOVW);
1341 load_reg( R_EAX, Rn );
1342 check_walign16( R_EAX );
1343 LEA_r32disp8_r32( R_EAX, -2, R_EAX );
1344 load_reg( R_EDX, Rm );
1345 MEM_WRITE_WORD( R_EAX, R_EDX );
1346 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1347 sh4_x86.tstate = TSTATE_NONE;
1348 :}
1349 MOV.W Rm, @(R0, Rn) {:
1350 COUNT_INST(I_MOVW);
1351 load_reg( R_EAX, 0 );
1352 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1353 check_walign16( R_EAX );
1354 load_reg( R_EDX, Rm );
1355 MEM_WRITE_WORD( R_EAX, R_EDX );
1356 sh4_x86.tstate = TSTATE_NONE;
1357 :}
1358 MOV.W R0, @(disp, GBR) {:
1359 COUNT_INST(I_MOVW);
1360 load_spreg( R_EAX, R_GBR );
1361 ADD_imm32_r32( disp, R_EAX );
1362 check_walign16( R_EAX );
1363 load_reg( R_EDX, 0 );
1364 MEM_WRITE_WORD( R_EAX, R_EDX );
1365 sh4_x86.tstate = TSTATE_NONE;
1366 :}
1367 MOV.W R0, @(disp, Rn) {:
1368 COUNT_INST(I_MOVW);
1369 load_reg( R_EAX, Rn );
1370 ADD_imm32_r32( disp, R_EAX );
1371 check_walign16( R_EAX );
1372 load_reg( R_EDX, 0 );
1373 MEM_WRITE_WORD( R_EAX, R_EDX );
1374 sh4_x86.tstate = TSTATE_NONE;
1375 :}
1376 MOV.W @Rm, Rn {:
1377 COUNT_INST(I_MOVW);
1378 load_reg( R_EAX, Rm );
1379 check_ralign16( R_EAX );
1380 MEM_READ_WORD( R_EAX, R_EAX );
1381 store_reg( R_EAX, Rn );
1382 sh4_x86.tstate = TSTATE_NONE;
1383 :}
1384 MOV.W @Rm+, Rn {:
1385 COUNT_INST(I_MOVW);
1386 load_reg( R_EAX, Rm );
1387 check_ralign16( R_EAX );
1388 MEM_READ_WORD( R_EAX, R_EAX );
1389 if( Rm != Rn ) {
1390 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1391 }
1392 store_reg( R_EAX, Rn );
1393 sh4_x86.tstate = TSTATE_NONE;
1394 :}
1395 MOV.W @(R0, Rm), Rn {:
1396 COUNT_INST(I_MOVW);
1397 load_reg( R_EAX, 0 );
1398 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1399 check_ralign16( R_EAX );
1400 MEM_READ_WORD( R_EAX, R_EAX );
1401 store_reg( R_EAX, Rn );
1402 sh4_x86.tstate = TSTATE_NONE;
1403 :}
1404 MOV.W @(disp, GBR), R0 {:
1405 COUNT_INST(I_MOVW);
1406 load_spreg( R_EAX, R_GBR );
1407 ADD_imm32_r32( disp, R_EAX );
1408 check_ralign16( R_EAX );
1409 MEM_READ_WORD( R_EAX, R_EAX );
1410 store_reg( R_EAX, 0 );
1411 sh4_x86.tstate = TSTATE_NONE;
1412 :}
1413 MOV.W @(disp, PC), Rn {:
1414 COUNT_INST(I_MOVW);
1415 if( sh4_x86.in_delay_slot ) {
1416 SLOTILLEGAL();
1417 } else {
1418 // See comments for MOV.L @(disp, PC), Rn
1419 uint32_t target = pc + disp + 4;
1420 if( IS_IN_ICACHE(target) ) {
1421 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1422 MOV_moff32_EAX( ptr );
1423 MOVSX_r16_r32( R_EAX, R_EAX );
1424 } else {
1425 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1426 ADD_sh4r_r32( R_PC, R_EAX );
1427 MEM_READ_WORD( R_EAX, R_EAX );
1428 sh4_x86.tstate = TSTATE_NONE;
1429 }
1430 store_reg( R_EAX, Rn );
1431 }
1432 :}
1433 MOV.W @(disp, Rm), R0 {:
1434 COUNT_INST(I_MOVW);
1435 load_reg( R_EAX, Rm );
1436 ADD_imm32_r32( disp, R_EAX );
1437 check_ralign16( R_EAX );
1438 MEM_READ_WORD( R_EAX, R_EAX );
1439 store_reg( R_EAX, 0 );
1440 sh4_x86.tstate = TSTATE_NONE;
1441 :}
1442 MOVA @(disp, PC), R0 {:
1443 COUNT_INST(I_MOVA);
1444 if( sh4_x86.in_delay_slot ) {
1445 SLOTILLEGAL();
1446 } else {
1447 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1448 ADD_sh4r_r32( R_PC, R_ECX );
1449 store_reg( R_ECX, 0 );
1450 sh4_x86.tstate = TSTATE_NONE;
1451 }
1452 :}
1453 MOVCA.L R0, @Rn {:
1454 COUNT_INST(I_MOVCA);
1455 load_reg( R_EAX, Rn );
1456 check_walign32( R_EAX );
1457 load_reg( R_EDX, 0 );
1458 MEM_WRITE_LONG( R_EAX, R_EDX );
1459 sh4_x86.tstate = TSTATE_NONE;
1460 :}
1462 /* Control transfer instructions */
1463 BF disp {:
1464 COUNT_INST(I_BF);
1465 if( sh4_x86.in_delay_slot ) {
1466 SLOTILLEGAL();
1467 } else {
1468 sh4vma_t target = disp + pc + 4;
1469 JT_rel8( nottaken );
1470 exit_block_rel(target, pc+2 );
1471 JMP_TARGET(nottaken);
1472 return 2;
1473 }
1474 :}
1475 BF/S disp {:
1476 COUNT_INST(I_BFS);
1477 if( sh4_x86.in_delay_slot ) {
1478 SLOTILLEGAL();
1479 } else {
1480 sh4_x86.in_delay_slot = DELAY_PC;
1481 if( UNTRANSLATABLE(pc+2) ) {
1482 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1483 JT_rel8(nottaken);
1484 ADD_imm32_r32( disp, R_EAX );
1485 JMP_TARGET(nottaken);
1486 ADD_sh4r_r32( R_PC, R_EAX );
1487 store_spreg( R_EAX, R_NEW_PC );
1488 exit_block_emu(pc+2);
1489 sh4_x86.branch_taken = TRUE;
1490 return 2;
1491 } else {
1492 if( sh4_x86.tstate == TSTATE_NONE ) {
1493 CMP_imm8s_sh4r( 1, R_T );
1494 sh4_x86.tstate = TSTATE_E;
1495 }
1496 sh4vma_t target = disp + pc + 4;
1497 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1498 int save_tstate = sh4_x86.tstate;
1499 sh4_translate_instruction(pc+2);
1500 exit_block_rel( target, pc+4 );
1502 // not taken
1503 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1504 sh4_x86.tstate = save_tstate;
1505 sh4_translate_instruction(pc+2);
1506 return 4;
1507 }
1508 }
1509 :}
1510 BRA disp {:
1511 COUNT_INST(I_BRA);
1512 if( sh4_x86.in_delay_slot ) {
1513 SLOTILLEGAL();
1514 } else {
1515 sh4_x86.in_delay_slot = DELAY_PC;
1516 sh4_x86.branch_taken = TRUE;
1517 if( UNTRANSLATABLE(pc+2) ) {
1518 load_spreg( R_EAX, R_PC );
1519 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1520 store_spreg( R_EAX, R_NEW_PC );
1521 exit_block_emu(pc+2);
1522 return 2;
1523 } else {
1524 sh4_translate_instruction( pc + 2 );
1525 exit_block_rel( disp + pc + 4, pc+4 );
1526 return 4;
1527 }
1528 }
1529 :}
1530 BRAF Rn {:
1531 COUNT_INST(I_BRAF);
1532 if( sh4_x86.in_delay_slot ) {
1533 SLOTILLEGAL();
1534 } else {
1535 load_spreg( R_EAX, R_PC );
1536 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1537 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1538 store_spreg( R_EAX, R_NEW_PC );
1539 sh4_x86.in_delay_slot = DELAY_PC;
1540 sh4_x86.tstate = TSTATE_NONE;
1541 sh4_x86.branch_taken = TRUE;
1542 if( UNTRANSLATABLE(pc+2) ) {
1543 exit_block_emu(pc+2);
1544 return 2;
1545 } else {
1546 sh4_translate_instruction( pc + 2 );
1547 exit_block_newpcset(pc+2);
1548 return 4;
1549 }
1550 }
1551 :}
1552 BSR disp {:
1553 COUNT_INST(I_BSR);
1554 if( sh4_x86.in_delay_slot ) {
1555 SLOTILLEGAL();
1556 } else {
1557 load_spreg( R_EAX, R_PC );
1558 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1559 store_spreg( R_EAX, R_PR );
1560 sh4_x86.in_delay_slot = DELAY_PC;
1561 sh4_x86.branch_taken = TRUE;
1562 sh4_x86.tstate = TSTATE_NONE;
1563 if( UNTRANSLATABLE(pc+2) ) {
1564 ADD_imm32_r32( disp, R_EAX );
1565 store_spreg( R_EAX, R_NEW_PC );
1566 exit_block_emu(pc+2);
1567 return 2;
1568 } else {
1569 sh4_translate_instruction( pc + 2 );
1570 exit_block_rel( disp + pc + 4, pc+4 );
1571 return 4;
1572 }
1573 }
1574 :}
1575 BSRF Rn {:
1576 COUNT_INST(I_BSRF);
1577 if( sh4_x86.in_delay_slot ) {
1578 SLOTILLEGAL();
1579 } else {
1580 load_spreg( R_EAX, R_PC );
1581 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1582 store_spreg( R_EAX, R_PR );
1583 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1584 store_spreg( R_EAX, R_NEW_PC );
1586 sh4_x86.in_delay_slot = DELAY_PC;
1587 sh4_x86.tstate = TSTATE_NONE;
1588 sh4_x86.branch_taken = TRUE;
1589 if( UNTRANSLATABLE(pc+2) ) {
1590 exit_block_emu(pc+2);
1591 return 2;
1592 } else {
1593 sh4_translate_instruction( pc + 2 );
1594 exit_block_newpcset(pc+2);
1595 return 4;
1596 }
1597 }
1598 :}
1599 BT disp {:
1600 COUNT_INST(I_BT);
1601 if( sh4_x86.in_delay_slot ) {
1602 SLOTILLEGAL();
1603 } else {
1604 sh4vma_t target = disp + pc + 4;
1605 JF_rel8( nottaken );
1606 exit_block_rel(target, pc+2 );
1607 JMP_TARGET(nottaken);
1608 return 2;
1609 }
1610 :}
1611 BT/S disp {:
1612 COUNT_INST(I_BTS);
1613 if( sh4_x86.in_delay_slot ) {
1614 SLOTILLEGAL();
1615 } else {
1616 sh4_x86.in_delay_slot = DELAY_PC;
1617 if( UNTRANSLATABLE(pc+2) ) {
1618 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1619 JF_rel8(nottaken);
1620 ADD_imm32_r32( disp, R_EAX );
1621 JMP_TARGET(nottaken);
1622 ADD_sh4r_r32( R_PC, R_EAX );
1623 store_spreg( R_EAX, R_NEW_PC );
1624 exit_block_emu(pc+2);
1625 sh4_x86.branch_taken = TRUE;
1626 return 2;
1627 } else {
1628 if( sh4_x86.tstate == TSTATE_NONE ) {
1629 CMP_imm8s_sh4r( 1, R_T );
1630 sh4_x86.tstate = TSTATE_E;
1631 }
1632 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1633 int save_tstate = sh4_x86.tstate;
1634 sh4_translate_instruction(pc+2);
1635 exit_block_rel( disp + pc + 4, pc+4 );
1636 // not taken
1637 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1638 sh4_x86.tstate = save_tstate;
1639 sh4_translate_instruction(pc+2);
1640 return 4;
1641 }
1642 }
1643 :}
1644 JMP @Rn {:
1645 COUNT_INST(I_JMP);
1646 if( sh4_x86.in_delay_slot ) {
1647 SLOTILLEGAL();
1648 } else {
1649 load_reg( R_ECX, Rn );
1650 store_spreg( R_ECX, R_NEW_PC );
1651 sh4_x86.in_delay_slot = DELAY_PC;
1652 sh4_x86.branch_taken = TRUE;
1653 if( UNTRANSLATABLE(pc+2) ) {
1654 exit_block_emu(pc+2);
1655 return 2;
1656 } else {
1657 sh4_translate_instruction(pc+2);
1658 exit_block_newpcset(pc+2);
1659 return 4;
1660 }
1661 }
1662 :}
1663 JSR @Rn {:
1664 COUNT_INST(I_JSR);
1665 if( sh4_x86.in_delay_slot ) {
1666 SLOTILLEGAL();
1667 } else {
1668 load_spreg( R_EAX, R_PC );
1669 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1670 store_spreg( R_EAX, R_PR );
1671 load_reg( R_ECX, Rn );
1672 store_spreg( R_ECX, R_NEW_PC );
1673 sh4_x86.in_delay_slot = DELAY_PC;
1674 sh4_x86.branch_taken = TRUE;
1675 sh4_x86.tstate = TSTATE_NONE;
1676 if( UNTRANSLATABLE(pc+2) ) {
1677 exit_block_emu(pc+2);
1678 return 2;
1679 } else {
1680 sh4_translate_instruction(pc+2);
1681 exit_block_newpcset(pc+2);
1682 return 4;
1683 }
1684 }
1685 :}
1686 RTE {:
1687 COUNT_INST(I_RTE);
1688 if( sh4_x86.in_delay_slot ) {
1689 SLOTILLEGAL();
1690 } else {
1691 check_priv();
1692 load_spreg( R_ECX, R_SPC );
1693 store_spreg( R_ECX, R_NEW_PC );
1694 load_spreg( R_EAX, R_SSR );
1695 call_func1( sh4_write_sr, R_EAX );
1696 sh4_x86.in_delay_slot = DELAY_PC;
1697 sh4_x86.fpuen_checked = FALSE;
1698 sh4_x86.tstate = TSTATE_NONE;
1699 sh4_x86.branch_taken = TRUE;
1700 if( UNTRANSLATABLE(pc+2) ) {
1701 exit_block_emu(pc+2);
1702 return 2;
1703 } else {
1704 sh4_translate_instruction(pc+2);
1705 exit_block_newpcset(pc+2);
1706 return 4;
1707 }
1708 }
1709 :}
1710 RTS {:
1711 COUNT_INST(I_RTS);
1712 if( sh4_x86.in_delay_slot ) {
1713 SLOTILLEGAL();
1714 } else {
1715 load_spreg( R_ECX, R_PR );
1716 store_spreg( R_ECX, R_NEW_PC );
1717 sh4_x86.in_delay_slot = DELAY_PC;
1718 sh4_x86.branch_taken = TRUE;
1719 if( UNTRANSLATABLE(pc+2) ) {
1720 exit_block_emu(pc+2);
1721 return 2;
1722 } else {
1723 sh4_translate_instruction(pc+2);
1724 exit_block_newpcset(pc+2);
1725 return 4;
1726 }
1727 }
1728 :}
1729 TRAPA #imm {:
1730 COUNT_INST(I_TRAPA);
1731 if( sh4_x86.in_delay_slot ) {
1732 SLOTILLEGAL();
1733 } else {
1734 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1735 ADD_r32_sh4r( R_ECX, R_PC );
1736 load_imm32( R_EAX, imm );
1737 call_func1( sh4_raise_trap, R_EAX );
1738 sh4_x86.tstate = TSTATE_NONE;
1739 exit_block_pcset(pc);
1740 sh4_x86.branch_taken = TRUE;
1741 return 2;
1742 }
1743 :}
1744 UNDEF {:
1745 COUNT_INST(I_UNDEF);
1746 if( sh4_x86.in_delay_slot ) {
1747 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);
1748 } else {
1749 exit_block_exc(EXC_ILLEGAL, pc);
1750 return 2;
1751 }
1752 :}
1754 CLRMAC {:
1755 COUNT_INST(I_CLRMAC);
1756 XOR_r32_r32(R_EAX, R_EAX);
1757 store_spreg( R_EAX, R_MACL );
1758 store_spreg( R_EAX, R_MACH );
1759 sh4_x86.tstate = TSTATE_NONE;
1760 :}
1761 CLRS {:
1762 COUNT_INST(I_CLRS);
1763 CLC();
1764 SETC_sh4r(R_S);
1765 sh4_x86.tstate = TSTATE_NONE;
1766 :}
1767 CLRT {:
1768 COUNT_INST(I_CLRT);
1769 CLC();
1770 SETC_t();
1771 sh4_x86.tstate = TSTATE_C;
1772 :}
1773 SETS {:
1774 COUNT_INST(I_SETS);
1775 STC();
1776 SETC_sh4r(R_S);
1777 sh4_x86.tstate = TSTATE_NONE;
1778 :}
1779 SETT {:
1780 COUNT_INST(I_SETT);
1781 STC();
1782 SETC_t();
1783 sh4_x86.tstate = TSTATE_C;
1784 :}
1786 /* Floating point moves */
1787 FMOV FRm, FRn {:
1788 COUNT_INST(I_FMOV1);
1789 check_fpuen();
1790 if( sh4_x86.double_size ) {
1791 load_dr0( R_EAX, FRm );
1792 load_dr1( R_ECX, FRm );
1793 store_dr0( R_EAX, FRn );
1794 store_dr1( R_ECX, FRn );
1795 } else {
1796 load_fr( R_EAX, FRm ); // SZ=0 branch
1797 store_fr( R_EAX, FRn );
1798 }
1799 :}
1800 FMOV FRm, @Rn {:
1801 COUNT_INST(I_FMOV2);
1802 check_fpuen();
1803 load_reg( R_EAX, Rn );
1804 if( sh4_x86.double_size ) {
1805 check_walign64( R_EAX );
1806 load_dr0( R_EDX, FRm );
1807 MEM_WRITE_LONG( R_EAX, R_EDX );
1808 load_reg( R_EAX, Rn );
1809 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1810 load_dr1( R_EDX, FRm );
1811 MEM_WRITE_LONG( R_EAX, R_EDX );
1812 } else {
1813 check_walign32( R_EAX );
1814 load_fr( R_EDX, FRm );
1815 MEM_WRITE_LONG( R_EAX, R_EDX );
1816 }
1817 sh4_x86.tstate = TSTATE_NONE;
1818 :}
1819 FMOV @Rm, FRn {:
1820 COUNT_INST(I_FMOV5);
1821 check_fpuen();
1822 load_reg( R_EAX, Rm );
1823 if( sh4_x86.double_size ) {
1824 check_ralign64( R_EAX );
1825 MEM_READ_LONG( R_EAX, R_EAX );
1826 store_dr0( R_EAX, FRn );
1827 load_reg( R_EAX, Rm );
1828 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1829 MEM_READ_LONG( R_EAX, R_EAX );
1830 store_dr1( R_EAX, FRn );
1831 } else {
1832 check_ralign32( R_EAX );
1833 MEM_READ_LONG( R_EAX, R_EAX );
1834 store_fr( R_EAX, FRn );
1835 }
1836 sh4_x86.tstate = TSTATE_NONE;
1837 :}
1838 FMOV FRm, @-Rn {:
1839 COUNT_INST(I_FMOV3);
1840 check_fpuen();
1841 load_reg( R_EAX, Rn );
1842 if( sh4_x86.double_size ) {
1843 check_walign64( R_EAX );
1844 LEA_r32disp8_r32( R_EAX, -8, R_EAX );
1845 load_dr0( R_EDX, FRm );
1846 MEM_WRITE_LONG( R_EAX, R_EDX );
1847 load_reg( R_EAX, Rn );
1848 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1849 load_dr1( R_EDX, FRm );
1850 MEM_WRITE_LONG( R_EAX, R_EDX );
1851 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1852 } else {
1853 check_walign32( R_EAX );
1854 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1855 load_fr( R_EDX, FRm );
1856 MEM_WRITE_LONG( R_EAX, R_EDX );
1857 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1858 }
1859 sh4_x86.tstate = TSTATE_NONE;
1860 :}
1861 FMOV @Rm+, FRn {:
1862 COUNT_INST(I_FMOV6);
1863 check_fpuen();
1864 load_reg( R_EAX, Rm );
1865 if( sh4_x86.double_size ) {
1866 check_ralign64( R_EAX );
1867 MEM_READ_LONG( R_EAX, R_EAX );
1868 store_dr0( R_EAX, FRn );
1869 load_reg( R_EAX, Rm );
1870 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1871 MEM_READ_LONG( R_EAX, R_EAX );
1872 store_dr1( R_EAX, FRn );
1873 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1874 } else {
1875 check_ralign32( R_EAX );
1876 MEM_READ_LONG( R_EAX, R_EAX );
1877 store_fr( R_EAX, FRn );
1878 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1879 }
1880 sh4_x86.tstate = TSTATE_NONE;
1881 :}
1882 FMOV FRm, @(R0, Rn) {:
1883 COUNT_INST(I_FMOV4);
1884 check_fpuen();
1885 load_reg( R_EAX, Rn );
1886 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1887 if( sh4_x86.double_size ) {
1888 check_walign64( R_EAX );
1889 load_dr0( R_EDX, FRm );
1890 MEM_WRITE_LONG( R_EAX, R_EDX );
1891 load_reg( R_EAX, Rn );
1892 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1893 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1894 load_dr1( R_EDX, FRm );
1895 MEM_WRITE_LONG( R_EAX, R_EDX );
1896 } else {
1897 check_walign32( R_EAX );
1898 load_fr( R_EDX, FRm );
1899 MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
1900 }
1901 sh4_x86.tstate = TSTATE_NONE;
1902 :}
1903 FMOV @(R0, Rm), FRn {:
1904 COUNT_INST(I_FMOV7);
1905 check_fpuen();
1906 load_reg( R_EAX, Rm );
1907 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1908 if( sh4_x86.double_size ) {
1909 check_ralign64( R_EAX );
1910 MEM_READ_LONG( R_EAX, R_EAX );
1911 store_dr0( R_EAX, FRn );
1912 load_reg( R_EAX, Rm );
1913 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1914 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1915 MEM_READ_LONG( R_EAX, R_EAX );
1916 store_dr1( R_EAX, FRn );
1917 } else {
1918 check_ralign32( R_EAX );
1919 MEM_READ_LONG( R_EAX, R_EAX );
1920 store_fr( R_EAX, FRn );
1921 }
1922 sh4_x86.tstate = TSTATE_NONE;
1923 :}
1924 FLDI0 FRn {: /* IFF PR=0 */
1925 COUNT_INST(I_FLDI0);
1926 check_fpuen();
1927 if( sh4_x86.double_prec == 0 ) {
1928 XOR_r32_r32( R_EAX, R_EAX );
1929 store_fr( R_EAX, FRn );
1930 }
1931 sh4_x86.tstate = TSTATE_NONE;
1932 :}
1933 FLDI1 FRn {: /* IFF PR=0 */
1934 COUNT_INST(I_FLDI1);
1935 check_fpuen();
1936 if( sh4_x86.double_prec == 0 ) {
1937 load_imm32(R_EAX, 0x3F800000);
1938 store_fr( R_EAX, FRn );
1939 }
1940 :}
1942 FLOAT FPUL, FRn {:
1943 COUNT_INST(I_FLOAT);
1944 check_fpuen();
1945 FILD_sh4r(R_FPUL);
1946 if( sh4_x86.double_prec ) {
1947 pop_dr( FRn );
1948 } else {
1949 pop_fr( FRn );
1950 }
1951 :}
1952 FTRC FRm, FPUL {:
1953 COUNT_INST(I_FTRC);
1954 check_fpuen();
1955 if( sh4_x86.double_prec ) {
1956 push_dr( FRm );
1957 } else {
1958 push_fr( FRm );
1959 }
1960 load_ptr( R_ECX, &max_int );
1961 FILD_r32ind( R_ECX );
1962 FCOMIP_st(1);
1963 JNA_rel8( sat );
1964 load_ptr( R_ECX, &min_int ); // 5
1965 FILD_r32ind( R_ECX ); // 2
1966 FCOMIP_st(1); // 2
1967 JAE_rel8( sat2 ); // 2
1968 load_ptr( R_EAX, &save_fcw );
1969 FNSTCW_r32ind( R_EAX );
1970 load_ptr( R_EDX, &trunc_fcw );
1971 FLDCW_r32ind( R_EDX );
1972 FISTP_sh4r(R_FPUL); // 3
1973 FLDCW_r32ind( R_EAX );
1974 JMP_rel8(end); // 2
1976 JMP_TARGET(sat);
1977 JMP_TARGET(sat2);
1978 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
1979 store_spreg( R_ECX, R_FPUL );
1980 FPOP_st();
1981 JMP_TARGET(end);
1982 sh4_x86.tstate = TSTATE_NONE;
1983 :}
1984 FLDS FRm, FPUL {:
1985 COUNT_INST(I_FLDS);
1986 check_fpuen();
1987 load_fr( R_EAX, FRm );
1988 store_spreg( R_EAX, R_FPUL );
1989 :}
1990 FSTS FPUL, FRn {:
1991 COUNT_INST(I_FSTS);
1992 check_fpuen();
1993 load_spreg( R_EAX, R_FPUL );
1994 store_fr( R_EAX, FRn );
1995 :}
1996 FCNVDS FRm, FPUL {:
1997 COUNT_INST(I_FCNVDS);
1998 check_fpuen();
1999 if( sh4_x86.double_prec ) {
2000 push_dr( FRm );
2001 pop_fpul();
2002 }
2003 :}
2004 FCNVSD FPUL, FRn {:
2005 COUNT_INST(I_FCNVSD);
2006 check_fpuen();
2007 if( sh4_x86.double_prec ) {
2008 push_fpul();
2009 pop_dr( FRn );
2010 }
2011 :}
2013 /* Floating point instructions */
2014 FABS FRn {:
2015 COUNT_INST(I_FABS);
2016 check_fpuen();
2017 if( sh4_x86.double_prec ) {
2018 push_dr(FRn);
2019 FABS_st0();
2020 pop_dr(FRn);
2021 } else {
2022 push_fr(FRn);
2023 FABS_st0();
2024 pop_fr(FRn);
2025 }
2026 :}
2027 FADD FRm, FRn {:
2028 COUNT_INST(I_FADD);
2029 check_fpuen();
2030 if( sh4_x86.double_prec ) {
2031 push_dr(FRm);
2032 push_dr(FRn);
2033 FADDP_st(1);
2034 pop_dr(FRn);
2035 } else {
2036 push_fr(FRm);
2037 push_fr(FRn);
2038 FADDP_st(1);
2039 pop_fr(FRn);
2040 }
2041 :}
2042 FDIV FRm, FRn {:
2043 COUNT_INST(I_FDIV);
2044 check_fpuen();
2045 if( sh4_x86.double_prec ) {
2046 push_dr(FRn);
2047 push_dr(FRm);
2048 FDIVP_st(1);
2049 pop_dr(FRn);
2050 } else {
2051 push_fr(FRn);
2052 push_fr(FRm);
2053 FDIVP_st(1);
2054 pop_fr(FRn);
2055 }
2056 :}
2057 FMAC FR0, FRm, FRn {:
2058 COUNT_INST(I_FMAC);
2059 check_fpuen();
2060 if( sh4_x86.double_prec ) {
2061 push_dr( 0 );
2062 push_dr( FRm );
2063 FMULP_st(1);
2064 push_dr( FRn );
2065 FADDP_st(1);
2066 pop_dr( FRn );
2067 } else {
2068 push_fr( 0 );
2069 push_fr( FRm );
2070 FMULP_st(1);
2071 push_fr( FRn );
2072 FADDP_st(1);
2073 pop_fr( FRn );
2074 }
2075 :}
2077 FMUL FRm, FRn {:
2078 COUNT_INST(I_FMUL);
2079 check_fpuen();
2080 if( sh4_x86.double_prec ) {
2081 push_dr(FRm);
2082 push_dr(FRn);
2083 FMULP_st(1);
2084 pop_dr(FRn);
2085 } else {
2086 push_fr(FRm);
2087 push_fr(FRn);
2088 FMULP_st(1);
2089 pop_fr(FRn);
2090 }
2091 :}
2092 FNEG FRn {:
2093 COUNT_INST(I_FNEG);
2094 check_fpuen();
2095 if( sh4_x86.double_prec ) {
2096 push_dr(FRn);
2097 FCHS_st0();
2098 pop_dr(FRn);
2099 } else {
2100 push_fr(FRn);
2101 FCHS_st0();
2102 pop_fr(FRn);
2103 }
2104 :}
2105 FSRRA FRn {:
2106 COUNT_INST(I_FSRRA);
2107 check_fpuen();
2108 if( sh4_x86.double_prec == 0 ) {
2109 FLD1_st0();
2110 push_fr(FRn);
2111 FSQRT_st0();
2112 FDIVP_st(1);
2113 pop_fr(FRn);
2114 }
2115 :}
2116 FSQRT FRn {:
2117 COUNT_INST(I_FSQRT);
2118 check_fpuen();
2119 if( sh4_x86.double_prec ) {
2120 push_dr(FRn);
2121 FSQRT_st0();
2122 pop_dr(FRn);
2123 } else {
2124 push_fr(FRn);
2125 FSQRT_st0();
2126 pop_fr(FRn);
2127 }
2128 :}
2129 FSUB FRm, FRn {:
2130 COUNT_INST(I_FSUB);
2131 check_fpuen();
2132 if( sh4_x86.double_prec ) {
2133 push_dr(FRn);
2134 push_dr(FRm);
2135 FSUBP_st(1);
2136 pop_dr(FRn);
2137 } else {
2138 push_fr(FRn);
2139 push_fr(FRm);
2140 FSUBP_st(1);
2141 pop_fr(FRn);
2142 }
2143 :}
2145 FCMP/EQ FRm, FRn {:
2146 COUNT_INST(I_FCMPEQ);
2147 check_fpuen();
2148 if( sh4_x86.double_prec ) {
2149 push_dr(FRm);
2150 push_dr(FRn);
2151 } else {
2152 push_fr(FRm);
2153 push_fr(FRn);
2154 }
2155 FCOMIP_st(1);
2156 SETE_t();
2157 FPOP_st();
2158 sh4_x86.tstate = TSTATE_E;
2159 :}
2160 FCMP/GT FRm, FRn {:
2161 COUNT_INST(I_FCMPGT);
2162 check_fpuen();
2163 if( sh4_x86.double_prec ) {
2164 push_dr(FRm);
2165 push_dr(FRn);
2166 } else {
2167 push_fr(FRm);
2168 push_fr(FRn);
2169 }
2170 FCOMIP_st(1);
2171 SETA_t();
2172 FPOP_st();
2173 sh4_x86.tstate = TSTATE_A;
2174 :}
2176 FSCA FPUL, FRn {:
2177 COUNT_INST(I_FSCA);
2178 check_fpuen();
2179 if( sh4_x86.double_prec == 0 ) {
2180 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
2181 load_spreg( R_EAX, R_FPUL );
2182 call_func2( sh4_fsca, R_EAX, R_EDX );
2183 }
2184 sh4_x86.tstate = TSTATE_NONE;
2185 :}
2186 FIPR FVm, FVn {:
2187 COUNT_INST(I_FIPR);
2188 check_fpuen();
2189 if( sh4_x86.double_prec == 0 ) {
2190 if( sh4_x86.sse3_enabled ) {
2191 MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2192 MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2193 HADDPS_xmm_xmm( 4, 4 );
2194 HADDPS_xmm_xmm( 4, 4 );
2195 MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2196 } else {
2197 push_fr( FVm<<2 );
2198 push_fr( FVn<<2 );
2199 FMULP_st(1);
2200 push_fr( (FVm<<2)+1);
2201 push_fr( (FVn<<2)+1);
2202 FMULP_st(1);
2203 FADDP_st(1);
2204 push_fr( (FVm<<2)+2);
2205 push_fr( (FVn<<2)+2);
2206 FMULP_st(1);
2207 FADDP_st(1);
2208 push_fr( (FVm<<2)+3);
2209 push_fr( (FVn<<2)+3);
2210 FMULP_st(1);
2211 FADDP_st(1);
2212 pop_fr( (FVn<<2)+3);
2213 }
2214 }
2215 :}
2216 FTRV XMTRX, FVn {:
2217 COUNT_INST(I_FTRV);
2218 check_fpuen();
2219 if( sh4_x86.double_prec == 0 ) {
2220 if( sh4_x86.sse3_enabled ) {
2221 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2222 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2223 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2224 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2226 MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2227 MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2228 MOVAPS_xmm_xmm( 4, 6 );
2229 MOVAPS_xmm_xmm( 5, 7 );
2230 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2231 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2232 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2233 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2234 MULPS_xmm_xmm( 0, 4 );
2235 MULPS_xmm_xmm( 1, 5 );
2236 MULPS_xmm_xmm( 2, 6 );
2237 MULPS_xmm_xmm( 3, 7 );
2238 ADDPS_xmm_xmm( 5, 4 );
2239 ADDPS_xmm_xmm( 7, 6 );
2240 ADDPS_xmm_xmm( 6, 4 );
2241 MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
2242 } else {
2243 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
2244 call_func1( sh4_ftrv, R_EAX );
2245 }
2246 }
2247 sh4_x86.tstate = TSTATE_NONE;
2248 :}
2250 FRCHG {:
2251 COUNT_INST(I_FRCHG);
2252 check_fpuen();
2253 XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
2254 call_func0( sh4_switch_fr_banks );
2255 sh4_x86.tstate = TSTATE_NONE;
2256 :}
2257 FSCHG {:
2258 COUNT_INST(I_FSCHG);
2259 check_fpuen();
2260 XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
2261 XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2262 sh4_x86.tstate = TSTATE_NONE;
2263 sh4_x86.double_size = !sh4_x86.double_size;
2264 :}
2266 /* Processor control instructions */
2267 LDC Rm, SR {:
2268 COUNT_INST(I_LDCSR);
2269 if( sh4_x86.in_delay_slot ) {
2270 SLOTILLEGAL();
2271 } else {
2272 check_priv();
2273 load_reg( R_EAX, Rm );
2274 call_func1( sh4_write_sr, R_EAX );
2275 sh4_x86.fpuen_checked = FALSE;
2276 sh4_x86.tstate = TSTATE_NONE;
2277 return 2;
2278 }
2279 :}
2280 LDC Rm, GBR {:
2281 COUNT_INST(I_LDC);
2282 load_reg( R_EAX, Rm );
2283 store_spreg( R_EAX, R_GBR );
2284 :}
2285 LDC Rm, VBR {:
2286 COUNT_INST(I_LDC);
2287 check_priv();
2288 load_reg( R_EAX, Rm );
2289 store_spreg( R_EAX, R_VBR );
2290 sh4_x86.tstate = TSTATE_NONE;
2291 :}
2292 LDC Rm, SSR {:
2293 COUNT_INST(I_LDC);
2294 check_priv();
2295 load_reg( R_EAX, Rm );
2296 store_spreg( R_EAX, R_SSR );
2297 sh4_x86.tstate = TSTATE_NONE;
2298 :}
2299 LDC Rm, SGR {:
2300 COUNT_INST(I_LDC);
2301 check_priv();
2302 load_reg( R_EAX, Rm );
2303 store_spreg( R_EAX, R_SGR );
2304 sh4_x86.tstate = TSTATE_NONE;
2305 :}
2306 LDC Rm, SPC {:
2307 COUNT_INST(I_LDC);
2308 check_priv();
2309 load_reg( R_EAX, Rm );
2310 store_spreg( R_EAX, R_SPC );
2311 sh4_x86.tstate = TSTATE_NONE;
2312 :}
2313 LDC Rm, DBR {:
2314 COUNT_INST(I_LDC);
2315 check_priv();
2316 load_reg( R_EAX, Rm );
2317 store_spreg( R_EAX, R_DBR );
2318 sh4_x86.tstate = TSTATE_NONE;
2319 :}
2320 LDC Rm, Rn_BANK {:
2321 COUNT_INST(I_LDC);
2322 check_priv();
2323 load_reg( R_EAX, Rm );
2324 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2325 sh4_x86.tstate = TSTATE_NONE;
2326 :}
2327 LDC.L @Rm+, GBR {:
2328 COUNT_INST(I_LDCM);
2329 load_reg( R_EAX, Rm );
2330 check_ralign32( R_EAX );
2331 MEM_READ_LONG( R_EAX, R_EAX );
2332 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2333 store_spreg( R_EAX, R_GBR );
2334 sh4_x86.tstate = TSTATE_NONE;
2335 :}
2336 LDC.L @Rm+, SR {:
2337 COUNT_INST(I_LDCSRM);
2338 if( sh4_x86.in_delay_slot ) {
2339 SLOTILLEGAL();
2340 } else {
2341 check_priv();
2342 load_reg( R_EAX, Rm );
2343 check_ralign32( R_EAX );
2344 MEM_READ_LONG( R_EAX, R_EAX );
2345 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2346 call_func1( sh4_write_sr, R_EAX );
2347 sh4_x86.fpuen_checked = FALSE;
2348 sh4_x86.tstate = TSTATE_NONE;
2349 return 2;
2350 }
2351 :}
2352 LDC.L @Rm+, VBR {:
2353 COUNT_INST(I_LDCM);
2354 check_priv();
2355 load_reg( R_EAX, Rm );
2356 check_ralign32( R_EAX );
2357 MEM_READ_LONG( R_EAX, R_EAX );
2358 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2359 store_spreg( R_EAX, R_VBR );
2360 sh4_x86.tstate = TSTATE_NONE;
2361 :}
2362 LDC.L @Rm+, SSR {:
2363 COUNT_INST(I_LDCM);
2364 check_priv();
2365 load_reg( R_EAX, Rm );
2366 check_ralign32( R_EAX );
2367 MEM_READ_LONG( R_EAX, R_EAX );
2368 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2369 store_spreg( R_EAX, R_SSR );
2370 sh4_x86.tstate = TSTATE_NONE;
2371 :}
2372 LDC.L @Rm+, SGR {:
2373 COUNT_INST(I_LDCM);
2374 check_priv();
2375 load_reg( R_EAX, Rm );
2376 check_ralign32( R_EAX );
2377 MEM_READ_LONG( R_EAX, R_EAX );
2378 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2379 store_spreg( R_EAX, R_SGR );
2380 sh4_x86.tstate = TSTATE_NONE;
2381 :}
2382 LDC.L @Rm+, SPC {:
2383 COUNT_INST(I_LDCM);
2384 check_priv();
2385 load_reg( R_EAX, Rm );
2386 check_ralign32( R_EAX );
2387 MEM_READ_LONG( R_EAX, R_EAX );
2388 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2389 store_spreg( R_EAX, R_SPC );
2390 sh4_x86.tstate = TSTATE_NONE;
2391 :}
2392 LDC.L @Rm+, DBR {:
2393 COUNT_INST(I_LDCM);
2394 check_priv();
2395 load_reg( R_EAX, Rm );
2396 check_ralign32( R_EAX );
2397 MEM_READ_LONG( R_EAX, R_EAX );
2398 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2399 store_spreg( R_EAX, R_DBR );
2400 sh4_x86.tstate = TSTATE_NONE;
2401 :}
2402 LDC.L @Rm+, Rn_BANK {:
2403 COUNT_INST(I_LDCM);
2404 check_priv();
2405 load_reg( R_EAX, Rm );
2406 check_ralign32( R_EAX );
2407 MEM_READ_LONG( R_EAX, R_EAX );
2408 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2409 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2410 sh4_x86.tstate = TSTATE_NONE;
2411 :}
2412 LDS Rm, FPSCR {:
2413 COUNT_INST(I_LDSFPSCR);
2414 check_fpuen();
2415 load_reg( R_EAX, Rm );
2416 call_func1( sh4_write_fpscr, R_EAX );
2417 sh4_x86.tstate = TSTATE_NONE;
2418 return 2;
2419 :}
2420 LDS.L @Rm+, FPSCR {:
2421 COUNT_INST(I_LDSFPSCRM);
2422 check_fpuen();
2423 load_reg( R_EAX, Rm );
2424 check_ralign32( R_EAX );
2425 MEM_READ_LONG( R_EAX, R_EAX );
2426 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2427 call_func1( sh4_write_fpscr, R_EAX );
2428 sh4_x86.tstate = TSTATE_NONE;
2429 return 2;
2430 :}
2431 LDS Rm, FPUL {:
2432 COUNT_INST(I_LDS);
2433 check_fpuen();
2434 load_reg( R_EAX, Rm );
2435 store_spreg( R_EAX, R_FPUL );
2436 :}
2437 LDS.L @Rm+, FPUL {:
2438 COUNT_INST(I_LDSM);
2439 check_fpuen();
2440 load_reg( R_EAX, Rm );
2441 check_ralign32( R_EAX );
2442 MEM_READ_LONG( R_EAX, R_EAX );
2443 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2444 store_spreg( R_EAX, R_FPUL );
2445 sh4_x86.tstate = TSTATE_NONE;
2446 :}
2447 LDS Rm, MACH {:
2448 COUNT_INST(I_LDS);
2449 load_reg( R_EAX, Rm );
2450 store_spreg( R_EAX, R_MACH );
2451 :}
2452 LDS.L @Rm+, MACH {:
2453 COUNT_INST(I_LDSM);
2454 load_reg( R_EAX, Rm );
2455 check_ralign32( R_EAX );
2456 MEM_READ_LONG( R_EAX, R_EAX );
2457 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2458 store_spreg( R_EAX, R_MACH );
2459 sh4_x86.tstate = TSTATE_NONE;
2460 :}
2461 LDS Rm, MACL {:
2462 COUNT_INST(I_LDS);
2463 load_reg( R_EAX, Rm );
2464 store_spreg( R_EAX, R_MACL );
2465 :}
2466 LDS.L @Rm+, MACL {:
2467 COUNT_INST(I_LDSM);
2468 load_reg( R_EAX, Rm );
2469 check_ralign32( R_EAX );
2470 MEM_READ_LONG( R_EAX, R_EAX );
2471 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2472 store_spreg( R_EAX, R_MACL );
2473 sh4_x86.tstate = TSTATE_NONE;
2474 :}
2475 LDS Rm, PR {:
2476 COUNT_INST(I_LDS);
2477 load_reg( R_EAX, Rm );
2478 store_spreg( R_EAX, R_PR );
2479 :}
2480 LDS.L @Rm+, PR {:
2481 COUNT_INST(I_LDSM);
2482 load_reg( R_EAX, Rm );
2483 check_ralign32( R_EAX );
2484 MEM_READ_LONG( R_EAX, R_EAX );
2485 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2486 store_spreg( R_EAX, R_PR );
2487 sh4_x86.tstate = TSTATE_NONE;
2488 :}
2489 LDTLB {:
2490 COUNT_INST(I_LDTLB);
2491 call_func0( MMU_ldtlb );
2492 sh4_x86.tstate = TSTATE_NONE;
2493 :}
2494 OCBI @Rn {:
2495 COUNT_INST(I_OCBI);
2496 :}
2497 OCBP @Rn {:
2498 COUNT_INST(I_OCBP);
2499 :}
2500 OCBWB @Rn {:
2501 COUNT_INST(I_OCBWB);
2502 :}
2503 PREF @Rn {:
2504 COUNT_INST(I_PREF);
2505 load_reg( R_EAX, Rn );
2506 MEM_PREFETCH( R_EAX );
2507 sh4_x86.tstate = TSTATE_NONE;
2508 :}
2509 SLEEP {:
2510 COUNT_INST(I_SLEEP);
2511 check_priv();
2512 call_func0( sh4_sleep );
2513 sh4_x86.tstate = TSTATE_NONE;
2514 sh4_x86.in_delay_slot = DELAY_NONE;
2515 return 2;
2516 :}
2517 STC SR, Rn {:
2518 COUNT_INST(I_STCSR);
2519 check_priv();
2520 call_func0(sh4_read_sr);
2521 store_reg( R_EAX, Rn );
2522 sh4_x86.tstate = TSTATE_NONE;
2523 :}
2524 STC GBR, Rn {:
2525 COUNT_INST(I_STC);
2526 load_spreg( R_EAX, R_GBR );
2527 store_reg( R_EAX, Rn );
2528 :}
2529 STC VBR, Rn {:
2530 COUNT_INST(I_STC);
2531 check_priv();
2532 load_spreg( R_EAX, R_VBR );
2533 store_reg( R_EAX, Rn );
2534 sh4_x86.tstate = TSTATE_NONE;
2535 :}
2536 STC SSR, Rn {:
2537 COUNT_INST(I_STC);
2538 check_priv();
2539 load_spreg( R_EAX, R_SSR );
2540 store_reg( R_EAX, Rn );
2541 sh4_x86.tstate = TSTATE_NONE;
2542 :}
2543 STC SPC, Rn {:
2544 COUNT_INST(I_STC);
2545 check_priv();
2546 load_spreg( R_EAX, R_SPC );
2547 store_reg( R_EAX, Rn );
2548 sh4_x86.tstate = TSTATE_NONE;
2549 :}
2550 STC SGR, Rn {:
2551 COUNT_INST(I_STC);
2552 check_priv();
2553 load_spreg( R_EAX, R_SGR );
2554 store_reg( R_EAX, Rn );
2555 sh4_x86.tstate = TSTATE_NONE;
2556 :}
2557 STC DBR, Rn {:
2558 COUNT_INST(I_STC);
2559 check_priv();
2560 load_spreg( R_EAX, R_DBR );
2561 store_reg( R_EAX, Rn );
2562 sh4_x86.tstate = TSTATE_NONE;
2563 :}
2564 STC Rm_BANK, Rn {:
2565 COUNT_INST(I_STC);
2566 check_priv();
2567 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2568 store_reg( R_EAX, Rn );
2569 sh4_x86.tstate = TSTATE_NONE;
2570 :}
2571 STC.L SR, @-Rn {:
2572 COUNT_INST(I_STCSRM);
2573 check_priv();
2574 call_func0( sh4_read_sr );
2575 MOV_r32_r32( R_EAX, R_EDX );
2576 load_reg( R_EAX, Rn );
2577 check_walign32( R_EAX );
2578 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
2579 MEM_WRITE_LONG( R_EAX, R_EDX );
2580 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2581 sh4_x86.tstate = TSTATE_NONE;
2582 :}
2583 STC.L VBR, @-Rn {:
2584 COUNT_INST(I_STCM);
2585 check_priv();
2586 load_reg( R_EAX, Rn );
2587 check_walign32( R_EAX );
2588 ADD_imm8s_r32( -4, R_EAX );
2589 load_spreg( R_EDX, R_VBR );
2590 MEM_WRITE_LONG( R_EAX, R_EDX );
2591 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2592 sh4_x86.tstate = TSTATE_NONE;
2593 :}
2594 STC.L SSR, @-Rn {:
2595 COUNT_INST(I_STCM);
2596 check_priv();
2597 load_reg( R_EAX, Rn );
2598 check_walign32( R_EAX );
2599 ADD_imm8s_r32( -4, R_EAX );
2600 load_spreg( R_EDX, R_SSR );
2601 MEM_WRITE_LONG( R_EAX, R_EDX );
2602 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2603 sh4_x86.tstate = TSTATE_NONE;
2604 :}
2605 STC.L SPC, @-Rn {:
2606 COUNT_INST(I_STCM);
2607 check_priv();
2608 load_reg( R_EAX, Rn );
2609 check_walign32( R_EAX );
2610 ADD_imm8s_r32( -4, R_EAX );
2611 load_spreg( R_EDX, R_SPC );
2612 MEM_WRITE_LONG( R_EAX, R_EDX );
2613 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2614 sh4_x86.tstate = TSTATE_NONE;
2615 :}
2616 STC.L SGR, @-Rn {:
2617 COUNT_INST(I_STCM);
2618 check_priv();
2619 load_reg( R_EAX, Rn );
2620 check_walign32( R_EAX );
2621 ADD_imm8s_r32( -4, R_EAX );
2622 load_spreg( R_EDX, R_SGR );
2623 MEM_WRITE_LONG( R_EAX, R_EDX );
2624 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2625 sh4_x86.tstate = TSTATE_NONE;
2626 :}
2627 STC.L DBR, @-Rn {:
2628 COUNT_INST(I_STCM);
2629 check_priv();
2630 load_reg( R_EAX, Rn );
2631 check_walign32( R_EAX );
2632 ADD_imm8s_r32( -4, R_EAX );
2633 load_spreg( R_EDX, R_DBR );
2634 MEM_WRITE_LONG( R_EAX, R_EDX );
2635 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2636 sh4_x86.tstate = TSTATE_NONE;
2637 :}
2638 STC.L Rm_BANK, @-Rn {:
2639 COUNT_INST(I_STCM);
2640 check_priv();
2641 load_reg( R_EAX, Rn );
2642 check_walign32( R_EAX );
2643 ADD_imm8s_r32( -4, R_EAX );
2644 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2645 MEM_WRITE_LONG( R_EAX, R_EDX );
2646 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2647 sh4_x86.tstate = TSTATE_NONE;
2648 :}
2649 STC.L GBR, @-Rn {:
2650 COUNT_INST(I_STCM);
2651 load_reg( R_EAX, Rn );
2652 check_walign32( R_EAX );
2653 ADD_imm8s_r32( -4, R_EAX );
2654 load_spreg( R_EDX, R_GBR );
2655 MEM_WRITE_LONG( R_EAX, R_EDX );
2656 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2657 sh4_x86.tstate = TSTATE_NONE;
2658 :}
2659 STS FPSCR, Rn {:
2660 COUNT_INST(I_STSFPSCR);
2661 check_fpuen();
2662 load_spreg( R_EAX, R_FPSCR );
2663 store_reg( R_EAX, Rn );
2664 :}
2665 STS.L FPSCR, @-Rn {:
2666 COUNT_INST(I_STSFPSCRM);
2667 check_fpuen();
2668 load_reg( R_EAX, Rn );
2669 check_walign32( R_EAX );
2670 ADD_imm8s_r32( -4, R_EAX );
2671 load_spreg( R_EDX, R_FPSCR );
2672 MEM_WRITE_LONG( R_EAX, R_EDX );
2673 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2674 sh4_x86.tstate = TSTATE_NONE;
2675 :}
2676 STS FPUL, Rn {:
2677 COUNT_INST(I_STS);
2678 check_fpuen();
2679 load_spreg( R_EAX, R_FPUL );
2680 store_reg( R_EAX, Rn );
2681 :}
2682 STS.L FPUL, @-Rn {:
2683 COUNT_INST(I_STSM);
2684 check_fpuen();
2685 load_reg( R_EAX, Rn );
2686 check_walign32( R_EAX );
2687 ADD_imm8s_r32( -4, R_EAX );
2688 load_spreg( R_EDX, R_FPUL );
2689 MEM_WRITE_LONG( R_EAX, R_EDX );
2690 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2691 sh4_x86.tstate = TSTATE_NONE;
2692 :}
2693 STS MACH, Rn {:
2694 COUNT_INST(I_STS);
2695 load_spreg( R_EAX, R_MACH );
2696 store_reg( R_EAX, Rn );
2697 :}
2698 STS.L MACH, @-Rn {:
2699 COUNT_INST(I_STSM);
2700 load_reg( R_EAX, Rn );
2701 check_walign32( R_EAX );
2702 ADD_imm8s_r32( -4, R_EAX );
2703 load_spreg( R_EDX, R_MACH );
2704 MEM_WRITE_LONG( R_EAX, R_EDX );
2705 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2706 sh4_x86.tstate = TSTATE_NONE;
2707 :}
2708 STS MACL, Rn {:
2709 COUNT_INST(I_STS);
2710 load_spreg( R_EAX, R_MACL );
2711 store_reg( R_EAX, Rn );
2712 :}
2713 STS.L MACL, @-Rn {:
2714 COUNT_INST(I_STSM);
2715 load_reg( R_EAX, Rn );
2716 check_walign32( R_EAX );
2717 ADD_imm8s_r32( -4, R_EAX );
2718 load_spreg( R_EDX, R_MACL );
2719 MEM_WRITE_LONG( R_EAX, R_EDX );
2720 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2721 sh4_x86.tstate = TSTATE_NONE;
2722 :}
2723 STS PR, Rn {:
2724 COUNT_INST(I_STS);
2725 load_spreg( R_EAX, R_PR );
2726 store_reg( R_EAX, Rn );
2727 :}
2728 STS.L PR, @-Rn {:
2729 COUNT_INST(I_STSM);
2730 load_reg( R_EAX, Rn );
2731 check_walign32( R_EAX );
2732 ADD_imm8s_r32( -4, R_EAX );
2733 load_spreg( R_EDX, R_PR );
2734 MEM_WRITE_LONG( R_EAX, R_EDX );
2735 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2736 sh4_x86.tstate = TSTATE_NONE;
2737 :}
2739 NOP {:
2740 COUNT_INST(I_NOP);
2741 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2742 :}
2743 %%
2744 sh4_x86.in_delay_slot = DELAY_NONE;
2745 return 0;
2746 }
.