filename | src/sh4/sh4x86.in |
changeset | 975:007bf7eb944f |
prev | 974:16b079ed11bb |
next | 991:60c7fab9c880 |
author | nkeynes |
date | Wed Feb 25 09:00:05 2009 +0000 (15 years ago) |
permissions | -rw-r--r-- |
last change | Argh. Apparently we still do really need _BSD_SOURCE and _GNU_SOURCE I think that's everything now... |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
7 *
8 * Copyright (c) 2007 Nathan Keynes.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
21 #include <assert.h>
22 #include <math.h>
24 #ifndef NDEBUG
25 #define DEBUG_JUMPS 1
26 #endif
28 #include "lxdream.h"
29 #include "sh4/xltcache.h"
30 #include "sh4/sh4core.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/x86op.h"
35 #include "sh4/mmu.h"
36 #include "clock.h"
38 #define DEFAULT_BACKPATCH_SIZE 4096
40 struct backpatch_record {
41 uint32_t fixup_offset;
42 uint32_t fixup_icount;
43 int32_t exc_code;
44 };
46 #define DELAY_NONE 0
47 #define DELAY_PC 1
48 #define DELAY_PC_PR 2
50 /**
51 * Struct to manage internal translation state. This state is not saved -
52 * it is only valid between calls to sh4_translate_begin_block() and
53 * sh4_translate_end_block()
54 */
55 struct sh4_x86_state {
56 int in_delay_slot;
57 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
58 gboolean branch_taken; /* true if we branched unconditionally */
59 gboolean double_prec; /* true if FPU is in double-precision mode */
60 gboolean double_size; /* true if FPU is in double-size mode */
61 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
62 uint32_t block_start_pc;
63 uint32_t stack_posn; /* Trace stack height for alignment purposes */
64 int tstate;
66 /* mode flags */
67 gboolean tlb_on; /* True if tlb translation is active */
69 /* Allocated memory for the (block-wide) back-patch list */
70 struct backpatch_record *backpatch_list;
71 uint32_t backpatch_posn;
72 uint32_t backpatch_size;
73 };
75 #define TSTATE_NONE -1
76 #define TSTATE_O 0
77 #define TSTATE_C 2
78 #define TSTATE_E 4
79 #define TSTATE_NE 5
80 #define TSTATE_G 0xF
81 #define TSTATE_GE 0xD
82 #define TSTATE_A 7
83 #define TSTATE_AE 3
85 #ifdef ENABLE_SH4STATS
86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
87 #else
88 #define COUNT_INST(id)
89 #endif
91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
93 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
94 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
98 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
99 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
101 static struct sh4_x86_state sh4_x86;
103 static uint32_t max_int = 0x7FFFFFFF;
104 static uint32_t min_int = 0x80000000;
105 static uint32_t save_fcw; /* save value for fpu control word */
106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
108 gboolean is_sse3_supported()
109 {
110 uint32_t features;
112 __asm__ __volatile__(
113 "mov $0x01, %%eax\n\t"
114 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
115 return (features & 1) ? TRUE : FALSE;
116 }
118 void sh4_translate_init(void)
119 {
120 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
121 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
122 sh4_x86.sse3_enabled = is_sse3_supported();
123 }
126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
127 {
128 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
129 sh4_x86.backpatch_size <<= 1;
130 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
131 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
132 assert( sh4_x86.backpatch_list != NULL );
133 }
134 if( sh4_x86.in_delay_slot ) {
135 fixup_pc -= 2;
136 }
137 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
138 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
139 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
140 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
141 sh4_x86.backpatch_posn++;
142 }
144 /**
145 * Emit an instruction to load an SH4 reg into a real register
146 */
147 static inline void load_reg( int x86reg, int sh4reg )
148 {
149 /* mov [bp+n], reg */
150 OP(0x8B);
151 OP(0x45 + (x86reg<<3));
152 OP(REG_OFFSET(r[sh4reg]));
153 }
155 static inline void load_reg16s( int x86reg, int sh4reg )
156 {
157 OP(0x0F);
158 OP(0xBF);
159 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
160 }
162 static inline void load_reg16u( int x86reg, int sh4reg )
163 {
164 OP(0x0F);
165 OP(0xB7);
166 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
168 }
170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
172 /**
173 * Emit an instruction to load an immediate value into a register
174 */
175 static inline void load_imm32( int x86reg, uint32_t value ) {
176 /* mov #value, reg */
177 OP(0xB8 + x86reg);
178 OP32(value);
179 }
182 /**
183 * Load an immediate 64-bit quantity (note: x86-64 only)
184 */
185 static inline void load_imm64( int x86reg, uint64_t value ) {
186 /* mov #value, reg */
187 REXW();
188 OP(0xB8 + x86reg);
189 OP64(value);
190 }
192 /**
193 * Emit an instruction to store an SH4 reg (RN)
194 */
195 void static inline store_reg( int x86reg, int sh4reg ) {
196 /* mov reg, [bp+n] */
197 OP(0x89);
198 OP(0x45 + (x86reg<<3));
199 OP(REG_OFFSET(r[sh4reg]));
200 }
202 /**
203 * Load an FR register (single-precision floating point) into an integer x86
204 * register (eg for register-to-register moves)
205 */
206 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
207 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
209 /**
210 * Load the low half of a DR register (DR or XD) into an integer x86 register
211 */
212 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
213 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
215 /**
216 * Store an FR register (single-precision floating point) from an integer x86+
217 * register (eg for register-to-register moves)
218 */
219 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
220 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
222 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
223 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
226 #define push_fpul() FLDF_sh4r(R_FPUL)
227 #define pop_fpul() FSTPF_sh4r(R_FPUL)
228 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
229 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
230 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
231 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
232 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
233 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
234 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
235 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
239 /* Exception checks - Note that all exception checks will clobber EAX */
241 #define check_priv( ) \
242 if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
243 if( sh4_x86.in_delay_slot ) { \
244 exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
245 } else { \
246 exit_block_exc(EXC_ILLEGAL, pc); \
247 } \
248 sh4_x86.branch_taken = TRUE; \
249 sh4_x86.in_delay_slot = DELAY_NONE; \
250 return 2; \
251 }
253 #define check_fpuen( ) \
254 if( !sh4_x86.fpuen_checked ) {\
255 sh4_x86.fpuen_checked = TRUE;\
256 load_spreg( R_EAX, R_SR );\
257 AND_imm32_r32( SR_FD, R_EAX );\
258 if( sh4_x86.in_delay_slot ) {\
259 JNE_exc(EXC_SLOT_FPU_DISABLED);\
260 } else {\
261 JNE_exc(EXC_FPU_DISABLED);\
262 }\
263 sh4_x86.tstate = TSTATE_NONE; \
264 }
266 #define check_ralign16( x86reg ) \
267 TEST_imm32_r32( 0x00000001, x86reg ); \
268 JNE_exc(EXC_DATA_ADDR_READ)
270 #define check_walign16( x86reg ) \
271 TEST_imm32_r32( 0x00000001, x86reg ); \
272 JNE_exc(EXC_DATA_ADDR_WRITE);
274 #define check_ralign32( x86reg ) \
275 TEST_imm32_r32( 0x00000003, x86reg ); \
276 JNE_exc(EXC_DATA_ADDR_READ)
278 #define check_walign32( x86reg ) \
279 TEST_imm32_r32( 0x00000003, x86reg ); \
280 JNE_exc(EXC_DATA_ADDR_WRITE);
282 #define check_ralign64( x86reg ) \
283 TEST_imm32_r32( 0x00000007, x86reg ); \
284 JNE_exc(EXC_DATA_ADDR_READ)
286 #define check_walign64( x86reg ) \
287 TEST_imm32_r32( 0x00000007, x86reg ); \
288 JNE_exc(EXC_DATA_ADDR_WRITE);
290 #define UNDEF(ir)
291 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
292 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
293 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so
294 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
295 */
297 #ifdef HAVE_FRAME_ADDRESS
298 #define _CALL_READ(addr_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
299 call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg); } else { \
300 call_func1_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, pc); }
301 #define _CALL_WRITE(addr_reg, val_reg, fn) if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { \
302 call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg); } else { \
303 call_func2_r32disp8_exc(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg, pc); }
304 #else
305 #define _CALL_READ(addr_reg, fn) call_func1_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg)
306 #define _CALL_WRITE(addr_reg, val_reg, fn) call_func2_r32disp8(R_ECX, MEM_REGION_PTR(fn), addr_reg, val_reg)
307 #endif
309 #define MEM_READ_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte); MEM_RESULT(value_reg)
310 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_byte_for_write); MEM_RESULT(value_reg)
311 #define MEM_READ_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_word); MEM_RESULT(value_reg)
312 #define MEM_READ_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, read_long); MEM_RESULT(value_reg)
313 #define MEM_WRITE_BYTE( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_byte)
314 #define MEM_WRITE_WORD( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_word)
315 #define MEM_WRITE_LONG( addr_reg, value_reg ) decode_address(addr_reg); _CALL_WRITE(addr_reg, value_reg, write_long)
316 #define MEM_PREFETCH( addr_reg ) decode_address(addr_reg); _CALL_READ(addr_reg, prefetch)
318 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
320 /****** Import appropriate calling conventions ******/
321 #if SIZEOF_VOID_P == 8
322 #include "sh4/ia64abi.h"
323 #else /* 32-bit system */
324 #include "sh4/ia32abi.h"
325 #endif
327 void sh4_translate_begin_block( sh4addr_t pc )
328 {
329 enter_block();
330 sh4_x86.in_delay_slot = FALSE;
331 sh4_x86.fpuen_checked = FALSE;
332 sh4_x86.branch_taken = FALSE;
333 sh4_x86.backpatch_posn = 0;
334 sh4_x86.block_start_pc = pc;
335 sh4_x86.tlb_on = IS_TLB_ENABLED();
336 sh4_x86.tstate = TSTATE_NONE;
337 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
338 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
339 }
342 uint32_t sh4_translate_end_block_size()
343 {
344 if( sh4_x86.backpatch_posn <= 3 ) {
345 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
346 } else {
347 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
348 }
349 }
352 /**
353 * Embed a breakpoint into the generated code
354 */
355 void sh4_translate_emit_breakpoint( sh4vma_t pc )
356 {
357 load_imm32( R_EAX, pc );
358 call_func1( sh4_translate_breakpoint_hit, R_EAX );
359 sh4_x86.tstate = TSTATE_NONE;
360 }
363 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
365 /**
366 * Embed a call to sh4_execute_instruction for situations that we
367 * can't translate (just page-crossing delay slots at the moment).
368 * Caller is responsible for setting new_pc before calling this function.
369 *
370 * Performs:
371 * Set PC = endpc
372 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
373 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
374 * Call sh4_execute_instruction
375 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
376 */
377 void exit_block_emu( sh4vma_t endpc )
378 {
379 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
380 ADD_r32_sh4r( R_ECX, R_PC );
382 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
383 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
384 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
385 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
387 call_func0( sh4_execute_instruction );
388 load_spreg( R_EAX, R_PC );
389 if( sh4_x86.tlb_on ) {
390 call_func1(xlat_get_code_by_vma,R_EAX);
391 } else {
392 call_func1(xlat_get_code,R_EAX);
393 }
394 exit_block();
395 }
397 /**
398 * Translate a single instruction. Delayed branches are handled specially
399 * by translating both branch and delayed instruction as a single unit (as
400 *
401 * The instruction MUST be in the icache (assert check)
402 *
403 * @return true if the instruction marks the end of a basic block
404 * (eg a branch or
405 */
406 uint32_t sh4_translate_instruction( sh4vma_t pc )
407 {
408 uint32_t ir;
409 /* Read instruction from icache */
410 assert( IS_IN_ICACHE(pc) );
411 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
413 if( !sh4_x86.in_delay_slot ) {
414 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
415 }
416 %%
417 /* ALU operations */
418 ADD Rm, Rn {:
419 COUNT_INST(I_ADD);
420 load_reg( R_EAX, Rm );
421 load_reg( R_ECX, Rn );
422 ADD_r32_r32( R_EAX, R_ECX );
423 store_reg( R_ECX, Rn );
424 sh4_x86.tstate = TSTATE_NONE;
425 :}
426 ADD #imm, Rn {:
427 COUNT_INST(I_ADDI);
428 ADD_imm8s_sh4r( imm, REG_OFFSET(r[Rn]) );
429 sh4_x86.tstate = TSTATE_NONE;
430 :}
431 ADDC Rm, Rn {:
432 COUNT_INST(I_ADDC);
433 if( sh4_x86.tstate != TSTATE_C ) {
434 LDC_t();
435 }
436 load_reg( R_EAX, Rm );
437 load_reg( R_ECX, Rn );
438 ADC_r32_r32( R_EAX, R_ECX );
439 store_reg( R_ECX, Rn );
440 SETC_t();
441 sh4_x86.tstate = TSTATE_C;
442 :}
443 ADDV Rm, Rn {:
444 COUNT_INST(I_ADDV);
445 load_reg( R_EAX, Rm );
446 load_reg( R_ECX, Rn );
447 ADD_r32_r32( R_EAX, R_ECX );
448 store_reg( R_ECX, Rn );
449 SETO_t();
450 sh4_x86.tstate = TSTATE_O;
451 :}
452 AND Rm, Rn {:
453 COUNT_INST(I_AND);
454 load_reg( R_EAX, Rm );
455 load_reg( R_ECX, Rn );
456 AND_r32_r32( R_EAX, R_ECX );
457 store_reg( R_ECX, Rn );
458 sh4_x86.tstate = TSTATE_NONE;
459 :}
460 AND #imm, R0 {:
461 COUNT_INST(I_ANDI);
462 load_reg( R_EAX, 0 );
463 AND_imm32_r32(imm, R_EAX);
464 store_reg( R_EAX, 0 );
465 sh4_x86.tstate = TSTATE_NONE;
466 :}
467 AND.B #imm, @(R0, GBR) {:
468 COUNT_INST(I_ANDB);
469 load_reg( R_EAX, 0 );
470 ADD_sh4r_r32( R_GBR, R_EAX );
471 MOV_r32_esp8(R_EAX, 0);
472 MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
473 MOV_esp8_r32(0, R_EAX);
474 AND_imm32_r32(imm, R_EDX );
475 MEM_WRITE_BYTE( R_EAX, R_EDX );
476 sh4_x86.tstate = TSTATE_NONE;
477 :}
478 CMP/EQ Rm, Rn {:
479 COUNT_INST(I_CMPEQ);
480 load_reg( R_EAX, Rm );
481 load_reg( R_ECX, Rn );
482 CMP_r32_r32( R_EAX, R_ECX );
483 SETE_t();
484 sh4_x86.tstate = TSTATE_E;
485 :}
486 CMP/EQ #imm, R0 {:
487 COUNT_INST(I_CMPEQI);
488 load_reg( R_EAX, 0 );
489 CMP_imm8s_r32(imm, R_EAX);
490 SETE_t();
491 sh4_x86.tstate = TSTATE_E;
492 :}
493 CMP/GE Rm, Rn {:
494 COUNT_INST(I_CMPGE);
495 load_reg( R_EAX, Rm );
496 load_reg( R_ECX, Rn );
497 CMP_r32_r32( R_EAX, R_ECX );
498 SETGE_t();
499 sh4_x86.tstate = TSTATE_GE;
500 :}
501 CMP/GT Rm, Rn {:
502 COUNT_INST(I_CMPGT);
503 load_reg( R_EAX, Rm );
504 load_reg( R_ECX, Rn );
505 CMP_r32_r32( R_EAX, R_ECX );
506 SETG_t();
507 sh4_x86.tstate = TSTATE_G;
508 :}
509 CMP/HI Rm, Rn {:
510 COUNT_INST(I_CMPHI);
511 load_reg( R_EAX, Rm );
512 load_reg( R_ECX, Rn );
513 CMP_r32_r32( R_EAX, R_ECX );
514 SETA_t();
515 sh4_x86.tstate = TSTATE_A;
516 :}
517 CMP/HS Rm, Rn {:
518 COUNT_INST(I_CMPHS);
519 load_reg( R_EAX, Rm );
520 load_reg( R_ECX, Rn );
521 CMP_r32_r32( R_EAX, R_ECX );
522 SETAE_t();
523 sh4_x86.tstate = TSTATE_AE;
524 :}
525 CMP/PL Rn {:
526 COUNT_INST(I_CMPPL);
527 load_reg( R_EAX, Rn );
528 CMP_imm8s_r32( 0, R_EAX );
529 SETG_t();
530 sh4_x86.tstate = TSTATE_G;
531 :}
532 CMP/PZ Rn {:
533 COUNT_INST(I_CMPPZ);
534 load_reg( R_EAX, Rn );
535 CMP_imm8s_r32( 0, R_EAX );
536 SETGE_t();
537 sh4_x86.tstate = TSTATE_GE;
538 :}
539 CMP/STR Rm, Rn {:
540 COUNT_INST(I_CMPSTR);
541 load_reg( R_EAX, Rm );
542 load_reg( R_ECX, Rn );
543 XOR_r32_r32( R_ECX, R_EAX );
544 TEST_r8_r8( R_AL, R_AL );
545 JE_rel8(target1);
546 TEST_r8_r8( R_AH, R_AH );
547 JE_rel8(target2);
548 SHR_imm8_r32( 16, R_EAX );
549 TEST_r8_r8( R_AL, R_AL );
550 JE_rel8(target3);
551 TEST_r8_r8( R_AH, R_AH );
552 JMP_TARGET(target1);
553 JMP_TARGET(target2);
554 JMP_TARGET(target3);
555 SETE_t();
556 sh4_x86.tstate = TSTATE_E;
557 :}
558 DIV0S Rm, Rn {:
559 COUNT_INST(I_DIV0S);
560 load_reg( R_EAX, Rm );
561 load_reg( R_ECX, Rn );
562 SHR_imm8_r32( 31, R_EAX );
563 SHR_imm8_r32( 31, R_ECX );
564 store_spreg( R_EAX, R_M );
565 store_spreg( R_ECX, R_Q );
566 CMP_r32_r32( R_EAX, R_ECX );
567 SETNE_t();
568 sh4_x86.tstate = TSTATE_NE;
569 :}
570 DIV0U {:
571 COUNT_INST(I_DIV0U);
572 XOR_r32_r32( R_EAX, R_EAX );
573 store_spreg( R_EAX, R_Q );
574 store_spreg( R_EAX, R_M );
575 store_spreg( R_EAX, R_T );
576 sh4_x86.tstate = TSTATE_C; // works for DIV1
577 :}
578 DIV1 Rm, Rn {:
579 COUNT_INST(I_DIV1);
580 load_spreg( R_ECX, R_M );
581 load_reg( R_EAX, Rn );
582 if( sh4_x86.tstate != TSTATE_C ) {
583 LDC_t();
584 }
585 RCL1_r32( R_EAX );
586 SETC_r8( R_DL ); // Q'
587 CMP_sh4r_r32( R_Q, R_ECX );
588 JE_rel8(mqequal);
589 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
590 JMP_rel8(end);
591 JMP_TARGET(mqequal);
592 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
593 JMP_TARGET(end);
594 store_reg( R_EAX, Rn ); // Done with Rn now
595 SETC_r8(R_AL); // tmp1
596 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
597 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
598 store_spreg( R_ECX, R_Q );
599 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
600 MOVZX_r8_r32( R_AL, R_EAX );
601 store_spreg( R_EAX, R_T );
602 sh4_x86.tstate = TSTATE_NONE;
603 :}
604 DMULS.L Rm, Rn {:
605 COUNT_INST(I_DMULS);
606 load_reg( R_EAX, Rm );
607 load_reg( R_ECX, Rn );
608 IMUL_r32(R_ECX);
609 store_spreg( R_EDX, R_MACH );
610 store_spreg( R_EAX, R_MACL );
611 sh4_x86.tstate = TSTATE_NONE;
612 :}
613 DMULU.L Rm, Rn {:
614 COUNT_INST(I_DMULU);
615 load_reg( R_EAX, Rm );
616 load_reg( R_ECX, Rn );
617 MUL_r32(R_ECX);
618 store_spreg( R_EDX, R_MACH );
619 store_spreg( R_EAX, R_MACL );
620 sh4_x86.tstate = TSTATE_NONE;
621 :}
622 DT Rn {:
623 COUNT_INST(I_DT);
624 load_reg( R_EAX, Rn );
625 ADD_imm8s_r32( -1, R_EAX );
626 store_reg( R_EAX, Rn );
627 SETE_t();
628 sh4_x86.tstate = TSTATE_E;
629 :}
630 EXTS.B Rm, Rn {:
631 COUNT_INST(I_EXTSB);
632 load_reg( R_EAX, Rm );
633 MOVSX_r8_r32( R_EAX, R_EAX );
634 store_reg( R_EAX, Rn );
635 :}
636 EXTS.W Rm, Rn {:
637 COUNT_INST(I_EXTSW);
638 load_reg( R_EAX, Rm );
639 MOVSX_r16_r32( R_EAX, R_EAX );
640 store_reg( R_EAX, Rn );
641 :}
642 EXTU.B Rm, Rn {:
643 COUNT_INST(I_EXTUB);
644 load_reg( R_EAX, Rm );
645 MOVZX_r8_r32( R_EAX, R_EAX );
646 store_reg( R_EAX, Rn );
647 :}
648 EXTU.W Rm, Rn {:
649 COUNT_INST(I_EXTUW);
650 load_reg( R_EAX, Rm );
651 MOVZX_r16_r32( R_EAX, R_EAX );
652 store_reg( R_EAX, Rn );
653 :}
654 MAC.L @Rm+, @Rn+ {:
655 COUNT_INST(I_MACL);
656 if( Rm == Rn ) {
657 load_reg( R_EAX, Rm );
658 check_ralign32( R_EAX );
659 MEM_READ_LONG( R_EAX, R_EAX );
660 MOV_r32_esp8(R_EAX, 0);
661 load_reg( R_EAX, Rm );
662 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
663 MEM_READ_LONG( R_EAX, R_EAX );
664 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
665 } else {
666 load_reg( R_EAX, Rm );
667 check_ralign32( R_EAX );
668 MEM_READ_LONG( R_EAX, R_EAX );
669 MOV_r32_esp8( R_EAX, 0 );
670 load_reg( R_EAX, Rn );
671 check_ralign32( R_EAX );
672 MEM_READ_LONG( R_EAX, R_EAX );
673 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
674 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
675 }
677 IMUL_esp8( 0 );
678 ADD_r32_sh4r( R_EAX, R_MACL );
679 ADC_r32_sh4r( R_EDX, R_MACH );
681 load_spreg( R_ECX, R_S );
682 TEST_r32_r32(R_ECX, R_ECX);
683 JE_rel8( nosat );
684 call_func0( signsat48 );
685 JMP_TARGET( nosat );
686 sh4_x86.tstate = TSTATE_NONE;
687 :}
688 MAC.W @Rm+, @Rn+ {:
689 COUNT_INST(I_MACW);
690 if( Rm == Rn ) {
691 load_reg( R_EAX, Rm );
692 check_ralign16( R_EAX );
693 MEM_READ_WORD( R_EAX, R_EAX );
694 MOV_r32_esp8( R_EAX, 0 );
695 load_reg( R_EAX, Rm );
696 LEA_r32disp8_r32( R_EAX, 2, R_EAX );
697 MEM_READ_WORD( R_EAX, R_EAX );
698 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
699 // Note translate twice in case of page boundaries. Maybe worth
700 // adding a page-boundary check to skip the second translation
701 } else {
702 load_reg( R_EAX, Rm );
703 check_ralign16( R_EAX );
704 MEM_READ_WORD( R_EAX, R_EAX );
705 MOV_r32_esp8( R_EAX, 0 );
706 load_reg( R_EAX, Rn );
707 check_ralign16( R_EAX );
708 MEM_READ_WORD( R_EAX, R_EAX );
709 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
710 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
711 }
712 IMUL_esp8( 0 );
713 load_spreg( R_ECX, R_S );
714 TEST_r32_r32( R_ECX, R_ECX );
715 JE_rel8( nosat );
717 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
718 JNO_rel8( end ); // 2
719 load_imm32( R_EDX, 1 ); // 5
720 store_spreg( R_EDX, R_MACH ); // 6
721 JS_rel8( positive ); // 2
722 load_imm32( R_EAX, 0x80000000 );// 5
723 store_spreg( R_EAX, R_MACL ); // 6
724 JMP_rel8(end2); // 2
726 JMP_TARGET(positive);
727 load_imm32( R_EAX, 0x7FFFFFFF );// 5
728 store_spreg( R_EAX, R_MACL ); // 6
729 JMP_rel8(end3); // 2
731 JMP_TARGET(nosat);
732 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
733 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
734 JMP_TARGET(end);
735 JMP_TARGET(end2);
736 JMP_TARGET(end3);
737 sh4_x86.tstate = TSTATE_NONE;
738 :}
739 MOVT Rn {:
740 COUNT_INST(I_MOVT);
741 load_spreg( R_EAX, R_T );
742 store_reg( R_EAX, Rn );
743 :}
744 MUL.L Rm, Rn {:
745 COUNT_INST(I_MULL);
746 load_reg( R_EAX, Rm );
747 load_reg( R_ECX, Rn );
748 MUL_r32( R_ECX );
749 store_spreg( R_EAX, R_MACL );
750 sh4_x86.tstate = TSTATE_NONE;
751 :}
752 MULS.W Rm, Rn {:
753 COUNT_INST(I_MULSW);
754 load_reg16s( R_EAX, Rm );
755 load_reg16s( R_ECX, Rn );
756 MUL_r32( R_ECX );
757 store_spreg( R_EAX, R_MACL );
758 sh4_x86.tstate = TSTATE_NONE;
759 :}
760 MULU.W Rm, Rn {:
761 COUNT_INST(I_MULUW);
762 load_reg16u( R_EAX, Rm );
763 load_reg16u( R_ECX, Rn );
764 MUL_r32( R_ECX );
765 store_spreg( R_EAX, R_MACL );
766 sh4_x86.tstate = TSTATE_NONE;
767 :}
768 NEG Rm, Rn {:
769 COUNT_INST(I_NEG);
770 load_reg( R_EAX, Rm );
771 NEG_r32( R_EAX );
772 store_reg( R_EAX, Rn );
773 sh4_x86.tstate = TSTATE_NONE;
774 :}
775 NEGC Rm, Rn {:
776 COUNT_INST(I_NEGC);
777 load_reg( R_EAX, Rm );
778 XOR_r32_r32( R_ECX, R_ECX );
779 LDC_t();
780 SBB_r32_r32( R_EAX, R_ECX );
781 store_reg( R_ECX, Rn );
782 SETC_t();
783 sh4_x86.tstate = TSTATE_C;
784 :}
785 NOT Rm, Rn {:
786 COUNT_INST(I_NOT);
787 load_reg( R_EAX, Rm );
788 NOT_r32( R_EAX );
789 store_reg( R_EAX, Rn );
790 sh4_x86.tstate = TSTATE_NONE;
791 :}
792 OR Rm, Rn {:
793 COUNT_INST(I_OR);
794 load_reg( R_EAX, Rm );
795 load_reg( R_ECX, Rn );
796 OR_r32_r32( R_EAX, R_ECX );
797 store_reg( R_ECX, Rn );
798 sh4_x86.tstate = TSTATE_NONE;
799 :}
800 OR #imm, R0 {:
801 COUNT_INST(I_ORI);
802 load_reg( R_EAX, 0 );
803 OR_imm32_r32(imm, R_EAX);
804 store_reg( R_EAX, 0 );
805 sh4_x86.tstate = TSTATE_NONE;
806 :}
807 OR.B #imm, @(R0, GBR) {:
808 COUNT_INST(I_ORB);
809 load_reg( R_EAX, 0 );
810 ADD_sh4r_r32( R_GBR, R_EAX );
811 MOV_r32_esp8( R_EAX, 0 );
812 MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
813 MOV_esp8_r32( 0, R_EAX );
814 OR_imm32_r32(imm, R_EDX );
815 MEM_WRITE_BYTE( R_EAX, R_EDX );
816 sh4_x86.tstate = TSTATE_NONE;
817 :}
818 ROTCL Rn {:
819 COUNT_INST(I_ROTCL);
820 load_reg( R_EAX, Rn );
821 if( sh4_x86.tstate != TSTATE_C ) {
822 LDC_t();
823 }
824 RCL1_r32( R_EAX );
825 store_reg( R_EAX, Rn );
826 SETC_t();
827 sh4_x86.tstate = TSTATE_C;
828 :}
829 ROTCR Rn {:
830 COUNT_INST(I_ROTCR);
831 load_reg( R_EAX, Rn );
832 if( sh4_x86.tstate != TSTATE_C ) {
833 LDC_t();
834 }
835 RCR1_r32( R_EAX );
836 store_reg( R_EAX, Rn );
837 SETC_t();
838 sh4_x86.tstate = TSTATE_C;
839 :}
840 ROTL Rn {:
841 COUNT_INST(I_ROTL);
842 load_reg( R_EAX, Rn );
843 ROL1_r32( R_EAX );
844 store_reg( R_EAX, Rn );
845 SETC_t();
846 sh4_x86.tstate = TSTATE_C;
847 :}
848 ROTR Rn {:
849 COUNT_INST(I_ROTR);
850 load_reg( R_EAX, Rn );
851 ROR1_r32( R_EAX );
852 store_reg( R_EAX, Rn );
853 SETC_t();
854 sh4_x86.tstate = TSTATE_C;
855 :}
856 SHAD Rm, Rn {:
857 COUNT_INST(I_SHAD);
858 /* Annoyingly enough, not directly convertible */
859 load_reg( R_EAX, Rn );
860 load_reg( R_ECX, Rm );
861 CMP_imm32_r32( 0, R_ECX );
862 JGE_rel8(doshl);
864 NEG_r32( R_ECX ); // 2
865 AND_imm8_r8( 0x1F, R_CL ); // 3
866 JE_rel8(emptysar); // 2
867 SAR_r32_CL( R_EAX ); // 2
868 JMP_rel8(end); // 2
870 JMP_TARGET(emptysar);
871 SAR_imm8_r32(31, R_EAX ); // 3
872 JMP_rel8(end2);
874 JMP_TARGET(doshl);
875 AND_imm8_r8( 0x1F, R_CL ); // 3
876 SHL_r32_CL( R_EAX ); // 2
877 JMP_TARGET(end);
878 JMP_TARGET(end2);
879 store_reg( R_EAX, Rn );
880 sh4_x86.tstate = TSTATE_NONE;
881 :}
882 SHLD Rm, Rn {:
883 COUNT_INST(I_SHLD);
884 load_reg( R_EAX, Rn );
885 load_reg( R_ECX, Rm );
886 CMP_imm32_r32( 0, R_ECX );
887 JGE_rel8(doshl);
889 NEG_r32( R_ECX ); // 2
890 AND_imm8_r8( 0x1F, R_CL ); // 3
891 JE_rel8(emptyshr );
892 SHR_r32_CL( R_EAX ); // 2
893 JMP_rel8(end); // 2
895 JMP_TARGET(emptyshr);
896 XOR_r32_r32( R_EAX, R_EAX );
897 JMP_rel8(end2);
899 JMP_TARGET(doshl);
900 AND_imm8_r8( 0x1F, R_CL ); // 3
901 SHL_r32_CL( R_EAX ); // 2
902 JMP_TARGET(end);
903 JMP_TARGET(end2);
904 store_reg( R_EAX, Rn );
905 sh4_x86.tstate = TSTATE_NONE;
906 :}
907 SHAL Rn {:
908 COUNT_INST(I_SHAL);
909 load_reg( R_EAX, Rn );
910 SHL1_r32( R_EAX );
911 SETC_t();
912 store_reg( R_EAX, Rn );
913 sh4_x86.tstate = TSTATE_C;
914 :}
915 SHAR Rn {:
916 COUNT_INST(I_SHAR);
917 load_reg( R_EAX, Rn );
918 SAR1_r32( R_EAX );
919 SETC_t();
920 store_reg( R_EAX, Rn );
921 sh4_x86.tstate = TSTATE_C;
922 :}
923 SHLL Rn {:
924 COUNT_INST(I_SHLL);
925 load_reg( R_EAX, Rn );
926 SHL1_r32( R_EAX );
927 SETC_t();
928 store_reg( R_EAX, Rn );
929 sh4_x86.tstate = TSTATE_C;
930 :}
931 SHLL2 Rn {:
932 COUNT_INST(I_SHLL);
933 load_reg( R_EAX, Rn );
934 SHL_imm8_r32( 2, R_EAX );
935 store_reg( R_EAX, Rn );
936 sh4_x86.tstate = TSTATE_NONE;
937 :}
938 SHLL8 Rn {:
939 COUNT_INST(I_SHLL);
940 load_reg( R_EAX, Rn );
941 SHL_imm8_r32( 8, R_EAX );
942 store_reg( R_EAX, Rn );
943 sh4_x86.tstate = TSTATE_NONE;
944 :}
945 SHLL16 Rn {:
946 COUNT_INST(I_SHLL);
947 load_reg( R_EAX, Rn );
948 SHL_imm8_r32( 16, R_EAX );
949 store_reg( R_EAX, Rn );
950 sh4_x86.tstate = TSTATE_NONE;
951 :}
952 SHLR Rn {:
953 COUNT_INST(I_SHLR);
954 load_reg( R_EAX, Rn );
955 SHR1_r32( R_EAX );
956 SETC_t();
957 store_reg( R_EAX, Rn );
958 sh4_x86.tstate = TSTATE_C;
959 :}
960 SHLR2 Rn {:
961 COUNT_INST(I_SHLR);
962 load_reg( R_EAX, Rn );
963 SHR_imm8_r32( 2, R_EAX );
964 store_reg( R_EAX, Rn );
965 sh4_x86.tstate = TSTATE_NONE;
966 :}
967 SHLR8 Rn {:
968 COUNT_INST(I_SHLR);
969 load_reg( R_EAX, Rn );
970 SHR_imm8_r32( 8, R_EAX );
971 store_reg( R_EAX, Rn );
972 sh4_x86.tstate = TSTATE_NONE;
973 :}
974 SHLR16 Rn {:
975 COUNT_INST(I_SHLR);
976 load_reg( R_EAX, Rn );
977 SHR_imm8_r32( 16, R_EAX );
978 store_reg( R_EAX, Rn );
979 sh4_x86.tstate = TSTATE_NONE;
980 :}
981 SUB Rm, Rn {:
982 COUNT_INST(I_SUB);
983 load_reg( R_EAX, Rm );
984 load_reg( R_ECX, Rn );
985 SUB_r32_r32( R_EAX, R_ECX );
986 store_reg( R_ECX, Rn );
987 sh4_x86.tstate = TSTATE_NONE;
988 :}
989 SUBC Rm, Rn {:
990 COUNT_INST(I_SUBC);
991 load_reg( R_EAX, Rm );
992 load_reg( R_ECX, Rn );
993 if( sh4_x86.tstate != TSTATE_C ) {
994 LDC_t();
995 }
996 SBB_r32_r32( R_EAX, R_ECX );
997 store_reg( R_ECX, Rn );
998 SETC_t();
999 sh4_x86.tstate = TSTATE_C;
1000 :}
1001 SUBV Rm, Rn {:
1002 COUNT_INST(I_SUBV);
1003 load_reg( R_EAX, Rm );
1004 load_reg( R_ECX, Rn );
1005 SUB_r32_r32( R_EAX, R_ECX );
1006 store_reg( R_ECX, Rn );
1007 SETO_t();
1008 sh4_x86.tstate = TSTATE_O;
1009 :}
1010 SWAP.B Rm, Rn {:
1011 COUNT_INST(I_SWAPB);
1012 load_reg( R_EAX, Rm );
1013 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1014 store_reg( R_EAX, Rn );
1015 :}
1016 SWAP.W Rm, Rn {:
1017 COUNT_INST(I_SWAPB);
1018 load_reg( R_EAX, Rm );
1019 MOV_r32_r32( R_EAX, R_ECX );
1020 SHL_imm8_r32( 16, R_ECX );
1021 SHR_imm8_r32( 16, R_EAX );
1022 OR_r32_r32( R_EAX, R_ECX );
1023 store_reg( R_ECX, Rn );
1024 sh4_x86.tstate = TSTATE_NONE;
1025 :}
1026 TAS.B @Rn {:
1027 COUNT_INST(I_TASB);
1028 load_reg( R_EAX, Rn );
1029 MOV_r32_esp8( R_EAX, 0 );
1030 MEM_READ_BYTE_FOR_WRITE( R_EAX, R_EDX );
1031 TEST_r8_r8( R_DL, R_DL );
1032 SETE_t();
1033 OR_imm8_r8( 0x80, R_DL );
1034 MOV_esp8_r32( 0, R_EAX );
1035 MEM_WRITE_BYTE( R_EAX, R_EDX );
1036 sh4_x86.tstate = TSTATE_NONE;
1037 :}
1038 TST Rm, Rn {:
1039 COUNT_INST(I_TST);
1040 load_reg( R_EAX, Rm );
1041 load_reg( R_ECX, Rn );
1042 TEST_r32_r32( R_EAX, R_ECX );
1043 SETE_t();
1044 sh4_x86.tstate = TSTATE_E;
1045 :}
1046 TST #imm, R0 {:
1047 COUNT_INST(I_TSTI);
1048 load_reg( R_EAX, 0 );
1049 TEST_imm32_r32( imm, R_EAX );
1050 SETE_t();
1051 sh4_x86.tstate = TSTATE_E;
1052 :}
1053 TST.B #imm, @(R0, GBR) {:
1054 COUNT_INST(I_TSTB);
1055 load_reg( R_EAX, 0);
1056 ADD_sh4r_r32( R_GBR, R_EAX );
1057 MEM_READ_BYTE( R_EAX, R_EAX );
1058 TEST_imm8_r8( imm, R_AL );
1059 SETE_t();
1060 sh4_x86.tstate = TSTATE_E;
1061 :}
1062 XOR Rm, Rn {:
1063 COUNT_INST(I_XOR);
1064 load_reg( R_EAX, Rm );
1065 load_reg( R_ECX, Rn );
1066 XOR_r32_r32( R_EAX, R_ECX );
1067 store_reg( R_ECX, Rn );
1068 sh4_x86.tstate = TSTATE_NONE;
1069 :}
1070 XOR #imm, R0 {:
1071 COUNT_INST(I_XORI);
1072 load_reg( R_EAX, 0 );
1073 XOR_imm32_r32( imm, R_EAX );
1074 store_reg( R_EAX, 0 );
1075 sh4_x86.tstate = TSTATE_NONE;
1076 :}
1077 XOR.B #imm, @(R0, GBR) {:
1078 COUNT_INST(I_XORB);
1079 load_reg( R_EAX, 0 );
1080 ADD_sh4r_r32( R_GBR, R_EAX );
1081 MOV_r32_esp8( R_EAX, 0 );
1082 MEM_READ_BYTE_FOR_WRITE(R_EAX, R_EDX);
1083 MOV_esp8_r32( 0, R_EAX );
1084 XOR_imm32_r32( imm, R_EDX );
1085 MEM_WRITE_BYTE( R_EAX, R_EDX );
1086 sh4_x86.tstate = TSTATE_NONE;
1087 :}
1088 XTRCT Rm, Rn {:
1089 COUNT_INST(I_XTRCT);
1090 load_reg( R_EAX, Rm );
1091 load_reg( R_ECX, Rn );
1092 SHL_imm8_r32( 16, R_EAX );
1093 SHR_imm8_r32( 16, R_ECX );
1094 OR_r32_r32( R_EAX, R_ECX );
1095 store_reg( R_ECX, Rn );
1096 sh4_x86.tstate = TSTATE_NONE;
1097 :}
1099 /* Data move instructions */
1100 MOV Rm, Rn {:
1101 COUNT_INST(I_MOV);
1102 load_reg( R_EAX, Rm );
1103 store_reg( R_EAX, Rn );
1104 :}
1105 MOV #imm, Rn {:
1106 COUNT_INST(I_MOVI);
1107 load_imm32( R_EAX, imm );
1108 store_reg( R_EAX, Rn );
1109 :}
1110 MOV.B Rm, @Rn {:
1111 COUNT_INST(I_MOVB);
1112 load_reg( R_EAX, Rn );
1113 load_reg( R_EDX, Rm );
1114 MEM_WRITE_BYTE( R_EAX, R_EDX );
1115 sh4_x86.tstate = TSTATE_NONE;
1116 :}
1117 MOV.B Rm, @-Rn {:
1118 COUNT_INST(I_MOVB);
1119 load_reg( R_EAX, Rn );
1120 LEA_r32disp8_r32( R_EAX, -1, R_EAX );
1121 load_reg( R_EDX, Rm );
1122 MEM_WRITE_BYTE( R_EAX, R_EDX );
1123 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1124 sh4_x86.tstate = TSTATE_NONE;
1125 :}
1126 MOV.B Rm, @(R0, Rn) {:
1127 COUNT_INST(I_MOVB);
1128 load_reg( R_EAX, 0 );
1129 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1130 load_reg( R_EDX, Rm );
1131 MEM_WRITE_BYTE( R_EAX, R_EDX );
1132 sh4_x86.tstate = TSTATE_NONE;
1133 :}
1134 MOV.B R0, @(disp, GBR) {:
1135 COUNT_INST(I_MOVB);
1136 load_spreg( R_EAX, R_GBR );
1137 ADD_imm32_r32( disp, R_EAX );
1138 load_reg( R_EDX, 0 );
1139 MEM_WRITE_BYTE( R_EAX, R_EDX );
1140 sh4_x86.tstate = TSTATE_NONE;
1141 :}
1142 MOV.B R0, @(disp, Rn) {:
1143 COUNT_INST(I_MOVB);
1144 load_reg( R_EAX, Rn );
1145 ADD_imm32_r32( disp, R_EAX );
1146 load_reg( R_EDX, 0 );
1147 MEM_WRITE_BYTE( R_EAX, R_EDX );
1148 sh4_x86.tstate = TSTATE_NONE;
1149 :}
1150 MOV.B @Rm, Rn {:
1151 COUNT_INST(I_MOVB);
1152 load_reg( R_EAX, Rm );
1153 MEM_READ_BYTE( R_EAX, R_EAX );
1154 store_reg( R_EAX, Rn );
1155 sh4_x86.tstate = TSTATE_NONE;
1156 :}
1157 MOV.B @Rm+, Rn {:
1158 COUNT_INST(I_MOVB);
1159 load_reg( R_EAX, Rm );
1160 MEM_READ_BYTE( R_EAX, R_EAX );
1161 if( Rm != Rn ) {
1162 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1163 }
1164 store_reg( R_EAX, Rn );
1165 sh4_x86.tstate = TSTATE_NONE;
1166 :}
1167 MOV.B @(R0, Rm), Rn {:
1168 COUNT_INST(I_MOVB);
1169 load_reg( R_EAX, 0 );
1170 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1171 MEM_READ_BYTE( R_EAX, R_EAX );
1172 store_reg( R_EAX, Rn );
1173 sh4_x86.tstate = TSTATE_NONE;
1174 :}
1175 MOV.B @(disp, GBR), R0 {:
1176 COUNT_INST(I_MOVB);
1177 load_spreg( R_EAX, R_GBR );
1178 ADD_imm32_r32( disp, R_EAX );
1179 MEM_READ_BYTE( R_EAX, R_EAX );
1180 store_reg( R_EAX, 0 );
1181 sh4_x86.tstate = TSTATE_NONE;
1182 :}
1183 MOV.B @(disp, Rm), R0 {:
1184 COUNT_INST(I_MOVB);
1185 load_reg( R_EAX, Rm );
1186 ADD_imm32_r32( disp, R_EAX );
1187 MEM_READ_BYTE( R_EAX, R_EAX );
1188 store_reg( R_EAX, 0 );
1189 sh4_x86.tstate = TSTATE_NONE;
1190 :}
1191 MOV.L Rm, @Rn {:
1192 COUNT_INST(I_MOVL);
1193 load_reg( R_EAX, Rn );
1194 check_walign32(R_EAX);
1195 MOV_r32_r32( R_EAX, R_ECX );
1196 AND_imm32_r32( 0xFC000000, R_ECX );
1197 CMP_imm32_r32( 0xE0000000, R_ECX );
1198 JNE_rel8( notsq );
1199 AND_imm8s_r32( 0x3C, R_EAX );
1200 load_reg( R_EDX, Rm );
1201 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1202 JMP_rel8(end);
1203 JMP_TARGET(notsq);
1204 load_reg( R_EDX, Rm );
1205 MEM_WRITE_LONG( R_EAX, R_EDX );
1206 JMP_TARGET(end);
1207 sh4_x86.tstate = TSTATE_NONE;
1208 :}
1209 MOV.L Rm, @-Rn {:
1210 COUNT_INST(I_MOVL);
1211 load_reg( R_EAX, Rn );
1212 ADD_imm8s_r32( -4, R_EAX );
1213 check_walign32( R_EAX );
1214 load_reg( R_EDX, Rm );
1215 MEM_WRITE_LONG( R_EAX, R_EDX );
1216 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1217 sh4_x86.tstate = TSTATE_NONE;
1218 :}
1219 MOV.L Rm, @(R0, Rn) {:
1220 COUNT_INST(I_MOVL);
1221 load_reg( R_EAX, 0 );
1222 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1223 check_walign32( R_EAX );
1224 load_reg( R_EDX, Rm );
1225 MEM_WRITE_LONG( R_EAX, R_EDX );
1226 sh4_x86.tstate = TSTATE_NONE;
1227 :}
1228 MOV.L R0, @(disp, GBR) {:
1229 COUNT_INST(I_MOVL);
1230 load_spreg( R_EAX, R_GBR );
1231 ADD_imm32_r32( disp, R_EAX );
1232 check_walign32( R_EAX );
1233 load_reg( R_EDX, 0 );
1234 MEM_WRITE_LONG( R_EAX, R_EDX );
1235 sh4_x86.tstate = TSTATE_NONE;
1236 :}
1237 MOV.L Rm, @(disp, Rn) {:
1238 COUNT_INST(I_MOVL);
1239 load_reg( R_EAX, Rn );
1240 ADD_imm32_r32( disp, R_EAX );
1241 check_walign32( R_EAX );
1242 MOV_r32_r32( R_EAX, R_ECX );
1243 AND_imm32_r32( 0xFC000000, R_ECX );
1244 CMP_imm32_r32( 0xE0000000, R_ECX );
1245 JNE_rel8( notsq );
1246 AND_imm8s_r32( 0x3C, R_EAX );
1247 load_reg( R_EDX, Rm );
1248 MOV_r32_ebpr32disp32( R_EDX, R_EAX, REG_OFFSET(store_queue) );
1249 JMP_rel8(end);
1250 JMP_TARGET(notsq);
1251 load_reg( R_EDX, Rm );
1252 MEM_WRITE_LONG( R_EAX, R_EDX );
1253 JMP_TARGET(end);
1254 sh4_x86.tstate = TSTATE_NONE;
1255 :}
1256 MOV.L @Rm, Rn {:
1257 COUNT_INST(I_MOVL);
1258 load_reg( R_EAX, Rm );
1259 check_ralign32( R_EAX );
1260 MEM_READ_LONG( R_EAX, R_EAX );
1261 store_reg( R_EAX, Rn );
1262 sh4_x86.tstate = TSTATE_NONE;
1263 :}
1264 MOV.L @Rm+, Rn {:
1265 COUNT_INST(I_MOVL);
1266 load_reg( R_EAX, Rm );
1267 check_ralign32( R_EAX );
1268 MEM_READ_LONG( R_EAX, R_EAX );
1269 if( Rm != Rn ) {
1270 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1271 }
1272 store_reg( R_EAX, Rn );
1273 sh4_x86.tstate = TSTATE_NONE;
1274 :}
1275 MOV.L @(R0, Rm), Rn {:
1276 COUNT_INST(I_MOVL);
1277 load_reg( R_EAX, 0 );
1278 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1279 check_ralign32( R_EAX );
1280 MEM_READ_LONG( R_EAX, R_EAX );
1281 store_reg( R_EAX, Rn );
1282 sh4_x86.tstate = TSTATE_NONE;
1283 :}
1284 MOV.L @(disp, GBR), R0 {:
1285 COUNT_INST(I_MOVL);
1286 load_spreg( R_EAX, R_GBR );
1287 ADD_imm32_r32( disp, R_EAX );
1288 check_ralign32( R_EAX );
1289 MEM_READ_LONG( R_EAX, R_EAX );
1290 store_reg( R_EAX, 0 );
1291 sh4_x86.tstate = TSTATE_NONE;
1292 :}
1293 MOV.L @(disp, PC), Rn {:
1294 COUNT_INST(I_MOVLPC);
1295 if( sh4_x86.in_delay_slot ) {
1296 SLOTILLEGAL();
1297 } else {
1298 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1299 if( IS_IN_ICACHE(target) ) {
1300 // If the target address is in the same page as the code, it's
1301 // pretty safe to just ref it directly and circumvent the whole
1302 // memory subsystem. (this is a big performance win)
1304 // FIXME: There's a corner-case that's not handled here when
1305 // the current code-page is in the ITLB but not in the UTLB.
1306 // (should generate a TLB miss although need to test SH4
1307 // behaviour to confirm) Unlikely to be anyone depending on this
1308 // behaviour though.
1309 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1310 MOV_moff32_EAX( ptr );
1311 } else {
1312 // Note: we use sh4r.pc for the calc as we could be running at a
1313 // different virtual address than the translation was done with,
1314 // but we can safely assume that the low bits are the same.
1315 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1316 ADD_sh4r_r32( R_PC, R_EAX );
1317 MEM_READ_LONG( R_EAX, R_EAX );
1318 sh4_x86.tstate = TSTATE_NONE;
1319 }
1320 store_reg( R_EAX, Rn );
1321 }
1322 :}
1323 MOV.L @(disp, Rm), Rn {:
1324 COUNT_INST(I_MOVL);
1325 load_reg( R_EAX, Rm );
1326 ADD_imm8s_r32( disp, R_EAX );
1327 check_ralign32( R_EAX );
1328 MEM_READ_LONG( R_EAX, R_EAX );
1329 store_reg( R_EAX, Rn );
1330 sh4_x86.tstate = TSTATE_NONE;
1331 :}
1332 MOV.W Rm, @Rn {:
1333 COUNT_INST(I_MOVW);
1334 load_reg( R_EAX, Rn );
1335 check_walign16( R_EAX );
1336 load_reg( R_EDX, Rm );
1337 MEM_WRITE_WORD( R_EAX, R_EDX );
1338 sh4_x86.tstate = TSTATE_NONE;
1339 :}
1340 MOV.W Rm, @-Rn {:
1341 COUNT_INST(I_MOVW);
1342 load_reg( R_EAX, Rn );
1343 check_walign16( R_EAX );
1344 LEA_r32disp8_r32( R_EAX, -2, R_EAX );
1345 load_reg( R_EDX, Rm );
1346 MEM_WRITE_WORD( R_EAX, R_EDX );
1347 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1348 sh4_x86.tstate = TSTATE_NONE;
1349 :}
1350 MOV.W Rm, @(R0, Rn) {:
1351 COUNT_INST(I_MOVW);
1352 load_reg( R_EAX, 0 );
1353 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1354 check_walign16( R_EAX );
1355 load_reg( R_EDX, Rm );
1356 MEM_WRITE_WORD( R_EAX, R_EDX );
1357 sh4_x86.tstate = TSTATE_NONE;
1358 :}
1359 MOV.W R0, @(disp, GBR) {:
1360 COUNT_INST(I_MOVW);
1361 load_spreg( R_EAX, R_GBR );
1362 ADD_imm32_r32( disp, R_EAX );
1363 check_walign16( R_EAX );
1364 load_reg( R_EDX, 0 );
1365 MEM_WRITE_WORD( R_EAX, R_EDX );
1366 sh4_x86.tstate = TSTATE_NONE;
1367 :}
1368 MOV.W R0, @(disp, Rn) {:
1369 COUNT_INST(I_MOVW);
1370 load_reg( R_EAX, Rn );
1371 ADD_imm32_r32( disp, R_EAX );
1372 check_walign16( R_EAX );
1373 load_reg( R_EDX, 0 );
1374 MEM_WRITE_WORD( R_EAX, R_EDX );
1375 sh4_x86.tstate = TSTATE_NONE;
1376 :}
1377 MOV.W @Rm, Rn {:
1378 COUNT_INST(I_MOVW);
1379 load_reg( R_EAX, Rm );
1380 check_ralign16( R_EAX );
1381 MEM_READ_WORD( R_EAX, R_EAX );
1382 store_reg( R_EAX, Rn );
1383 sh4_x86.tstate = TSTATE_NONE;
1384 :}
1385 MOV.W @Rm+, Rn {:
1386 COUNT_INST(I_MOVW);
1387 load_reg( R_EAX, Rm );
1388 check_ralign16( R_EAX );
1389 MEM_READ_WORD( R_EAX, R_EAX );
1390 if( Rm != Rn ) {
1391 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1392 }
1393 store_reg( R_EAX, Rn );
1394 sh4_x86.tstate = TSTATE_NONE;
1395 :}
1396 MOV.W @(R0, Rm), Rn {:
1397 COUNT_INST(I_MOVW);
1398 load_reg( R_EAX, 0 );
1399 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
1400 check_ralign16( R_EAX );
1401 MEM_READ_WORD( R_EAX, R_EAX );
1402 store_reg( R_EAX, Rn );
1403 sh4_x86.tstate = TSTATE_NONE;
1404 :}
1405 MOV.W @(disp, GBR), R0 {:
1406 COUNT_INST(I_MOVW);
1407 load_spreg( R_EAX, R_GBR );
1408 ADD_imm32_r32( disp, R_EAX );
1409 check_ralign16( R_EAX );
1410 MEM_READ_WORD( R_EAX, R_EAX );
1411 store_reg( R_EAX, 0 );
1412 sh4_x86.tstate = TSTATE_NONE;
1413 :}
1414 MOV.W @(disp, PC), Rn {:
1415 COUNT_INST(I_MOVW);
1416 if( sh4_x86.in_delay_slot ) {
1417 SLOTILLEGAL();
1418 } else {
1419 // See comments for MOV.L @(disp, PC), Rn
1420 uint32_t target = pc + disp + 4;
1421 if( IS_IN_ICACHE(target) ) {
1422 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1423 MOV_moff32_EAX( ptr );
1424 MOVSX_r16_r32( R_EAX, R_EAX );
1425 } else {
1426 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1427 ADD_sh4r_r32( R_PC, R_EAX );
1428 MEM_READ_WORD( R_EAX, R_EAX );
1429 sh4_x86.tstate = TSTATE_NONE;
1430 }
1431 store_reg( R_EAX, Rn );
1432 }
1433 :}
1434 MOV.W @(disp, Rm), R0 {:
1435 COUNT_INST(I_MOVW);
1436 load_reg( R_EAX, Rm );
1437 ADD_imm32_r32( disp, R_EAX );
1438 check_ralign16( R_EAX );
1439 MEM_READ_WORD( R_EAX, R_EAX );
1440 store_reg( R_EAX, 0 );
1441 sh4_x86.tstate = TSTATE_NONE;
1442 :}
1443 MOVA @(disp, PC), R0 {:
1444 COUNT_INST(I_MOVA);
1445 if( sh4_x86.in_delay_slot ) {
1446 SLOTILLEGAL();
1447 } else {
1448 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1449 ADD_sh4r_r32( R_PC, R_ECX );
1450 store_reg( R_ECX, 0 );
1451 sh4_x86.tstate = TSTATE_NONE;
1452 }
1453 :}
1454 MOVCA.L R0, @Rn {:
1455 COUNT_INST(I_MOVCA);
1456 load_reg( R_EAX, Rn );
1457 check_walign32( R_EAX );
1458 load_reg( R_EDX, 0 );
1459 MEM_WRITE_LONG( R_EAX, R_EDX );
1460 sh4_x86.tstate = TSTATE_NONE;
1461 :}
1463 /* Control transfer instructions */
1464 BF disp {:
1465 COUNT_INST(I_BF);
1466 if( sh4_x86.in_delay_slot ) {
1467 SLOTILLEGAL();
1468 } else {
1469 sh4vma_t target = disp + pc + 4;
1470 JT_rel8( nottaken );
1471 exit_block_rel(target, pc+2 );
1472 JMP_TARGET(nottaken);
1473 return 2;
1474 }
1475 :}
1476 BF/S disp {:
1477 COUNT_INST(I_BFS);
1478 if( sh4_x86.in_delay_slot ) {
1479 SLOTILLEGAL();
1480 } else {
1481 sh4_x86.in_delay_slot = DELAY_PC;
1482 if( UNTRANSLATABLE(pc+2) ) {
1483 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1484 JT_rel8(nottaken);
1485 ADD_imm32_r32( disp, R_EAX );
1486 JMP_TARGET(nottaken);
1487 ADD_sh4r_r32( R_PC, R_EAX );
1488 store_spreg( R_EAX, R_NEW_PC );
1489 exit_block_emu(pc+2);
1490 sh4_x86.branch_taken = TRUE;
1491 return 2;
1492 } else {
1493 if( sh4_x86.tstate == TSTATE_NONE ) {
1494 CMP_imm8s_sh4r( 1, R_T );
1495 sh4_x86.tstate = TSTATE_E;
1496 }
1497 sh4vma_t target = disp + pc + 4;
1498 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1499 int save_tstate = sh4_x86.tstate;
1500 sh4_translate_instruction(pc+2);
1501 exit_block_rel( target, pc+4 );
1503 // not taken
1504 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1505 sh4_x86.tstate = save_tstate;
1506 sh4_translate_instruction(pc+2);
1507 return 4;
1508 }
1509 }
1510 :}
1511 BRA disp {:
1512 COUNT_INST(I_BRA);
1513 if( sh4_x86.in_delay_slot ) {
1514 SLOTILLEGAL();
1515 } else {
1516 sh4_x86.in_delay_slot = DELAY_PC;
1517 sh4_x86.branch_taken = TRUE;
1518 if( UNTRANSLATABLE(pc+2) ) {
1519 load_spreg( R_EAX, R_PC );
1520 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1521 store_spreg( R_EAX, R_NEW_PC );
1522 exit_block_emu(pc+2);
1523 return 2;
1524 } else {
1525 sh4_translate_instruction( pc + 2 );
1526 exit_block_rel( disp + pc + 4, pc+4 );
1527 return 4;
1528 }
1529 }
1530 :}
1531 BRAF Rn {:
1532 COUNT_INST(I_BRAF);
1533 if( sh4_x86.in_delay_slot ) {
1534 SLOTILLEGAL();
1535 } else {
1536 load_spreg( R_EAX, R_PC );
1537 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1538 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1539 store_spreg( R_EAX, R_NEW_PC );
1540 sh4_x86.in_delay_slot = DELAY_PC;
1541 sh4_x86.tstate = TSTATE_NONE;
1542 sh4_x86.branch_taken = TRUE;
1543 if( UNTRANSLATABLE(pc+2) ) {
1544 exit_block_emu(pc+2);
1545 return 2;
1546 } else {
1547 sh4_translate_instruction( pc + 2 );
1548 exit_block_newpcset(pc+4);
1549 return 4;
1550 }
1551 }
1552 :}
1553 BSR disp {:
1554 COUNT_INST(I_BSR);
1555 if( sh4_x86.in_delay_slot ) {
1556 SLOTILLEGAL();
1557 } else {
1558 load_spreg( R_EAX, R_PC );
1559 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1560 store_spreg( R_EAX, R_PR );
1561 sh4_x86.in_delay_slot = DELAY_PC;
1562 sh4_x86.branch_taken = TRUE;
1563 sh4_x86.tstate = TSTATE_NONE;
1564 if( UNTRANSLATABLE(pc+2) ) {
1565 ADD_imm32_r32( disp, R_EAX );
1566 store_spreg( R_EAX, R_NEW_PC );
1567 exit_block_emu(pc+2);
1568 return 2;
1569 } else {
1570 sh4_translate_instruction( pc + 2 );
1571 exit_block_rel( disp + pc + 4, pc+4 );
1572 return 4;
1573 }
1574 }
1575 :}
1576 BSRF Rn {:
1577 COUNT_INST(I_BSRF);
1578 if( sh4_x86.in_delay_slot ) {
1579 SLOTILLEGAL();
1580 } else {
1581 load_spreg( R_EAX, R_PC );
1582 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1583 store_spreg( R_EAX, R_PR );
1584 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1585 store_spreg( R_EAX, R_NEW_PC );
1587 sh4_x86.in_delay_slot = DELAY_PC;
1588 sh4_x86.tstate = TSTATE_NONE;
1589 sh4_x86.branch_taken = TRUE;
1590 if( UNTRANSLATABLE(pc+2) ) {
1591 exit_block_emu(pc+2);
1592 return 2;
1593 } else {
1594 sh4_translate_instruction( pc + 2 );
1595 exit_block_newpcset(pc+4);
1596 return 4;
1597 }
1598 }
1599 :}
1600 BT disp {:
1601 COUNT_INST(I_BT);
1602 if( sh4_x86.in_delay_slot ) {
1603 SLOTILLEGAL();
1604 } else {
1605 sh4vma_t target = disp + pc + 4;
1606 JF_rel8( nottaken );
1607 exit_block_rel(target, pc+2 );
1608 JMP_TARGET(nottaken);
1609 return 2;
1610 }
1611 :}
1612 BT/S disp {:
1613 COUNT_INST(I_BTS);
1614 if( sh4_x86.in_delay_slot ) {
1615 SLOTILLEGAL();
1616 } else {
1617 sh4_x86.in_delay_slot = DELAY_PC;
1618 if( UNTRANSLATABLE(pc+2) ) {
1619 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1620 JF_rel8(nottaken);
1621 ADD_imm32_r32( disp, R_EAX );
1622 JMP_TARGET(nottaken);
1623 ADD_sh4r_r32( R_PC, R_EAX );
1624 store_spreg( R_EAX, R_NEW_PC );
1625 exit_block_emu(pc+2);
1626 sh4_x86.branch_taken = TRUE;
1627 return 2;
1628 } else {
1629 if( sh4_x86.tstate == TSTATE_NONE ) {
1630 CMP_imm8s_sh4r( 1, R_T );
1631 sh4_x86.tstate = TSTATE_E;
1632 }
1633 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1634 int save_tstate = sh4_x86.tstate;
1635 sh4_translate_instruction(pc+2);
1636 exit_block_rel( disp + pc + 4, pc+4 );
1637 // not taken
1638 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1639 sh4_x86.tstate = save_tstate;
1640 sh4_translate_instruction(pc+2);
1641 return 4;
1642 }
1643 }
1644 :}
1645 JMP @Rn {:
1646 COUNT_INST(I_JMP);
1647 if( sh4_x86.in_delay_slot ) {
1648 SLOTILLEGAL();
1649 } else {
1650 load_reg( R_ECX, Rn );
1651 store_spreg( R_ECX, R_NEW_PC );
1652 sh4_x86.in_delay_slot = DELAY_PC;
1653 sh4_x86.branch_taken = TRUE;
1654 if( UNTRANSLATABLE(pc+2) ) {
1655 exit_block_emu(pc+2);
1656 return 2;
1657 } else {
1658 sh4_translate_instruction(pc+2);
1659 exit_block_newpcset(pc+4);
1660 return 4;
1661 }
1662 }
1663 :}
1664 JSR @Rn {:
1665 COUNT_INST(I_JSR);
1666 if( sh4_x86.in_delay_slot ) {
1667 SLOTILLEGAL();
1668 } else {
1669 load_spreg( R_EAX, R_PC );
1670 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1671 store_spreg( R_EAX, R_PR );
1672 load_reg( R_ECX, Rn );
1673 store_spreg( R_ECX, R_NEW_PC );
1674 sh4_x86.in_delay_slot = DELAY_PC;
1675 sh4_x86.branch_taken = TRUE;
1676 sh4_x86.tstate = TSTATE_NONE;
1677 if( UNTRANSLATABLE(pc+2) ) {
1678 exit_block_emu(pc+2);
1679 return 2;
1680 } else {
1681 sh4_translate_instruction(pc+2);
1682 exit_block_newpcset(pc+4);
1683 return 4;
1684 }
1685 }
1686 :}
1687 RTE {:
1688 COUNT_INST(I_RTE);
1689 if( sh4_x86.in_delay_slot ) {
1690 SLOTILLEGAL();
1691 } else {
1692 check_priv();
1693 load_spreg( R_ECX, R_SPC );
1694 store_spreg( R_ECX, R_NEW_PC );
1695 load_spreg( R_EAX, R_SSR );
1696 call_func1( sh4_write_sr, R_EAX );
1697 sh4_x86.in_delay_slot = DELAY_PC;
1698 sh4_x86.fpuen_checked = FALSE;
1699 sh4_x86.tstate = TSTATE_NONE;
1700 sh4_x86.branch_taken = TRUE;
1701 if( UNTRANSLATABLE(pc+2) ) {
1702 exit_block_emu(pc+2);
1703 return 2;
1704 } else {
1705 sh4_translate_instruction(pc+2);
1706 exit_block_newpcset(pc+4);
1707 return 4;
1708 }
1709 }
1710 :}
1711 RTS {:
1712 COUNT_INST(I_RTS);
1713 if( sh4_x86.in_delay_slot ) {
1714 SLOTILLEGAL();
1715 } else {
1716 load_spreg( R_ECX, R_PR );
1717 store_spreg( R_ECX, R_NEW_PC );
1718 sh4_x86.in_delay_slot = DELAY_PC;
1719 sh4_x86.branch_taken = TRUE;
1720 if( UNTRANSLATABLE(pc+2) ) {
1721 exit_block_emu(pc+2);
1722 return 2;
1723 } else {
1724 sh4_translate_instruction(pc+2);
1725 exit_block_newpcset(pc+4);
1726 return 4;
1727 }
1728 }
1729 :}
1730 TRAPA #imm {:
1731 COUNT_INST(I_TRAPA);
1732 if( sh4_x86.in_delay_slot ) {
1733 SLOTILLEGAL();
1734 } else {
1735 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1736 ADD_r32_sh4r( R_ECX, R_PC );
1737 load_imm32( R_EAX, imm );
1738 call_func1( sh4_raise_trap, R_EAX );
1739 sh4_x86.tstate = TSTATE_NONE;
1740 exit_block_pcset(pc+2);
1741 sh4_x86.branch_taken = TRUE;
1742 return 2;
1743 }
1744 :}
1745 UNDEF {:
1746 COUNT_INST(I_UNDEF);
1747 if( sh4_x86.in_delay_slot ) {
1748 exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);
1749 } else {
1750 exit_block_exc(EXC_ILLEGAL, pc);
1751 return 2;
1752 }
1753 :}
1755 CLRMAC {:
1756 COUNT_INST(I_CLRMAC);
1757 XOR_r32_r32(R_EAX, R_EAX);
1758 store_spreg( R_EAX, R_MACL );
1759 store_spreg( R_EAX, R_MACH );
1760 sh4_x86.tstate = TSTATE_NONE;
1761 :}
1762 CLRS {:
1763 COUNT_INST(I_CLRS);
1764 CLC();
1765 SETC_sh4r(R_S);
1766 sh4_x86.tstate = TSTATE_NONE;
1767 :}
1768 CLRT {:
1769 COUNT_INST(I_CLRT);
1770 CLC();
1771 SETC_t();
1772 sh4_x86.tstate = TSTATE_C;
1773 :}
1774 SETS {:
1775 COUNT_INST(I_SETS);
1776 STC();
1777 SETC_sh4r(R_S);
1778 sh4_x86.tstate = TSTATE_NONE;
1779 :}
1780 SETT {:
1781 COUNT_INST(I_SETT);
1782 STC();
1783 SETC_t();
1784 sh4_x86.tstate = TSTATE_C;
1785 :}
1787 /* Floating point moves */
1788 FMOV FRm, FRn {:
1789 COUNT_INST(I_FMOV1);
1790 check_fpuen();
1791 if( sh4_x86.double_size ) {
1792 load_dr0( R_EAX, FRm );
1793 load_dr1( R_ECX, FRm );
1794 store_dr0( R_EAX, FRn );
1795 store_dr1( R_ECX, FRn );
1796 } else {
1797 load_fr( R_EAX, FRm ); // SZ=0 branch
1798 store_fr( R_EAX, FRn );
1799 }
1800 :}
1801 FMOV FRm, @Rn {:
1802 COUNT_INST(I_FMOV2);
1803 check_fpuen();
1804 load_reg( R_EAX, Rn );
1805 if( sh4_x86.double_size ) {
1806 check_walign64( R_EAX );
1807 load_dr0( R_EDX, FRm );
1808 MEM_WRITE_LONG( R_EAX, R_EDX );
1809 load_reg( R_EAX, Rn );
1810 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1811 load_dr1( R_EDX, FRm );
1812 MEM_WRITE_LONG( R_EAX, R_EDX );
1813 } else {
1814 check_walign32( R_EAX );
1815 load_fr( R_EDX, FRm );
1816 MEM_WRITE_LONG( R_EAX, R_EDX );
1817 }
1818 sh4_x86.tstate = TSTATE_NONE;
1819 :}
1820 FMOV @Rm, FRn {:
1821 COUNT_INST(I_FMOV5);
1822 check_fpuen();
1823 load_reg( R_EAX, Rm );
1824 if( sh4_x86.double_size ) {
1825 check_ralign64( R_EAX );
1826 MEM_READ_LONG( R_EAX, R_EAX );
1827 store_dr0( R_EAX, FRn );
1828 load_reg( R_EAX, Rm );
1829 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1830 MEM_READ_LONG( R_EAX, R_EAX );
1831 store_dr1( R_EAX, FRn );
1832 } else {
1833 check_ralign32( R_EAX );
1834 MEM_READ_LONG( R_EAX, R_EAX );
1835 store_fr( R_EAX, FRn );
1836 }
1837 sh4_x86.tstate = TSTATE_NONE;
1838 :}
1839 FMOV FRm, @-Rn {:
1840 COUNT_INST(I_FMOV3);
1841 check_fpuen();
1842 load_reg( R_EAX, Rn );
1843 if( sh4_x86.double_size ) {
1844 check_walign64( R_EAX );
1845 LEA_r32disp8_r32( R_EAX, -8, R_EAX );
1846 load_dr0( R_EDX, FRm );
1847 MEM_WRITE_LONG( R_EAX, R_EDX );
1848 load_reg( R_EAX, Rn );
1849 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1850 load_dr1( R_EDX, FRm );
1851 MEM_WRITE_LONG( R_EAX, R_EDX );
1852 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1853 } else {
1854 check_walign32( R_EAX );
1855 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
1856 load_fr( R_EDX, FRm );
1857 MEM_WRITE_LONG( R_EAX, R_EDX );
1858 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1859 }
1860 sh4_x86.tstate = TSTATE_NONE;
1861 :}
1862 FMOV @Rm+, FRn {:
1863 COUNT_INST(I_FMOV6);
1864 check_fpuen();
1865 load_reg( R_EAX, Rm );
1866 if( sh4_x86.double_size ) {
1867 check_ralign64( R_EAX );
1868 MEM_READ_LONG( R_EAX, R_EAX );
1869 store_dr0( R_EAX, FRn );
1870 load_reg( R_EAX, Rm );
1871 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1872 MEM_READ_LONG( R_EAX, R_EAX );
1873 store_dr1( R_EAX, FRn );
1874 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1875 } else {
1876 check_ralign32( R_EAX );
1877 MEM_READ_LONG( R_EAX, R_EAX );
1878 store_fr( R_EAX, FRn );
1879 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1880 }
1881 sh4_x86.tstate = TSTATE_NONE;
1882 :}
1883 FMOV FRm, @(R0, Rn) {:
1884 COUNT_INST(I_FMOV4);
1885 check_fpuen();
1886 load_reg( R_EAX, Rn );
1887 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1888 if( sh4_x86.double_size ) {
1889 check_walign64( R_EAX );
1890 load_dr0( R_EDX, FRm );
1891 MEM_WRITE_LONG( R_EAX, R_EDX );
1892 load_reg( R_EAX, Rn );
1893 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1894 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1895 load_dr1( R_EDX, FRm );
1896 MEM_WRITE_LONG( R_EAX, R_EDX );
1897 } else {
1898 check_walign32( R_EAX );
1899 load_fr( R_EDX, FRm );
1900 MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
1901 }
1902 sh4_x86.tstate = TSTATE_NONE;
1903 :}
1904 FMOV @(R0, Rm), FRn {:
1905 COUNT_INST(I_FMOV7);
1906 check_fpuen();
1907 load_reg( R_EAX, Rm );
1908 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1909 if( sh4_x86.double_size ) {
1910 check_ralign64( R_EAX );
1911 MEM_READ_LONG( R_EAX, R_EAX );
1912 store_dr0( R_EAX, FRn );
1913 load_reg( R_EAX, Rm );
1914 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1915 LEA_r32disp8_r32( R_EAX, 4, R_EAX );
1916 MEM_READ_LONG( R_EAX, R_EAX );
1917 store_dr1( R_EAX, FRn );
1918 } else {
1919 check_ralign32( R_EAX );
1920 MEM_READ_LONG( R_EAX, R_EAX );
1921 store_fr( R_EAX, FRn );
1922 }
1923 sh4_x86.tstate = TSTATE_NONE;
1924 :}
1925 FLDI0 FRn {: /* IFF PR=0 */
1926 COUNT_INST(I_FLDI0);
1927 check_fpuen();
1928 if( sh4_x86.double_prec == 0 ) {
1929 XOR_r32_r32( R_EAX, R_EAX );
1930 store_fr( R_EAX, FRn );
1931 }
1932 sh4_x86.tstate = TSTATE_NONE;
1933 :}
1934 FLDI1 FRn {: /* IFF PR=0 */
1935 COUNT_INST(I_FLDI1);
1936 check_fpuen();
1937 if( sh4_x86.double_prec == 0 ) {
1938 load_imm32(R_EAX, 0x3F800000);
1939 store_fr( R_EAX, FRn );
1940 }
1941 :}
1943 FLOAT FPUL, FRn {:
1944 COUNT_INST(I_FLOAT);
1945 check_fpuen();
1946 FILD_sh4r(R_FPUL);
1947 if( sh4_x86.double_prec ) {
1948 pop_dr( FRn );
1949 } else {
1950 pop_fr( FRn );
1951 }
1952 :}
1953 FTRC FRm, FPUL {:
1954 COUNT_INST(I_FTRC);
1955 check_fpuen();
1956 if( sh4_x86.double_prec ) {
1957 push_dr( FRm );
1958 } else {
1959 push_fr( FRm );
1960 }
1961 load_ptr( R_ECX, &max_int );
1962 FILD_r32ind( R_ECX );
1963 FCOMIP_st(1);
1964 JNA_rel8( sat );
1965 load_ptr( R_ECX, &min_int ); // 5
1966 FILD_r32ind( R_ECX ); // 2
1967 FCOMIP_st(1); // 2
1968 JAE_rel8( sat2 ); // 2
1969 load_ptr( R_EAX, &save_fcw );
1970 FNSTCW_r32ind( R_EAX );
1971 load_ptr( R_EDX, &trunc_fcw );
1972 FLDCW_r32ind( R_EDX );
1973 FISTP_sh4r(R_FPUL); // 3
1974 FLDCW_r32ind( R_EAX );
1975 JMP_rel8(end); // 2
1977 JMP_TARGET(sat);
1978 JMP_TARGET(sat2);
1979 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
1980 store_spreg( R_ECX, R_FPUL );
1981 FPOP_st();
1982 JMP_TARGET(end);
1983 sh4_x86.tstate = TSTATE_NONE;
1984 :}
1985 FLDS FRm, FPUL {:
1986 COUNT_INST(I_FLDS);
1987 check_fpuen();
1988 load_fr( R_EAX, FRm );
1989 store_spreg( R_EAX, R_FPUL );
1990 :}
1991 FSTS FPUL, FRn {:
1992 COUNT_INST(I_FSTS);
1993 check_fpuen();
1994 load_spreg( R_EAX, R_FPUL );
1995 store_fr( R_EAX, FRn );
1996 :}
1997 FCNVDS FRm, FPUL {:
1998 COUNT_INST(I_FCNVDS);
1999 check_fpuen();
2000 if( sh4_x86.double_prec ) {
2001 push_dr( FRm );
2002 pop_fpul();
2003 }
2004 :}
2005 FCNVSD FPUL, FRn {:
2006 COUNT_INST(I_FCNVSD);
2007 check_fpuen();
2008 if( sh4_x86.double_prec ) {
2009 push_fpul();
2010 pop_dr( FRn );
2011 }
2012 :}
2014 /* Floating point instructions */
2015 FABS FRn {:
2016 COUNT_INST(I_FABS);
2017 check_fpuen();
2018 if( sh4_x86.double_prec ) {
2019 push_dr(FRn);
2020 FABS_st0();
2021 pop_dr(FRn);
2022 } else {
2023 push_fr(FRn);
2024 FABS_st0();
2025 pop_fr(FRn);
2026 }
2027 :}
2028 FADD FRm, FRn {:
2029 COUNT_INST(I_FADD);
2030 check_fpuen();
2031 if( sh4_x86.double_prec ) {
2032 push_dr(FRm);
2033 push_dr(FRn);
2034 FADDP_st(1);
2035 pop_dr(FRn);
2036 } else {
2037 push_fr(FRm);
2038 push_fr(FRn);
2039 FADDP_st(1);
2040 pop_fr(FRn);
2041 }
2042 :}
2043 FDIV FRm, FRn {:
2044 COUNT_INST(I_FDIV);
2045 check_fpuen();
2046 if( sh4_x86.double_prec ) {
2047 push_dr(FRn);
2048 push_dr(FRm);
2049 FDIVP_st(1);
2050 pop_dr(FRn);
2051 } else {
2052 push_fr(FRn);
2053 push_fr(FRm);
2054 FDIVP_st(1);
2055 pop_fr(FRn);
2056 }
2057 :}
2058 FMAC FR0, FRm, FRn {:
2059 COUNT_INST(I_FMAC);
2060 check_fpuen();
2061 if( sh4_x86.double_prec ) {
2062 push_dr( 0 );
2063 push_dr( FRm );
2064 FMULP_st(1);
2065 push_dr( FRn );
2066 FADDP_st(1);
2067 pop_dr( FRn );
2068 } else {
2069 push_fr( 0 );
2070 push_fr( FRm );
2071 FMULP_st(1);
2072 push_fr( FRn );
2073 FADDP_st(1);
2074 pop_fr( FRn );
2075 }
2076 :}
2078 FMUL FRm, FRn {:
2079 COUNT_INST(I_FMUL);
2080 check_fpuen();
2081 if( sh4_x86.double_prec ) {
2082 push_dr(FRm);
2083 push_dr(FRn);
2084 FMULP_st(1);
2085 pop_dr(FRn);
2086 } else {
2087 push_fr(FRm);
2088 push_fr(FRn);
2089 FMULP_st(1);
2090 pop_fr(FRn);
2091 }
2092 :}
2093 FNEG FRn {:
2094 COUNT_INST(I_FNEG);
2095 check_fpuen();
2096 if( sh4_x86.double_prec ) {
2097 push_dr(FRn);
2098 FCHS_st0();
2099 pop_dr(FRn);
2100 } else {
2101 push_fr(FRn);
2102 FCHS_st0();
2103 pop_fr(FRn);
2104 }
2105 :}
2106 FSRRA FRn {:
2107 COUNT_INST(I_FSRRA);
2108 check_fpuen();
2109 if( sh4_x86.double_prec == 0 ) {
2110 FLD1_st0();
2111 push_fr(FRn);
2112 FSQRT_st0();
2113 FDIVP_st(1);
2114 pop_fr(FRn);
2115 }
2116 :}
2117 FSQRT FRn {:
2118 COUNT_INST(I_FSQRT);
2119 check_fpuen();
2120 if( sh4_x86.double_prec ) {
2121 push_dr(FRn);
2122 FSQRT_st0();
2123 pop_dr(FRn);
2124 } else {
2125 push_fr(FRn);
2126 FSQRT_st0();
2127 pop_fr(FRn);
2128 }
2129 :}
2130 FSUB FRm, FRn {:
2131 COUNT_INST(I_FSUB);
2132 check_fpuen();
2133 if( sh4_x86.double_prec ) {
2134 push_dr(FRn);
2135 push_dr(FRm);
2136 FSUBP_st(1);
2137 pop_dr(FRn);
2138 } else {
2139 push_fr(FRn);
2140 push_fr(FRm);
2141 FSUBP_st(1);
2142 pop_fr(FRn);
2143 }
2144 :}
2146 FCMP/EQ FRm, FRn {:
2147 COUNT_INST(I_FCMPEQ);
2148 check_fpuen();
2149 if( sh4_x86.double_prec ) {
2150 push_dr(FRm);
2151 push_dr(FRn);
2152 } else {
2153 push_fr(FRm);
2154 push_fr(FRn);
2155 }
2156 FCOMIP_st(1);
2157 SETE_t();
2158 FPOP_st();
2159 sh4_x86.tstate = TSTATE_E;
2160 :}
2161 FCMP/GT FRm, FRn {:
2162 COUNT_INST(I_FCMPGT);
2163 check_fpuen();
2164 if( sh4_x86.double_prec ) {
2165 push_dr(FRm);
2166 push_dr(FRn);
2167 } else {
2168 push_fr(FRm);
2169 push_fr(FRn);
2170 }
2171 FCOMIP_st(1);
2172 SETA_t();
2173 FPOP_st();
2174 sh4_x86.tstate = TSTATE_A;
2175 :}
2177 FSCA FPUL, FRn {:
2178 COUNT_INST(I_FSCA);
2179 check_fpuen();
2180 if( sh4_x86.double_prec == 0 ) {
2181 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
2182 load_spreg( R_EAX, R_FPUL );
2183 call_func2( sh4_fsca, R_EAX, R_EDX );
2184 }
2185 sh4_x86.tstate = TSTATE_NONE;
2186 :}
2187 FIPR FVm, FVn {:
2188 COUNT_INST(I_FIPR);
2189 check_fpuen();
2190 if( sh4_x86.double_prec == 0 ) {
2191 if( sh4_x86.sse3_enabled ) {
2192 MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2193 MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2194 HADDPS_xmm_xmm( 4, 4 );
2195 HADDPS_xmm_xmm( 4, 4 );
2196 MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2197 } else {
2198 push_fr( FVm<<2 );
2199 push_fr( FVn<<2 );
2200 FMULP_st(1);
2201 push_fr( (FVm<<2)+1);
2202 push_fr( (FVn<<2)+1);
2203 FMULP_st(1);
2204 FADDP_st(1);
2205 push_fr( (FVm<<2)+2);
2206 push_fr( (FVn<<2)+2);
2207 FMULP_st(1);
2208 FADDP_st(1);
2209 push_fr( (FVm<<2)+3);
2210 push_fr( (FVn<<2)+3);
2211 FMULP_st(1);
2212 FADDP_st(1);
2213 pop_fr( (FVn<<2)+3);
2214 }
2215 }
2216 :}
2217 FTRV XMTRX, FVn {:
2218 COUNT_INST(I_FTRV);
2219 check_fpuen();
2220 if( sh4_x86.double_prec == 0 ) {
2221 if( sh4_x86.sse3_enabled ) {
2222 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2223 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2224 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2225 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2227 MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2228 MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2229 MOVAPS_xmm_xmm( 4, 6 );
2230 MOVAPS_xmm_xmm( 5, 7 );
2231 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2232 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2233 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2234 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2235 MULPS_xmm_xmm( 0, 4 );
2236 MULPS_xmm_xmm( 1, 5 );
2237 MULPS_xmm_xmm( 2, 6 );
2238 MULPS_xmm_xmm( 3, 7 );
2239 ADDPS_xmm_xmm( 5, 4 );
2240 ADDPS_xmm_xmm( 7, 6 );
2241 ADDPS_xmm_xmm( 6, 4 );
2242 MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
2243 } else {
2244 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
2245 call_func1( sh4_ftrv, R_EAX );
2246 }
2247 }
2248 sh4_x86.tstate = TSTATE_NONE;
2249 :}
2251 FRCHG {:
2252 COUNT_INST(I_FRCHG);
2253 check_fpuen();
2254 XOR_imm32_sh4r( FPSCR_FR, R_FPSCR );
2255 call_func0( sh4_switch_fr_banks );
2256 sh4_x86.tstate = TSTATE_NONE;
2257 :}
2258 FSCHG {:
2259 COUNT_INST(I_FSCHG);
2260 check_fpuen();
2261 XOR_imm32_sh4r( FPSCR_SZ, R_FPSCR);
2262 XOR_imm32_sh4r( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
2263 sh4_x86.tstate = TSTATE_NONE;
2264 sh4_x86.double_size = !sh4_x86.double_size;
2265 :}
2267 /* Processor control instructions */
2268 LDC Rm, SR {:
2269 COUNT_INST(I_LDCSR);
2270 if( sh4_x86.in_delay_slot ) {
2271 SLOTILLEGAL();
2272 } else {
2273 check_priv();
2274 load_reg( R_EAX, Rm );
2275 call_func1( sh4_write_sr, R_EAX );
2276 sh4_x86.fpuen_checked = FALSE;
2277 sh4_x86.tstate = TSTATE_NONE;
2278 return 2;
2279 }
2280 :}
2281 LDC Rm, GBR {:
2282 COUNT_INST(I_LDC);
2283 load_reg( R_EAX, Rm );
2284 store_spreg( R_EAX, R_GBR );
2285 :}
2286 LDC Rm, VBR {:
2287 COUNT_INST(I_LDC);
2288 check_priv();
2289 load_reg( R_EAX, Rm );
2290 store_spreg( R_EAX, R_VBR );
2291 sh4_x86.tstate = TSTATE_NONE;
2292 :}
2293 LDC Rm, SSR {:
2294 COUNT_INST(I_LDC);
2295 check_priv();
2296 load_reg( R_EAX, Rm );
2297 store_spreg( R_EAX, R_SSR );
2298 sh4_x86.tstate = TSTATE_NONE;
2299 :}
2300 LDC Rm, SGR {:
2301 COUNT_INST(I_LDC);
2302 check_priv();
2303 load_reg( R_EAX, Rm );
2304 store_spreg( R_EAX, R_SGR );
2305 sh4_x86.tstate = TSTATE_NONE;
2306 :}
2307 LDC Rm, SPC {:
2308 COUNT_INST(I_LDC);
2309 check_priv();
2310 load_reg( R_EAX, Rm );
2311 store_spreg( R_EAX, R_SPC );
2312 sh4_x86.tstate = TSTATE_NONE;
2313 :}
2314 LDC Rm, DBR {:
2315 COUNT_INST(I_LDC);
2316 check_priv();
2317 load_reg( R_EAX, Rm );
2318 store_spreg( R_EAX, R_DBR );
2319 sh4_x86.tstate = TSTATE_NONE;
2320 :}
2321 LDC Rm, Rn_BANK {:
2322 COUNT_INST(I_LDC);
2323 check_priv();
2324 load_reg( R_EAX, Rm );
2325 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2326 sh4_x86.tstate = TSTATE_NONE;
2327 :}
2328 LDC.L @Rm+, GBR {:
2329 COUNT_INST(I_LDCM);
2330 load_reg( R_EAX, Rm );
2331 check_ralign32( R_EAX );
2332 MEM_READ_LONG( R_EAX, R_EAX );
2333 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2334 store_spreg( R_EAX, R_GBR );
2335 sh4_x86.tstate = TSTATE_NONE;
2336 :}
2337 LDC.L @Rm+, SR {:
2338 COUNT_INST(I_LDCSRM);
2339 if( sh4_x86.in_delay_slot ) {
2340 SLOTILLEGAL();
2341 } else {
2342 check_priv();
2343 load_reg( R_EAX, Rm );
2344 check_ralign32( R_EAX );
2345 MEM_READ_LONG( R_EAX, R_EAX );
2346 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2347 call_func1( sh4_write_sr, R_EAX );
2348 sh4_x86.fpuen_checked = FALSE;
2349 sh4_x86.tstate = TSTATE_NONE;
2350 return 2;
2351 }
2352 :}
2353 LDC.L @Rm+, VBR {:
2354 COUNT_INST(I_LDCM);
2355 check_priv();
2356 load_reg( R_EAX, Rm );
2357 check_ralign32( R_EAX );
2358 MEM_READ_LONG( R_EAX, R_EAX );
2359 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2360 store_spreg( R_EAX, R_VBR );
2361 sh4_x86.tstate = TSTATE_NONE;
2362 :}
2363 LDC.L @Rm+, SSR {:
2364 COUNT_INST(I_LDCM);
2365 check_priv();
2366 load_reg( R_EAX, Rm );
2367 check_ralign32( R_EAX );
2368 MEM_READ_LONG( R_EAX, R_EAX );
2369 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2370 store_spreg( R_EAX, R_SSR );
2371 sh4_x86.tstate = TSTATE_NONE;
2372 :}
2373 LDC.L @Rm+, SGR {:
2374 COUNT_INST(I_LDCM);
2375 check_priv();
2376 load_reg( R_EAX, Rm );
2377 check_ralign32( R_EAX );
2378 MEM_READ_LONG( R_EAX, R_EAX );
2379 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2380 store_spreg( R_EAX, R_SGR );
2381 sh4_x86.tstate = TSTATE_NONE;
2382 :}
2383 LDC.L @Rm+, SPC {:
2384 COUNT_INST(I_LDCM);
2385 check_priv();
2386 load_reg( R_EAX, Rm );
2387 check_ralign32( R_EAX );
2388 MEM_READ_LONG( R_EAX, R_EAX );
2389 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2390 store_spreg( R_EAX, R_SPC );
2391 sh4_x86.tstate = TSTATE_NONE;
2392 :}
2393 LDC.L @Rm+, DBR {:
2394 COUNT_INST(I_LDCM);
2395 check_priv();
2396 load_reg( R_EAX, Rm );
2397 check_ralign32( R_EAX );
2398 MEM_READ_LONG( R_EAX, R_EAX );
2399 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2400 store_spreg( R_EAX, R_DBR );
2401 sh4_x86.tstate = TSTATE_NONE;
2402 :}
2403 LDC.L @Rm+, Rn_BANK {:
2404 COUNT_INST(I_LDCM);
2405 check_priv();
2406 load_reg( R_EAX, Rm );
2407 check_ralign32( R_EAX );
2408 MEM_READ_LONG( R_EAX, R_EAX );
2409 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2410 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2411 sh4_x86.tstate = TSTATE_NONE;
2412 :}
2413 LDS Rm, FPSCR {:
2414 COUNT_INST(I_LDSFPSCR);
2415 check_fpuen();
2416 load_reg( R_EAX, Rm );
2417 call_func1( sh4_write_fpscr, R_EAX );
2418 sh4_x86.tstate = TSTATE_NONE;
2419 return 2;
2420 :}
2421 LDS.L @Rm+, FPSCR {:
2422 COUNT_INST(I_LDSFPSCRM);
2423 check_fpuen();
2424 load_reg( R_EAX, Rm );
2425 check_ralign32( R_EAX );
2426 MEM_READ_LONG( R_EAX, R_EAX );
2427 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2428 call_func1( sh4_write_fpscr, R_EAX );
2429 sh4_x86.tstate = TSTATE_NONE;
2430 return 2;
2431 :}
2432 LDS Rm, FPUL {:
2433 COUNT_INST(I_LDS);
2434 check_fpuen();
2435 load_reg( R_EAX, Rm );
2436 store_spreg( R_EAX, R_FPUL );
2437 :}
2438 LDS.L @Rm+, FPUL {:
2439 COUNT_INST(I_LDSM);
2440 check_fpuen();
2441 load_reg( R_EAX, Rm );
2442 check_ralign32( R_EAX );
2443 MEM_READ_LONG( R_EAX, R_EAX );
2444 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2445 store_spreg( R_EAX, R_FPUL );
2446 sh4_x86.tstate = TSTATE_NONE;
2447 :}
2448 LDS Rm, MACH {:
2449 COUNT_INST(I_LDS);
2450 load_reg( R_EAX, Rm );
2451 store_spreg( R_EAX, R_MACH );
2452 :}
2453 LDS.L @Rm+, MACH {:
2454 COUNT_INST(I_LDSM);
2455 load_reg( R_EAX, Rm );
2456 check_ralign32( R_EAX );
2457 MEM_READ_LONG( R_EAX, R_EAX );
2458 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2459 store_spreg( R_EAX, R_MACH );
2460 sh4_x86.tstate = TSTATE_NONE;
2461 :}
2462 LDS Rm, MACL {:
2463 COUNT_INST(I_LDS);
2464 load_reg( R_EAX, Rm );
2465 store_spreg( R_EAX, R_MACL );
2466 :}
2467 LDS.L @Rm+, MACL {:
2468 COUNT_INST(I_LDSM);
2469 load_reg( R_EAX, Rm );
2470 check_ralign32( R_EAX );
2471 MEM_READ_LONG( R_EAX, R_EAX );
2472 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2473 store_spreg( R_EAX, R_MACL );
2474 sh4_x86.tstate = TSTATE_NONE;
2475 :}
2476 LDS Rm, PR {:
2477 COUNT_INST(I_LDS);
2478 load_reg( R_EAX, Rm );
2479 store_spreg( R_EAX, R_PR );
2480 :}
2481 LDS.L @Rm+, PR {:
2482 COUNT_INST(I_LDSM);
2483 load_reg( R_EAX, Rm );
2484 check_ralign32( R_EAX );
2485 MEM_READ_LONG( R_EAX, R_EAX );
2486 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2487 store_spreg( R_EAX, R_PR );
2488 sh4_x86.tstate = TSTATE_NONE;
2489 :}
2490 LDTLB {:
2491 COUNT_INST(I_LDTLB);
2492 call_func0( MMU_ldtlb );
2493 sh4_x86.tstate = TSTATE_NONE;
2494 :}
2495 OCBI @Rn {:
2496 COUNT_INST(I_OCBI);
2497 :}
2498 OCBP @Rn {:
2499 COUNT_INST(I_OCBP);
2500 :}
2501 OCBWB @Rn {:
2502 COUNT_INST(I_OCBWB);
2503 :}
2504 PREF @Rn {:
2505 COUNT_INST(I_PREF);
2506 load_reg( R_EAX, Rn );
2507 MEM_PREFETCH( R_EAX );
2508 sh4_x86.tstate = TSTATE_NONE;
2509 :}
2510 SLEEP {:
2511 COUNT_INST(I_SLEEP);
2512 check_priv();
2513 call_func0( sh4_sleep );
2514 sh4_x86.tstate = TSTATE_NONE;
2515 sh4_x86.in_delay_slot = DELAY_NONE;
2516 return 2;
2517 :}
2518 STC SR, Rn {:
2519 COUNT_INST(I_STCSR);
2520 check_priv();
2521 call_func0(sh4_read_sr);
2522 store_reg( R_EAX, Rn );
2523 sh4_x86.tstate = TSTATE_NONE;
2524 :}
2525 STC GBR, Rn {:
2526 COUNT_INST(I_STC);
2527 load_spreg( R_EAX, R_GBR );
2528 store_reg( R_EAX, Rn );
2529 :}
2530 STC VBR, Rn {:
2531 COUNT_INST(I_STC);
2532 check_priv();
2533 load_spreg( R_EAX, R_VBR );
2534 store_reg( R_EAX, Rn );
2535 sh4_x86.tstate = TSTATE_NONE;
2536 :}
2537 STC SSR, Rn {:
2538 COUNT_INST(I_STC);
2539 check_priv();
2540 load_spreg( R_EAX, R_SSR );
2541 store_reg( R_EAX, Rn );
2542 sh4_x86.tstate = TSTATE_NONE;
2543 :}
2544 STC SPC, Rn {:
2545 COUNT_INST(I_STC);
2546 check_priv();
2547 load_spreg( R_EAX, R_SPC );
2548 store_reg( R_EAX, Rn );
2549 sh4_x86.tstate = TSTATE_NONE;
2550 :}
2551 STC SGR, Rn {:
2552 COUNT_INST(I_STC);
2553 check_priv();
2554 load_spreg( R_EAX, R_SGR );
2555 store_reg( R_EAX, Rn );
2556 sh4_x86.tstate = TSTATE_NONE;
2557 :}
2558 STC DBR, Rn {:
2559 COUNT_INST(I_STC);
2560 check_priv();
2561 load_spreg( R_EAX, R_DBR );
2562 store_reg( R_EAX, Rn );
2563 sh4_x86.tstate = TSTATE_NONE;
2564 :}
2565 STC Rm_BANK, Rn {:
2566 COUNT_INST(I_STC);
2567 check_priv();
2568 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2569 store_reg( R_EAX, Rn );
2570 sh4_x86.tstate = TSTATE_NONE;
2571 :}
2572 STC.L SR, @-Rn {:
2573 COUNT_INST(I_STCSRM);
2574 check_priv();
2575 call_func0( sh4_read_sr );
2576 MOV_r32_r32( R_EAX, R_EDX );
2577 load_reg( R_EAX, Rn );
2578 check_walign32( R_EAX );
2579 LEA_r32disp8_r32( R_EAX, -4, R_EAX );
2580 MEM_WRITE_LONG( R_EAX, R_EDX );
2581 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2582 sh4_x86.tstate = TSTATE_NONE;
2583 :}
2584 STC.L VBR, @-Rn {:
2585 COUNT_INST(I_STCM);
2586 check_priv();
2587 load_reg( R_EAX, Rn );
2588 check_walign32( R_EAX );
2589 ADD_imm8s_r32( -4, R_EAX );
2590 load_spreg( R_EDX, R_VBR );
2591 MEM_WRITE_LONG( R_EAX, R_EDX );
2592 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2593 sh4_x86.tstate = TSTATE_NONE;
2594 :}
2595 STC.L SSR, @-Rn {:
2596 COUNT_INST(I_STCM);
2597 check_priv();
2598 load_reg( R_EAX, Rn );
2599 check_walign32( R_EAX );
2600 ADD_imm8s_r32( -4, R_EAX );
2601 load_spreg( R_EDX, R_SSR );
2602 MEM_WRITE_LONG( R_EAX, R_EDX );
2603 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2604 sh4_x86.tstate = TSTATE_NONE;
2605 :}
2606 STC.L SPC, @-Rn {:
2607 COUNT_INST(I_STCM);
2608 check_priv();
2609 load_reg( R_EAX, Rn );
2610 check_walign32( R_EAX );
2611 ADD_imm8s_r32( -4, R_EAX );
2612 load_spreg( R_EDX, R_SPC );
2613 MEM_WRITE_LONG( R_EAX, R_EDX );
2614 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2615 sh4_x86.tstate = TSTATE_NONE;
2616 :}
2617 STC.L SGR, @-Rn {:
2618 COUNT_INST(I_STCM);
2619 check_priv();
2620 load_reg( R_EAX, Rn );
2621 check_walign32( R_EAX );
2622 ADD_imm8s_r32( -4, R_EAX );
2623 load_spreg( R_EDX, R_SGR );
2624 MEM_WRITE_LONG( R_EAX, R_EDX );
2625 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2626 sh4_x86.tstate = TSTATE_NONE;
2627 :}
2628 STC.L DBR, @-Rn {:
2629 COUNT_INST(I_STCM);
2630 check_priv();
2631 load_reg( R_EAX, Rn );
2632 check_walign32( R_EAX );
2633 ADD_imm8s_r32( -4, R_EAX );
2634 load_spreg( R_EDX, R_DBR );
2635 MEM_WRITE_LONG( R_EAX, R_EDX );
2636 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2637 sh4_x86.tstate = TSTATE_NONE;
2638 :}
2639 STC.L Rm_BANK, @-Rn {:
2640 COUNT_INST(I_STCM);
2641 check_priv();
2642 load_reg( R_EAX, Rn );
2643 check_walign32( R_EAX );
2644 ADD_imm8s_r32( -4, R_EAX );
2645 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2646 MEM_WRITE_LONG( R_EAX, R_EDX );
2647 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2648 sh4_x86.tstate = TSTATE_NONE;
2649 :}
2650 STC.L GBR, @-Rn {:
2651 COUNT_INST(I_STCM);
2652 load_reg( R_EAX, Rn );
2653 check_walign32( R_EAX );
2654 ADD_imm8s_r32( -4, R_EAX );
2655 load_spreg( R_EDX, R_GBR );
2656 MEM_WRITE_LONG( R_EAX, R_EDX );
2657 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2658 sh4_x86.tstate = TSTATE_NONE;
2659 :}
2660 STS FPSCR, Rn {:
2661 COUNT_INST(I_STSFPSCR);
2662 check_fpuen();
2663 load_spreg( R_EAX, R_FPSCR );
2664 store_reg( R_EAX, Rn );
2665 :}
2666 STS.L FPSCR, @-Rn {:
2667 COUNT_INST(I_STSFPSCRM);
2668 check_fpuen();
2669 load_reg( R_EAX, Rn );
2670 check_walign32( R_EAX );
2671 ADD_imm8s_r32( -4, R_EAX );
2672 load_spreg( R_EDX, R_FPSCR );
2673 MEM_WRITE_LONG( R_EAX, R_EDX );
2674 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2675 sh4_x86.tstate = TSTATE_NONE;
2676 :}
2677 STS FPUL, Rn {:
2678 COUNT_INST(I_STS);
2679 check_fpuen();
2680 load_spreg( R_EAX, R_FPUL );
2681 store_reg( R_EAX, Rn );
2682 :}
2683 STS.L FPUL, @-Rn {:
2684 COUNT_INST(I_STSM);
2685 check_fpuen();
2686 load_reg( R_EAX, Rn );
2687 check_walign32( R_EAX );
2688 ADD_imm8s_r32( -4, R_EAX );
2689 load_spreg( R_EDX, R_FPUL );
2690 MEM_WRITE_LONG( R_EAX, R_EDX );
2691 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2692 sh4_x86.tstate = TSTATE_NONE;
2693 :}
2694 STS MACH, Rn {:
2695 COUNT_INST(I_STS);
2696 load_spreg( R_EAX, R_MACH );
2697 store_reg( R_EAX, Rn );
2698 :}
2699 STS.L MACH, @-Rn {:
2700 COUNT_INST(I_STSM);
2701 load_reg( R_EAX, Rn );
2702 check_walign32( R_EAX );
2703 ADD_imm8s_r32( -4, R_EAX );
2704 load_spreg( R_EDX, R_MACH );
2705 MEM_WRITE_LONG( R_EAX, R_EDX );
2706 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2707 sh4_x86.tstate = TSTATE_NONE;
2708 :}
2709 STS MACL, Rn {:
2710 COUNT_INST(I_STS);
2711 load_spreg( R_EAX, R_MACL );
2712 store_reg( R_EAX, Rn );
2713 :}
2714 STS.L MACL, @-Rn {:
2715 COUNT_INST(I_STSM);
2716 load_reg( R_EAX, Rn );
2717 check_walign32( R_EAX );
2718 ADD_imm8s_r32( -4, R_EAX );
2719 load_spreg( R_EDX, R_MACL );
2720 MEM_WRITE_LONG( R_EAX, R_EDX );
2721 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2722 sh4_x86.tstate = TSTATE_NONE;
2723 :}
2724 STS PR, Rn {:
2725 COUNT_INST(I_STS);
2726 load_spreg( R_EAX, R_PR );
2727 store_reg( R_EAX, Rn );
2728 :}
2729 STS.L PR, @-Rn {:
2730 COUNT_INST(I_STSM);
2731 load_reg( R_EAX, Rn );
2732 check_walign32( R_EAX );
2733 ADD_imm8s_r32( -4, R_EAX );
2734 load_spreg( R_EDX, R_PR );
2735 MEM_WRITE_LONG( R_EAX, R_EDX );
2736 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2737 sh4_x86.tstate = TSTATE_NONE;
2738 :}
2740 NOP {:
2741 COUNT_INST(I_NOP);
2742 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2743 :}
2744 %%
2745 sh4_x86.in_delay_slot = DELAY_NONE;
2746 return 0;
2747 }
.