filename | src/sh4/sh4x86.in |
changeset | 926:68f3e0fe02f1 |
prev | 911:2f6ba75b84d1 |
next | 927:17b6b9e245d8 |
author | nkeynes |
date | Sun Dec 14 07:50:48 2008 +0000 (15 years ago) |
permissions | -rw-r--r-- |
last change | Setup a 'proper' stackframe in translated blocks. This doesn't affect performance noticeably, but does ensure that a) The stack is aligned correctly on OS X with no extra effort, and b) We can't mess up the stack and crash that way anymore. Replace all PUSH/POP instructions (outside of prologue/epilogue) with ESP-rel moves to stack local variables. Finally merge ia32mac and ia32abi together, since they're pretty much the same now anyway (and thereby simplifying maintenance a good deal) |
view | annotate | diff | log | raw |
1 /**
2 * $Id$
3 *
4 * SH4 => x86 translation. This version does no real optimization, it just
5 * outputs straight-line x86 code - it mainly exists to provide a baseline
6 * to test the optimizing versions against.
7 *
8 * Copyright (c) 2007 Nathan Keynes.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
21 #include <assert.h>
22 #include <math.h>
24 #ifndef NDEBUG
25 #define DEBUG_JUMPS 1
26 #endif
28 #include "lxdream.h"
29 #include "sh4/xltcache.h"
30 #include "sh4/sh4core.h"
31 #include "sh4/sh4trans.h"
32 #include "sh4/sh4stat.h"
33 #include "sh4/sh4mmio.h"
34 #include "sh4/x86op.h"
35 #include "clock.h"
37 #define DEFAULT_BACKPATCH_SIZE 4096
39 struct backpatch_record {
40 uint32_t fixup_offset;
41 uint32_t fixup_icount;
42 int32_t exc_code;
43 };
45 #define DELAY_NONE 0
46 #define DELAY_PC 1
47 #define DELAY_PC_PR 2
49 /**
50 * Struct to manage internal translation state. This state is not saved -
51 * it is only valid between calls to sh4_translate_begin_block() and
52 * sh4_translate_end_block()
53 */
54 struct sh4_x86_state {
55 int in_delay_slot;
56 gboolean priv_checked; /* true if we've already checked the cpu mode. */
57 gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
58 gboolean branch_taken; /* true if we branched unconditionally */
59 gboolean double_prec; /* true if FPU is in double-precision mode */
60 gboolean double_size; /* true if FPU is in double-size mode */
61 gboolean sse3_enabled; /* true if host supports SSE3 instructions */
62 uint32_t block_start_pc;
63 uint32_t stack_posn; /* Trace stack height for alignment purposes */
64 int tstate;
66 /* mode flags */
67 gboolean tlb_on; /* True if tlb translation is active */
69 /* Allocated memory for the (block-wide) back-patch list */
70 struct backpatch_record *backpatch_list;
71 uint32_t backpatch_posn;
72 uint32_t backpatch_size;
73 };
75 #define TSTATE_NONE -1
76 #define TSTATE_O 0
77 #define TSTATE_C 2
78 #define TSTATE_E 4
79 #define TSTATE_NE 5
80 #define TSTATE_G 0xF
81 #define TSTATE_GE 0xD
82 #define TSTATE_A 7
83 #define TSTATE_AE 3
85 #ifdef ENABLE_SH4STATS
86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
87 #else
88 #define COUNT_INST(id)
89 #endif
91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
93 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
94 OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
98 CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
99 OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
101 static struct sh4_x86_state sh4_x86;
103 static uint32_t max_int = 0x7FFFFFFF;
104 static uint32_t min_int = 0x80000000;
105 static uint32_t save_fcw; /* save value for fpu control word */
106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
108 gboolean is_sse3_supported()
109 {
110 uint32_t features;
112 __asm__ __volatile__(
113 "mov $0x01, %%eax\n\t"
114 "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
115 return (features & 1) ? TRUE : FALSE;
116 }
118 void sh4_translate_init(void)
119 {
120 sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
121 sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
122 sh4_x86.sse3_enabled = is_sse3_supported();
123 }
126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
127 {
128 if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
129 sh4_x86.backpatch_size <<= 1;
130 sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list,
131 sh4_x86.backpatch_size * sizeof(struct backpatch_record));
132 assert( sh4_x86.backpatch_list != NULL );
133 }
134 if( sh4_x86.in_delay_slot ) {
135 fixup_pc -= 2;
136 }
137 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset =
138 ((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
139 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
140 sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
141 sh4_x86.backpatch_posn++;
142 }
144 /**
145 * Emit an instruction to load an SH4 reg into a real register
146 */
147 static inline void load_reg( int x86reg, int sh4reg )
148 {
149 /* mov [bp+n], reg */
150 OP(0x8B);
151 OP(0x45 + (x86reg<<3));
152 OP(REG_OFFSET(r[sh4reg]));
153 }
155 static inline void load_reg16s( int x86reg, int sh4reg )
156 {
157 OP(0x0F);
158 OP(0xBF);
159 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
160 }
162 static inline void load_reg16u( int x86reg, int sh4reg )
163 {
164 OP(0x0F);
165 OP(0xB7);
166 MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
168 }
170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
172 /**
173 * Emit an instruction to load an immediate value into a register
174 */
175 static inline void load_imm32( int x86reg, uint32_t value ) {
176 /* mov #value, reg */
177 OP(0xB8 + x86reg);
178 OP32(value);
179 }
181 /**
182 * Load an immediate 64-bit quantity (note: x86-64 only)
183 */
184 static inline void load_imm64( int x86reg, uint64_t value ) {
185 /* mov #value, reg */
186 REXW();
187 OP(0xB8 + x86reg);
188 OP64(value);
189 }
191 /**
192 * Emit an instruction to store an SH4 reg (RN)
193 */
194 void static inline store_reg( int x86reg, int sh4reg ) {
195 /* mov reg, [bp+n] */
196 OP(0x89);
197 OP(0x45 + (x86reg<<3));
198 OP(REG_OFFSET(r[sh4reg]));
199 }
201 /**
202 * Load an FR register (single-precision floating point) into an integer x86
203 * register (eg for register-to-register moves)
204 */
205 #define load_fr(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
206 #define load_xf(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
208 /**
209 * Load the low half of a DR register (DR or XD) into an integer x86 register
210 */
211 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
212 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
214 /**
215 * Store an FR register (single-precision floating point) from an integer x86+
216 * register (eg for register-to-register moves)
217 */
218 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
219 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
221 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
222 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
225 #define push_fpul() FLDF_sh4r(R_FPUL)
226 #define pop_fpul() FSTPF_sh4r(R_FPUL)
227 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
228 #define pop_fr(frm) FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
229 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
230 #define pop_xf(frm) FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
231 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
232 #define pop_dr(frm) FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
233 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
234 #define pop_xdr(frm) FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
238 /* Exception checks - Note that all exception checks will clobber EAX */
240 #define check_priv( ) \
241 if( !sh4_x86.priv_checked ) { \
242 sh4_x86.priv_checked = TRUE;\
243 load_spreg( R_EAX, R_SR );\
244 AND_imm32_r32( SR_MD, R_EAX );\
245 if( sh4_x86.in_delay_slot ) {\
246 JE_exc( EXC_SLOT_ILLEGAL );\
247 } else {\
248 JE_exc( EXC_ILLEGAL );\
249 }\
250 sh4_x86.tstate = TSTATE_NONE; \
251 }\
253 #define check_fpuen( ) \
254 if( !sh4_x86.fpuen_checked ) {\
255 sh4_x86.fpuen_checked = TRUE;\
256 load_spreg( R_EAX, R_SR );\
257 AND_imm32_r32( SR_FD, R_EAX );\
258 if( sh4_x86.in_delay_slot ) {\
259 JNE_exc(EXC_SLOT_FPU_DISABLED);\
260 } else {\
261 JNE_exc(EXC_FPU_DISABLED);\
262 }\
263 sh4_x86.tstate = TSTATE_NONE; \
264 }
266 #define check_ralign16( x86reg ) \
267 TEST_imm32_r32( 0x00000001, x86reg ); \
268 JNE_exc(EXC_DATA_ADDR_READ)
270 #define check_walign16( x86reg ) \
271 TEST_imm32_r32( 0x00000001, x86reg ); \
272 JNE_exc(EXC_DATA_ADDR_WRITE);
274 #define check_ralign32( x86reg ) \
275 TEST_imm32_r32( 0x00000003, x86reg ); \
276 JNE_exc(EXC_DATA_ADDR_READ)
278 #define check_walign32( x86reg ) \
279 TEST_imm32_r32( 0x00000003, x86reg ); \
280 JNE_exc(EXC_DATA_ADDR_WRITE);
282 #define check_ralign64( x86reg ) \
283 TEST_imm32_r32( 0x00000007, x86reg ); \
284 JNE_exc(EXC_DATA_ADDR_READ)
286 #define check_walign64( x86reg ) \
287 TEST_imm32_r32( 0x00000007, x86reg ); \
288 JNE_exc(EXC_DATA_ADDR_WRITE);
290 #define UNDEF(ir)
291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
292 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
293 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
294 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
295 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
296 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
297 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
299 /**
300 * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned
301 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
302 */
303 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
305 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
306 /**
307 * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned
308 * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
309 */
310 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
312 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
314 /****** Import appropriate calling conventions ******/
315 #if SIZEOF_VOID_P == 8
316 #include "sh4/ia64abi.h"
317 #else /* 32-bit system */
318 #include "sh4/ia32abi.h"
319 #endif
321 void sh4_translate_begin_block( sh4addr_t pc )
322 {
323 enter_block();
324 sh4_x86.in_delay_slot = FALSE;
325 sh4_x86.priv_checked = FALSE;
326 sh4_x86.fpuen_checked = FALSE;
327 sh4_x86.branch_taken = FALSE;
328 sh4_x86.backpatch_posn = 0;
329 sh4_x86.block_start_pc = pc;
330 sh4_x86.tlb_on = IS_MMU_ENABLED();
331 sh4_x86.tstate = TSTATE_NONE;
332 sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
333 sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
334 }
337 uint32_t sh4_translate_end_block_size()
338 {
339 if( sh4_x86.backpatch_posn <= 3 ) {
340 return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
341 } else {
342 return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
343 }
344 }
347 /**
348 * Embed a breakpoint into the generated code
349 */
350 void sh4_translate_emit_breakpoint( sh4vma_t pc )
351 {
352 load_imm32( R_EAX, pc );
353 call_func1( sh4_translate_breakpoint_hit, R_EAX );
354 sh4_x86.tstate = TSTATE_NONE;
355 }
358 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
360 /**
361 * Embed a call to sh4_execute_instruction for situations that we
362 * can't translate (just page-crossing delay slots at the moment).
363 * Caller is responsible for setting new_pc before calling this function.
364 *
365 * Performs:
366 * Set PC = endpc
367 * Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
368 * Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
369 * Call sh4_execute_instruction
370 * Call xlat_get_code_by_vma / xlat_get_code as for normal exit
371 */
372 void exit_block_emu( sh4vma_t endpc )
373 {
374 load_imm32( R_ECX, endpc - sh4_x86.block_start_pc ); // 5
375 ADD_r32_sh4r( R_ECX, R_PC );
377 load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
378 ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) ); // 6
379 load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
380 store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
382 call_func0( sh4_execute_instruction );
383 load_spreg( R_EAX, R_PC );
384 if( sh4_x86.tlb_on ) {
385 call_func1(xlat_get_code_by_vma,R_EAX);
386 } else {
387 call_func1(xlat_get_code,R_EAX);
388 }
389 exit_block();
390 }
392 /**
393 * Translate a single instruction. Delayed branches are handled specially
394 * by translating both branch and delayed instruction as a single unit (as
395 *
396 * The instruction MUST be in the icache (assert check)
397 *
398 * @return true if the instruction marks the end of a basic block
399 * (eg a branch or
400 */
401 uint32_t sh4_translate_instruction( sh4vma_t pc )
402 {
403 uint32_t ir;
404 /* Read instruction from icache */
405 assert( IS_IN_ICACHE(pc) );
406 ir = *(uint16_t *)GET_ICACHE_PTR(pc);
408 if( !sh4_x86.in_delay_slot ) {
409 sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
410 }
411 %%
412 /* ALU operations */
413 ADD Rm, Rn {:
414 COUNT_INST(I_ADD);
415 load_reg( R_EAX, Rm );
416 load_reg( R_ECX, Rn );
417 ADD_r32_r32( R_EAX, R_ECX );
418 store_reg( R_ECX, Rn );
419 sh4_x86.tstate = TSTATE_NONE;
420 :}
421 ADD #imm, Rn {:
422 COUNT_INST(I_ADDI);
423 load_reg( R_EAX, Rn );
424 ADD_imm8s_r32( imm, R_EAX );
425 store_reg( R_EAX, Rn );
426 sh4_x86.tstate = TSTATE_NONE;
427 :}
428 ADDC Rm, Rn {:
429 COUNT_INST(I_ADDC);
430 if( sh4_x86.tstate != TSTATE_C ) {
431 LDC_t();
432 }
433 load_reg( R_EAX, Rm );
434 load_reg( R_ECX, Rn );
435 ADC_r32_r32( R_EAX, R_ECX );
436 store_reg( R_ECX, Rn );
437 SETC_t();
438 sh4_x86.tstate = TSTATE_C;
439 :}
440 ADDV Rm, Rn {:
441 COUNT_INST(I_ADDV);
442 load_reg( R_EAX, Rm );
443 load_reg( R_ECX, Rn );
444 ADD_r32_r32( R_EAX, R_ECX );
445 store_reg( R_ECX, Rn );
446 SETO_t();
447 sh4_x86.tstate = TSTATE_O;
448 :}
449 AND Rm, Rn {:
450 COUNT_INST(I_AND);
451 load_reg( R_EAX, Rm );
452 load_reg( R_ECX, Rn );
453 AND_r32_r32( R_EAX, R_ECX );
454 store_reg( R_ECX, Rn );
455 sh4_x86.tstate = TSTATE_NONE;
456 :}
457 AND #imm, R0 {:
458 COUNT_INST(I_ANDI);
459 load_reg( R_EAX, 0 );
460 AND_imm32_r32(imm, R_EAX);
461 store_reg( R_EAX, 0 );
462 sh4_x86.tstate = TSTATE_NONE;
463 :}
464 AND.B #imm, @(R0, GBR) {:
465 COUNT_INST(I_ANDB);
466 load_reg( R_EAX, 0 );
467 load_spreg( R_ECX, R_GBR );
468 ADD_r32_r32( R_ECX, R_EAX );
469 MMU_TRANSLATE_WRITE( R_EAX );
470 MOV_r32_esp8(R_EAX, 0);
471 MEM_READ_BYTE( R_EAX, R_EDX );
472 MOV_esp8_r32(0, R_EAX);
473 AND_imm32_r32(imm, R_EDX );
474 MEM_WRITE_BYTE( R_EAX, R_EDX );
475 sh4_x86.tstate = TSTATE_NONE;
476 :}
477 CMP/EQ Rm, Rn {:
478 COUNT_INST(I_CMPEQ);
479 load_reg( R_EAX, Rm );
480 load_reg( R_ECX, Rn );
481 CMP_r32_r32( R_EAX, R_ECX );
482 SETE_t();
483 sh4_x86.tstate = TSTATE_E;
484 :}
485 CMP/EQ #imm, R0 {:
486 COUNT_INST(I_CMPEQI);
487 load_reg( R_EAX, 0 );
488 CMP_imm8s_r32(imm, R_EAX);
489 SETE_t();
490 sh4_x86.tstate = TSTATE_E;
491 :}
492 CMP/GE Rm, Rn {:
493 COUNT_INST(I_CMPGE);
494 load_reg( R_EAX, Rm );
495 load_reg( R_ECX, Rn );
496 CMP_r32_r32( R_EAX, R_ECX );
497 SETGE_t();
498 sh4_x86.tstate = TSTATE_GE;
499 :}
500 CMP/GT Rm, Rn {:
501 COUNT_INST(I_CMPGT);
502 load_reg( R_EAX, Rm );
503 load_reg( R_ECX, Rn );
504 CMP_r32_r32( R_EAX, R_ECX );
505 SETG_t();
506 sh4_x86.tstate = TSTATE_G;
507 :}
508 CMP/HI Rm, Rn {:
509 COUNT_INST(I_CMPHI);
510 load_reg( R_EAX, Rm );
511 load_reg( R_ECX, Rn );
512 CMP_r32_r32( R_EAX, R_ECX );
513 SETA_t();
514 sh4_x86.tstate = TSTATE_A;
515 :}
516 CMP/HS Rm, Rn {:
517 COUNT_INST(I_CMPHS);
518 load_reg( R_EAX, Rm );
519 load_reg( R_ECX, Rn );
520 CMP_r32_r32( R_EAX, R_ECX );
521 SETAE_t();
522 sh4_x86.tstate = TSTATE_AE;
523 :}
524 CMP/PL Rn {:
525 COUNT_INST(I_CMPPL);
526 load_reg( R_EAX, Rn );
527 CMP_imm8s_r32( 0, R_EAX );
528 SETG_t();
529 sh4_x86.tstate = TSTATE_G;
530 :}
531 CMP/PZ Rn {:
532 COUNT_INST(I_CMPPZ);
533 load_reg( R_EAX, Rn );
534 CMP_imm8s_r32( 0, R_EAX );
535 SETGE_t();
536 sh4_x86.tstate = TSTATE_GE;
537 :}
538 CMP/STR Rm, Rn {:
539 COUNT_INST(I_CMPSTR);
540 load_reg( R_EAX, Rm );
541 load_reg( R_ECX, Rn );
542 XOR_r32_r32( R_ECX, R_EAX );
543 TEST_r8_r8( R_AL, R_AL );
544 JE_rel8(target1);
545 TEST_r8_r8( R_AH, R_AH );
546 JE_rel8(target2);
547 SHR_imm8_r32( 16, R_EAX );
548 TEST_r8_r8( R_AL, R_AL );
549 JE_rel8(target3);
550 TEST_r8_r8( R_AH, R_AH );
551 JMP_TARGET(target1);
552 JMP_TARGET(target2);
553 JMP_TARGET(target3);
554 SETE_t();
555 sh4_x86.tstate = TSTATE_E;
556 :}
557 DIV0S Rm, Rn {:
558 COUNT_INST(I_DIV0S);
559 load_reg( R_EAX, Rm );
560 load_reg( R_ECX, Rn );
561 SHR_imm8_r32( 31, R_EAX );
562 SHR_imm8_r32( 31, R_ECX );
563 store_spreg( R_EAX, R_M );
564 store_spreg( R_ECX, R_Q );
565 CMP_r32_r32( R_EAX, R_ECX );
566 SETNE_t();
567 sh4_x86.tstate = TSTATE_NE;
568 :}
569 DIV0U {:
570 COUNT_INST(I_DIV0U);
571 XOR_r32_r32( R_EAX, R_EAX );
572 store_spreg( R_EAX, R_Q );
573 store_spreg( R_EAX, R_M );
574 store_spreg( R_EAX, R_T );
575 sh4_x86.tstate = TSTATE_C; // works for DIV1
576 :}
577 DIV1 Rm, Rn {:
578 COUNT_INST(I_DIV1);
579 load_spreg( R_ECX, R_M );
580 load_reg( R_EAX, Rn );
581 if( sh4_x86.tstate != TSTATE_C ) {
582 LDC_t();
583 }
584 RCL1_r32( R_EAX );
585 SETC_r8( R_DL ); // Q'
586 CMP_sh4r_r32( R_Q, R_ECX );
587 JE_rel8(mqequal);
588 ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
589 JMP_rel8(end);
590 JMP_TARGET(mqequal);
591 SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
592 JMP_TARGET(end);
593 store_reg( R_EAX, Rn ); // Done with Rn now
594 SETC_r8(R_AL); // tmp1
595 XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
596 XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
597 store_spreg( R_ECX, R_Q );
598 XOR_imm8s_r32( 1, R_AL ); // T = !Q'
599 MOVZX_r8_r32( R_AL, R_EAX );
600 store_spreg( R_EAX, R_T );
601 sh4_x86.tstate = TSTATE_NONE;
602 :}
603 DMULS.L Rm, Rn {:
604 COUNT_INST(I_DMULS);
605 load_reg( R_EAX, Rm );
606 load_reg( R_ECX, Rn );
607 IMUL_r32(R_ECX);
608 store_spreg( R_EDX, R_MACH );
609 store_spreg( R_EAX, R_MACL );
610 sh4_x86.tstate = TSTATE_NONE;
611 :}
612 DMULU.L Rm, Rn {:
613 COUNT_INST(I_DMULU);
614 load_reg( R_EAX, Rm );
615 load_reg( R_ECX, Rn );
616 MUL_r32(R_ECX);
617 store_spreg( R_EDX, R_MACH );
618 store_spreg( R_EAX, R_MACL );
619 sh4_x86.tstate = TSTATE_NONE;
620 :}
621 DT Rn {:
622 COUNT_INST(I_DT);
623 load_reg( R_EAX, Rn );
624 ADD_imm8s_r32( -1, R_EAX );
625 store_reg( R_EAX, Rn );
626 SETE_t();
627 sh4_x86.tstate = TSTATE_E;
628 :}
629 EXTS.B Rm, Rn {:
630 COUNT_INST(I_EXTSB);
631 load_reg( R_EAX, Rm );
632 MOVSX_r8_r32( R_EAX, R_EAX );
633 store_reg( R_EAX, Rn );
634 :}
635 EXTS.W Rm, Rn {:
636 COUNT_INST(I_EXTSW);
637 load_reg( R_EAX, Rm );
638 MOVSX_r16_r32( R_EAX, R_EAX );
639 store_reg( R_EAX, Rn );
640 :}
641 EXTU.B Rm, Rn {:
642 COUNT_INST(I_EXTUB);
643 load_reg( R_EAX, Rm );
644 MOVZX_r8_r32( R_EAX, R_EAX );
645 store_reg( R_EAX, Rn );
646 :}
647 EXTU.W Rm, Rn {:
648 COUNT_INST(I_EXTUW);
649 load_reg( R_EAX, Rm );
650 MOVZX_r16_r32( R_EAX, R_EAX );
651 store_reg( R_EAX, Rn );
652 :}
653 MAC.L @Rm+, @Rn+ {:
654 COUNT_INST(I_MACL);
655 if( Rm == Rn ) {
656 load_reg( R_EAX, Rm );
657 check_ralign32( R_EAX );
658 MMU_TRANSLATE_READ( R_EAX );
659 MOV_r32_esp8(R_EAX, 0);
660 load_reg( R_EAX, Rn );
661 ADD_imm8s_r32( 4, R_EAX );
662 MMU_TRANSLATE_READ( R_EAX );
663 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
664 // Note translate twice in case of page boundaries. Maybe worth
665 // adding a page-boundary check to skip the second translation
666 } else {
667 load_reg( R_EAX, Rm );
668 check_ralign32( R_EAX );
669 MMU_TRANSLATE_READ( R_EAX );
670 MOV_r32_esp8( R_EAX, 0 );
671 load_reg( R_EAX, Rn );
672 check_ralign32( R_EAX );
673 MMU_TRANSLATE_READ( R_EAX );
674 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
675 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
676 }
677 MEM_READ_LONG( R_EAX, R_EAX );
678 MOV_r32_esp8( R_EAX, 4 );
679 MOV_esp8_r32( 0, R_EAX );
680 MEM_READ_LONG( R_EAX, R_EAX );
681 MOV_esp8_r32( 4, R_ECX );
683 IMUL_r32( R_ECX );
684 ADD_r32_sh4r( R_EAX, R_MACL );
685 ADC_r32_sh4r( R_EDX, R_MACH );
687 load_spreg( R_ECX, R_S );
688 TEST_r32_r32(R_ECX, R_ECX);
689 JE_rel8( nosat );
690 call_func0( signsat48 );
691 JMP_TARGET( nosat );
692 sh4_x86.tstate = TSTATE_NONE;
693 :}
694 MAC.W @Rm+, @Rn+ {:
695 COUNT_INST(I_MACW);
696 if( Rm == Rn ) {
697 load_reg( R_EAX, Rm );
698 check_ralign16( R_EAX );
699 MMU_TRANSLATE_READ( R_EAX );
700 MOV_r32_esp8( R_EAX, 0 );
701 load_reg( R_EAX, Rn );
702 ADD_imm8s_r32( 2, R_EAX );
703 MMU_TRANSLATE_READ( R_EAX );
704 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
705 // Note translate twice in case of page boundaries. Maybe worth
706 // adding a page-boundary check to skip the second translation
707 } else {
708 load_reg( R_EAX, Rm );
709 check_ralign16( R_EAX );
710 MMU_TRANSLATE_READ( R_EAX );
711 MOV_r32_esp8( R_EAX, 0 );
712 load_reg( R_EAX, Rn );
713 check_ralign16( R_EAX );
714 MMU_TRANSLATE_READ( R_EAX );
715 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
716 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
717 }
718 MEM_READ_WORD( R_EAX, R_EAX );
719 MOV_r32_esp8( R_EAX, 4 );
720 MOV_esp8_r32( 0, R_EAX );
721 MEM_READ_WORD( R_EAX, R_EAX );
722 MOV_esp8_r32( 4, R_ECX );
724 IMUL_r32( R_ECX );
725 load_spreg( R_ECX, R_S );
726 TEST_r32_r32( R_ECX, R_ECX );
727 JE_rel8( nosat );
729 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
730 JNO_rel8( end ); // 2
731 load_imm32( R_EDX, 1 ); // 5
732 store_spreg( R_EDX, R_MACH ); // 6
733 JS_rel8( positive ); // 2
734 load_imm32( R_EAX, 0x80000000 );// 5
735 store_spreg( R_EAX, R_MACL ); // 6
736 JMP_rel8(end2); // 2
738 JMP_TARGET(positive);
739 load_imm32( R_EAX, 0x7FFFFFFF );// 5
740 store_spreg( R_EAX, R_MACL ); // 6
741 JMP_rel8(end3); // 2
743 JMP_TARGET(nosat);
744 ADD_r32_sh4r( R_EAX, R_MACL ); // 6
745 ADC_r32_sh4r( R_EDX, R_MACH ); // 6
746 JMP_TARGET(end);
747 JMP_TARGET(end2);
748 JMP_TARGET(end3);
749 sh4_x86.tstate = TSTATE_NONE;
750 :}
751 MOVT Rn {:
752 COUNT_INST(I_MOVT);
753 load_spreg( R_EAX, R_T );
754 store_reg( R_EAX, Rn );
755 :}
756 MUL.L Rm, Rn {:
757 COUNT_INST(I_MULL);
758 load_reg( R_EAX, Rm );
759 load_reg( R_ECX, Rn );
760 MUL_r32( R_ECX );
761 store_spreg( R_EAX, R_MACL );
762 sh4_x86.tstate = TSTATE_NONE;
763 :}
764 MULS.W Rm, Rn {:
765 COUNT_INST(I_MULSW);
766 load_reg16s( R_EAX, Rm );
767 load_reg16s( R_ECX, Rn );
768 MUL_r32( R_ECX );
769 store_spreg( R_EAX, R_MACL );
770 sh4_x86.tstate = TSTATE_NONE;
771 :}
772 MULU.W Rm, Rn {:
773 COUNT_INST(I_MULUW);
774 load_reg16u( R_EAX, Rm );
775 load_reg16u( R_ECX, Rn );
776 MUL_r32( R_ECX );
777 store_spreg( R_EAX, R_MACL );
778 sh4_x86.tstate = TSTATE_NONE;
779 :}
780 NEG Rm, Rn {:
781 COUNT_INST(I_NEG);
782 load_reg( R_EAX, Rm );
783 NEG_r32( R_EAX );
784 store_reg( R_EAX, Rn );
785 sh4_x86.tstate = TSTATE_NONE;
786 :}
787 NEGC Rm, Rn {:
788 COUNT_INST(I_NEGC);
789 load_reg( R_EAX, Rm );
790 XOR_r32_r32( R_ECX, R_ECX );
791 LDC_t();
792 SBB_r32_r32( R_EAX, R_ECX );
793 store_reg( R_ECX, Rn );
794 SETC_t();
795 sh4_x86.tstate = TSTATE_C;
796 :}
797 NOT Rm, Rn {:
798 COUNT_INST(I_NOT);
799 load_reg( R_EAX, Rm );
800 NOT_r32( R_EAX );
801 store_reg( R_EAX, Rn );
802 sh4_x86.tstate = TSTATE_NONE;
803 :}
804 OR Rm, Rn {:
805 COUNT_INST(I_OR);
806 load_reg( R_EAX, Rm );
807 load_reg( R_ECX, Rn );
808 OR_r32_r32( R_EAX, R_ECX );
809 store_reg( R_ECX, Rn );
810 sh4_x86.tstate = TSTATE_NONE;
811 :}
812 OR #imm, R0 {:
813 COUNT_INST(I_ORI);
814 load_reg( R_EAX, 0 );
815 OR_imm32_r32(imm, R_EAX);
816 store_reg( R_EAX, 0 );
817 sh4_x86.tstate = TSTATE_NONE;
818 :}
819 OR.B #imm, @(R0, GBR) {:
820 COUNT_INST(I_ORB);
821 load_reg( R_EAX, 0 );
822 load_spreg( R_ECX, R_GBR );
823 ADD_r32_r32( R_ECX, R_EAX );
824 MMU_TRANSLATE_WRITE( R_EAX );
825 MOV_r32_esp8( R_EAX, 0 );
826 MEM_READ_BYTE( R_EAX, R_EDX );
827 MOV_esp8_r32( 0, R_EAX );
828 OR_imm32_r32(imm, R_EDX );
829 MEM_WRITE_BYTE( R_EAX, R_EDX );
830 sh4_x86.tstate = TSTATE_NONE;
831 :}
832 ROTCL Rn {:
833 COUNT_INST(I_ROTCL);
834 load_reg( R_EAX, Rn );
835 if( sh4_x86.tstate != TSTATE_C ) {
836 LDC_t();
837 }
838 RCL1_r32( R_EAX );
839 store_reg( R_EAX, Rn );
840 SETC_t();
841 sh4_x86.tstate = TSTATE_C;
842 :}
843 ROTCR Rn {:
844 COUNT_INST(I_ROTCR);
845 load_reg( R_EAX, Rn );
846 if( sh4_x86.tstate != TSTATE_C ) {
847 LDC_t();
848 }
849 RCR1_r32( R_EAX );
850 store_reg( R_EAX, Rn );
851 SETC_t();
852 sh4_x86.tstate = TSTATE_C;
853 :}
854 ROTL Rn {:
855 COUNT_INST(I_ROTL);
856 load_reg( R_EAX, Rn );
857 ROL1_r32( R_EAX );
858 store_reg( R_EAX, Rn );
859 SETC_t();
860 sh4_x86.tstate = TSTATE_C;
861 :}
862 ROTR Rn {:
863 COUNT_INST(I_ROTR);
864 load_reg( R_EAX, Rn );
865 ROR1_r32( R_EAX );
866 store_reg( R_EAX, Rn );
867 SETC_t();
868 sh4_x86.tstate = TSTATE_C;
869 :}
870 SHAD Rm, Rn {:
871 COUNT_INST(I_SHAD);
872 /* Annoyingly enough, not directly convertible */
873 load_reg( R_EAX, Rn );
874 load_reg( R_ECX, Rm );
875 CMP_imm32_r32( 0, R_ECX );
876 JGE_rel8(doshl);
878 NEG_r32( R_ECX ); // 2
879 AND_imm8_r8( 0x1F, R_CL ); // 3
880 JE_rel8(emptysar); // 2
881 SAR_r32_CL( R_EAX ); // 2
882 JMP_rel8(end); // 2
884 JMP_TARGET(emptysar);
885 SAR_imm8_r32(31, R_EAX ); // 3
886 JMP_rel8(end2);
888 JMP_TARGET(doshl);
889 AND_imm8_r8( 0x1F, R_CL ); // 3
890 SHL_r32_CL( R_EAX ); // 2
891 JMP_TARGET(end);
892 JMP_TARGET(end2);
893 store_reg( R_EAX, Rn );
894 sh4_x86.tstate = TSTATE_NONE;
895 :}
896 SHLD Rm, Rn {:
897 COUNT_INST(I_SHLD);
898 load_reg( R_EAX, Rn );
899 load_reg( R_ECX, Rm );
900 CMP_imm32_r32( 0, R_ECX );
901 JGE_rel8(doshl);
903 NEG_r32( R_ECX ); // 2
904 AND_imm8_r8( 0x1F, R_CL ); // 3
905 JE_rel8(emptyshr );
906 SHR_r32_CL( R_EAX ); // 2
907 JMP_rel8(end); // 2
909 JMP_TARGET(emptyshr);
910 XOR_r32_r32( R_EAX, R_EAX );
911 JMP_rel8(end2);
913 JMP_TARGET(doshl);
914 AND_imm8_r8( 0x1F, R_CL ); // 3
915 SHL_r32_CL( R_EAX ); // 2
916 JMP_TARGET(end);
917 JMP_TARGET(end2);
918 store_reg( R_EAX, Rn );
919 sh4_x86.tstate = TSTATE_NONE;
920 :}
921 SHAL Rn {:
922 COUNT_INST(I_SHAL);
923 load_reg( R_EAX, Rn );
924 SHL1_r32( R_EAX );
925 SETC_t();
926 store_reg( R_EAX, Rn );
927 sh4_x86.tstate = TSTATE_C;
928 :}
929 SHAR Rn {:
930 COUNT_INST(I_SHAR);
931 load_reg( R_EAX, Rn );
932 SAR1_r32( R_EAX );
933 SETC_t();
934 store_reg( R_EAX, Rn );
935 sh4_x86.tstate = TSTATE_C;
936 :}
937 SHLL Rn {:
938 COUNT_INST(I_SHLL);
939 load_reg( R_EAX, Rn );
940 SHL1_r32( R_EAX );
941 SETC_t();
942 store_reg( R_EAX, Rn );
943 sh4_x86.tstate = TSTATE_C;
944 :}
945 SHLL2 Rn {:
946 COUNT_INST(I_SHLL);
947 load_reg( R_EAX, Rn );
948 SHL_imm8_r32( 2, R_EAX );
949 store_reg( R_EAX, Rn );
950 sh4_x86.tstate = TSTATE_NONE;
951 :}
952 SHLL8 Rn {:
953 COUNT_INST(I_SHLL);
954 load_reg( R_EAX, Rn );
955 SHL_imm8_r32( 8, R_EAX );
956 store_reg( R_EAX, Rn );
957 sh4_x86.tstate = TSTATE_NONE;
958 :}
959 SHLL16 Rn {:
960 COUNT_INST(I_SHLL);
961 load_reg( R_EAX, Rn );
962 SHL_imm8_r32( 16, R_EAX );
963 store_reg( R_EAX, Rn );
964 sh4_x86.tstate = TSTATE_NONE;
965 :}
966 SHLR Rn {:
967 COUNT_INST(I_SHLR);
968 load_reg( R_EAX, Rn );
969 SHR1_r32( R_EAX );
970 SETC_t();
971 store_reg( R_EAX, Rn );
972 sh4_x86.tstate = TSTATE_C;
973 :}
974 SHLR2 Rn {:
975 COUNT_INST(I_SHLR);
976 load_reg( R_EAX, Rn );
977 SHR_imm8_r32( 2, R_EAX );
978 store_reg( R_EAX, Rn );
979 sh4_x86.tstate = TSTATE_NONE;
980 :}
981 SHLR8 Rn {:
982 COUNT_INST(I_SHLR);
983 load_reg( R_EAX, Rn );
984 SHR_imm8_r32( 8, R_EAX );
985 store_reg( R_EAX, Rn );
986 sh4_x86.tstate = TSTATE_NONE;
987 :}
988 SHLR16 Rn {:
989 COUNT_INST(I_SHLR);
990 load_reg( R_EAX, Rn );
991 SHR_imm8_r32( 16, R_EAX );
992 store_reg( R_EAX, Rn );
993 sh4_x86.tstate = TSTATE_NONE;
994 :}
995 SUB Rm, Rn {:
996 COUNT_INST(I_SUB);
997 load_reg( R_EAX, Rm );
998 load_reg( R_ECX, Rn );
999 SUB_r32_r32( R_EAX, R_ECX );
1000 store_reg( R_ECX, Rn );
1001 sh4_x86.tstate = TSTATE_NONE;
1002 :}
1003 SUBC Rm, Rn {:
1004 COUNT_INST(I_SUBC);
1005 load_reg( R_EAX, Rm );
1006 load_reg( R_ECX, Rn );
1007 if( sh4_x86.tstate != TSTATE_C ) {
1008 LDC_t();
1009 }
1010 SBB_r32_r32( R_EAX, R_ECX );
1011 store_reg( R_ECX, Rn );
1012 SETC_t();
1013 sh4_x86.tstate = TSTATE_C;
1014 :}
1015 SUBV Rm, Rn {:
1016 COUNT_INST(I_SUBV);
1017 load_reg( R_EAX, Rm );
1018 load_reg( R_ECX, Rn );
1019 SUB_r32_r32( R_EAX, R_ECX );
1020 store_reg( R_ECX, Rn );
1021 SETO_t();
1022 sh4_x86.tstate = TSTATE_O;
1023 :}
1024 SWAP.B Rm, Rn {:
1025 COUNT_INST(I_SWAPB);
1026 load_reg( R_EAX, Rm );
1027 XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
1028 store_reg( R_EAX, Rn );
1029 :}
1030 SWAP.W Rm, Rn {:
1031 COUNT_INST(I_SWAPB);
1032 load_reg( R_EAX, Rm );
1033 MOV_r32_r32( R_EAX, R_ECX );
1034 SHL_imm8_r32( 16, R_ECX );
1035 SHR_imm8_r32( 16, R_EAX );
1036 OR_r32_r32( R_EAX, R_ECX );
1037 store_reg( R_ECX, Rn );
1038 sh4_x86.tstate = TSTATE_NONE;
1039 :}
1040 TAS.B @Rn {:
1041 COUNT_INST(I_TASB);
1042 load_reg( R_EAX, Rn );
1043 MMU_TRANSLATE_WRITE( R_EAX );
1044 MOV_r32_esp8( R_EAX, 0 );
1045 MEM_READ_BYTE( R_EAX, R_EDX );
1046 TEST_r8_r8( R_DL, R_DL );
1047 SETE_t();
1048 OR_imm8_r8( 0x80, R_DL );
1049 MOV_esp8_r32( 0, R_EAX );
1050 MEM_WRITE_BYTE( R_EAX, R_EDX );
1051 sh4_x86.tstate = TSTATE_NONE;
1052 :}
1053 TST Rm, Rn {:
1054 COUNT_INST(I_TST);
1055 load_reg( R_EAX, Rm );
1056 load_reg( R_ECX, Rn );
1057 TEST_r32_r32( R_EAX, R_ECX );
1058 SETE_t();
1059 sh4_x86.tstate = TSTATE_E;
1060 :}
1061 TST #imm, R0 {:
1062 COUNT_INST(I_TSTI);
1063 load_reg( R_EAX, 0 );
1064 TEST_imm32_r32( imm, R_EAX );
1065 SETE_t();
1066 sh4_x86.tstate = TSTATE_E;
1067 :}
1068 TST.B #imm, @(R0, GBR) {:
1069 COUNT_INST(I_TSTB);
1070 load_reg( R_EAX, 0);
1071 load_reg( R_ECX, R_GBR);
1072 ADD_r32_r32( R_ECX, R_EAX );
1073 MMU_TRANSLATE_READ( R_EAX );
1074 MEM_READ_BYTE( R_EAX, R_EAX );
1075 TEST_imm8_r8( imm, R_AL );
1076 SETE_t();
1077 sh4_x86.tstate = TSTATE_E;
1078 :}
1079 XOR Rm, Rn {:
1080 COUNT_INST(I_XOR);
1081 load_reg( R_EAX, Rm );
1082 load_reg( R_ECX, Rn );
1083 XOR_r32_r32( R_EAX, R_ECX );
1084 store_reg( R_ECX, Rn );
1085 sh4_x86.tstate = TSTATE_NONE;
1086 :}
1087 XOR #imm, R0 {:
1088 COUNT_INST(I_XORI);
1089 load_reg( R_EAX, 0 );
1090 XOR_imm32_r32( imm, R_EAX );
1091 store_reg( R_EAX, 0 );
1092 sh4_x86.tstate = TSTATE_NONE;
1093 :}
1094 XOR.B #imm, @(R0, GBR) {:
1095 COUNT_INST(I_XORB);
1096 load_reg( R_EAX, 0 );
1097 load_spreg( R_ECX, R_GBR );
1098 ADD_r32_r32( R_ECX, R_EAX );
1099 MMU_TRANSLATE_WRITE( R_EAX );
1100 MOV_r32_esp8( R_EAX, 0 );
1101 MEM_READ_BYTE(R_EAX, R_EDX);
1102 MOV_esp8_r32( 0, R_EAX );
1103 XOR_imm32_r32( imm, R_EDX );
1104 MEM_WRITE_BYTE( R_EAX, R_EDX );
1105 sh4_x86.tstate = TSTATE_NONE;
1106 :}
1107 XTRCT Rm, Rn {:
1108 COUNT_INST(I_XTRCT);
1109 load_reg( R_EAX, Rm );
1110 load_reg( R_ECX, Rn );
1111 SHL_imm8_r32( 16, R_EAX );
1112 SHR_imm8_r32( 16, R_ECX );
1113 OR_r32_r32( R_EAX, R_ECX );
1114 store_reg( R_ECX, Rn );
1115 sh4_x86.tstate = TSTATE_NONE;
1116 :}
1118 /* Data move instructions */
1119 MOV Rm, Rn {:
1120 COUNT_INST(I_MOV);
1121 load_reg( R_EAX, Rm );
1122 store_reg( R_EAX, Rn );
1123 :}
1124 MOV #imm, Rn {:
1125 COUNT_INST(I_MOVI);
1126 load_imm32( R_EAX, imm );
1127 store_reg( R_EAX, Rn );
1128 :}
1129 MOV.B Rm, @Rn {:
1130 COUNT_INST(I_MOVB);
1131 load_reg( R_EAX, Rn );
1132 MMU_TRANSLATE_WRITE( R_EAX );
1133 load_reg( R_EDX, Rm );
1134 MEM_WRITE_BYTE( R_EAX, R_EDX );
1135 sh4_x86.tstate = TSTATE_NONE;
1136 :}
1137 MOV.B Rm, @-Rn {:
1138 COUNT_INST(I_MOVB);
1139 load_reg( R_EAX, Rn );
1140 ADD_imm8s_r32( -1, R_EAX );
1141 MMU_TRANSLATE_WRITE( R_EAX );
1142 load_reg( R_EDX, Rm );
1143 ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
1144 MEM_WRITE_BYTE( R_EAX, R_EDX );
1145 sh4_x86.tstate = TSTATE_NONE;
1146 :}
1147 MOV.B Rm, @(R0, Rn) {:
1148 COUNT_INST(I_MOVB);
1149 load_reg( R_EAX, 0 );
1150 load_reg( R_ECX, Rn );
1151 ADD_r32_r32( R_ECX, R_EAX );
1152 MMU_TRANSLATE_WRITE( R_EAX );
1153 load_reg( R_EDX, Rm );
1154 MEM_WRITE_BYTE( R_EAX, R_EDX );
1155 sh4_x86.tstate = TSTATE_NONE;
1156 :}
1157 MOV.B R0, @(disp, GBR) {:
1158 COUNT_INST(I_MOVB);
1159 load_spreg( R_EAX, R_GBR );
1160 ADD_imm32_r32( disp, R_EAX );
1161 MMU_TRANSLATE_WRITE( R_EAX );
1162 load_reg( R_EDX, 0 );
1163 MEM_WRITE_BYTE( R_EAX, R_EDX );
1164 sh4_x86.tstate = TSTATE_NONE;
1165 :}
1166 MOV.B R0, @(disp, Rn) {:
1167 COUNT_INST(I_MOVB);
1168 load_reg( R_EAX, Rn );
1169 ADD_imm32_r32( disp, R_EAX );
1170 MMU_TRANSLATE_WRITE( R_EAX );
1171 load_reg( R_EDX, 0 );
1172 MEM_WRITE_BYTE( R_EAX, R_EDX );
1173 sh4_x86.tstate = TSTATE_NONE;
1174 :}
1175 MOV.B @Rm, Rn {:
1176 COUNT_INST(I_MOVB);
1177 load_reg( R_EAX, Rm );
1178 MMU_TRANSLATE_READ( R_EAX );
1179 MEM_READ_BYTE( R_EAX, R_EAX );
1180 store_reg( R_EAX, Rn );
1181 sh4_x86.tstate = TSTATE_NONE;
1182 :}
1183 MOV.B @Rm+, Rn {:
1184 COUNT_INST(I_MOVB);
1185 load_reg( R_EAX, Rm );
1186 MMU_TRANSLATE_READ( R_EAX );
1187 ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
1188 MEM_READ_BYTE( R_EAX, R_EAX );
1189 store_reg( R_EAX, Rn );
1190 sh4_x86.tstate = TSTATE_NONE;
1191 :}
1192 MOV.B @(R0, Rm), Rn {:
1193 COUNT_INST(I_MOVB);
1194 load_reg( R_EAX, 0 );
1195 load_reg( R_ECX, Rm );
1196 ADD_r32_r32( R_ECX, R_EAX );
1197 MMU_TRANSLATE_READ( R_EAX )
1198 MEM_READ_BYTE( R_EAX, R_EAX );
1199 store_reg( R_EAX, Rn );
1200 sh4_x86.tstate = TSTATE_NONE;
1201 :}
1202 MOV.B @(disp, GBR), R0 {:
1203 COUNT_INST(I_MOVB);
1204 load_spreg( R_EAX, R_GBR );
1205 ADD_imm32_r32( disp, R_EAX );
1206 MMU_TRANSLATE_READ( R_EAX );
1207 MEM_READ_BYTE( R_EAX, R_EAX );
1208 store_reg( R_EAX, 0 );
1209 sh4_x86.tstate = TSTATE_NONE;
1210 :}
1211 MOV.B @(disp, Rm), R0 {:
1212 COUNT_INST(I_MOVB);
1213 load_reg( R_EAX, Rm );
1214 ADD_imm32_r32( disp, R_EAX );
1215 MMU_TRANSLATE_READ( R_EAX );
1216 MEM_READ_BYTE( R_EAX, R_EAX );
1217 store_reg( R_EAX, 0 );
1218 sh4_x86.tstate = TSTATE_NONE;
1219 :}
1220 MOV.L Rm, @Rn {:
1221 COUNT_INST(I_MOVL);
1222 load_reg( R_EAX, Rn );
1223 check_walign32(R_EAX);
1224 MMU_TRANSLATE_WRITE( R_EAX );
1225 load_reg( R_EDX, Rm );
1226 MEM_WRITE_LONG( R_EAX, R_EDX );
1227 sh4_x86.tstate = TSTATE_NONE;
1228 :}
1229 MOV.L Rm, @-Rn {:
1230 COUNT_INST(I_MOVL);
1231 load_reg( R_EAX, Rn );
1232 ADD_imm8s_r32( -4, R_EAX );
1233 check_walign32( R_EAX );
1234 MMU_TRANSLATE_WRITE( R_EAX );
1235 load_reg( R_EDX, Rm );
1236 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
1237 MEM_WRITE_LONG( R_EAX, R_EDX );
1238 sh4_x86.tstate = TSTATE_NONE;
1239 :}
1240 MOV.L Rm, @(R0, Rn) {:
1241 COUNT_INST(I_MOVL);
1242 load_reg( R_EAX, 0 );
1243 load_reg( R_ECX, Rn );
1244 ADD_r32_r32( R_ECX, R_EAX );
1245 check_walign32( R_EAX );
1246 MMU_TRANSLATE_WRITE( R_EAX );
1247 load_reg( R_EDX, Rm );
1248 MEM_WRITE_LONG( R_EAX, R_EDX );
1249 sh4_x86.tstate = TSTATE_NONE;
1250 :}
1251 MOV.L R0, @(disp, GBR) {:
1252 COUNT_INST(I_MOVL);
1253 load_spreg( R_EAX, R_GBR );
1254 ADD_imm32_r32( disp, R_EAX );
1255 check_walign32( R_EAX );
1256 MMU_TRANSLATE_WRITE( R_EAX );
1257 load_reg( R_EDX, 0 );
1258 MEM_WRITE_LONG( R_EAX, R_EDX );
1259 sh4_x86.tstate = TSTATE_NONE;
1260 :}
1261 MOV.L Rm, @(disp, Rn) {:
1262 COUNT_INST(I_MOVL);
1263 load_reg( R_EAX, Rn );
1264 ADD_imm32_r32( disp, R_EAX );
1265 check_walign32( R_EAX );
1266 MMU_TRANSLATE_WRITE( R_EAX );
1267 load_reg( R_EDX, Rm );
1268 MEM_WRITE_LONG( R_EAX, R_EDX );
1269 sh4_x86.tstate = TSTATE_NONE;
1270 :}
1271 MOV.L @Rm, Rn {:
1272 COUNT_INST(I_MOVL);
1273 load_reg( R_EAX, Rm );
1274 check_ralign32( R_EAX );
1275 MMU_TRANSLATE_READ( R_EAX );
1276 MEM_READ_LONG( R_EAX, R_EAX );
1277 store_reg( R_EAX, Rn );
1278 sh4_x86.tstate = TSTATE_NONE;
1279 :}
1280 MOV.L @Rm+, Rn {:
1281 COUNT_INST(I_MOVL);
1282 load_reg( R_EAX, Rm );
1283 check_ralign32( R_EAX );
1284 MMU_TRANSLATE_READ( R_EAX );
1285 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1286 MEM_READ_LONG( R_EAX, R_EAX );
1287 store_reg( R_EAX, Rn );
1288 sh4_x86.tstate = TSTATE_NONE;
1289 :}
1290 MOV.L @(R0, Rm), Rn {:
1291 COUNT_INST(I_MOVL);
1292 load_reg( R_EAX, 0 );
1293 load_reg( R_ECX, Rm );
1294 ADD_r32_r32( R_ECX, R_EAX );
1295 check_ralign32( R_EAX );
1296 MMU_TRANSLATE_READ( R_EAX );
1297 MEM_READ_LONG( R_EAX, R_EAX );
1298 store_reg( R_EAX, Rn );
1299 sh4_x86.tstate = TSTATE_NONE;
1300 :}
1301 MOV.L @(disp, GBR), R0 {:
1302 COUNT_INST(I_MOVL);
1303 load_spreg( R_EAX, R_GBR );
1304 ADD_imm32_r32( disp, R_EAX );
1305 check_ralign32( R_EAX );
1306 MMU_TRANSLATE_READ( R_EAX );
1307 MEM_READ_LONG( R_EAX, R_EAX );
1308 store_reg( R_EAX, 0 );
1309 sh4_x86.tstate = TSTATE_NONE;
1310 :}
1311 MOV.L @(disp, PC), Rn {:
1312 COUNT_INST(I_MOVLPC);
1313 if( sh4_x86.in_delay_slot ) {
1314 SLOTILLEGAL();
1315 } else {
1316 uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
1317 if( IS_IN_ICACHE(target) ) {
1318 // If the target address is in the same page as the code, it's
1319 // pretty safe to just ref it directly and circumvent the whole
1320 // memory subsystem. (this is a big performance win)
1322 // FIXME: There's a corner-case that's not handled here when
1323 // the current code-page is in the ITLB but not in the UTLB.
1324 // (should generate a TLB miss although need to test SH4
1325 // behaviour to confirm) Unlikely to be anyone depending on this
1326 // behaviour though.
1327 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1328 MOV_moff32_EAX( ptr );
1329 } else {
1330 // Note: we use sh4r.pc for the calc as we could be running at a
1331 // different virtual address than the translation was done with,
1332 // but we can safely assume that the low bits are the same.
1333 load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1334 ADD_sh4r_r32( R_PC, R_EAX );
1335 MMU_TRANSLATE_READ( R_EAX );
1336 MEM_READ_LONG( R_EAX, R_EAX );
1337 sh4_x86.tstate = TSTATE_NONE;
1338 }
1339 store_reg( R_EAX, Rn );
1340 }
1341 :}
1342 MOV.L @(disp, Rm), Rn {:
1343 COUNT_INST(I_MOVL);
1344 load_reg( R_EAX, Rm );
1345 ADD_imm8s_r32( disp, R_EAX );
1346 check_ralign32( R_EAX );
1347 MMU_TRANSLATE_READ( R_EAX );
1348 MEM_READ_LONG( R_EAX, R_EAX );
1349 store_reg( R_EAX, Rn );
1350 sh4_x86.tstate = TSTATE_NONE;
1351 :}
1352 MOV.W Rm, @Rn {:
1353 COUNT_INST(I_MOVW);
1354 load_reg( R_EAX, Rn );
1355 check_walign16( R_EAX );
1356 MMU_TRANSLATE_WRITE( R_EAX )
1357 load_reg( R_EDX, Rm );
1358 MEM_WRITE_WORD( R_EAX, R_EDX );
1359 sh4_x86.tstate = TSTATE_NONE;
1360 :}
1361 MOV.W Rm, @-Rn {:
1362 COUNT_INST(I_MOVW);
1363 load_reg( R_EAX, Rn );
1364 ADD_imm8s_r32( -2, R_EAX );
1365 check_walign16( R_EAX );
1366 MMU_TRANSLATE_WRITE( R_EAX );
1367 load_reg( R_EDX, Rm );
1368 ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
1369 MEM_WRITE_WORD( R_EAX, R_EDX );
1370 sh4_x86.tstate = TSTATE_NONE;
1371 :}
1372 MOV.W Rm, @(R0, Rn) {:
1373 COUNT_INST(I_MOVW);
1374 load_reg( R_EAX, 0 );
1375 load_reg( R_ECX, Rn );
1376 ADD_r32_r32( R_ECX, R_EAX );
1377 check_walign16( R_EAX );
1378 MMU_TRANSLATE_WRITE( R_EAX );
1379 load_reg( R_EDX, Rm );
1380 MEM_WRITE_WORD( R_EAX, R_EDX );
1381 sh4_x86.tstate = TSTATE_NONE;
1382 :}
1383 MOV.W R0, @(disp, GBR) {:
1384 COUNT_INST(I_MOVW);
1385 load_spreg( R_EAX, R_GBR );
1386 ADD_imm32_r32( disp, R_EAX );
1387 check_walign16( R_EAX );
1388 MMU_TRANSLATE_WRITE( R_EAX );
1389 load_reg( R_EDX, 0 );
1390 MEM_WRITE_WORD( R_EAX, R_EDX );
1391 sh4_x86.tstate = TSTATE_NONE;
1392 :}
1393 MOV.W R0, @(disp, Rn) {:
1394 COUNT_INST(I_MOVW);
1395 load_reg( R_EAX, Rn );
1396 ADD_imm32_r32( disp, R_EAX );
1397 check_walign16( R_EAX );
1398 MMU_TRANSLATE_WRITE( R_EAX );
1399 load_reg( R_EDX, 0 );
1400 MEM_WRITE_WORD( R_EAX, R_EDX );
1401 sh4_x86.tstate = TSTATE_NONE;
1402 :}
1403 MOV.W @Rm, Rn {:
1404 COUNT_INST(I_MOVW);
1405 load_reg( R_EAX, Rm );
1406 check_ralign16( R_EAX );
1407 MMU_TRANSLATE_READ( R_EAX );
1408 MEM_READ_WORD( R_EAX, R_EAX );
1409 store_reg( R_EAX, Rn );
1410 sh4_x86.tstate = TSTATE_NONE;
1411 :}
1412 MOV.W @Rm+, Rn {:
1413 COUNT_INST(I_MOVW);
1414 load_reg( R_EAX, Rm );
1415 check_ralign16( R_EAX );
1416 MMU_TRANSLATE_READ( R_EAX );
1417 ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
1418 MEM_READ_WORD( R_EAX, R_EAX );
1419 store_reg( R_EAX, Rn );
1420 sh4_x86.tstate = TSTATE_NONE;
1421 :}
1422 MOV.W @(R0, Rm), Rn {:
1423 COUNT_INST(I_MOVW);
1424 load_reg( R_EAX, 0 );
1425 load_reg( R_ECX, Rm );
1426 ADD_r32_r32( R_ECX, R_EAX );
1427 check_ralign16( R_EAX );
1428 MMU_TRANSLATE_READ( R_EAX );
1429 MEM_READ_WORD( R_EAX, R_EAX );
1430 store_reg( R_EAX, Rn );
1431 sh4_x86.tstate = TSTATE_NONE;
1432 :}
1433 MOV.W @(disp, GBR), R0 {:
1434 COUNT_INST(I_MOVW);
1435 load_spreg( R_EAX, R_GBR );
1436 ADD_imm32_r32( disp, R_EAX );
1437 check_ralign16( R_EAX );
1438 MMU_TRANSLATE_READ( R_EAX );
1439 MEM_READ_WORD( R_EAX, R_EAX );
1440 store_reg( R_EAX, 0 );
1441 sh4_x86.tstate = TSTATE_NONE;
1442 :}
1443 MOV.W @(disp, PC), Rn {:
1444 COUNT_INST(I_MOVW);
1445 if( sh4_x86.in_delay_slot ) {
1446 SLOTILLEGAL();
1447 } else {
1448 // See comments for MOV.L @(disp, PC), Rn
1449 uint32_t target = pc + disp + 4;
1450 if( IS_IN_ICACHE(target) ) {
1451 sh4ptr_t ptr = GET_ICACHE_PTR(target);
1452 MOV_moff32_EAX( ptr );
1453 MOVSX_r16_r32( R_EAX, R_EAX );
1454 } else {
1455 load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
1456 ADD_sh4r_r32( R_PC, R_EAX );
1457 MMU_TRANSLATE_READ( R_EAX );
1458 MEM_READ_WORD( R_EAX, R_EAX );
1459 sh4_x86.tstate = TSTATE_NONE;
1460 }
1461 store_reg( R_EAX, Rn );
1462 }
1463 :}
1464 MOV.W @(disp, Rm), R0 {:
1465 COUNT_INST(I_MOVW);
1466 load_reg( R_EAX, Rm );
1467 ADD_imm32_r32( disp, R_EAX );
1468 check_ralign16( R_EAX );
1469 MMU_TRANSLATE_READ( R_EAX );
1470 MEM_READ_WORD( R_EAX, R_EAX );
1471 store_reg( R_EAX, 0 );
1472 sh4_x86.tstate = TSTATE_NONE;
1473 :}
1474 MOVA @(disp, PC), R0 {:
1475 COUNT_INST(I_MOVA);
1476 if( sh4_x86.in_delay_slot ) {
1477 SLOTILLEGAL();
1478 } else {
1479 load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
1480 ADD_sh4r_r32( R_PC, R_ECX );
1481 store_reg( R_ECX, 0 );
1482 sh4_x86.tstate = TSTATE_NONE;
1483 }
1484 :}
1485 MOVCA.L R0, @Rn {:
1486 COUNT_INST(I_MOVCA);
1487 load_reg( R_EAX, Rn );
1488 check_walign32( R_EAX );
1489 MMU_TRANSLATE_WRITE( R_EAX );
1490 load_reg( R_EDX, 0 );
1491 MEM_WRITE_LONG( R_EAX, R_EDX );
1492 sh4_x86.tstate = TSTATE_NONE;
1493 :}
1495 /* Control transfer instructions */
1496 BF disp {:
1497 COUNT_INST(I_BF);
1498 if( sh4_x86.in_delay_slot ) {
1499 SLOTILLEGAL();
1500 } else {
1501 sh4vma_t target = disp + pc + 4;
1502 JT_rel8( nottaken );
1503 exit_block_rel(target, pc+2 );
1504 JMP_TARGET(nottaken);
1505 return 2;
1506 }
1507 :}
1508 BF/S disp {:
1509 COUNT_INST(I_BFS);
1510 if( sh4_x86.in_delay_slot ) {
1511 SLOTILLEGAL();
1512 } else {
1513 sh4_x86.in_delay_slot = DELAY_PC;
1514 if( UNTRANSLATABLE(pc+2) ) {
1515 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1516 JT_rel8(nottaken);
1517 ADD_imm32_r32( disp, R_EAX );
1518 JMP_TARGET(nottaken);
1519 ADD_sh4r_r32( R_PC, R_EAX );
1520 store_spreg( R_EAX, R_NEW_PC );
1521 exit_block_emu(pc+2);
1522 sh4_x86.branch_taken = TRUE;
1523 return 2;
1524 } else {
1525 if( sh4_x86.tstate == TSTATE_NONE ) {
1526 CMP_imm8s_sh4r( 1, R_T );
1527 sh4_x86.tstate = TSTATE_E;
1528 }
1529 sh4vma_t target = disp + pc + 4;
1530 OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
1531 int save_tstate = sh4_x86.tstate;
1532 sh4_translate_instruction(pc+2);
1533 exit_block_rel( target, pc+4 );
1535 // not taken
1536 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1537 sh4_x86.tstate = save_tstate;
1538 sh4_translate_instruction(pc+2);
1539 return 4;
1540 }
1541 }
1542 :}
1543 BRA disp {:
1544 COUNT_INST(I_BRA);
1545 if( sh4_x86.in_delay_slot ) {
1546 SLOTILLEGAL();
1547 } else {
1548 sh4_x86.in_delay_slot = DELAY_PC;
1549 sh4_x86.branch_taken = TRUE;
1550 if( UNTRANSLATABLE(pc+2) ) {
1551 load_spreg( R_EAX, R_PC );
1552 ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
1553 store_spreg( R_EAX, R_NEW_PC );
1554 exit_block_emu(pc+2);
1555 return 2;
1556 } else {
1557 sh4_translate_instruction( pc + 2 );
1558 exit_block_rel( disp + pc + 4, pc+4 );
1559 return 4;
1560 }
1561 }
1562 :}
1563 BRAF Rn {:
1564 COUNT_INST(I_BRAF);
1565 if( sh4_x86.in_delay_slot ) {
1566 SLOTILLEGAL();
1567 } else {
1568 load_spreg( R_EAX, R_PC );
1569 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1570 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1571 store_spreg( R_EAX, R_NEW_PC );
1572 sh4_x86.in_delay_slot = DELAY_PC;
1573 sh4_x86.tstate = TSTATE_NONE;
1574 sh4_x86.branch_taken = TRUE;
1575 if( UNTRANSLATABLE(pc+2) ) {
1576 exit_block_emu(pc+2);
1577 return 2;
1578 } else {
1579 sh4_translate_instruction( pc + 2 );
1580 exit_block_newpcset(pc+2);
1581 return 4;
1582 }
1583 }
1584 :}
1585 BSR disp {:
1586 COUNT_INST(I_BSR);
1587 if( sh4_x86.in_delay_slot ) {
1588 SLOTILLEGAL();
1589 } else {
1590 load_spreg( R_EAX, R_PC );
1591 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1592 store_spreg( R_EAX, R_PR );
1593 sh4_x86.in_delay_slot = DELAY_PC;
1594 sh4_x86.branch_taken = TRUE;
1595 sh4_x86.tstate = TSTATE_NONE;
1596 if( UNTRANSLATABLE(pc+2) ) {
1597 ADD_imm32_r32( disp, R_EAX );
1598 store_spreg( R_EAX, R_NEW_PC );
1599 exit_block_emu(pc+2);
1600 return 2;
1601 } else {
1602 sh4_translate_instruction( pc + 2 );
1603 exit_block_rel( disp + pc + 4, pc+4 );
1604 return 4;
1605 }
1606 }
1607 :}
1608 BSRF Rn {:
1609 COUNT_INST(I_BSRF);
1610 if( sh4_x86.in_delay_slot ) {
1611 SLOTILLEGAL();
1612 } else {
1613 load_spreg( R_EAX, R_PC );
1614 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1615 store_spreg( R_EAX, R_PR );
1616 ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
1617 store_spreg( R_EAX, R_NEW_PC );
1619 sh4_x86.in_delay_slot = DELAY_PC;
1620 sh4_x86.tstate = TSTATE_NONE;
1621 sh4_x86.branch_taken = TRUE;
1622 if( UNTRANSLATABLE(pc+2) ) {
1623 exit_block_emu(pc+2);
1624 return 2;
1625 } else {
1626 sh4_translate_instruction( pc + 2 );
1627 exit_block_newpcset(pc+2);
1628 return 4;
1629 }
1630 }
1631 :}
1632 BT disp {:
1633 COUNT_INST(I_BT);
1634 if( sh4_x86.in_delay_slot ) {
1635 SLOTILLEGAL();
1636 } else {
1637 sh4vma_t target = disp + pc + 4;
1638 JF_rel8( nottaken );
1639 exit_block_rel(target, pc+2 );
1640 JMP_TARGET(nottaken);
1641 return 2;
1642 }
1643 :}
1644 BT/S disp {:
1645 COUNT_INST(I_BTS);
1646 if( sh4_x86.in_delay_slot ) {
1647 SLOTILLEGAL();
1648 } else {
1649 sh4_x86.in_delay_slot = DELAY_PC;
1650 if( UNTRANSLATABLE(pc+2) ) {
1651 load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
1652 JF_rel8(nottaken);
1653 ADD_imm32_r32( disp, R_EAX );
1654 JMP_TARGET(nottaken);
1655 ADD_sh4r_r32( R_PC, R_EAX );
1656 store_spreg( R_EAX, R_NEW_PC );
1657 exit_block_emu(pc+2);
1658 sh4_x86.branch_taken = TRUE;
1659 return 2;
1660 } else {
1661 if( sh4_x86.tstate == TSTATE_NONE ) {
1662 CMP_imm8s_sh4r( 1, R_T );
1663 sh4_x86.tstate = TSTATE_E;
1664 }
1665 OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
1666 int save_tstate = sh4_x86.tstate;
1667 sh4_translate_instruction(pc+2);
1668 exit_block_rel( disp + pc + 4, pc+4 );
1669 // not taken
1670 *patch = (xlat_output - ((uint8_t *)patch)) - 4;
1671 sh4_x86.tstate = save_tstate;
1672 sh4_translate_instruction(pc+2);
1673 return 4;
1674 }
1675 }
1676 :}
1677 JMP @Rn {:
1678 COUNT_INST(I_JMP);
1679 if( sh4_x86.in_delay_slot ) {
1680 SLOTILLEGAL();
1681 } else {
1682 load_reg( R_ECX, Rn );
1683 store_spreg( R_ECX, R_NEW_PC );
1684 sh4_x86.in_delay_slot = DELAY_PC;
1685 sh4_x86.branch_taken = TRUE;
1686 if( UNTRANSLATABLE(pc+2) ) {
1687 exit_block_emu(pc+2);
1688 return 2;
1689 } else {
1690 sh4_translate_instruction(pc+2);
1691 exit_block_newpcset(pc+2);
1692 return 4;
1693 }
1694 }
1695 :}
1696 JSR @Rn {:
1697 COUNT_INST(I_JSR);
1698 if( sh4_x86.in_delay_slot ) {
1699 SLOTILLEGAL();
1700 } else {
1701 load_spreg( R_EAX, R_PC );
1702 ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
1703 store_spreg( R_EAX, R_PR );
1704 load_reg( R_ECX, Rn );
1705 store_spreg( R_ECX, R_NEW_PC );
1706 sh4_x86.in_delay_slot = DELAY_PC;
1707 sh4_x86.branch_taken = TRUE;
1708 sh4_x86.tstate = TSTATE_NONE;
1709 if( UNTRANSLATABLE(pc+2) ) {
1710 exit_block_emu(pc+2);
1711 return 2;
1712 } else {
1713 sh4_translate_instruction(pc+2);
1714 exit_block_newpcset(pc+2);
1715 return 4;
1716 }
1717 }
1718 :}
1719 RTE {:
1720 COUNT_INST(I_RTE);
1721 if( sh4_x86.in_delay_slot ) {
1722 SLOTILLEGAL();
1723 } else {
1724 check_priv();
1725 load_spreg( R_ECX, R_SPC );
1726 store_spreg( R_ECX, R_NEW_PC );
1727 load_spreg( R_EAX, R_SSR );
1728 call_func1( sh4_write_sr, R_EAX );
1729 sh4_x86.in_delay_slot = DELAY_PC;
1730 sh4_x86.priv_checked = FALSE;
1731 sh4_x86.fpuen_checked = FALSE;
1732 sh4_x86.tstate = TSTATE_NONE;
1733 sh4_x86.branch_taken = TRUE;
1734 if( UNTRANSLATABLE(pc+2) ) {
1735 exit_block_emu(pc+2);
1736 return 2;
1737 } else {
1738 sh4_translate_instruction(pc+2);
1739 exit_block_newpcset(pc+2);
1740 return 4;
1741 }
1742 }
1743 :}
1744 RTS {:
1745 COUNT_INST(I_RTS);
1746 if( sh4_x86.in_delay_slot ) {
1747 SLOTILLEGAL();
1748 } else {
1749 load_spreg( R_ECX, R_PR );
1750 store_spreg( R_ECX, R_NEW_PC );
1751 sh4_x86.in_delay_slot = DELAY_PC;
1752 sh4_x86.branch_taken = TRUE;
1753 if( UNTRANSLATABLE(pc+2) ) {
1754 exit_block_emu(pc+2);
1755 return 2;
1756 } else {
1757 sh4_translate_instruction(pc+2);
1758 exit_block_newpcset(pc+2);
1759 return 4;
1760 }
1761 }
1762 :}
1763 TRAPA #imm {:
1764 COUNT_INST(I_TRAPA);
1765 if( sh4_x86.in_delay_slot ) {
1766 SLOTILLEGAL();
1767 } else {
1768 load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc ); // 5
1769 ADD_r32_sh4r( R_ECX, R_PC );
1770 load_imm32( R_EAX, imm );
1771 call_func1( sh4_raise_trap, R_EAX );
1772 sh4_x86.tstate = TSTATE_NONE;
1773 exit_block_pcset(pc);
1774 sh4_x86.branch_taken = TRUE;
1775 return 2;
1776 }
1777 :}
1778 UNDEF {:
1779 COUNT_INST(I_UNDEF);
1780 if( sh4_x86.in_delay_slot ) {
1781 SLOTILLEGAL();
1782 } else {
1783 JMP_exc(EXC_ILLEGAL);
1784 return 2;
1785 }
1786 :}
1788 CLRMAC {:
1789 COUNT_INST(I_CLRMAC);
1790 XOR_r32_r32(R_EAX, R_EAX);
1791 store_spreg( R_EAX, R_MACL );
1792 store_spreg( R_EAX, R_MACH );
1793 sh4_x86.tstate = TSTATE_NONE;
1794 :}
1795 CLRS {:
1796 COUNT_INST(I_CLRS);
1797 CLC();
1798 SETC_sh4r(R_S);
1799 sh4_x86.tstate = TSTATE_NONE;
1800 :}
1801 CLRT {:
1802 COUNT_INST(I_CLRT);
1803 CLC();
1804 SETC_t();
1805 sh4_x86.tstate = TSTATE_C;
1806 :}
1807 SETS {:
1808 COUNT_INST(I_SETS);
1809 STC();
1810 SETC_sh4r(R_S);
1811 sh4_x86.tstate = TSTATE_NONE;
1812 :}
1813 SETT {:
1814 COUNT_INST(I_SETT);
1815 STC();
1816 SETC_t();
1817 sh4_x86.tstate = TSTATE_C;
1818 :}
1820 /* Floating point moves */
1821 FMOV FRm, FRn {:
1822 COUNT_INST(I_FMOV1);
1823 check_fpuen();
1824 if( sh4_x86.double_size ) {
1825 load_dr0( R_EAX, FRm );
1826 load_dr1( R_ECX, FRm );
1827 store_dr0( R_EAX, FRn );
1828 store_dr1( R_ECX, FRn );
1829 } else {
1830 load_fr( R_EAX, FRm ); // SZ=0 branch
1831 store_fr( R_EAX, FRn );
1832 }
1833 :}
1834 FMOV FRm, @Rn {:
1835 COUNT_INST(I_FMOV2);
1836 check_fpuen();
1837 load_reg( R_EAX, Rn );
1838 if( sh4_x86.double_size ) {
1839 check_walign64( R_EAX );
1840 MMU_TRANSLATE_WRITE( R_EAX );
1841 load_dr0( R_EDX, FRm );
1842 load_dr1( R_ECX, FRm );
1843 MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1844 } else {
1845 check_walign32( R_EAX );
1846 MMU_TRANSLATE_WRITE( R_EAX );
1847 load_fr( R_EDX, FRm );
1848 MEM_WRITE_LONG( R_EAX, R_EDX );
1849 }
1850 sh4_x86.tstate = TSTATE_NONE;
1851 :}
1852 FMOV @Rm, FRn {:
1853 COUNT_INST(I_FMOV5);
1854 check_fpuen();
1855 load_reg( R_EAX, Rm );
1856 if( sh4_x86.double_size ) {
1857 check_ralign64( R_EAX );
1858 MMU_TRANSLATE_READ( R_EAX );
1859 MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
1860 store_dr0( R_EDX, FRn );
1861 store_dr1( R_EAX, FRn );
1862 } else {
1863 check_ralign32( R_EAX );
1864 MMU_TRANSLATE_READ( R_EAX );
1865 MEM_READ_LONG( R_EAX, R_EAX );
1866 store_fr( R_EAX, FRn );
1867 }
1868 sh4_x86.tstate = TSTATE_NONE;
1869 :}
1870 FMOV FRm, @-Rn {:
1871 COUNT_INST(I_FMOV3);
1872 check_fpuen();
1873 load_reg( R_EAX, Rn );
1874 if( sh4_x86.double_size ) {
1875 check_walign64( R_EAX );
1876 ADD_imm8s_r32(-8,R_EAX);
1877 MMU_TRANSLATE_WRITE( R_EAX );
1878 load_dr0( R_EDX, FRm );
1879 load_dr1( R_ECX, FRm );
1880 ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
1881 MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1882 } else {
1883 check_walign32( R_EAX );
1884 ADD_imm8s_r32( -4, R_EAX );
1885 MMU_TRANSLATE_WRITE( R_EAX );
1886 load_fr( R_EDX, FRm );
1887 ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
1888 MEM_WRITE_LONG( R_EAX, R_EDX );
1889 }
1890 sh4_x86.tstate = TSTATE_NONE;
1891 :}
1892 FMOV @Rm+, FRn {:
1893 COUNT_INST(I_FMOV6);
1894 check_fpuen();
1895 load_reg( R_EAX, Rm );
1896 if( sh4_x86.double_size ) {
1897 check_ralign64( R_EAX );
1898 MMU_TRANSLATE_READ( R_EAX );
1899 ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
1900 MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
1901 store_dr0( R_EDX, FRn );
1902 store_dr1( R_EAX, FRn );
1903 } else {
1904 check_ralign32( R_EAX );
1905 MMU_TRANSLATE_READ( R_EAX );
1906 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
1907 MEM_READ_LONG( R_EAX, R_EAX );
1908 store_fr( R_EAX, FRn );
1909 }
1910 sh4_x86.tstate = TSTATE_NONE;
1911 :}
1912 FMOV FRm, @(R0, Rn) {:
1913 COUNT_INST(I_FMOV4);
1914 check_fpuen();
1915 load_reg( R_EAX, Rn );
1916 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1917 if( sh4_x86.double_size ) {
1918 check_walign64( R_EAX );
1919 MMU_TRANSLATE_WRITE( R_EAX );
1920 load_dr0( R_EDX, FRm );
1921 load_dr1( R_ECX, FRm );
1922 MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
1923 } else {
1924 check_walign32( R_EAX );
1925 MMU_TRANSLATE_WRITE( R_EAX );
1926 load_fr( R_EDX, FRm );
1927 MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
1928 }
1929 sh4_x86.tstate = TSTATE_NONE;
1930 :}
1931 FMOV @(R0, Rm), FRn {:
1932 COUNT_INST(I_FMOV7);
1933 check_fpuen();
1934 load_reg( R_EAX, Rm );
1935 ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
1936 if( sh4_x86.double_size ) {
1937 check_ralign64( R_EAX );
1938 MMU_TRANSLATE_READ( R_EAX );
1939 MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
1940 store_dr0( R_ECX, FRn );
1941 store_dr1( R_EAX, FRn );
1942 } else {
1943 check_ralign32( R_EAX );
1944 MMU_TRANSLATE_READ( R_EAX );
1945 MEM_READ_LONG( R_EAX, R_EAX );
1946 store_fr( R_EAX, FRn );
1947 }
1948 sh4_x86.tstate = TSTATE_NONE;
1949 :}
1950 FLDI0 FRn {: /* IFF PR=0 */
1951 COUNT_INST(I_FLDI0);
1952 check_fpuen();
1953 if( sh4_x86.double_prec == 0 ) {
1954 XOR_r32_r32( R_EAX, R_EAX );
1955 store_fr( R_EAX, FRn );
1956 }
1957 sh4_x86.tstate = TSTATE_NONE;
1958 :}
1959 FLDI1 FRn {: /* IFF PR=0 */
1960 COUNT_INST(I_FLDI1);
1961 check_fpuen();
1962 if( sh4_x86.double_prec == 0 ) {
1963 load_imm32(R_EAX, 0x3F800000);
1964 store_fr( R_EAX, FRn );
1965 }
1966 :}
1968 FLOAT FPUL, FRn {:
1969 COUNT_INST(I_FLOAT);
1970 check_fpuen();
1971 FILD_sh4r(R_FPUL);
1972 if( sh4_x86.double_prec ) {
1973 pop_dr( FRn );
1974 } else {
1975 pop_fr( FRn );
1976 }
1977 :}
1978 FTRC FRm, FPUL {:
1979 COUNT_INST(I_FTRC);
1980 check_fpuen();
1981 if( sh4_x86.double_prec ) {
1982 push_dr( FRm );
1983 } else {
1984 push_fr( FRm );
1985 }
1986 load_ptr( R_ECX, &max_int );
1987 FILD_r32ind( R_ECX );
1988 FCOMIP_st(1);
1989 JNA_rel8( sat );
1990 load_ptr( R_ECX, &min_int ); // 5
1991 FILD_r32ind( R_ECX ); // 2
1992 FCOMIP_st(1); // 2
1993 JAE_rel8( sat2 ); // 2
1994 load_ptr( R_EAX, &save_fcw );
1995 FNSTCW_r32ind( R_EAX );
1996 load_ptr( R_EDX, &trunc_fcw );
1997 FLDCW_r32ind( R_EDX );
1998 FISTP_sh4r(R_FPUL); // 3
1999 FLDCW_r32ind( R_EAX );
2000 JMP_rel8(end); // 2
2002 JMP_TARGET(sat);
2003 JMP_TARGET(sat2);
2004 MOV_r32ind_r32( R_ECX, R_ECX ); // 2
2005 store_spreg( R_ECX, R_FPUL );
2006 FPOP_st();
2007 JMP_TARGET(end);
2008 sh4_x86.tstate = TSTATE_NONE;
2009 :}
2010 FLDS FRm, FPUL {:
2011 COUNT_INST(I_FLDS);
2012 check_fpuen();
2013 load_fr( R_EAX, FRm );
2014 store_spreg( R_EAX, R_FPUL );
2015 :}
2016 FSTS FPUL, FRn {:
2017 COUNT_INST(I_FSTS);
2018 check_fpuen();
2019 load_spreg( R_EAX, R_FPUL );
2020 store_fr( R_EAX, FRn );
2021 :}
2022 FCNVDS FRm, FPUL {:
2023 COUNT_INST(I_FCNVDS);
2024 check_fpuen();
2025 if( sh4_x86.double_prec ) {
2026 push_dr( FRm );
2027 pop_fpul();
2028 }
2029 :}
2030 FCNVSD FPUL, FRn {:
2031 COUNT_INST(I_FCNVSD);
2032 check_fpuen();
2033 if( sh4_x86.double_prec ) {
2034 push_fpul();
2035 pop_dr( FRn );
2036 }
2037 :}
2039 /* Floating point instructions */
2040 FABS FRn {:
2041 COUNT_INST(I_FABS);
2042 check_fpuen();
2043 if( sh4_x86.double_prec ) {
2044 push_dr(FRn);
2045 FABS_st0();
2046 pop_dr(FRn);
2047 } else {
2048 push_fr(FRn);
2049 FABS_st0();
2050 pop_fr(FRn);
2051 }
2052 :}
2053 FADD FRm, FRn {:
2054 COUNT_INST(I_FADD);
2055 check_fpuen();
2056 if( sh4_x86.double_prec ) {
2057 push_dr(FRm);
2058 push_dr(FRn);
2059 FADDP_st(1);
2060 pop_dr(FRn);
2061 } else {
2062 push_fr(FRm);
2063 push_fr(FRn);
2064 FADDP_st(1);
2065 pop_fr(FRn);
2066 }
2067 :}
2068 FDIV FRm, FRn {:
2069 COUNT_INST(I_FDIV);
2070 check_fpuen();
2071 if( sh4_x86.double_prec ) {
2072 push_dr(FRn);
2073 push_dr(FRm);
2074 FDIVP_st(1);
2075 pop_dr(FRn);
2076 } else {
2077 push_fr(FRn);
2078 push_fr(FRm);
2079 FDIVP_st(1);
2080 pop_fr(FRn);
2081 }
2082 :}
2083 FMAC FR0, FRm, FRn {:
2084 COUNT_INST(I_FMAC);
2085 check_fpuen();
2086 if( sh4_x86.double_prec ) {
2087 push_dr( 0 );
2088 push_dr( FRm );
2089 FMULP_st(1);
2090 push_dr( FRn );
2091 FADDP_st(1);
2092 pop_dr( FRn );
2093 } else {
2094 push_fr( 0 );
2095 push_fr( FRm );
2096 FMULP_st(1);
2097 push_fr( FRn );
2098 FADDP_st(1);
2099 pop_fr( FRn );
2100 }
2101 :}
2103 FMUL FRm, FRn {:
2104 COUNT_INST(I_FMUL);
2105 check_fpuen();
2106 if( sh4_x86.double_prec ) {
2107 push_dr(FRm);
2108 push_dr(FRn);
2109 FMULP_st(1);
2110 pop_dr(FRn);
2111 } else {
2112 push_fr(FRm);
2113 push_fr(FRn);
2114 FMULP_st(1);
2115 pop_fr(FRn);
2116 }
2117 :}
2118 FNEG FRn {:
2119 COUNT_INST(I_FNEG);
2120 check_fpuen();
2121 if( sh4_x86.double_prec ) {
2122 push_dr(FRn);
2123 FCHS_st0();
2124 pop_dr(FRn);
2125 } else {
2126 push_fr(FRn);
2127 FCHS_st0();
2128 pop_fr(FRn);
2129 }
2130 :}
2131 FSRRA FRn {:
2132 COUNT_INST(I_FSRRA);
2133 check_fpuen();
2134 if( sh4_x86.double_prec == 0 ) {
2135 FLD1_st0();
2136 push_fr(FRn);
2137 FSQRT_st0();
2138 FDIVP_st(1);
2139 pop_fr(FRn);
2140 }
2141 :}
2142 FSQRT FRn {:
2143 COUNT_INST(I_FSQRT);
2144 check_fpuen();
2145 if( sh4_x86.double_prec ) {
2146 push_dr(FRn);
2147 FSQRT_st0();
2148 pop_dr(FRn);
2149 } else {
2150 push_fr(FRn);
2151 FSQRT_st0();
2152 pop_fr(FRn);
2153 }
2154 :}
2155 FSUB FRm, FRn {:
2156 COUNT_INST(I_FSUB);
2157 check_fpuen();
2158 if( sh4_x86.double_prec ) {
2159 push_dr(FRn);
2160 push_dr(FRm);
2161 FSUBP_st(1);
2162 pop_dr(FRn);
2163 } else {
2164 push_fr(FRn);
2165 push_fr(FRm);
2166 FSUBP_st(1);
2167 pop_fr(FRn);
2168 }
2169 :}
2171 FCMP/EQ FRm, FRn {:
2172 COUNT_INST(I_FCMPEQ);
2173 check_fpuen();
2174 if( sh4_x86.double_prec ) {
2175 push_dr(FRm);
2176 push_dr(FRn);
2177 } else {
2178 push_fr(FRm);
2179 push_fr(FRn);
2180 }
2181 FCOMIP_st(1);
2182 SETE_t();
2183 FPOP_st();
2184 sh4_x86.tstate = TSTATE_E;
2185 :}
2186 FCMP/GT FRm, FRn {:
2187 COUNT_INST(I_FCMPGT);
2188 check_fpuen();
2189 if( sh4_x86.double_prec ) {
2190 push_dr(FRm);
2191 push_dr(FRn);
2192 } else {
2193 push_fr(FRm);
2194 push_fr(FRn);
2195 }
2196 FCOMIP_st(1);
2197 SETA_t();
2198 FPOP_st();
2199 sh4_x86.tstate = TSTATE_A;
2200 :}
2202 FSCA FPUL, FRn {:
2203 COUNT_INST(I_FSCA);
2204 check_fpuen();
2205 if( sh4_x86.double_prec == 0 ) {
2206 LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
2207 load_spreg( R_EAX, R_FPUL );
2208 call_func2( sh4_fsca, R_EAX, R_EDX );
2209 }
2210 sh4_x86.tstate = TSTATE_NONE;
2211 :}
2212 FIPR FVm, FVn {:
2213 COUNT_INST(I_FIPR);
2214 check_fpuen();
2215 if( sh4_x86.double_prec == 0 ) {
2216 if( sh4_x86.sse3_enabled ) {
2217 MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
2218 MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
2219 HADDPS_xmm_xmm( 4, 4 );
2220 HADDPS_xmm_xmm( 4, 4 );
2221 MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
2222 } else {
2223 push_fr( FVm<<2 );
2224 push_fr( FVn<<2 );
2225 FMULP_st(1);
2226 push_fr( (FVm<<2)+1);
2227 push_fr( (FVn<<2)+1);
2228 FMULP_st(1);
2229 FADDP_st(1);
2230 push_fr( (FVm<<2)+2);
2231 push_fr( (FVn<<2)+2);
2232 FMULP_st(1);
2233 FADDP_st(1);
2234 push_fr( (FVm<<2)+3);
2235 push_fr( (FVn<<2)+3);
2236 FMULP_st(1);
2237 FADDP_st(1);
2238 pop_fr( (FVn<<2)+3);
2239 }
2240 }
2241 :}
2242 FTRV XMTRX, FVn {:
2243 COUNT_INST(I_FTRV);
2244 check_fpuen();
2245 if( sh4_x86.double_prec == 0 ) {
2246 if( sh4_x86.sse3_enabled ) {
2247 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1 M0 M3 M2
2248 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5 M4 M7 M6
2249 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9 M8 M11 M10
2250 MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
2252 MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
2253 MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
2254 MOVAPS_xmm_xmm( 4, 6 );
2255 MOVAPS_xmm_xmm( 5, 7 );
2256 MOVLHPS_xmm_xmm( 4, 4 ); // V1 V1 V1 V1
2257 MOVHLPS_xmm_xmm( 6, 6 ); // V3 V3 V3 V3
2258 MOVLHPS_xmm_xmm( 5, 5 ); // V0 V0 V0 V0
2259 MOVHLPS_xmm_xmm( 7, 7 ); // V2 V2 V2 V2
2260 MULPS_xmm_xmm( 0, 4 );
2261 MULPS_xmm_xmm( 1, 5 );
2262 MULPS_xmm_xmm( 2, 6 );
2263 MULPS_xmm_xmm( 3, 7 );
2264 ADDPS_xmm_xmm( 5, 4 );
2265 ADDPS_xmm_xmm( 7, 6 );
2266 ADDPS_xmm_xmm( 6, 4 );
2267 MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
2268 } else {
2269 LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
2270 call_func1( sh4_ftrv, R_EAX );
2271 }
2272 }
2273 sh4_x86.tstate = TSTATE_NONE;
2274 :}
2276 FRCHG {:
2277 COUNT_INST(I_FRCHG);
2278 check_fpuen();
2279 load_spreg( R_ECX, R_FPSCR );
2280 XOR_imm32_r32( FPSCR_FR, R_ECX );
2281 store_spreg( R_ECX, R_FPSCR );
2282 call_func0( sh4_switch_fr_banks );
2283 sh4_x86.tstate = TSTATE_NONE;
2284 :}
2285 FSCHG {:
2286 COUNT_INST(I_FSCHG);
2287 check_fpuen();
2288 load_spreg( R_ECX, R_FPSCR );
2289 XOR_imm32_r32( FPSCR_SZ, R_ECX );
2290 store_spreg( R_ECX, R_FPSCR );
2291 sh4_x86.tstate = TSTATE_NONE;
2292 sh4_x86.double_size = !sh4_x86.double_size;
2293 :}
2295 /* Processor control instructions */
2296 LDC Rm, SR {:
2297 COUNT_INST(I_LDCSR);
2298 if( sh4_x86.in_delay_slot ) {
2299 SLOTILLEGAL();
2300 } else {
2301 check_priv();
2302 load_reg( R_EAX, Rm );
2303 call_func1( sh4_write_sr, R_EAX );
2304 sh4_x86.priv_checked = FALSE;
2305 sh4_x86.fpuen_checked = FALSE;
2306 sh4_x86.tstate = TSTATE_NONE;
2307 }
2308 :}
2309 LDC Rm, GBR {:
2310 COUNT_INST(I_LDC);
2311 load_reg( R_EAX, Rm );
2312 store_spreg( R_EAX, R_GBR );
2313 :}
2314 LDC Rm, VBR {:
2315 COUNT_INST(I_LDC);
2316 check_priv();
2317 load_reg( R_EAX, Rm );
2318 store_spreg( R_EAX, R_VBR );
2319 sh4_x86.tstate = TSTATE_NONE;
2320 :}
2321 LDC Rm, SSR {:
2322 COUNT_INST(I_LDC);
2323 check_priv();
2324 load_reg( R_EAX, Rm );
2325 store_spreg( R_EAX, R_SSR );
2326 sh4_x86.tstate = TSTATE_NONE;
2327 :}
2328 LDC Rm, SGR {:
2329 COUNT_INST(I_LDC);
2330 check_priv();
2331 load_reg( R_EAX, Rm );
2332 store_spreg( R_EAX, R_SGR );
2333 sh4_x86.tstate = TSTATE_NONE;
2334 :}
2335 LDC Rm, SPC {:
2336 COUNT_INST(I_LDC);
2337 check_priv();
2338 load_reg( R_EAX, Rm );
2339 store_spreg( R_EAX, R_SPC );
2340 sh4_x86.tstate = TSTATE_NONE;
2341 :}
2342 LDC Rm, DBR {:
2343 COUNT_INST(I_LDC);
2344 check_priv();
2345 load_reg( R_EAX, Rm );
2346 store_spreg( R_EAX, R_DBR );
2347 sh4_x86.tstate = TSTATE_NONE;
2348 :}
2349 LDC Rm, Rn_BANK {:
2350 COUNT_INST(I_LDC);
2351 check_priv();
2352 load_reg( R_EAX, Rm );
2353 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2354 sh4_x86.tstate = TSTATE_NONE;
2355 :}
2356 LDC.L @Rm+, GBR {:
2357 COUNT_INST(I_LDCM);
2358 load_reg( R_EAX, Rm );
2359 check_ralign32( R_EAX );
2360 MMU_TRANSLATE_READ( R_EAX );
2361 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2362 MEM_READ_LONG( R_EAX, R_EAX );
2363 store_spreg( R_EAX, R_GBR );
2364 sh4_x86.tstate = TSTATE_NONE;
2365 :}
2366 LDC.L @Rm+, SR {:
2367 COUNT_INST(I_LDCSRM);
2368 if( sh4_x86.in_delay_slot ) {
2369 SLOTILLEGAL();
2370 } else {
2371 check_priv();
2372 load_reg( R_EAX, Rm );
2373 check_ralign32( R_EAX );
2374 MMU_TRANSLATE_READ( R_EAX );
2375 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2376 MEM_READ_LONG( R_EAX, R_EAX );
2377 call_func1( sh4_write_sr, R_EAX );
2378 sh4_x86.priv_checked = FALSE;
2379 sh4_x86.fpuen_checked = FALSE;
2380 sh4_x86.tstate = TSTATE_NONE;
2381 }
2382 :}
2383 LDC.L @Rm+, VBR {:
2384 COUNT_INST(I_LDCM);
2385 check_priv();
2386 load_reg( R_EAX, Rm );
2387 check_ralign32( R_EAX );
2388 MMU_TRANSLATE_READ( R_EAX );
2389 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2390 MEM_READ_LONG( R_EAX, R_EAX );
2391 store_spreg( R_EAX, R_VBR );
2392 sh4_x86.tstate = TSTATE_NONE;
2393 :}
2394 LDC.L @Rm+, SSR {:
2395 COUNT_INST(I_LDCM);
2396 check_priv();
2397 load_reg( R_EAX, Rm );
2398 check_ralign32( R_EAX );
2399 MMU_TRANSLATE_READ( R_EAX );
2400 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2401 MEM_READ_LONG( R_EAX, R_EAX );
2402 store_spreg( R_EAX, R_SSR );
2403 sh4_x86.tstate = TSTATE_NONE;
2404 :}
2405 LDC.L @Rm+, SGR {:
2406 COUNT_INST(I_LDCM);
2407 check_priv();
2408 load_reg( R_EAX, Rm );
2409 check_ralign32( R_EAX );
2410 MMU_TRANSLATE_READ( R_EAX );
2411 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2412 MEM_READ_LONG( R_EAX, R_EAX );
2413 store_spreg( R_EAX, R_SGR );
2414 sh4_x86.tstate = TSTATE_NONE;
2415 :}
2416 LDC.L @Rm+, SPC {:
2417 COUNT_INST(I_LDCM);
2418 check_priv();
2419 load_reg( R_EAX, Rm );
2420 check_ralign32( R_EAX );
2421 MMU_TRANSLATE_READ( R_EAX );
2422 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2423 MEM_READ_LONG( R_EAX, R_EAX );
2424 store_spreg( R_EAX, R_SPC );
2425 sh4_x86.tstate = TSTATE_NONE;
2426 :}
2427 LDC.L @Rm+, DBR {:
2428 COUNT_INST(I_LDCM);
2429 check_priv();
2430 load_reg( R_EAX, Rm );
2431 check_ralign32( R_EAX );
2432 MMU_TRANSLATE_READ( R_EAX );
2433 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2434 MEM_READ_LONG( R_EAX, R_EAX );
2435 store_spreg( R_EAX, R_DBR );
2436 sh4_x86.tstate = TSTATE_NONE;
2437 :}
2438 LDC.L @Rm+, Rn_BANK {:
2439 COUNT_INST(I_LDCM);
2440 check_priv();
2441 load_reg( R_EAX, Rm );
2442 check_ralign32( R_EAX );
2443 MMU_TRANSLATE_READ( R_EAX );
2444 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2445 MEM_READ_LONG( R_EAX, R_EAX );
2446 store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
2447 sh4_x86.tstate = TSTATE_NONE;
2448 :}
2449 LDS Rm, FPSCR {:
2450 COUNT_INST(I_LDSFPSCR);
2451 check_fpuen();
2452 load_reg( R_EAX, Rm );
2453 call_func1( sh4_write_fpscr, R_EAX );
2454 sh4_x86.tstate = TSTATE_NONE;
2455 return 2;
2456 :}
2457 LDS.L @Rm+, FPSCR {:
2458 COUNT_INST(I_LDSFPSCRM);
2459 check_fpuen();
2460 load_reg( R_EAX, Rm );
2461 check_ralign32( R_EAX );
2462 MMU_TRANSLATE_READ( R_EAX );
2463 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2464 MEM_READ_LONG( R_EAX, R_EAX );
2465 call_func1( sh4_write_fpscr, R_EAX );
2466 sh4_x86.tstate = TSTATE_NONE;
2467 return 2;
2468 :}
2469 LDS Rm, FPUL {:
2470 COUNT_INST(I_LDS);
2471 check_fpuen();
2472 load_reg( R_EAX, Rm );
2473 store_spreg( R_EAX, R_FPUL );
2474 :}
2475 LDS.L @Rm+, FPUL {:
2476 COUNT_INST(I_LDSM);
2477 check_fpuen();
2478 load_reg( R_EAX, Rm );
2479 check_ralign32( R_EAX );
2480 MMU_TRANSLATE_READ( R_EAX );
2481 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2482 MEM_READ_LONG( R_EAX, R_EAX );
2483 store_spreg( R_EAX, R_FPUL );
2484 sh4_x86.tstate = TSTATE_NONE;
2485 :}
2486 LDS Rm, MACH {:
2487 COUNT_INST(I_LDS);
2488 load_reg( R_EAX, Rm );
2489 store_spreg( R_EAX, R_MACH );
2490 :}
2491 LDS.L @Rm+, MACH {:
2492 COUNT_INST(I_LDSM);
2493 load_reg( R_EAX, Rm );
2494 check_ralign32( R_EAX );
2495 MMU_TRANSLATE_READ( R_EAX );
2496 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2497 MEM_READ_LONG( R_EAX, R_EAX );
2498 store_spreg( R_EAX, R_MACH );
2499 sh4_x86.tstate = TSTATE_NONE;
2500 :}
2501 LDS Rm, MACL {:
2502 COUNT_INST(I_LDS);
2503 load_reg( R_EAX, Rm );
2504 store_spreg( R_EAX, R_MACL );
2505 :}
2506 LDS.L @Rm+, MACL {:
2507 COUNT_INST(I_LDSM);
2508 load_reg( R_EAX, Rm );
2509 check_ralign32( R_EAX );
2510 MMU_TRANSLATE_READ( R_EAX );
2511 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2512 MEM_READ_LONG( R_EAX, R_EAX );
2513 store_spreg( R_EAX, R_MACL );
2514 sh4_x86.tstate = TSTATE_NONE;
2515 :}
2516 LDS Rm, PR {:
2517 COUNT_INST(I_LDS);
2518 load_reg( R_EAX, Rm );
2519 store_spreg( R_EAX, R_PR );
2520 :}
2521 LDS.L @Rm+, PR {:
2522 COUNT_INST(I_LDSM);
2523 load_reg( R_EAX, Rm );
2524 check_ralign32( R_EAX );
2525 MMU_TRANSLATE_READ( R_EAX );
2526 ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
2527 MEM_READ_LONG( R_EAX, R_EAX );
2528 store_spreg( R_EAX, R_PR );
2529 sh4_x86.tstate = TSTATE_NONE;
2530 :}
2531 LDTLB {:
2532 COUNT_INST(I_LDTLB);
2533 call_func0( MMU_ldtlb );
2534 sh4_x86.tstate = TSTATE_NONE;
2535 :}
2536 OCBI @Rn {:
2537 COUNT_INST(I_OCBI);
2538 :}
2539 OCBP @Rn {:
2540 COUNT_INST(I_OCBP);
2541 :}
2542 OCBWB @Rn {:
2543 COUNT_INST(I_OCBWB);
2544 :}
2545 PREF @Rn {:
2546 COUNT_INST(I_PREF);
2547 load_reg( R_EAX, Rn );
2548 MOV_r32_r32( R_EAX, R_ECX );
2549 AND_imm32_r32( 0xFC000000, R_ECX );
2550 CMP_imm32_r32( 0xE0000000, R_ECX );
2551 JNE_rel8(end);
2552 if( sh4_x86.tlb_on ) {
2553 call_func1( sh4_flush_store_queue_mmu, R_EAX );
2554 TEST_r32_r32( R_EAX, R_EAX );
2555 JE_exc(-1);
2556 } else {
2557 call_func1( sh4_flush_store_queue, R_EAX );
2558 }
2559 JMP_TARGET(end);
2560 sh4_x86.tstate = TSTATE_NONE;
2561 :}
2562 SLEEP {:
2563 COUNT_INST(I_SLEEP);
2564 check_priv();
2565 call_func0( sh4_sleep );
2566 sh4_x86.tstate = TSTATE_NONE;
2567 sh4_x86.in_delay_slot = DELAY_NONE;
2568 return 2;
2569 :}
2570 STC SR, Rn {:
2571 COUNT_INST(I_STCSR);
2572 check_priv();
2573 call_func0(sh4_read_sr);
2574 store_reg( R_EAX, Rn );
2575 sh4_x86.tstate = TSTATE_NONE;
2576 :}
2577 STC GBR, Rn {:
2578 COUNT_INST(I_STC);
2579 load_spreg( R_EAX, R_GBR );
2580 store_reg( R_EAX, Rn );
2581 :}
2582 STC VBR, Rn {:
2583 COUNT_INST(I_STC);
2584 check_priv();
2585 load_spreg( R_EAX, R_VBR );
2586 store_reg( R_EAX, Rn );
2587 sh4_x86.tstate = TSTATE_NONE;
2588 :}
2589 STC SSR, Rn {:
2590 COUNT_INST(I_STC);
2591 check_priv();
2592 load_spreg( R_EAX, R_SSR );
2593 store_reg( R_EAX, Rn );
2594 sh4_x86.tstate = TSTATE_NONE;
2595 :}
2596 STC SPC, Rn {:
2597 COUNT_INST(I_STC);
2598 check_priv();
2599 load_spreg( R_EAX, R_SPC );
2600 store_reg( R_EAX, Rn );
2601 sh4_x86.tstate = TSTATE_NONE;
2602 :}
2603 STC SGR, Rn {:
2604 COUNT_INST(I_STC);
2605 check_priv();
2606 load_spreg( R_EAX, R_SGR );
2607 store_reg( R_EAX, Rn );
2608 sh4_x86.tstate = TSTATE_NONE;
2609 :}
2610 STC DBR, Rn {:
2611 COUNT_INST(I_STC);
2612 check_priv();
2613 load_spreg( R_EAX, R_DBR );
2614 store_reg( R_EAX, Rn );
2615 sh4_x86.tstate = TSTATE_NONE;
2616 :}
2617 STC Rm_BANK, Rn {:
2618 COUNT_INST(I_STC);
2619 check_priv();
2620 load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
2621 store_reg( R_EAX, Rn );
2622 sh4_x86.tstate = TSTATE_NONE;
2623 :}
2624 STC.L SR, @-Rn {:
2625 COUNT_INST(I_STCSRM);
2626 check_priv();
2627 load_reg( R_EAX, Rn );
2628 check_walign32( R_EAX );
2629 ADD_imm8s_r32( -4, R_EAX );
2630 MMU_TRANSLATE_WRITE( R_EAX );
2631 MOV_r32_esp8( R_EAX, 0 );
2632 call_func0( sh4_read_sr );
2633 MOV_r32_r32( R_EAX, R_EDX );
2634 MOV_esp8_r32( 0, R_EAX );
2635 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2636 MEM_WRITE_LONG( R_EAX, R_EDX );
2637 sh4_x86.tstate = TSTATE_NONE;
2638 :}
2639 STC.L VBR, @-Rn {:
2640 COUNT_INST(I_STCM);
2641 check_priv();
2642 load_reg( R_EAX, Rn );
2643 check_walign32( R_EAX );
2644 ADD_imm8s_r32( -4, R_EAX );
2645 MMU_TRANSLATE_WRITE( R_EAX );
2646 load_spreg( R_EDX, R_VBR );
2647 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2648 MEM_WRITE_LONG( R_EAX, R_EDX );
2649 sh4_x86.tstate = TSTATE_NONE;
2650 :}
2651 STC.L SSR, @-Rn {:
2652 COUNT_INST(I_STCM);
2653 check_priv();
2654 load_reg( R_EAX, Rn );
2655 check_walign32( R_EAX );
2656 ADD_imm8s_r32( -4, R_EAX );
2657 MMU_TRANSLATE_WRITE( R_EAX );
2658 load_spreg( R_EDX, R_SSR );
2659 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2660 MEM_WRITE_LONG( R_EAX, R_EDX );
2661 sh4_x86.tstate = TSTATE_NONE;
2662 :}
2663 STC.L SPC, @-Rn {:
2664 COUNT_INST(I_STCM);
2665 check_priv();
2666 load_reg( R_EAX, Rn );
2667 check_walign32( R_EAX );
2668 ADD_imm8s_r32( -4, R_EAX );
2669 MMU_TRANSLATE_WRITE( R_EAX );
2670 load_spreg( R_EDX, R_SPC );
2671 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2672 MEM_WRITE_LONG( R_EAX, R_EDX );
2673 sh4_x86.tstate = TSTATE_NONE;
2674 :}
2675 STC.L SGR, @-Rn {:
2676 COUNT_INST(I_STCM);
2677 check_priv();
2678 load_reg( R_EAX, Rn );
2679 check_walign32( R_EAX );
2680 ADD_imm8s_r32( -4, R_EAX );
2681 MMU_TRANSLATE_WRITE( R_EAX );
2682 load_spreg( R_EDX, R_SGR );
2683 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2684 MEM_WRITE_LONG( R_EAX, R_EDX );
2685 sh4_x86.tstate = TSTATE_NONE;
2686 :}
2687 STC.L DBR, @-Rn {:
2688 COUNT_INST(I_STCM);
2689 check_priv();
2690 load_reg( R_EAX, Rn );
2691 check_walign32( R_EAX );
2692 ADD_imm8s_r32( -4, R_EAX );
2693 MMU_TRANSLATE_WRITE( R_EAX );
2694 load_spreg( R_EDX, R_DBR );
2695 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2696 MEM_WRITE_LONG( R_EAX, R_EDX );
2697 sh4_x86.tstate = TSTATE_NONE;
2698 :}
2699 STC.L Rm_BANK, @-Rn {:
2700 COUNT_INST(I_STCM);
2701 check_priv();
2702 load_reg( R_EAX, Rn );
2703 check_walign32( R_EAX );
2704 ADD_imm8s_r32( -4, R_EAX );
2705 MMU_TRANSLATE_WRITE( R_EAX );
2706 load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
2707 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2708 MEM_WRITE_LONG( R_EAX, R_EDX );
2709 sh4_x86.tstate = TSTATE_NONE;
2710 :}
2711 STC.L GBR, @-Rn {:
2712 COUNT_INST(I_STCM);
2713 load_reg( R_EAX, Rn );
2714 check_walign32( R_EAX );
2715 ADD_imm8s_r32( -4, R_EAX );
2716 MMU_TRANSLATE_WRITE( R_EAX );
2717 load_spreg( R_EDX, R_GBR );
2718 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2719 MEM_WRITE_LONG( R_EAX, R_EDX );
2720 sh4_x86.tstate = TSTATE_NONE;
2721 :}
2722 STS FPSCR, Rn {:
2723 COUNT_INST(I_STSFPSCR);
2724 check_fpuen();
2725 load_spreg( R_EAX, R_FPSCR );
2726 store_reg( R_EAX, Rn );
2727 :}
2728 STS.L FPSCR, @-Rn {:
2729 COUNT_INST(I_STSFPSCRM);
2730 check_fpuen();
2731 load_reg( R_EAX, Rn );
2732 check_walign32( R_EAX );
2733 ADD_imm8s_r32( -4, R_EAX );
2734 MMU_TRANSLATE_WRITE( R_EAX );
2735 load_spreg( R_EDX, R_FPSCR );
2736 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2737 MEM_WRITE_LONG( R_EAX, R_EDX );
2738 sh4_x86.tstate = TSTATE_NONE;
2739 :}
2740 STS FPUL, Rn {:
2741 COUNT_INST(I_STS);
2742 check_fpuen();
2743 load_spreg( R_EAX, R_FPUL );
2744 store_reg( R_EAX, Rn );
2745 :}
2746 STS.L FPUL, @-Rn {:
2747 COUNT_INST(I_STSM);
2748 check_fpuen();
2749 load_reg( R_EAX, Rn );
2750 check_walign32( R_EAX );
2751 ADD_imm8s_r32( -4, R_EAX );
2752 MMU_TRANSLATE_WRITE( R_EAX );
2753 load_spreg( R_EDX, R_FPUL );
2754 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2755 MEM_WRITE_LONG( R_EAX, R_EDX );
2756 sh4_x86.tstate = TSTATE_NONE;
2757 :}
2758 STS MACH, Rn {:
2759 COUNT_INST(I_STS);
2760 load_spreg( R_EAX, R_MACH );
2761 store_reg( R_EAX, Rn );
2762 :}
2763 STS.L MACH, @-Rn {:
2764 COUNT_INST(I_STSM);
2765 load_reg( R_EAX, Rn );
2766 check_walign32( R_EAX );
2767 ADD_imm8s_r32( -4, R_EAX );
2768 MMU_TRANSLATE_WRITE( R_EAX );
2769 load_spreg( R_EDX, R_MACH );
2770 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2771 MEM_WRITE_LONG( R_EAX, R_EDX );
2772 sh4_x86.tstate = TSTATE_NONE;
2773 :}
2774 STS MACL, Rn {:
2775 COUNT_INST(I_STS);
2776 load_spreg( R_EAX, R_MACL );
2777 store_reg( R_EAX, Rn );
2778 :}
2779 STS.L MACL, @-Rn {:
2780 COUNT_INST(I_STSM);
2781 load_reg( R_EAX, Rn );
2782 check_walign32( R_EAX );
2783 ADD_imm8s_r32( -4, R_EAX );
2784 MMU_TRANSLATE_WRITE( R_EAX );
2785 load_spreg( R_EDX, R_MACL );
2786 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2787 MEM_WRITE_LONG( R_EAX, R_EDX );
2788 sh4_x86.tstate = TSTATE_NONE;
2789 :}
2790 STS PR, Rn {:
2791 COUNT_INST(I_STS);
2792 load_spreg( R_EAX, R_PR );
2793 store_reg( R_EAX, Rn );
2794 :}
2795 STS.L PR, @-Rn {:
2796 COUNT_INST(I_STSM);
2797 load_reg( R_EAX, Rn );
2798 check_walign32( R_EAX );
2799 ADD_imm8s_r32( -4, R_EAX );
2800 MMU_TRANSLATE_WRITE( R_EAX );
2801 load_spreg( R_EDX, R_PR );
2802 ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
2803 MEM_WRITE_LONG( R_EAX, R_EDX );
2804 sh4_x86.tstate = TSTATE_NONE;
2805 :}
2807 NOP {:
2808 COUNT_INST(I_NOP);
2809 /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */
2810 :}
2811 %%
2812 sh4_x86.in_delay_slot = DELAY_NONE;
2813 return 0;
2814 }
.