Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 879:a07af43e03c4
prev875:2147174fb320
next901:32c5cf5e206f
author nkeynes
date Sat Oct 25 12:36:42 2008 +0000 (15 years ago)
permissions -rw-r--r--
last change Add --enable-optimized flag (on by default).
When optimized, add -fomit-frame-pointer -fexceptions on non-Mac x86 (Unwinding doesn't seem to work correctly on Mac)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/x86op.h"
    34 #include "clock.h"
    36 #define DEFAULT_BACKPATCH_SIZE 4096
    38 struct backpatch_record {
    39     uint32_t fixup_offset;
    40     uint32_t fixup_icount;
    41     int32_t exc_code;
    42 };
    44 #define DELAY_NONE 0
    45 #define DELAY_PC 1
    46 #define DELAY_PC_PR 2
    48 /** 
    49  * Struct to manage internal translation state. This state is not saved -
    50  * it is only valid between calls to sh4_translate_begin_block() and
    51  * sh4_translate_end_block()
    52  */
    53 struct sh4_x86_state {
    54     int in_delay_slot;
    55     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    56     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    57     gboolean branch_taken; /* true if we branched unconditionally */
    58     uint32_t block_start_pc;
    59     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    60     int tstate;
    62     /* mode flags */
    63     gboolean tlb_on; /* True if tlb translation is active */
    65     /* Allocated memory for the (block-wide) back-patch list */
    66     struct backpatch_record *backpatch_list;
    67     uint32_t backpatch_posn;
    68     uint32_t backpatch_size;
    69 };
    71 #define TSTATE_NONE -1
    72 #define TSTATE_O    0
    73 #define TSTATE_C    2
    74 #define TSTATE_E    4
    75 #define TSTATE_NE   5
    76 #define TSTATE_G    0xF
    77 #define TSTATE_GE   0xD
    78 #define TSTATE_A    7
    79 #define TSTATE_AE   3
    81 #ifdef ENABLE_SH4STATS
    82 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    83 #else
    84 #define COUNT_INST(id)
    85 #endif
    87 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    88 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    89 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    90     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    92 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    93 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    94 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    95     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
    97 static struct sh4_x86_state sh4_x86;
    99 static uint32_t max_int = 0x7FFFFFFF;
   100 static uint32_t min_int = 0x80000000;
   101 static uint32_t save_fcw; /* save value for fpu control word */
   102 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   104 void sh4_translate_init(void)
   105 {
   106     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   107     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   108 }
   111 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   112 {
   113     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   114 	sh4_x86.backpatch_size <<= 1;
   115 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   116 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   117 	assert( sh4_x86.backpatch_list != NULL );
   118     }
   119     if( sh4_x86.in_delay_slot ) {
   120 	fixup_pc -= 2;
   121     }
   122     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   123 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   124     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   125     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   126     sh4_x86.backpatch_posn++;
   127 }
   129 /**
   130  * Emit an instruction to load an SH4 reg into a real register
   131  */
   132 static inline void load_reg( int x86reg, int sh4reg ) 
   133 {
   134     /* mov [bp+n], reg */
   135     OP(0x8B);
   136     OP(0x45 + (x86reg<<3));
   137     OP(REG_OFFSET(r[sh4reg]));
   138 }
   140 static inline void load_reg16s( int x86reg, int sh4reg )
   141 {
   142     OP(0x0F);
   143     OP(0xBF);
   144     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   145 }
   147 static inline void load_reg16u( int x86reg, int sh4reg )
   148 {
   149     OP(0x0F);
   150     OP(0xB7);
   151     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   153 }
   155 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   156 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   157 /**
   158  * Emit an instruction to load an immediate value into a register
   159  */
   160 static inline void load_imm32( int x86reg, uint32_t value ) {
   161     /* mov #value, reg */
   162     OP(0xB8 + x86reg);
   163     OP32(value);
   164 }
   166 /**
   167  * Load an immediate 64-bit quantity (note: x86-64 only)
   168  */
   169 static inline void load_imm64( int x86reg, uint64_t value ) {
   170     /* mov #value, reg */
   171     REXW();
   172     OP(0xB8 + x86reg);
   173     OP64(value);
   174 }
   176 /**
   177  * Emit an instruction to store an SH4 reg (RN)
   178  */
   179 void static inline store_reg( int x86reg, int sh4reg ) {
   180     /* mov reg, [bp+n] */
   181     OP(0x89);
   182     OP(0x45 + (x86reg<<3));
   183     OP(REG_OFFSET(r[sh4reg]));
   184 }
   186 /**
   187  * Load an FR register (single-precision floating point) into an integer x86
   188  * register (eg for register-to-register moves)
   189  */
   190 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   191 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   193 /**
   194  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   195  */
   196 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   197 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   199 /**
   200  * Store an FR register (single-precision floating point) from an integer x86+
   201  * register (eg for register-to-register moves)
   202  */
   203 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   204 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   206 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   207 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   210 #define push_fpul()  FLDF_sh4r(R_FPUL)
   211 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   212 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   213 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   214 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   215 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   216 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   217 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   218 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   219 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   223 /* Exception checks - Note that all exception checks will clobber EAX */
   225 #define check_priv( ) \
   226     if( !sh4_x86.priv_checked ) { \
   227 	sh4_x86.priv_checked = TRUE;\
   228 	load_spreg( R_EAX, R_SR );\
   229 	AND_imm32_r32( SR_MD, R_EAX );\
   230 	if( sh4_x86.in_delay_slot ) {\
   231 	    JE_exc( EXC_SLOT_ILLEGAL );\
   232 	} else {\
   233 	    JE_exc( EXC_ILLEGAL );\
   234 	}\
   235 	sh4_x86.tstate = TSTATE_NONE; \
   236     }\
   238 #define check_fpuen( ) \
   239     if( !sh4_x86.fpuen_checked ) {\
   240 	sh4_x86.fpuen_checked = TRUE;\
   241 	load_spreg( R_EAX, R_SR );\
   242 	AND_imm32_r32( SR_FD, R_EAX );\
   243 	if( sh4_x86.in_delay_slot ) {\
   244 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   245 	} else {\
   246 	    JNE_exc(EXC_FPU_DISABLED);\
   247 	}\
   248 	sh4_x86.tstate = TSTATE_NONE; \
   249     }
   251 #define check_ralign16( x86reg ) \
   252     TEST_imm32_r32( 0x00000001, x86reg ); \
   253     JNE_exc(EXC_DATA_ADDR_READ)
   255 #define check_walign16( x86reg ) \
   256     TEST_imm32_r32( 0x00000001, x86reg ); \
   257     JNE_exc(EXC_DATA_ADDR_WRITE);
   259 #define check_ralign32( x86reg ) \
   260     TEST_imm32_r32( 0x00000003, x86reg ); \
   261     JNE_exc(EXC_DATA_ADDR_READ)
   263 #define check_walign32( x86reg ) \
   264     TEST_imm32_r32( 0x00000003, x86reg ); \
   265     JNE_exc(EXC_DATA_ADDR_WRITE);
   267 #define check_ralign64( x86reg ) \
   268     TEST_imm32_r32( 0x00000007, x86reg ); \
   269     JNE_exc(EXC_DATA_ADDR_READ)
   271 #define check_walign64( x86reg ) \
   272     TEST_imm32_r32( 0x00000007, x86reg ); \
   273     JNE_exc(EXC_DATA_ADDR_WRITE);
   275 #define UNDEF(ir)
   276 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   277 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   278 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   279 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   280 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   281 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   282 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   284 /**
   285  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   286  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   287  */
   288 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   290 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   291 /**
   292  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   293  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   294  */
   295 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   297 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   298 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   299 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   301 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   303 /****** Import appropriate calling conventions ******/
   304 #if SIZEOF_VOID_P == 8
   305 #include "sh4/ia64abi.h"
   306 #else /* 32-bit system */
   307 #ifdef APPLE_BUILD
   308 #include "sh4/ia32mac.h"
   309 #else
   310 #include "sh4/ia32abi.h"
   311 #endif
   312 #endif
   314 uint32_t sh4_translate_end_block_size()
   315 {
   316     if( sh4_x86.backpatch_posn <= 3 ) {
   317 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   318     } else {
   319 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   320     }
   321 }
   324 /**
   325  * Embed a breakpoint into the generated code
   326  */
   327 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   328 {
   329     load_imm32( R_EAX, pc );
   330     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   331     sh4_x86.tstate = TSTATE_NONE;
   332 }
   335 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   337 /**
   338  * Embed a call to sh4_execute_instruction for situations that we
   339  * can't translate (just page-crossing delay slots at the moment).
   340  * Caller is responsible for setting new_pc before calling this function.
   341  *
   342  * Performs:
   343  *   Set PC = endpc
   344  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   345  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   346  *   Call sh4_execute_instruction
   347  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   348  */
   349 void exit_block_emu( sh4vma_t endpc )
   350 {
   351     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   352     ADD_r32_sh4r( R_ECX, R_PC );
   354     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   355     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   356     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   357     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   359     call_func0( sh4_execute_instruction );    
   360     load_spreg( R_EAX, R_PC );
   361     if( sh4_x86.tlb_on ) {
   362 	call_func1(xlat_get_code_by_vma,R_EAX);
   363     } else {
   364 	call_func1(xlat_get_code,R_EAX);
   365     }
   366     AND_imm8s_rptr( 0xFC, R_EAX );
   367     POP_r32(R_EBP);
   368     RET();
   369 } 
   371 /**
   372  * Translate a single instruction. Delayed branches are handled specially
   373  * by translating both branch and delayed instruction as a single unit (as
   374  * 
   375  * The instruction MUST be in the icache (assert check)
   376  *
   377  * @return true if the instruction marks the end of a basic block
   378  * (eg a branch or 
   379  */
   380 uint32_t sh4_translate_instruction( sh4vma_t pc )
   381 {
   382     uint32_t ir;
   383     /* Read instruction from icache */
   384     assert( IS_IN_ICACHE(pc) );
   385     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   387 	/* PC is not in the current icache - this usually means we're running
   388 	 * with MMU on, and we've gone past the end of the page. And since 
   389 	 * sh4_translate_block is pretty careful about this, it means we're
   390 	 * almost certainly in a delay slot.
   391 	 *
   392 	 * Since we can't assume the page is present (and we can't fault it in
   393 	 * at this point, inline a call to sh4_execute_instruction (with a few
   394 	 * small repairs to cope with the different environment).
   395 	 */
   397     if( !sh4_x86.in_delay_slot ) {
   398 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   399     }
   400 %%
   401 /* ALU operations */
   402 ADD Rm, Rn {:
   403     COUNT_INST(I_ADD);
   404     load_reg( R_EAX, Rm );
   405     load_reg( R_ECX, Rn );
   406     ADD_r32_r32( R_EAX, R_ECX );
   407     store_reg( R_ECX, Rn );
   408     sh4_x86.tstate = TSTATE_NONE;
   409 :}
   410 ADD #imm, Rn {:  
   411     COUNT_INST(I_ADDI);
   412     load_reg( R_EAX, Rn );
   413     ADD_imm8s_r32( imm, R_EAX );
   414     store_reg( R_EAX, Rn );
   415     sh4_x86.tstate = TSTATE_NONE;
   416 :}
   417 ADDC Rm, Rn {:
   418     COUNT_INST(I_ADDC);
   419     if( sh4_x86.tstate != TSTATE_C ) {
   420 	LDC_t();
   421     }
   422     load_reg( R_EAX, Rm );
   423     load_reg( R_ECX, Rn );
   424     ADC_r32_r32( R_EAX, R_ECX );
   425     store_reg( R_ECX, Rn );
   426     SETC_t();
   427     sh4_x86.tstate = TSTATE_C;
   428 :}
   429 ADDV Rm, Rn {:
   430     COUNT_INST(I_ADDV);
   431     load_reg( R_EAX, Rm );
   432     load_reg( R_ECX, Rn );
   433     ADD_r32_r32( R_EAX, R_ECX );
   434     store_reg( R_ECX, Rn );
   435     SETO_t();
   436     sh4_x86.tstate = TSTATE_O;
   437 :}
   438 AND Rm, Rn {:
   439     COUNT_INST(I_AND);
   440     load_reg( R_EAX, Rm );
   441     load_reg( R_ECX, Rn );
   442     AND_r32_r32( R_EAX, R_ECX );
   443     store_reg( R_ECX, Rn );
   444     sh4_x86.tstate = TSTATE_NONE;
   445 :}
   446 AND #imm, R0 {:  
   447     COUNT_INST(I_ANDI);
   448     load_reg( R_EAX, 0 );
   449     AND_imm32_r32(imm, R_EAX); 
   450     store_reg( R_EAX, 0 );
   451     sh4_x86.tstate = TSTATE_NONE;
   452 :}
   453 AND.B #imm, @(R0, GBR) {: 
   454     COUNT_INST(I_ANDB);
   455     load_reg( R_EAX, 0 );
   456     load_spreg( R_ECX, R_GBR );
   457     ADD_r32_r32( R_ECX, R_EAX );
   458     MMU_TRANSLATE_WRITE( R_EAX );
   459     PUSH_realigned_r32(R_EAX);
   460     MEM_READ_BYTE( R_EAX, R_EAX );
   461     POP_realigned_r32(R_ECX);
   462     AND_imm32_r32(imm, R_EAX );
   463     MEM_WRITE_BYTE( R_ECX, R_EAX );
   464     sh4_x86.tstate = TSTATE_NONE;
   465 :}
   466 CMP/EQ Rm, Rn {:  
   467     COUNT_INST(I_CMPEQ);
   468     load_reg( R_EAX, Rm );
   469     load_reg( R_ECX, Rn );
   470     CMP_r32_r32( R_EAX, R_ECX );
   471     SETE_t();
   472     sh4_x86.tstate = TSTATE_E;
   473 :}
   474 CMP/EQ #imm, R0 {:  
   475     COUNT_INST(I_CMPEQI);
   476     load_reg( R_EAX, 0 );
   477     CMP_imm8s_r32(imm, R_EAX);
   478     SETE_t();
   479     sh4_x86.tstate = TSTATE_E;
   480 :}
   481 CMP/GE Rm, Rn {:  
   482     COUNT_INST(I_CMPGE);
   483     load_reg( R_EAX, Rm );
   484     load_reg( R_ECX, Rn );
   485     CMP_r32_r32( R_EAX, R_ECX );
   486     SETGE_t();
   487     sh4_x86.tstate = TSTATE_GE;
   488 :}
   489 CMP/GT Rm, Rn {: 
   490     COUNT_INST(I_CMPGT);
   491     load_reg( R_EAX, Rm );
   492     load_reg( R_ECX, Rn );
   493     CMP_r32_r32( R_EAX, R_ECX );
   494     SETG_t();
   495     sh4_x86.tstate = TSTATE_G;
   496 :}
   497 CMP/HI Rm, Rn {:  
   498     COUNT_INST(I_CMPHI);
   499     load_reg( R_EAX, Rm );
   500     load_reg( R_ECX, Rn );
   501     CMP_r32_r32( R_EAX, R_ECX );
   502     SETA_t();
   503     sh4_x86.tstate = TSTATE_A;
   504 :}
   505 CMP/HS Rm, Rn {: 
   506     COUNT_INST(I_CMPHS);
   507     load_reg( R_EAX, Rm );
   508     load_reg( R_ECX, Rn );
   509     CMP_r32_r32( R_EAX, R_ECX );
   510     SETAE_t();
   511     sh4_x86.tstate = TSTATE_AE;
   512  :}
   513 CMP/PL Rn {: 
   514     COUNT_INST(I_CMPPL);
   515     load_reg( R_EAX, Rn );
   516     CMP_imm8s_r32( 0, R_EAX );
   517     SETG_t();
   518     sh4_x86.tstate = TSTATE_G;
   519 :}
   520 CMP/PZ Rn {:  
   521     COUNT_INST(I_CMPPZ);
   522     load_reg( R_EAX, Rn );
   523     CMP_imm8s_r32( 0, R_EAX );
   524     SETGE_t();
   525     sh4_x86.tstate = TSTATE_GE;
   526 :}
   527 CMP/STR Rm, Rn {:  
   528     COUNT_INST(I_CMPSTR);
   529     load_reg( R_EAX, Rm );
   530     load_reg( R_ECX, Rn );
   531     XOR_r32_r32( R_ECX, R_EAX );
   532     TEST_r8_r8( R_AL, R_AL );
   533     JE_rel8(target1);
   534     TEST_r8_r8( R_AH, R_AH );
   535     JE_rel8(target2);
   536     SHR_imm8_r32( 16, R_EAX );
   537     TEST_r8_r8( R_AL, R_AL );
   538     JE_rel8(target3);
   539     TEST_r8_r8( R_AH, R_AH );
   540     JMP_TARGET(target1);
   541     JMP_TARGET(target2);
   542     JMP_TARGET(target3);
   543     SETE_t();
   544     sh4_x86.tstate = TSTATE_E;
   545 :}
   546 DIV0S Rm, Rn {:
   547     COUNT_INST(I_DIV0S);
   548     load_reg( R_EAX, Rm );
   549     load_reg( R_ECX, Rn );
   550     SHR_imm8_r32( 31, R_EAX );
   551     SHR_imm8_r32( 31, R_ECX );
   552     store_spreg( R_EAX, R_M );
   553     store_spreg( R_ECX, R_Q );
   554     CMP_r32_r32( R_EAX, R_ECX );
   555     SETNE_t();
   556     sh4_x86.tstate = TSTATE_NE;
   557 :}
   558 DIV0U {:  
   559     COUNT_INST(I_DIV0U);
   560     XOR_r32_r32( R_EAX, R_EAX );
   561     store_spreg( R_EAX, R_Q );
   562     store_spreg( R_EAX, R_M );
   563     store_spreg( R_EAX, R_T );
   564     sh4_x86.tstate = TSTATE_C; // works for DIV1
   565 :}
   566 DIV1 Rm, Rn {:
   567     COUNT_INST(I_DIV1);
   568     load_spreg( R_ECX, R_M );
   569     load_reg( R_EAX, Rn );
   570     if( sh4_x86.tstate != TSTATE_C ) {
   571 	LDC_t();
   572     }
   573     RCL1_r32( R_EAX );
   574     SETC_r8( R_DL ); // Q'
   575     CMP_sh4r_r32( R_Q, R_ECX );
   576     JE_rel8(mqequal);
   577     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   578     JMP_rel8(end);
   579     JMP_TARGET(mqequal);
   580     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   581     JMP_TARGET(end);
   582     store_reg( R_EAX, Rn ); // Done with Rn now
   583     SETC_r8(R_AL); // tmp1
   584     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   585     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   586     store_spreg( R_ECX, R_Q );
   587     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   588     MOVZX_r8_r32( R_AL, R_EAX );
   589     store_spreg( R_EAX, R_T );
   590     sh4_x86.tstate = TSTATE_NONE;
   591 :}
   592 DMULS.L Rm, Rn {:  
   593     COUNT_INST(I_DMULS);
   594     load_reg( R_EAX, Rm );
   595     load_reg( R_ECX, Rn );
   596     IMUL_r32(R_ECX);
   597     store_spreg( R_EDX, R_MACH );
   598     store_spreg( R_EAX, R_MACL );
   599     sh4_x86.tstate = TSTATE_NONE;
   600 :}
   601 DMULU.L Rm, Rn {:  
   602     COUNT_INST(I_DMULU);
   603     load_reg( R_EAX, Rm );
   604     load_reg( R_ECX, Rn );
   605     MUL_r32(R_ECX);
   606     store_spreg( R_EDX, R_MACH );
   607     store_spreg( R_EAX, R_MACL );    
   608     sh4_x86.tstate = TSTATE_NONE;
   609 :}
   610 DT Rn {:  
   611     COUNT_INST(I_DT);
   612     load_reg( R_EAX, Rn );
   613     ADD_imm8s_r32( -1, R_EAX );
   614     store_reg( R_EAX, Rn );
   615     SETE_t();
   616     sh4_x86.tstate = TSTATE_E;
   617 :}
   618 EXTS.B Rm, Rn {:  
   619     COUNT_INST(I_EXTSB);
   620     load_reg( R_EAX, Rm );
   621     MOVSX_r8_r32( R_EAX, R_EAX );
   622     store_reg( R_EAX, Rn );
   623 :}
   624 EXTS.W Rm, Rn {:  
   625     COUNT_INST(I_EXTSW);
   626     load_reg( R_EAX, Rm );
   627     MOVSX_r16_r32( R_EAX, R_EAX );
   628     store_reg( R_EAX, Rn );
   629 :}
   630 EXTU.B Rm, Rn {:  
   631     COUNT_INST(I_EXTUB);
   632     load_reg( R_EAX, Rm );
   633     MOVZX_r8_r32( R_EAX, R_EAX );
   634     store_reg( R_EAX, Rn );
   635 :}
   636 EXTU.W Rm, Rn {:  
   637     COUNT_INST(I_EXTUW);
   638     load_reg( R_EAX, Rm );
   639     MOVZX_r16_r32( R_EAX, R_EAX );
   640     store_reg( R_EAX, Rn );
   641 :}
   642 MAC.L @Rm+, @Rn+ {:
   643     COUNT_INST(I_MACL);
   644     if( Rm == Rn ) {
   645 	load_reg( R_EAX, Rm );
   646 	check_ralign32( R_EAX );
   647 	MMU_TRANSLATE_READ( R_EAX );
   648 	PUSH_realigned_r32( R_EAX );
   649 	load_reg( R_EAX, Rn );
   650 	ADD_imm8s_r32( 4, R_EAX );
   651 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   652 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   653 	// Note translate twice in case of page boundaries. Maybe worth
   654 	// adding a page-boundary check to skip the second translation
   655     } else {
   656 	load_reg( R_EAX, Rm );
   657 	check_ralign32( R_EAX );
   658 	MMU_TRANSLATE_READ( R_EAX );
   659 	load_reg( R_ECX, Rn );
   660 	check_ralign32( R_ECX );
   661 	PUSH_realigned_r32( R_EAX );
   662 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   663 	MOV_r32_r32( R_ECX, R_EAX );
   664 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   665 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   666     }
   667     MEM_READ_LONG( R_EAX, R_EAX );
   668     POP_r32( R_ECX );
   669     PUSH_r32( R_EAX );
   670     MEM_READ_LONG( R_ECX, R_EAX );
   671     POP_realigned_r32( R_ECX );
   673     IMUL_r32( R_ECX );
   674     ADD_r32_sh4r( R_EAX, R_MACL );
   675     ADC_r32_sh4r( R_EDX, R_MACH );
   677     load_spreg( R_ECX, R_S );
   678     TEST_r32_r32(R_ECX, R_ECX);
   679     JE_rel8( nosat );
   680     call_func0( signsat48 );
   681     JMP_TARGET( nosat );
   682     sh4_x86.tstate = TSTATE_NONE;
   683 :}
   684 MAC.W @Rm+, @Rn+ {:  
   685     COUNT_INST(I_MACW);
   686     if( Rm == Rn ) {
   687 	load_reg( R_EAX, Rm );
   688 	check_ralign16( R_EAX );
   689 	MMU_TRANSLATE_READ( R_EAX );
   690 	PUSH_realigned_r32( R_EAX );
   691 	load_reg( R_EAX, Rn );
   692 	ADD_imm8s_r32( 2, R_EAX );
   693 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   694 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   695 	// Note translate twice in case of page boundaries. Maybe worth
   696 	// adding a page-boundary check to skip the second translation
   697     } else {
   698 	load_reg( R_EAX, Rm );
   699 	check_ralign16( R_EAX );
   700 	MMU_TRANSLATE_READ( R_EAX );
   701 	load_reg( R_ECX, Rn );
   702 	check_ralign16( R_ECX );
   703 	PUSH_realigned_r32( R_EAX );
   704 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   705 	MOV_r32_r32( R_ECX, R_EAX );
   706 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   707 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   708     }
   709     MEM_READ_WORD( R_EAX, R_EAX );
   710     POP_r32( R_ECX );
   711     PUSH_r32( R_EAX );
   712     MEM_READ_WORD( R_ECX, R_EAX );
   713     POP_realigned_r32( R_ECX );
   714     IMUL_r32( R_ECX );
   716     load_spreg( R_ECX, R_S );
   717     TEST_r32_r32( R_ECX, R_ECX );
   718     JE_rel8( nosat );
   720     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   721     JNO_rel8( end );            // 2
   722     load_imm32( R_EDX, 1 );         // 5
   723     store_spreg( R_EDX, R_MACH );   // 6
   724     JS_rel8( positive );        // 2
   725     load_imm32( R_EAX, 0x80000000 );// 5
   726     store_spreg( R_EAX, R_MACL );   // 6
   727     JMP_rel8(end2);           // 2
   729     JMP_TARGET(positive);
   730     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   731     store_spreg( R_EAX, R_MACL );   // 6
   732     JMP_rel8(end3);            // 2
   734     JMP_TARGET(nosat);
   735     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   736     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   737     JMP_TARGET(end);
   738     JMP_TARGET(end2);
   739     JMP_TARGET(end3);
   740     sh4_x86.tstate = TSTATE_NONE;
   741 :}
   742 MOVT Rn {:  
   743     COUNT_INST(I_MOVT);
   744     load_spreg( R_EAX, R_T );
   745     store_reg( R_EAX, Rn );
   746 :}
   747 MUL.L Rm, Rn {:  
   748     COUNT_INST(I_MULL);
   749     load_reg( R_EAX, Rm );
   750     load_reg( R_ECX, Rn );
   751     MUL_r32( R_ECX );
   752     store_spreg( R_EAX, R_MACL );
   753     sh4_x86.tstate = TSTATE_NONE;
   754 :}
   755 MULS.W Rm, Rn {:
   756     COUNT_INST(I_MULSW);
   757     load_reg16s( R_EAX, Rm );
   758     load_reg16s( R_ECX, Rn );
   759     MUL_r32( R_ECX );
   760     store_spreg( R_EAX, R_MACL );
   761     sh4_x86.tstate = TSTATE_NONE;
   762 :}
   763 MULU.W Rm, Rn {:  
   764     COUNT_INST(I_MULUW);
   765     load_reg16u( R_EAX, Rm );
   766     load_reg16u( R_ECX, Rn );
   767     MUL_r32( R_ECX );
   768     store_spreg( R_EAX, R_MACL );
   769     sh4_x86.tstate = TSTATE_NONE;
   770 :}
   771 NEG Rm, Rn {:
   772     COUNT_INST(I_NEG);
   773     load_reg( R_EAX, Rm );
   774     NEG_r32( R_EAX );
   775     store_reg( R_EAX, Rn );
   776     sh4_x86.tstate = TSTATE_NONE;
   777 :}
   778 NEGC Rm, Rn {:  
   779     COUNT_INST(I_NEGC);
   780     load_reg( R_EAX, Rm );
   781     XOR_r32_r32( R_ECX, R_ECX );
   782     LDC_t();
   783     SBB_r32_r32( R_EAX, R_ECX );
   784     store_reg( R_ECX, Rn );
   785     SETC_t();
   786     sh4_x86.tstate = TSTATE_C;
   787 :}
   788 NOT Rm, Rn {:  
   789     COUNT_INST(I_NOT);
   790     load_reg( R_EAX, Rm );
   791     NOT_r32( R_EAX );
   792     store_reg( R_EAX, Rn );
   793     sh4_x86.tstate = TSTATE_NONE;
   794 :}
   795 OR Rm, Rn {:  
   796     COUNT_INST(I_OR);
   797     load_reg( R_EAX, Rm );
   798     load_reg( R_ECX, Rn );
   799     OR_r32_r32( R_EAX, R_ECX );
   800     store_reg( R_ECX, Rn );
   801     sh4_x86.tstate = TSTATE_NONE;
   802 :}
   803 OR #imm, R0 {:
   804     COUNT_INST(I_ORI);
   805     load_reg( R_EAX, 0 );
   806     OR_imm32_r32(imm, R_EAX);
   807     store_reg( R_EAX, 0 );
   808     sh4_x86.tstate = TSTATE_NONE;
   809 :}
   810 OR.B #imm, @(R0, GBR) {:  
   811     COUNT_INST(I_ORB);
   812     load_reg( R_EAX, 0 );
   813     load_spreg( R_ECX, R_GBR );
   814     ADD_r32_r32( R_ECX, R_EAX );
   815     MMU_TRANSLATE_WRITE( R_EAX );
   816     PUSH_realigned_r32(R_EAX);
   817     MEM_READ_BYTE( R_EAX, R_EAX );
   818     POP_realigned_r32(R_ECX);
   819     OR_imm32_r32(imm, R_EAX );
   820     MEM_WRITE_BYTE( R_ECX, R_EAX );
   821     sh4_x86.tstate = TSTATE_NONE;
   822 :}
   823 ROTCL Rn {:
   824     COUNT_INST(I_ROTCL);
   825     load_reg( R_EAX, Rn );
   826     if( sh4_x86.tstate != TSTATE_C ) {
   827 	LDC_t();
   828     }
   829     RCL1_r32( R_EAX );
   830     store_reg( R_EAX, Rn );
   831     SETC_t();
   832     sh4_x86.tstate = TSTATE_C;
   833 :}
   834 ROTCR Rn {:  
   835     COUNT_INST(I_ROTCR);
   836     load_reg( R_EAX, Rn );
   837     if( sh4_x86.tstate != TSTATE_C ) {
   838 	LDC_t();
   839     }
   840     RCR1_r32( R_EAX );
   841     store_reg( R_EAX, Rn );
   842     SETC_t();
   843     sh4_x86.tstate = TSTATE_C;
   844 :}
   845 ROTL Rn {:  
   846     COUNT_INST(I_ROTL);
   847     load_reg( R_EAX, Rn );
   848     ROL1_r32( R_EAX );
   849     store_reg( R_EAX, Rn );
   850     SETC_t();
   851     sh4_x86.tstate = TSTATE_C;
   852 :}
   853 ROTR Rn {:  
   854     COUNT_INST(I_ROTR);
   855     load_reg( R_EAX, Rn );
   856     ROR1_r32( R_EAX );
   857     store_reg( R_EAX, Rn );
   858     SETC_t();
   859     sh4_x86.tstate = TSTATE_C;
   860 :}
   861 SHAD Rm, Rn {:
   862     COUNT_INST(I_SHAD);
   863     /* Annoyingly enough, not directly convertible */
   864     load_reg( R_EAX, Rn );
   865     load_reg( R_ECX, Rm );
   866     CMP_imm32_r32( 0, R_ECX );
   867     JGE_rel8(doshl);
   869     NEG_r32( R_ECX );      // 2
   870     AND_imm8_r8( 0x1F, R_CL ); // 3
   871     JE_rel8(emptysar);     // 2
   872     SAR_r32_CL( R_EAX );       // 2
   873     JMP_rel8(end);          // 2
   875     JMP_TARGET(emptysar);
   876     SAR_imm8_r32(31, R_EAX );  // 3
   877     JMP_rel8(end2);
   879     JMP_TARGET(doshl);
   880     AND_imm8_r8( 0x1F, R_CL ); // 3
   881     SHL_r32_CL( R_EAX );       // 2
   882     JMP_TARGET(end);
   883     JMP_TARGET(end2);
   884     store_reg( R_EAX, Rn );
   885     sh4_x86.tstate = TSTATE_NONE;
   886 :}
   887 SHLD Rm, Rn {:  
   888     COUNT_INST(I_SHLD);
   889     load_reg( R_EAX, Rn );
   890     load_reg( R_ECX, Rm );
   891     CMP_imm32_r32( 0, R_ECX );
   892     JGE_rel8(doshl);
   894     NEG_r32( R_ECX );      // 2
   895     AND_imm8_r8( 0x1F, R_CL ); // 3
   896     JE_rel8(emptyshr );
   897     SHR_r32_CL( R_EAX );       // 2
   898     JMP_rel8(end);          // 2
   900     JMP_TARGET(emptyshr);
   901     XOR_r32_r32( R_EAX, R_EAX );
   902     JMP_rel8(end2);
   904     JMP_TARGET(doshl);
   905     AND_imm8_r8( 0x1F, R_CL ); // 3
   906     SHL_r32_CL( R_EAX );       // 2
   907     JMP_TARGET(end);
   908     JMP_TARGET(end2);
   909     store_reg( R_EAX, Rn );
   910     sh4_x86.tstate = TSTATE_NONE;
   911 :}
   912 SHAL Rn {: 
   913     COUNT_INST(I_SHAL);
   914     load_reg( R_EAX, Rn );
   915     SHL1_r32( R_EAX );
   916     SETC_t();
   917     store_reg( R_EAX, Rn );
   918     sh4_x86.tstate = TSTATE_C;
   919 :}
   920 SHAR Rn {:  
   921     COUNT_INST(I_SHAR);
   922     load_reg( R_EAX, Rn );
   923     SAR1_r32( R_EAX );
   924     SETC_t();
   925     store_reg( R_EAX, Rn );
   926     sh4_x86.tstate = TSTATE_C;
   927 :}
   928 SHLL Rn {:  
   929     COUNT_INST(I_SHLL);
   930     load_reg( R_EAX, Rn );
   931     SHL1_r32( R_EAX );
   932     SETC_t();
   933     store_reg( R_EAX, Rn );
   934     sh4_x86.tstate = TSTATE_C;
   935 :}
   936 SHLL2 Rn {:
   937     COUNT_INST(I_SHLL);
   938     load_reg( R_EAX, Rn );
   939     SHL_imm8_r32( 2, R_EAX );
   940     store_reg( R_EAX, Rn );
   941     sh4_x86.tstate = TSTATE_NONE;
   942 :}
   943 SHLL8 Rn {:  
   944     COUNT_INST(I_SHLL);
   945     load_reg( R_EAX, Rn );
   946     SHL_imm8_r32( 8, R_EAX );
   947     store_reg( R_EAX, Rn );
   948     sh4_x86.tstate = TSTATE_NONE;
   949 :}
   950 SHLL16 Rn {:  
   951     COUNT_INST(I_SHLL);
   952     load_reg( R_EAX, Rn );
   953     SHL_imm8_r32( 16, R_EAX );
   954     store_reg( R_EAX, Rn );
   955     sh4_x86.tstate = TSTATE_NONE;
   956 :}
   957 SHLR Rn {:  
   958     COUNT_INST(I_SHLR);
   959     load_reg( R_EAX, Rn );
   960     SHR1_r32( R_EAX );
   961     SETC_t();
   962     store_reg( R_EAX, Rn );
   963     sh4_x86.tstate = TSTATE_C;
   964 :}
   965 SHLR2 Rn {:  
   966     COUNT_INST(I_SHLR);
   967     load_reg( R_EAX, Rn );
   968     SHR_imm8_r32( 2, R_EAX );
   969     store_reg( R_EAX, Rn );
   970     sh4_x86.tstate = TSTATE_NONE;
   971 :}
   972 SHLR8 Rn {:  
   973     COUNT_INST(I_SHLR);
   974     load_reg( R_EAX, Rn );
   975     SHR_imm8_r32( 8, R_EAX );
   976     store_reg( R_EAX, Rn );
   977     sh4_x86.tstate = TSTATE_NONE;
   978 :}
   979 SHLR16 Rn {:  
   980     COUNT_INST(I_SHLR);
   981     load_reg( R_EAX, Rn );
   982     SHR_imm8_r32( 16, R_EAX );
   983     store_reg( R_EAX, Rn );
   984     sh4_x86.tstate = TSTATE_NONE;
   985 :}
   986 SUB Rm, Rn {:  
   987     COUNT_INST(I_SUB);
   988     load_reg( R_EAX, Rm );
   989     load_reg( R_ECX, Rn );
   990     SUB_r32_r32( R_EAX, R_ECX );
   991     store_reg( R_ECX, Rn );
   992     sh4_x86.tstate = TSTATE_NONE;
   993 :}
   994 SUBC Rm, Rn {:  
   995     COUNT_INST(I_SUBC);
   996     load_reg( R_EAX, Rm );
   997     load_reg( R_ECX, Rn );
   998     if( sh4_x86.tstate != TSTATE_C ) {
   999 	LDC_t();
  1001     SBB_r32_r32( R_EAX, R_ECX );
  1002     store_reg( R_ECX, Rn );
  1003     SETC_t();
  1004     sh4_x86.tstate = TSTATE_C;
  1005 :}
  1006 SUBV Rm, Rn {:  
  1007     COUNT_INST(I_SUBV);
  1008     load_reg( R_EAX, Rm );
  1009     load_reg( R_ECX, Rn );
  1010     SUB_r32_r32( R_EAX, R_ECX );
  1011     store_reg( R_ECX, Rn );
  1012     SETO_t();
  1013     sh4_x86.tstate = TSTATE_O;
  1014 :}
  1015 SWAP.B Rm, Rn {:  
  1016     COUNT_INST(I_SWAPB);
  1017     load_reg( R_EAX, Rm );
  1018     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1019     store_reg( R_EAX, Rn );
  1020 :}
  1021 SWAP.W Rm, Rn {:  
  1022     COUNT_INST(I_SWAPB);
  1023     load_reg( R_EAX, Rm );
  1024     MOV_r32_r32( R_EAX, R_ECX );
  1025     SHL_imm8_r32( 16, R_ECX );
  1026     SHR_imm8_r32( 16, R_EAX );
  1027     OR_r32_r32( R_EAX, R_ECX );
  1028     store_reg( R_ECX, Rn );
  1029     sh4_x86.tstate = TSTATE_NONE;
  1030 :}
  1031 TAS.B @Rn {:  
  1032     COUNT_INST(I_TASB);
  1033     load_reg( R_EAX, Rn );
  1034     MMU_TRANSLATE_WRITE( R_EAX );
  1035     PUSH_realigned_r32( R_EAX );
  1036     MEM_READ_BYTE( R_EAX, R_EAX );
  1037     TEST_r8_r8( R_AL, R_AL );
  1038     SETE_t();
  1039     OR_imm8_r8( 0x80, R_AL );
  1040     POP_realigned_r32( R_ECX );
  1041     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1042     sh4_x86.tstate = TSTATE_NONE;
  1043 :}
  1044 TST Rm, Rn {:  
  1045     COUNT_INST(I_TST);
  1046     load_reg( R_EAX, Rm );
  1047     load_reg( R_ECX, Rn );
  1048     TEST_r32_r32( R_EAX, R_ECX );
  1049     SETE_t();
  1050     sh4_x86.tstate = TSTATE_E;
  1051 :}
  1052 TST #imm, R0 {:  
  1053     COUNT_INST(I_TSTI);
  1054     load_reg( R_EAX, 0 );
  1055     TEST_imm32_r32( imm, R_EAX );
  1056     SETE_t();
  1057     sh4_x86.tstate = TSTATE_E;
  1058 :}
  1059 TST.B #imm, @(R0, GBR) {:  
  1060     COUNT_INST(I_TSTB);
  1061     load_reg( R_EAX, 0);
  1062     load_reg( R_ECX, R_GBR);
  1063     ADD_r32_r32( R_ECX, R_EAX );
  1064     MMU_TRANSLATE_READ( R_EAX );
  1065     MEM_READ_BYTE( R_EAX, R_EAX );
  1066     TEST_imm8_r8( imm, R_AL );
  1067     SETE_t();
  1068     sh4_x86.tstate = TSTATE_E;
  1069 :}
  1070 XOR Rm, Rn {:  
  1071     COUNT_INST(I_XOR);
  1072     load_reg( R_EAX, Rm );
  1073     load_reg( R_ECX, Rn );
  1074     XOR_r32_r32( R_EAX, R_ECX );
  1075     store_reg( R_ECX, Rn );
  1076     sh4_x86.tstate = TSTATE_NONE;
  1077 :}
  1078 XOR #imm, R0 {:  
  1079     COUNT_INST(I_XORI);
  1080     load_reg( R_EAX, 0 );
  1081     XOR_imm32_r32( imm, R_EAX );
  1082     store_reg( R_EAX, 0 );
  1083     sh4_x86.tstate = TSTATE_NONE;
  1084 :}
  1085 XOR.B #imm, @(R0, GBR) {:  
  1086     COUNT_INST(I_XORB);
  1087     load_reg( R_EAX, 0 );
  1088     load_spreg( R_ECX, R_GBR );
  1089     ADD_r32_r32( R_ECX, R_EAX );
  1090     MMU_TRANSLATE_WRITE( R_EAX );
  1091     PUSH_realigned_r32(R_EAX);
  1092     MEM_READ_BYTE(R_EAX, R_EAX);
  1093     POP_realigned_r32(R_ECX);
  1094     XOR_imm32_r32( imm, R_EAX );
  1095     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1096     sh4_x86.tstate = TSTATE_NONE;
  1097 :}
  1098 XTRCT Rm, Rn {:
  1099     COUNT_INST(I_XTRCT);
  1100     load_reg( R_EAX, Rm );
  1101     load_reg( R_ECX, Rn );
  1102     SHL_imm8_r32( 16, R_EAX );
  1103     SHR_imm8_r32( 16, R_ECX );
  1104     OR_r32_r32( R_EAX, R_ECX );
  1105     store_reg( R_ECX, Rn );
  1106     sh4_x86.tstate = TSTATE_NONE;
  1107 :}
  1109 /* Data move instructions */
  1110 MOV Rm, Rn {:  
  1111     COUNT_INST(I_MOV);
  1112     load_reg( R_EAX, Rm );
  1113     store_reg( R_EAX, Rn );
  1114 :}
  1115 MOV #imm, Rn {:  
  1116     COUNT_INST(I_MOVI);
  1117     load_imm32( R_EAX, imm );
  1118     store_reg( R_EAX, Rn );
  1119 :}
  1120 MOV.B Rm, @Rn {:  
  1121     COUNT_INST(I_MOVB);
  1122     load_reg( R_EAX, Rn );
  1123     MMU_TRANSLATE_WRITE( R_EAX );
  1124     load_reg( R_EDX, Rm );
  1125     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1126     sh4_x86.tstate = TSTATE_NONE;
  1127 :}
  1128 MOV.B Rm, @-Rn {:  
  1129     COUNT_INST(I_MOVB);
  1130     load_reg( R_EAX, Rn );
  1131     ADD_imm8s_r32( -1, R_EAX );
  1132     MMU_TRANSLATE_WRITE( R_EAX );
  1133     load_reg( R_EDX, Rm );
  1134     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1135     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1136     sh4_x86.tstate = TSTATE_NONE;
  1137 :}
  1138 MOV.B Rm, @(R0, Rn) {:  
  1139     COUNT_INST(I_MOVB);
  1140     load_reg( R_EAX, 0 );
  1141     load_reg( R_ECX, Rn );
  1142     ADD_r32_r32( R_ECX, R_EAX );
  1143     MMU_TRANSLATE_WRITE( R_EAX );
  1144     load_reg( R_EDX, Rm );
  1145     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1146     sh4_x86.tstate = TSTATE_NONE;
  1147 :}
  1148 MOV.B R0, @(disp, GBR) {:  
  1149     COUNT_INST(I_MOVB);
  1150     load_spreg( R_EAX, R_GBR );
  1151     ADD_imm32_r32( disp, R_EAX );
  1152     MMU_TRANSLATE_WRITE( R_EAX );
  1153     load_reg( R_EDX, 0 );
  1154     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 MOV.B R0, @(disp, Rn) {:  
  1158     COUNT_INST(I_MOVB);
  1159     load_reg( R_EAX, Rn );
  1160     ADD_imm32_r32( disp, R_EAX );
  1161     MMU_TRANSLATE_WRITE( R_EAX );
  1162     load_reg( R_EDX, 0 );
  1163     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1164     sh4_x86.tstate = TSTATE_NONE;
  1165 :}
  1166 MOV.B @Rm, Rn {:  
  1167     COUNT_INST(I_MOVB);
  1168     load_reg( R_EAX, Rm );
  1169     MMU_TRANSLATE_READ( R_EAX );
  1170     MEM_READ_BYTE( R_EAX, R_EAX );
  1171     store_reg( R_EAX, Rn );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 MOV.B @Rm+, Rn {:  
  1175     COUNT_INST(I_MOVB);
  1176     load_reg( R_EAX, Rm );
  1177     MMU_TRANSLATE_READ( R_EAX );
  1178     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1179     MEM_READ_BYTE( R_EAX, R_EAX );
  1180     store_reg( R_EAX, Rn );
  1181     sh4_x86.tstate = TSTATE_NONE;
  1182 :}
  1183 MOV.B @(R0, Rm), Rn {:  
  1184     COUNT_INST(I_MOVB);
  1185     load_reg( R_EAX, 0 );
  1186     load_reg( R_ECX, Rm );
  1187     ADD_r32_r32( R_ECX, R_EAX );
  1188     MMU_TRANSLATE_READ( R_EAX )
  1189     MEM_READ_BYTE( R_EAX, R_EAX );
  1190     store_reg( R_EAX, Rn );
  1191     sh4_x86.tstate = TSTATE_NONE;
  1192 :}
  1193 MOV.B @(disp, GBR), R0 {:  
  1194     COUNT_INST(I_MOVB);
  1195     load_spreg( R_EAX, R_GBR );
  1196     ADD_imm32_r32( disp, R_EAX );
  1197     MMU_TRANSLATE_READ( R_EAX );
  1198     MEM_READ_BYTE( R_EAX, R_EAX );
  1199     store_reg( R_EAX, 0 );
  1200     sh4_x86.tstate = TSTATE_NONE;
  1201 :}
  1202 MOV.B @(disp, Rm), R0 {:  
  1203     COUNT_INST(I_MOVB);
  1204     load_reg( R_EAX, Rm );
  1205     ADD_imm32_r32( disp, R_EAX );
  1206     MMU_TRANSLATE_READ( R_EAX );
  1207     MEM_READ_BYTE( R_EAX, R_EAX );
  1208     store_reg( R_EAX, 0 );
  1209     sh4_x86.tstate = TSTATE_NONE;
  1210 :}
  1211 MOV.L Rm, @Rn {:
  1212     COUNT_INST(I_MOVL);
  1213     load_reg( R_EAX, Rn );
  1214     check_walign32(R_EAX);
  1215     MMU_TRANSLATE_WRITE( R_EAX );
  1216     load_reg( R_EDX, Rm );
  1217     MEM_WRITE_LONG( R_EAX, R_EDX );
  1218     sh4_x86.tstate = TSTATE_NONE;
  1219 :}
  1220 MOV.L Rm, @-Rn {:  
  1221     COUNT_INST(I_MOVL);
  1222     load_reg( R_EAX, Rn );
  1223     ADD_imm8s_r32( -4, R_EAX );
  1224     check_walign32( R_EAX );
  1225     MMU_TRANSLATE_WRITE( R_EAX );
  1226     load_reg( R_EDX, Rm );
  1227     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1228     MEM_WRITE_LONG( R_EAX, R_EDX );
  1229     sh4_x86.tstate = TSTATE_NONE;
  1230 :}
  1231 MOV.L Rm, @(R0, Rn) {:  
  1232     COUNT_INST(I_MOVL);
  1233     load_reg( R_EAX, 0 );
  1234     load_reg( R_ECX, Rn );
  1235     ADD_r32_r32( R_ECX, R_EAX );
  1236     check_walign32( R_EAX );
  1237     MMU_TRANSLATE_WRITE( R_EAX );
  1238     load_reg( R_EDX, Rm );
  1239     MEM_WRITE_LONG( R_EAX, R_EDX );
  1240     sh4_x86.tstate = TSTATE_NONE;
  1241 :}
  1242 MOV.L R0, @(disp, GBR) {:  
  1243     COUNT_INST(I_MOVL);
  1244     load_spreg( R_EAX, R_GBR );
  1245     ADD_imm32_r32( disp, R_EAX );
  1246     check_walign32( R_EAX );
  1247     MMU_TRANSLATE_WRITE( R_EAX );
  1248     load_reg( R_EDX, 0 );
  1249     MEM_WRITE_LONG( R_EAX, R_EDX );
  1250     sh4_x86.tstate = TSTATE_NONE;
  1251 :}
  1252 MOV.L Rm, @(disp, Rn) {:  
  1253     COUNT_INST(I_MOVL);
  1254     load_reg( R_EAX, Rn );
  1255     ADD_imm32_r32( disp, R_EAX );
  1256     check_walign32( R_EAX );
  1257     MMU_TRANSLATE_WRITE( R_EAX );
  1258     load_reg( R_EDX, Rm );
  1259     MEM_WRITE_LONG( R_EAX, R_EDX );
  1260     sh4_x86.tstate = TSTATE_NONE;
  1261 :}
  1262 MOV.L @Rm, Rn {:  
  1263     COUNT_INST(I_MOVL);
  1264     load_reg( R_EAX, Rm );
  1265     check_ralign32( R_EAX );
  1266     MMU_TRANSLATE_READ( R_EAX );
  1267     MEM_READ_LONG( R_EAX, R_EAX );
  1268     store_reg( R_EAX, Rn );
  1269     sh4_x86.tstate = TSTATE_NONE;
  1270 :}
  1271 MOV.L @Rm+, Rn {:  
  1272     COUNT_INST(I_MOVL);
  1273     load_reg( R_EAX, Rm );
  1274     check_ralign32( R_EAX );
  1275     MMU_TRANSLATE_READ( R_EAX );
  1276     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1277     MEM_READ_LONG( R_EAX, R_EAX );
  1278     store_reg( R_EAX, Rn );
  1279     sh4_x86.tstate = TSTATE_NONE;
  1280 :}
  1281 MOV.L @(R0, Rm), Rn {:  
  1282     COUNT_INST(I_MOVL);
  1283     load_reg( R_EAX, 0 );
  1284     load_reg( R_ECX, Rm );
  1285     ADD_r32_r32( R_ECX, R_EAX );
  1286     check_ralign32( R_EAX );
  1287     MMU_TRANSLATE_READ( R_EAX );
  1288     MEM_READ_LONG( R_EAX, R_EAX );
  1289     store_reg( R_EAX, Rn );
  1290     sh4_x86.tstate = TSTATE_NONE;
  1291 :}
  1292 MOV.L @(disp, GBR), R0 {:
  1293     COUNT_INST(I_MOVL);
  1294     load_spreg( R_EAX, R_GBR );
  1295     ADD_imm32_r32( disp, R_EAX );
  1296     check_ralign32( R_EAX );
  1297     MMU_TRANSLATE_READ( R_EAX );
  1298     MEM_READ_LONG( R_EAX, R_EAX );
  1299     store_reg( R_EAX, 0 );
  1300     sh4_x86.tstate = TSTATE_NONE;
  1301 :}
  1302 MOV.L @(disp, PC), Rn {:  
  1303     COUNT_INST(I_MOVLPC);
  1304     if( sh4_x86.in_delay_slot ) {
  1305 	SLOTILLEGAL();
  1306     } else {
  1307 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1308 	if( IS_IN_ICACHE(target) ) {
  1309 	    // If the target address is in the same page as the code, it's
  1310 	    // pretty safe to just ref it directly and circumvent the whole
  1311 	    // memory subsystem. (this is a big performance win)
  1313 	    // FIXME: There's a corner-case that's not handled here when
  1314 	    // the current code-page is in the ITLB but not in the UTLB.
  1315 	    // (should generate a TLB miss although need to test SH4 
  1316 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1317 	    // behaviour though.
  1318 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1319 	    MOV_moff32_EAX( ptr );
  1320 	} else {
  1321 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1322 	    // different virtual address than the translation was done with,
  1323 	    // but we can safely assume that the low bits are the same.
  1324 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1325 	    ADD_sh4r_r32( R_PC, R_EAX );
  1326 	    MMU_TRANSLATE_READ( R_EAX );
  1327 	    MEM_READ_LONG( R_EAX, R_EAX );
  1328 	    sh4_x86.tstate = TSTATE_NONE;
  1330 	store_reg( R_EAX, Rn );
  1332 :}
  1333 MOV.L @(disp, Rm), Rn {:  
  1334     COUNT_INST(I_MOVL);
  1335     load_reg( R_EAX, Rm );
  1336     ADD_imm8s_r32( disp, R_EAX );
  1337     check_ralign32( R_EAX );
  1338     MMU_TRANSLATE_READ( R_EAX );
  1339     MEM_READ_LONG( R_EAX, R_EAX );
  1340     store_reg( R_EAX, Rn );
  1341     sh4_x86.tstate = TSTATE_NONE;
  1342 :}
  1343 MOV.W Rm, @Rn {:  
  1344     COUNT_INST(I_MOVW);
  1345     load_reg( R_EAX, Rn );
  1346     check_walign16( R_EAX );
  1347     MMU_TRANSLATE_WRITE( R_EAX )
  1348     load_reg( R_EDX, Rm );
  1349     MEM_WRITE_WORD( R_EAX, R_EDX );
  1350     sh4_x86.tstate = TSTATE_NONE;
  1351 :}
  1352 MOV.W Rm, @-Rn {:  
  1353     COUNT_INST(I_MOVW);
  1354     load_reg( R_EAX, Rn );
  1355     ADD_imm8s_r32( -2, R_EAX );
  1356     check_walign16( R_EAX );
  1357     MMU_TRANSLATE_WRITE( R_EAX );
  1358     load_reg( R_EDX, Rm );
  1359     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1360     MEM_WRITE_WORD( R_EAX, R_EDX );
  1361     sh4_x86.tstate = TSTATE_NONE;
  1362 :}
  1363 MOV.W Rm, @(R0, Rn) {:  
  1364     COUNT_INST(I_MOVW);
  1365     load_reg( R_EAX, 0 );
  1366     load_reg( R_ECX, Rn );
  1367     ADD_r32_r32( R_ECX, R_EAX );
  1368     check_walign16( R_EAX );
  1369     MMU_TRANSLATE_WRITE( R_EAX );
  1370     load_reg( R_EDX, Rm );
  1371     MEM_WRITE_WORD( R_EAX, R_EDX );
  1372     sh4_x86.tstate = TSTATE_NONE;
  1373 :}
  1374 MOV.W R0, @(disp, GBR) {:  
  1375     COUNT_INST(I_MOVW);
  1376     load_spreg( R_EAX, R_GBR );
  1377     ADD_imm32_r32( disp, R_EAX );
  1378     check_walign16( R_EAX );
  1379     MMU_TRANSLATE_WRITE( R_EAX );
  1380     load_reg( R_EDX, 0 );
  1381     MEM_WRITE_WORD( R_EAX, R_EDX );
  1382     sh4_x86.tstate = TSTATE_NONE;
  1383 :}
  1384 MOV.W R0, @(disp, Rn) {:  
  1385     COUNT_INST(I_MOVW);
  1386     load_reg( R_EAX, Rn );
  1387     ADD_imm32_r32( disp, R_EAX );
  1388     check_walign16( R_EAX );
  1389     MMU_TRANSLATE_WRITE( R_EAX );
  1390     load_reg( R_EDX, 0 );
  1391     MEM_WRITE_WORD( R_EAX, R_EDX );
  1392     sh4_x86.tstate = TSTATE_NONE;
  1393 :}
  1394 MOV.W @Rm, Rn {:  
  1395     COUNT_INST(I_MOVW);
  1396     load_reg( R_EAX, Rm );
  1397     check_ralign16( R_EAX );
  1398     MMU_TRANSLATE_READ( R_EAX );
  1399     MEM_READ_WORD( R_EAX, R_EAX );
  1400     store_reg( R_EAX, Rn );
  1401     sh4_x86.tstate = TSTATE_NONE;
  1402 :}
  1403 MOV.W @Rm+, Rn {:  
  1404     COUNT_INST(I_MOVW);
  1405     load_reg( R_EAX, Rm );
  1406     check_ralign16( R_EAX );
  1407     MMU_TRANSLATE_READ( R_EAX );
  1408     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1409     MEM_READ_WORD( R_EAX, R_EAX );
  1410     store_reg( R_EAX, Rn );
  1411     sh4_x86.tstate = TSTATE_NONE;
  1412 :}
  1413 MOV.W @(R0, Rm), Rn {:  
  1414     COUNT_INST(I_MOVW);
  1415     load_reg( R_EAX, 0 );
  1416     load_reg( R_ECX, Rm );
  1417     ADD_r32_r32( R_ECX, R_EAX );
  1418     check_ralign16( R_EAX );
  1419     MMU_TRANSLATE_READ( R_EAX );
  1420     MEM_READ_WORD( R_EAX, R_EAX );
  1421     store_reg( R_EAX, Rn );
  1422     sh4_x86.tstate = TSTATE_NONE;
  1423 :}
  1424 MOV.W @(disp, GBR), R0 {:  
  1425     COUNT_INST(I_MOVW);
  1426     load_spreg( R_EAX, R_GBR );
  1427     ADD_imm32_r32( disp, R_EAX );
  1428     check_ralign16( R_EAX );
  1429     MMU_TRANSLATE_READ( R_EAX );
  1430     MEM_READ_WORD( R_EAX, R_EAX );
  1431     store_reg( R_EAX, 0 );
  1432     sh4_x86.tstate = TSTATE_NONE;
  1433 :}
  1434 MOV.W @(disp, PC), Rn {:  
  1435     COUNT_INST(I_MOVW);
  1436     if( sh4_x86.in_delay_slot ) {
  1437 	SLOTILLEGAL();
  1438     } else {
  1439 	// See comments for MOV.L @(disp, PC), Rn
  1440 	uint32_t target = pc + disp + 4;
  1441 	if( IS_IN_ICACHE(target) ) {
  1442 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1443 	    MOV_moff32_EAX( ptr );
  1444 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1445 	} else {
  1446 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1447 	    ADD_sh4r_r32( R_PC, R_EAX );
  1448 	    MMU_TRANSLATE_READ( R_EAX );
  1449 	    MEM_READ_WORD( R_EAX, R_EAX );
  1450 	    sh4_x86.tstate = TSTATE_NONE;
  1452 	store_reg( R_EAX, Rn );
  1454 :}
  1455 MOV.W @(disp, Rm), R0 {:  
  1456     COUNT_INST(I_MOVW);
  1457     load_reg( R_EAX, Rm );
  1458     ADD_imm32_r32( disp, R_EAX );
  1459     check_ralign16( R_EAX );
  1460     MMU_TRANSLATE_READ( R_EAX );
  1461     MEM_READ_WORD( R_EAX, R_EAX );
  1462     store_reg( R_EAX, 0 );
  1463     sh4_x86.tstate = TSTATE_NONE;
  1464 :}
  1465 MOVA @(disp, PC), R0 {:  
  1466     COUNT_INST(I_MOVA);
  1467     if( sh4_x86.in_delay_slot ) {
  1468 	SLOTILLEGAL();
  1469     } else {
  1470 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1471 	ADD_sh4r_r32( R_PC, R_ECX );
  1472 	store_reg( R_ECX, 0 );
  1473 	sh4_x86.tstate = TSTATE_NONE;
  1475 :}
  1476 MOVCA.L R0, @Rn {:  
  1477     COUNT_INST(I_MOVCA);
  1478     load_reg( R_EAX, Rn );
  1479     check_walign32( R_EAX );
  1480     MMU_TRANSLATE_WRITE( R_EAX );
  1481     load_reg( R_EDX, 0 );
  1482     MEM_WRITE_LONG( R_EAX, R_EDX );
  1483     sh4_x86.tstate = TSTATE_NONE;
  1484 :}
  1486 /* Control transfer instructions */
  1487 BF disp {:
  1488     COUNT_INST(I_BF);
  1489     if( sh4_x86.in_delay_slot ) {
  1490 	SLOTILLEGAL();
  1491     } else {
  1492 	sh4vma_t target = disp + pc + 4;
  1493 	JT_rel8( nottaken );
  1494 	exit_block_rel(target, pc+2 );
  1495 	JMP_TARGET(nottaken);
  1496 	return 2;
  1498 :}
  1499 BF/S disp {:
  1500     COUNT_INST(I_BFS);
  1501     if( sh4_x86.in_delay_slot ) {
  1502 	SLOTILLEGAL();
  1503     } else {
  1504 	sh4_x86.in_delay_slot = DELAY_PC;
  1505 	if( UNTRANSLATABLE(pc+2) ) {
  1506 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1507 	    JT_rel8(nottaken);
  1508 	    ADD_imm32_r32( disp, R_EAX );
  1509 	    JMP_TARGET(nottaken);
  1510 	    ADD_sh4r_r32( R_PC, R_EAX );
  1511 	    store_spreg( R_EAX, R_NEW_PC );
  1512 	    exit_block_emu(pc+2);
  1513 	    sh4_x86.branch_taken = TRUE;
  1514 	    return 2;
  1515 	} else {
  1516 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1517 		CMP_imm8s_sh4r( 1, R_T );
  1518 		sh4_x86.tstate = TSTATE_E;
  1520 	    sh4vma_t target = disp + pc + 4;
  1521 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1522 	    int save_tstate = sh4_x86.tstate;
  1523 	    sh4_translate_instruction(pc+2);
  1524 	    exit_block_rel( target, pc+4 );
  1526 	    // not taken
  1527 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1528 	    sh4_x86.tstate = save_tstate;
  1529 	    sh4_translate_instruction(pc+2);
  1530 	    return 4;
  1533 :}
  1534 BRA disp {:  
  1535     COUNT_INST(I_BRA);
  1536     if( sh4_x86.in_delay_slot ) {
  1537 	SLOTILLEGAL();
  1538     } else {
  1539 	sh4_x86.in_delay_slot = DELAY_PC;
  1540 	sh4_x86.branch_taken = TRUE;
  1541 	if( UNTRANSLATABLE(pc+2) ) {
  1542 	    load_spreg( R_EAX, R_PC );
  1543 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1544 	    store_spreg( R_EAX, R_NEW_PC );
  1545 	    exit_block_emu(pc+2);
  1546 	    return 2;
  1547 	} else {
  1548 	    sh4_translate_instruction( pc + 2 );
  1549 	    exit_block_rel( disp + pc + 4, pc+4 );
  1550 	    return 4;
  1553 :}
  1554 BRAF Rn {:  
  1555     COUNT_INST(I_BRAF);
  1556     if( sh4_x86.in_delay_slot ) {
  1557 	SLOTILLEGAL();
  1558     } else {
  1559 	load_spreg( R_EAX, R_PC );
  1560 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1561 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1562 	store_spreg( R_EAX, R_NEW_PC );
  1563 	sh4_x86.in_delay_slot = DELAY_PC;
  1564 	sh4_x86.tstate = TSTATE_NONE;
  1565 	sh4_x86.branch_taken = TRUE;
  1566 	if( UNTRANSLATABLE(pc+2) ) {
  1567 	    exit_block_emu(pc+2);
  1568 	    return 2;
  1569 	} else {
  1570 	    sh4_translate_instruction( pc + 2 );
  1571 	    exit_block_newpcset(pc+2);
  1572 	    return 4;
  1575 :}
  1576 BSR disp {:  
  1577     COUNT_INST(I_BSR);
  1578     if( sh4_x86.in_delay_slot ) {
  1579 	SLOTILLEGAL();
  1580     } else {
  1581 	load_spreg( R_EAX, R_PC );
  1582 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1583 	store_spreg( R_EAX, R_PR );
  1584 	sh4_x86.in_delay_slot = DELAY_PC;
  1585 	sh4_x86.branch_taken = TRUE;
  1586 	sh4_x86.tstate = TSTATE_NONE;
  1587 	if( UNTRANSLATABLE(pc+2) ) {
  1588 	    ADD_imm32_r32( disp, R_EAX );
  1589 	    store_spreg( R_EAX, R_NEW_PC );
  1590 	    exit_block_emu(pc+2);
  1591 	    return 2;
  1592 	} else {
  1593 	    sh4_translate_instruction( pc + 2 );
  1594 	    exit_block_rel( disp + pc + 4, pc+4 );
  1595 	    return 4;
  1598 :}
  1599 BSRF Rn {:  
  1600     COUNT_INST(I_BSRF);
  1601     if( sh4_x86.in_delay_slot ) {
  1602 	SLOTILLEGAL();
  1603     } else {
  1604 	load_spreg( R_EAX, R_PC );
  1605 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1606 	store_spreg( R_EAX, R_PR );
  1607 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1608 	store_spreg( R_EAX, R_NEW_PC );
  1610 	sh4_x86.in_delay_slot = DELAY_PC;
  1611 	sh4_x86.tstate = TSTATE_NONE;
  1612 	sh4_x86.branch_taken = TRUE;
  1613 	if( UNTRANSLATABLE(pc+2) ) {
  1614 	    exit_block_emu(pc+2);
  1615 	    return 2;
  1616 	} else {
  1617 	    sh4_translate_instruction( pc + 2 );
  1618 	    exit_block_newpcset(pc+2);
  1619 	    return 4;
  1622 :}
  1623 BT disp {:
  1624     COUNT_INST(I_BT);
  1625     if( sh4_x86.in_delay_slot ) {
  1626 	SLOTILLEGAL();
  1627     } else {
  1628 	sh4vma_t target = disp + pc + 4;
  1629 	JF_rel8( nottaken );
  1630 	exit_block_rel(target, pc+2 );
  1631 	JMP_TARGET(nottaken);
  1632 	return 2;
  1634 :}
  1635 BT/S disp {:
  1636     COUNT_INST(I_BTS);
  1637     if( sh4_x86.in_delay_slot ) {
  1638 	SLOTILLEGAL();
  1639     } else {
  1640 	sh4_x86.in_delay_slot = DELAY_PC;
  1641 	if( UNTRANSLATABLE(pc+2) ) {
  1642 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1643 	    JF_rel8(nottaken);
  1644 	    ADD_imm32_r32( disp, R_EAX );
  1645 	    JMP_TARGET(nottaken);
  1646 	    ADD_sh4r_r32( R_PC, R_EAX );
  1647 	    store_spreg( R_EAX, R_NEW_PC );
  1648 	    exit_block_emu(pc+2);
  1649 	    sh4_x86.branch_taken = TRUE;
  1650 	    return 2;
  1651 	} else {
  1652 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1653 		CMP_imm8s_sh4r( 1, R_T );
  1654 		sh4_x86.tstate = TSTATE_E;
  1656 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1657 	    int save_tstate = sh4_x86.tstate;
  1658 	    sh4_translate_instruction(pc+2);
  1659 	    exit_block_rel( disp + pc + 4, pc+4 );
  1660 	    // not taken
  1661 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1662 	    sh4_x86.tstate = save_tstate;
  1663 	    sh4_translate_instruction(pc+2);
  1664 	    return 4;
  1667 :}
  1668 JMP @Rn {:  
  1669     COUNT_INST(I_JMP);
  1670     if( sh4_x86.in_delay_slot ) {
  1671 	SLOTILLEGAL();
  1672     } else {
  1673 	load_reg( R_ECX, Rn );
  1674 	store_spreg( R_ECX, R_NEW_PC );
  1675 	sh4_x86.in_delay_slot = DELAY_PC;
  1676 	sh4_x86.branch_taken = TRUE;
  1677 	if( UNTRANSLATABLE(pc+2) ) {
  1678 	    exit_block_emu(pc+2);
  1679 	    return 2;
  1680 	} else {
  1681 	    sh4_translate_instruction(pc+2);
  1682 	    exit_block_newpcset(pc+2);
  1683 	    return 4;
  1686 :}
  1687 JSR @Rn {:  
  1688     COUNT_INST(I_JSR);
  1689     if( sh4_x86.in_delay_slot ) {
  1690 	SLOTILLEGAL();
  1691     } else {
  1692 	load_spreg( R_EAX, R_PC );
  1693 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1694 	store_spreg( R_EAX, R_PR );
  1695 	load_reg( R_ECX, Rn );
  1696 	store_spreg( R_ECX, R_NEW_PC );
  1697 	sh4_x86.in_delay_slot = DELAY_PC;
  1698 	sh4_x86.branch_taken = TRUE;
  1699 	sh4_x86.tstate = TSTATE_NONE;
  1700 	if( UNTRANSLATABLE(pc+2) ) {
  1701 	    exit_block_emu(pc+2);
  1702 	    return 2;
  1703 	} else {
  1704 	    sh4_translate_instruction(pc+2);
  1705 	    exit_block_newpcset(pc+2);
  1706 	    return 4;
  1709 :}
  1710 RTE {:  
  1711     COUNT_INST(I_RTE);
  1712     if( sh4_x86.in_delay_slot ) {
  1713 	SLOTILLEGAL();
  1714     } else {
  1715 	check_priv();
  1716 	load_spreg( R_ECX, R_SPC );
  1717 	store_spreg( R_ECX, R_NEW_PC );
  1718 	load_spreg( R_EAX, R_SSR );
  1719 	call_func1( sh4_write_sr, R_EAX );
  1720 	sh4_x86.in_delay_slot = DELAY_PC;
  1721 	sh4_x86.priv_checked = FALSE;
  1722 	sh4_x86.fpuen_checked = FALSE;
  1723 	sh4_x86.tstate = TSTATE_NONE;
  1724 	sh4_x86.branch_taken = TRUE;
  1725 	if( UNTRANSLATABLE(pc+2) ) {
  1726 	    exit_block_emu(pc+2);
  1727 	    return 2;
  1728 	} else {
  1729 	    sh4_translate_instruction(pc+2);
  1730 	    exit_block_newpcset(pc+2);
  1731 	    return 4;
  1734 :}
  1735 RTS {:  
  1736     COUNT_INST(I_RTS);
  1737     if( sh4_x86.in_delay_slot ) {
  1738 	SLOTILLEGAL();
  1739     } else {
  1740 	load_spreg( R_ECX, R_PR );
  1741 	store_spreg( R_ECX, R_NEW_PC );
  1742 	sh4_x86.in_delay_slot = DELAY_PC;
  1743 	sh4_x86.branch_taken = TRUE;
  1744 	if( UNTRANSLATABLE(pc+2) ) {
  1745 	    exit_block_emu(pc+2);
  1746 	    return 2;
  1747 	} else {
  1748 	    sh4_translate_instruction(pc+2);
  1749 	    exit_block_newpcset(pc+2);
  1750 	    return 4;
  1753 :}
  1754 TRAPA #imm {:  
  1755     COUNT_INST(I_TRAPA);
  1756     if( sh4_x86.in_delay_slot ) {
  1757 	SLOTILLEGAL();
  1758     } else {
  1759 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1760 	ADD_r32_sh4r( R_ECX, R_PC );
  1761 	load_imm32( R_EAX, imm );
  1762 	call_func1( sh4_raise_trap, R_EAX );
  1763 	sh4_x86.tstate = TSTATE_NONE;
  1764 	exit_block_pcset(pc);
  1765 	sh4_x86.branch_taken = TRUE;
  1766 	return 2;
  1768 :}
  1769 UNDEF {:  
  1770     COUNT_INST(I_UNDEF);
  1771     if( sh4_x86.in_delay_slot ) {
  1772 	SLOTILLEGAL();
  1773     } else {
  1774 	JMP_exc(EXC_ILLEGAL);
  1775 	return 2;
  1777 :}
  1779 CLRMAC {:  
  1780     COUNT_INST(I_CLRMAC);
  1781     XOR_r32_r32(R_EAX, R_EAX);
  1782     store_spreg( R_EAX, R_MACL );
  1783     store_spreg( R_EAX, R_MACH );
  1784     sh4_x86.tstate = TSTATE_NONE;
  1785 :}
  1786 CLRS {:
  1787     COUNT_INST(I_CLRS);
  1788     CLC();
  1789     SETC_sh4r(R_S);
  1790     sh4_x86.tstate = TSTATE_NONE;
  1791 :}
  1792 CLRT {:  
  1793     COUNT_INST(I_CLRT);
  1794     CLC();
  1795     SETC_t();
  1796     sh4_x86.tstate = TSTATE_C;
  1797 :}
  1798 SETS {:  
  1799     COUNT_INST(I_SETS);
  1800     STC();
  1801     SETC_sh4r(R_S);
  1802     sh4_x86.tstate = TSTATE_NONE;
  1803 :}
  1804 SETT {:  
  1805     COUNT_INST(I_SETT);
  1806     STC();
  1807     SETC_t();
  1808     sh4_x86.tstate = TSTATE_C;
  1809 :}
  1811 /* Floating point moves */
  1812 FMOV FRm, FRn {:  
  1813     COUNT_INST(I_FMOV1);
  1814     check_fpuen();
  1815     load_spreg( R_ECX, R_FPSCR );
  1816     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1817     JNE_rel8(doublesize);
  1818     load_fr( R_EAX, FRm ); // SZ=0 branch
  1819     store_fr( R_EAX, FRn );
  1820     JMP_rel8(end);
  1821     JMP_TARGET(doublesize);
  1822     load_dr0( R_EAX, FRm );
  1823     load_dr1( R_ECX, FRm );
  1824     store_dr0( R_EAX, FRn );
  1825     store_dr1( R_ECX, FRn );
  1826     JMP_TARGET(end);
  1827     sh4_x86.tstate = TSTATE_NONE;
  1828 :}
  1829 FMOV FRm, @Rn {: 
  1830     COUNT_INST(I_FMOV2);
  1831     check_fpuen();
  1832     load_reg( R_EAX, Rn );
  1833     load_spreg( R_EDX, R_FPSCR );
  1834     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1835     JNE_rel8(doublesize);
  1837     check_walign32( R_EAX );
  1838     MMU_TRANSLATE_WRITE( R_EAX );
  1839     load_fr( R_ECX, FRm );
  1840     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1841     JMP_rel8(end);
  1843     JMP_TARGET(doublesize);
  1844     check_walign64( R_EAX );
  1845     MMU_TRANSLATE_WRITE( R_EAX );
  1846     load_dr0( R_ECX, FRm );
  1847     load_dr1( R_EDX, FRm );
  1848     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1849     JMP_TARGET(end);
  1850     sh4_x86.tstate = TSTATE_NONE;
  1851 :}
  1852 FMOV @Rm, FRn {:  
  1853     COUNT_INST(I_FMOV5);
  1854     check_fpuen();
  1855     load_reg( R_EAX, Rm );
  1856     load_spreg( R_EDX, R_FPSCR );
  1857     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1858     JNE_rel8(doublesize);
  1860     check_ralign32( R_EAX );
  1861     MMU_TRANSLATE_READ( R_EAX );
  1862     MEM_READ_LONG( R_EAX, R_EAX );
  1863     store_fr( R_EAX, FRn );
  1864     JMP_rel8(end);
  1866     JMP_TARGET(doublesize);
  1867     check_ralign64( R_EAX );
  1868     MMU_TRANSLATE_READ( R_EAX );
  1869     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1870     store_dr0( R_ECX, FRn );
  1871     store_dr1( R_EAX, FRn );
  1872     JMP_TARGET(end);
  1873     sh4_x86.tstate = TSTATE_NONE;
  1874 :}
  1875 FMOV FRm, @-Rn {:  
  1876     COUNT_INST(I_FMOV3);
  1877     check_fpuen();
  1878     load_reg( R_EAX, Rn );
  1879     load_spreg( R_EDX, R_FPSCR );
  1880     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1881     JNE_rel8(doublesize);
  1883     check_walign32( R_EAX );
  1884     ADD_imm8s_r32( -4, R_EAX );
  1885     MMU_TRANSLATE_WRITE( R_EAX );
  1886     load_fr( R_ECX, FRm );
  1887     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1888     MEM_WRITE_LONG( R_EAX, R_ECX );
  1889     JMP_rel8(end);
  1891     JMP_TARGET(doublesize);
  1892     check_walign64( R_EAX );
  1893     ADD_imm8s_r32(-8,R_EAX);
  1894     MMU_TRANSLATE_WRITE( R_EAX );
  1895     load_dr0( R_ECX, FRm );
  1896     load_dr1( R_EDX, FRm );
  1897     ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1898     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1899     JMP_TARGET(end);
  1901     sh4_x86.tstate = TSTATE_NONE;
  1902 :}
  1903 FMOV @Rm+, FRn {:
  1904     COUNT_INST(I_FMOV6);
  1905     check_fpuen();
  1906     load_reg( R_EAX, Rm );
  1907     load_spreg( R_EDX, R_FPSCR );
  1908     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1909     JNE_rel8(doublesize);
  1911     check_ralign32( R_EAX );
  1912     MMU_TRANSLATE_READ( R_EAX );
  1913     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1914     MEM_READ_LONG( R_EAX, R_EAX );
  1915     store_fr( R_EAX, FRn );
  1916     JMP_rel8(end);
  1918     JMP_TARGET(doublesize);
  1919     check_ralign64( R_EAX );
  1920     MMU_TRANSLATE_READ( R_EAX );
  1921     ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1922     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1923     store_dr0( R_ECX, FRn );
  1924     store_dr1( R_EAX, FRn );
  1925     JMP_TARGET(end);
  1927     sh4_x86.tstate = TSTATE_NONE;
  1928 :}
  1929 FMOV FRm, @(R0, Rn) {:  
  1930     COUNT_INST(I_FMOV4);
  1931     check_fpuen();
  1932     load_reg( R_EAX, Rn );
  1933     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1934     load_spreg( R_EDX, R_FPSCR );
  1935     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1936     JNE_rel8(doublesize);
  1938     check_walign32( R_EAX );
  1939     MMU_TRANSLATE_WRITE( R_EAX );
  1940     load_fr( R_ECX, FRm );
  1941     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1942     JMP_rel8(end);
  1944     JMP_TARGET(doublesize);
  1945     check_walign64( R_EAX );
  1946     MMU_TRANSLATE_WRITE( R_EAX );
  1947     load_dr0( R_ECX, FRm );
  1948     load_dr1( R_EDX, FRm );
  1949     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1950     JMP_TARGET(end);
  1952     sh4_x86.tstate = TSTATE_NONE;
  1953 :}
  1954 FMOV @(R0, Rm), FRn {:  
  1955     COUNT_INST(I_FMOV7);
  1956     check_fpuen();
  1957     load_reg( R_EAX, Rm );
  1958     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1959     load_spreg( R_EDX, R_FPSCR );
  1960     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1961     JNE_rel8(doublesize);
  1963     check_ralign32( R_EAX );
  1964     MMU_TRANSLATE_READ( R_EAX );
  1965     MEM_READ_LONG( R_EAX, R_EAX );
  1966     store_fr( R_EAX, FRn );
  1967     JMP_rel8(end);
  1969     JMP_TARGET(doublesize);
  1970     check_ralign64( R_EAX );
  1971     MMU_TRANSLATE_READ( R_EAX );
  1972     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1973     store_dr0( R_ECX, FRn );
  1974     store_dr1( R_EAX, FRn );
  1975     JMP_TARGET(end);
  1977     sh4_x86.tstate = TSTATE_NONE;
  1978 :}
  1979 FLDI0 FRn {:  /* IFF PR=0 */
  1980     COUNT_INST(I_FLDI0);
  1981     check_fpuen();
  1982     load_spreg( R_ECX, R_FPSCR );
  1983     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1984     JNE_rel8(end);
  1985     XOR_r32_r32( R_EAX, R_EAX );
  1986     store_fr( R_EAX, FRn );
  1987     JMP_TARGET(end);
  1988     sh4_x86.tstate = TSTATE_NONE;
  1989 :}
  1990 FLDI1 FRn {:  /* IFF PR=0 */
  1991     COUNT_INST(I_FLDI1);
  1992     check_fpuen();
  1993     load_spreg( R_ECX, R_FPSCR );
  1994     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1995     JNE_rel8(end);
  1996     load_imm32(R_EAX, 0x3F800000);
  1997     store_fr( R_EAX, FRn );
  1998     JMP_TARGET(end);
  1999     sh4_x86.tstate = TSTATE_NONE;
  2000 :}
  2002 FLOAT FPUL, FRn {:  
  2003     COUNT_INST(I_FLOAT);
  2004     check_fpuen();
  2005     load_spreg( R_ECX, R_FPSCR );
  2006     FILD_sh4r(R_FPUL);
  2007     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2008     JNE_rel8(doubleprec);
  2009     pop_fr( FRn );
  2010     JMP_rel8(end);
  2011     JMP_TARGET(doubleprec);
  2012     pop_dr( FRn );
  2013     JMP_TARGET(end);
  2014     sh4_x86.tstate = TSTATE_NONE;
  2015 :}
  2016 FTRC FRm, FPUL {:  
  2017     COUNT_INST(I_FTRC);
  2018     check_fpuen();
  2019     load_spreg( R_ECX, R_FPSCR );
  2020     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2021     JNE_rel8(doubleprec);
  2022     push_fr( FRm );
  2023     JMP_rel8(doop);
  2024     JMP_TARGET(doubleprec);
  2025     push_dr( FRm );
  2026     JMP_TARGET( doop );
  2027     load_ptr( R_ECX, &max_int );
  2028     FILD_r32ind( R_ECX );
  2029     FCOMIP_st(1);
  2030     JNA_rel8( sat );
  2031     load_ptr( R_ECX, &min_int );  // 5
  2032     FILD_r32ind( R_ECX );           // 2
  2033     FCOMIP_st(1);                   // 2
  2034     JAE_rel8( sat2 );            // 2
  2035     load_ptr( R_EAX, &save_fcw );
  2036     FNSTCW_r32ind( R_EAX );
  2037     load_ptr( R_EDX, &trunc_fcw );
  2038     FLDCW_r32ind( R_EDX );
  2039     FISTP_sh4r(R_FPUL);             // 3
  2040     FLDCW_r32ind( R_EAX );
  2041     JMP_rel8(end);             // 2
  2043     JMP_TARGET(sat);
  2044     JMP_TARGET(sat2);
  2045     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2046     store_spreg( R_ECX, R_FPUL );
  2047     FPOP_st();
  2048     JMP_TARGET(end);
  2049     sh4_x86.tstate = TSTATE_NONE;
  2050 :}
  2051 FLDS FRm, FPUL {:  
  2052     COUNT_INST(I_FLDS);
  2053     check_fpuen();
  2054     load_fr( R_EAX, FRm );
  2055     store_spreg( R_EAX, R_FPUL );
  2056     sh4_x86.tstate = TSTATE_NONE;
  2057 :}
  2058 FSTS FPUL, FRn {:  
  2059     COUNT_INST(I_FSTS);
  2060     check_fpuen();
  2061     load_spreg( R_EAX, R_FPUL );
  2062     store_fr( R_EAX, FRn );
  2063     sh4_x86.tstate = TSTATE_NONE;
  2064 :}
  2065 FCNVDS FRm, FPUL {:  
  2066     COUNT_INST(I_FCNVDS);
  2067     check_fpuen();
  2068     load_spreg( R_ECX, R_FPSCR );
  2069     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2070     JE_rel8(end); // only when PR=1
  2071     push_dr( FRm );
  2072     pop_fpul();
  2073     JMP_TARGET(end);
  2074     sh4_x86.tstate = TSTATE_NONE;
  2075 :}
  2076 FCNVSD FPUL, FRn {:  
  2077     COUNT_INST(I_FCNVSD);
  2078     check_fpuen();
  2079     load_spreg( R_ECX, R_FPSCR );
  2080     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2081     JE_rel8(end); // only when PR=1
  2082     push_fpul();
  2083     pop_dr( FRn );
  2084     JMP_TARGET(end);
  2085     sh4_x86.tstate = TSTATE_NONE;
  2086 :}
  2088 /* Floating point instructions */
  2089 FABS FRn {:  
  2090     COUNT_INST(I_FABS);
  2091     check_fpuen();
  2092     load_spreg( R_ECX, R_FPSCR );
  2093     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2094     JNE_rel8(doubleprec);
  2095     push_fr(FRn); // 6
  2096     FABS_st0(); // 2
  2097     pop_fr(FRn); //6
  2098     JMP_rel8(end); // 2
  2099     JMP_TARGET(doubleprec);
  2100     push_dr(FRn);
  2101     FABS_st0();
  2102     pop_dr(FRn);
  2103     JMP_TARGET(end);
  2104     sh4_x86.tstate = TSTATE_NONE;
  2105 :}
  2106 FADD FRm, FRn {:  
  2107     COUNT_INST(I_FADD);
  2108     check_fpuen();
  2109     load_spreg( R_ECX, R_FPSCR );
  2110     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2111     JNE_rel8(doubleprec);
  2112     push_fr(FRm);
  2113     push_fr(FRn);
  2114     FADDP_st(1);
  2115     pop_fr(FRn);
  2116     JMP_rel8(end);
  2117     JMP_TARGET(doubleprec);
  2118     push_dr(FRm);
  2119     push_dr(FRn);
  2120     FADDP_st(1);
  2121     pop_dr(FRn);
  2122     JMP_TARGET(end);
  2123     sh4_x86.tstate = TSTATE_NONE;
  2124 :}
  2125 FDIV FRm, FRn {:  
  2126     COUNT_INST(I_FDIV);
  2127     check_fpuen();
  2128     load_spreg( R_ECX, R_FPSCR );
  2129     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2130     JNE_rel8(doubleprec);
  2131     push_fr(FRn);
  2132     push_fr(FRm);
  2133     FDIVP_st(1);
  2134     pop_fr(FRn);
  2135     JMP_rel8(end);
  2136     JMP_TARGET(doubleprec);
  2137     push_dr(FRn);
  2138     push_dr(FRm);
  2139     FDIVP_st(1);
  2140     pop_dr(FRn);
  2141     JMP_TARGET(end);
  2142     sh4_x86.tstate = TSTATE_NONE;
  2143 :}
  2144 FMAC FR0, FRm, FRn {:  
  2145     COUNT_INST(I_FMAC);
  2146     check_fpuen();
  2147     load_spreg( R_ECX, R_FPSCR );
  2148     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2149     JNE_rel8(doubleprec);
  2150     push_fr( 0 );
  2151     push_fr( FRm );
  2152     FMULP_st(1);
  2153     push_fr( FRn );
  2154     FADDP_st(1);
  2155     pop_fr( FRn );
  2156     JMP_rel8(end);
  2157     JMP_TARGET(doubleprec);
  2158     push_dr( 0 );
  2159     push_dr( FRm );
  2160     FMULP_st(1);
  2161     push_dr( FRn );
  2162     FADDP_st(1);
  2163     pop_dr( FRn );
  2164     JMP_TARGET(end);
  2165     sh4_x86.tstate = TSTATE_NONE;
  2166 :}
  2168 FMUL FRm, FRn {:  
  2169     COUNT_INST(I_FMUL);
  2170     check_fpuen();
  2171     load_spreg( R_ECX, R_FPSCR );
  2172     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2173     JNE_rel8(doubleprec);
  2174     push_fr(FRm);
  2175     push_fr(FRn);
  2176     FMULP_st(1);
  2177     pop_fr(FRn);
  2178     JMP_rel8(end);
  2179     JMP_TARGET(doubleprec);
  2180     push_dr(FRm);
  2181     push_dr(FRn);
  2182     FMULP_st(1);
  2183     pop_dr(FRn);
  2184     JMP_TARGET(end);
  2185     sh4_x86.tstate = TSTATE_NONE;
  2186 :}
  2187 FNEG FRn {:  
  2188     COUNT_INST(I_FNEG);
  2189     check_fpuen();
  2190     load_spreg( R_ECX, R_FPSCR );
  2191     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2192     JNE_rel8(doubleprec);
  2193     push_fr(FRn);
  2194     FCHS_st0();
  2195     pop_fr(FRn);
  2196     JMP_rel8(end);
  2197     JMP_TARGET(doubleprec);
  2198     push_dr(FRn);
  2199     FCHS_st0();
  2200     pop_dr(FRn);
  2201     JMP_TARGET(end);
  2202     sh4_x86.tstate = TSTATE_NONE;
  2203 :}
  2204 FSRRA FRn {:  
  2205     COUNT_INST(I_FSRRA);
  2206     check_fpuen();
  2207     load_spreg( R_ECX, R_FPSCR );
  2208     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2209     JNE_rel8(end); // PR=0 only
  2210     FLD1_st0();
  2211     push_fr(FRn);
  2212     FSQRT_st0();
  2213     FDIVP_st(1);
  2214     pop_fr(FRn);
  2215     JMP_TARGET(end);
  2216     sh4_x86.tstate = TSTATE_NONE;
  2217 :}
  2218 FSQRT FRn {:  
  2219     COUNT_INST(I_FSQRT);
  2220     check_fpuen();
  2221     load_spreg( R_ECX, R_FPSCR );
  2222     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2223     JNE_rel8(doubleprec);
  2224     push_fr(FRn);
  2225     FSQRT_st0();
  2226     pop_fr(FRn);
  2227     JMP_rel8(end);
  2228     JMP_TARGET(doubleprec);
  2229     push_dr(FRn);
  2230     FSQRT_st0();
  2231     pop_dr(FRn);
  2232     JMP_TARGET(end);
  2233     sh4_x86.tstate = TSTATE_NONE;
  2234 :}
  2235 FSUB FRm, FRn {:  
  2236     COUNT_INST(I_FSUB);
  2237     check_fpuen();
  2238     load_spreg( R_ECX, R_FPSCR );
  2239     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2240     JNE_rel8(doubleprec);
  2241     push_fr(FRn);
  2242     push_fr(FRm);
  2243     FSUBP_st(1);
  2244     pop_fr(FRn);
  2245     JMP_rel8(end);
  2246     JMP_TARGET(doubleprec);
  2247     push_dr(FRn);
  2248     push_dr(FRm);
  2249     FSUBP_st(1);
  2250     pop_dr(FRn);
  2251     JMP_TARGET(end);
  2252     sh4_x86.tstate = TSTATE_NONE;
  2253 :}
  2255 FCMP/EQ FRm, FRn {:  
  2256     COUNT_INST(I_FCMPEQ);
  2257     check_fpuen();
  2258     load_spreg( R_ECX, R_FPSCR );
  2259     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2260     JNE_rel8(doubleprec);
  2261     push_fr(FRm);
  2262     push_fr(FRn);
  2263     JMP_rel8(end);
  2264     JMP_TARGET(doubleprec);
  2265     push_dr(FRm);
  2266     push_dr(FRn);
  2267     JMP_TARGET(end);
  2268     FCOMIP_st(1);
  2269     SETE_t();
  2270     FPOP_st();
  2271     sh4_x86.tstate = TSTATE_NONE;
  2272 :}
  2273 FCMP/GT FRm, FRn {:  
  2274     COUNT_INST(I_FCMPGT);
  2275     check_fpuen();
  2276     load_spreg( R_ECX, R_FPSCR );
  2277     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2278     JNE_rel8(doubleprec);
  2279     push_fr(FRm);
  2280     push_fr(FRn);
  2281     JMP_rel8(end);
  2282     JMP_TARGET(doubleprec);
  2283     push_dr(FRm);
  2284     push_dr(FRn);
  2285     JMP_TARGET(end);
  2286     FCOMIP_st(1);
  2287     SETA_t();
  2288     FPOP_st();
  2289     sh4_x86.tstate = TSTATE_NONE;
  2290 :}
  2292 FSCA FPUL, FRn {:  
  2293     COUNT_INST(I_FSCA);
  2294     check_fpuen();
  2295     load_spreg( R_ECX, R_FPSCR );
  2296     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2297     JNE_rel8(doubleprec );
  2298     LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
  2299     load_spreg( R_EDX, R_FPUL );
  2300     call_func2( sh4_fsca, R_EDX, R_ECX );
  2301     JMP_TARGET(doubleprec);
  2302     sh4_x86.tstate = TSTATE_NONE;
  2303 :}
  2304 FIPR FVm, FVn {:  
  2305     COUNT_INST(I_FIPR);
  2306     check_fpuen();
  2307     load_spreg( R_ECX, R_FPSCR );
  2308     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2309     JNE_rel8( doubleprec);
  2311     push_fr( FVm<<2 );
  2312     push_fr( FVn<<2 );
  2313     FMULP_st(1);
  2314     push_fr( (FVm<<2)+1);
  2315     push_fr( (FVn<<2)+1);
  2316     FMULP_st(1);
  2317     FADDP_st(1);
  2318     push_fr( (FVm<<2)+2);
  2319     push_fr( (FVn<<2)+2);
  2320     FMULP_st(1);
  2321     FADDP_st(1);
  2322     push_fr( (FVm<<2)+3);
  2323     push_fr( (FVn<<2)+3);
  2324     FMULP_st(1);
  2325     FADDP_st(1);
  2326     pop_fr( (FVn<<2)+3);
  2327     JMP_TARGET(doubleprec);
  2328     sh4_x86.tstate = TSTATE_NONE;
  2329 :}
  2330 FTRV XMTRX, FVn {:  
  2331     COUNT_INST(I_FTRV);
  2332     check_fpuen();
  2333     load_spreg( R_ECX, R_FPSCR );
  2334     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2335     JNE_rel8( doubleprec );
  2336     LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
  2337     call_func1( sh4_ftrv, R_EDX );  // 12
  2338     JMP_TARGET(doubleprec);
  2339     sh4_x86.tstate = TSTATE_NONE;
  2340 :}
  2342 FRCHG {:  
  2343     COUNT_INST(I_FRCHG);
  2344     check_fpuen();
  2345     load_spreg( R_ECX, R_FPSCR );
  2346     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2347     store_spreg( R_ECX, R_FPSCR );
  2348     call_func0( sh4_switch_fr_banks );
  2349     sh4_x86.tstate = TSTATE_NONE;
  2350 :}
  2351 FSCHG {:  
  2352     COUNT_INST(I_FSCHG);
  2353     check_fpuen();
  2354     load_spreg( R_ECX, R_FPSCR );
  2355     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2356     store_spreg( R_ECX, R_FPSCR );
  2357     sh4_x86.tstate = TSTATE_NONE;
  2358 :}
  2360 /* Processor control instructions */
  2361 LDC Rm, SR {:
  2362     COUNT_INST(I_LDCSR);
  2363     if( sh4_x86.in_delay_slot ) {
  2364 	SLOTILLEGAL();
  2365     } else {
  2366 	check_priv();
  2367 	load_reg( R_EAX, Rm );
  2368 	call_func1( sh4_write_sr, R_EAX );
  2369 	sh4_x86.priv_checked = FALSE;
  2370 	sh4_x86.fpuen_checked = FALSE;
  2371 	sh4_x86.tstate = TSTATE_NONE;
  2373 :}
  2374 LDC Rm, GBR {: 
  2375     COUNT_INST(I_LDC);
  2376     load_reg( R_EAX, Rm );
  2377     store_spreg( R_EAX, R_GBR );
  2378 :}
  2379 LDC Rm, VBR {:  
  2380     COUNT_INST(I_LDC);
  2381     check_priv();
  2382     load_reg( R_EAX, Rm );
  2383     store_spreg( R_EAX, R_VBR );
  2384     sh4_x86.tstate = TSTATE_NONE;
  2385 :}
  2386 LDC Rm, SSR {:  
  2387     COUNT_INST(I_LDC);
  2388     check_priv();
  2389     load_reg( R_EAX, Rm );
  2390     store_spreg( R_EAX, R_SSR );
  2391     sh4_x86.tstate = TSTATE_NONE;
  2392 :}
  2393 LDC Rm, SGR {:  
  2394     COUNT_INST(I_LDC);
  2395     check_priv();
  2396     load_reg( R_EAX, Rm );
  2397     store_spreg( R_EAX, R_SGR );
  2398     sh4_x86.tstate = TSTATE_NONE;
  2399 :}
  2400 LDC Rm, SPC {:  
  2401     COUNT_INST(I_LDC);
  2402     check_priv();
  2403     load_reg( R_EAX, Rm );
  2404     store_spreg( R_EAX, R_SPC );
  2405     sh4_x86.tstate = TSTATE_NONE;
  2406 :}
  2407 LDC Rm, DBR {:  
  2408     COUNT_INST(I_LDC);
  2409     check_priv();
  2410     load_reg( R_EAX, Rm );
  2411     store_spreg( R_EAX, R_DBR );
  2412     sh4_x86.tstate = TSTATE_NONE;
  2413 :}
  2414 LDC Rm, Rn_BANK {:  
  2415     COUNT_INST(I_LDC);
  2416     check_priv();
  2417     load_reg( R_EAX, Rm );
  2418     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2419     sh4_x86.tstate = TSTATE_NONE;
  2420 :}
  2421 LDC.L @Rm+, GBR {:  
  2422     COUNT_INST(I_LDCM);
  2423     load_reg( R_EAX, Rm );
  2424     check_ralign32( R_EAX );
  2425     MMU_TRANSLATE_READ( R_EAX );
  2426     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2427     MEM_READ_LONG( R_EAX, R_EAX );
  2428     store_spreg( R_EAX, R_GBR );
  2429     sh4_x86.tstate = TSTATE_NONE;
  2430 :}
  2431 LDC.L @Rm+, SR {:
  2432     COUNT_INST(I_LDCSRM);
  2433     if( sh4_x86.in_delay_slot ) {
  2434 	SLOTILLEGAL();
  2435     } else {
  2436 	check_priv();
  2437 	load_reg( R_EAX, Rm );
  2438 	check_ralign32( R_EAX );
  2439 	MMU_TRANSLATE_READ( R_EAX );
  2440 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2441 	MEM_READ_LONG( R_EAX, R_EAX );
  2442 	call_func1( sh4_write_sr, R_EAX );
  2443 	sh4_x86.priv_checked = FALSE;
  2444 	sh4_x86.fpuen_checked = FALSE;
  2445 	sh4_x86.tstate = TSTATE_NONE;
  2447 :}
  2448 LDC.L @Rm+, VBR {:  
  2449     COUNT_INST(I_LDCM);
  2450     check_priv();
  2451     load_reg( R_EAX, Rm );
  2452     check_ralign32( R_EAX );
  2453     MMU_TRANSLATE_READ( R_EAX );
  2454     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2455     MEM_READ_LONG( R_EAX, R_EAX );
  2456     store_spreg( R_EAX, R_VBR );
  2457     sh4_x86.tstate = TSTATE_NONE;
  2458 :}
  2459 LDC.L @Rm+, SSR {:
  2460     COUNT_INST(I_LDCM);
  2461     check_priv();
  2462     load_reg( R_EAX, Rm );
  2463     check_ralign32( R_EAX );
  2464     MMU_TRANSLATE_READ( R_EAX );
  2465     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2466     MEM_READ_LONG( R_EAX, R_EAX );
  2467     store_spreg( R_EAX, R_SSR );
  2468     sh4_x86.tstate = TSTATE_NONE;
  2469 :}
  2470 LDC.L @Rm+, SGR {:  
  2471     COUNT_INST(I_LDCM);
  2472     check_priv();
  2473     load_reg( R_EAX, Rm );
  2474     check_ralign32( R_EAX );
  2475     MMU_TRANSLATE_READ( R_EAX );
  2476     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2477     MEM_READ_LONG( R_EAX, R_EAX );
  2478     store_spreg( R_EAX, R_SGR );
  2479     sh4_x86.tstate = TSTATE_NONE;
  2480 :}
  2481 LDC.L @Rm+, SPC {:  
  2482     COUNT_INST(I_LDCM);
  2483     check_priv();
  2484     load_reg( R_EAX, Rm );
  2485     check_ralign32( R_EAX );
  2486     MMU_TRANSLATE_READ( R_EAX );
  2487     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2488     MEM_READ_LONG( R_EAX, R_EAX );
  2489     store_spreg( R_EAX, R_SPC );
  2490     sh4_x86.tstate = TSTATE_NONE;
  2491 :}
  2492 LDC.L @Rm+, DBR {:  
  2493     COUNT_INST(I_LDCM);
  2494     check_priv();
  2495     load_reg( R_EAX, Rm );
  2496     check_ralign32( R_EAX );
  2497     MMU_TRANSLATE_READ( R_EAX );
  2498     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2499     MEM_READ_LONG( R_EAX, R_EAX );
  2500     store_spreg( R_EAX, R_DBR );
  2501     sh4_x86.tstate = TSTATE_NONE;
  2502 :}
  2503 LDC.L @Rm+, Rn_BANK {:  
  2504     COUNT_INST(I_LDCM);
  2505     check_priv();
  2506     load_reg( R_EAX, Rm );
  2507     check_ralign32( R_EAX );
  2508     MMU_TRANSLATE_READ( R_EAX );
  2509     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2510     MEM_READ_LONG( R_EAX, R_EAX );
  2511     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2512     sh4_x86.tstate = TSTATE_NONE;
  2513 :}
  2514 LDS Rm, FPSCR {:
  2515     COUNT_INST(I_LDSFPSCR);
  2516     check_fpuen();
  2517     load_reg( R_EAX, Rm );
  2518     call_func1( sh4_write_fpscr, R_EAX );
  2519     sh4_x86.tstate = TSTATE_NONE;
  2520 :}
  2521 LDS.L @Rm+, FPSCR {:  
  2522     COUNT_INST(I_LDSFPSCRM);
  2523     check_fpuen();
  2524     load_reg( R_EAX, Rm );
  2525     check_ralign32( R_EAX );
  2526     MMU_TRANSLATE_READ( R_EAX );
  2527     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2528     MEM_READ_LONG( R_EAX, R_EAX );
  2529     call_func1( sh4_write_fpscr, R_EAX );
  2530     sh4_x86.tstate = TSTATE_NONE;
  2531 :}
  2532 LDS Rm, FPUL {:  
  2533     COUNT_INST(I_LDS);
  2534     check_fpuen();
  2535     load_reg( R_EAX, Rm );
  2536     store_spreg( R_EAX, R_FPUL );
  2537 :}
  2538 LDS.L @Rm+, FPUL {:  
  2539     COUNT_INST(I_LDSM);
  2540     check_fpuen();
  2541     load_reg( R_EAX, Rm );
  2542     check_ralign32( R_EAX );
  2543     MMU_TRANSLATE_READ( R_EAX );
  2544     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2545     MEM_READ_LONG( R_EAX, R_EAX );
  2546     store_spreg( R_EAX, R_FPUL );
  2547     sh4_x86.tstate = TSTATE_NONE;
  2548 :}
  2549 LDS Rm, MACH {: 
  2550     COUNT_INST(I_LDS);
  2551     load_reg( R_EAX, Rm );
  2552     store_spreg( R_EAX, R_MACH );
  2553 :}
  2554 LDS.L @Rm+, MACH {:  
  2555     COUNT_INST(I_LDSM);
  2556     load_reg( R_EAX, Rm );
  2557     check_ralign32( R_EAX );
  2558     MMU_TRANSLATE_READ( R_EAX );
  2559     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2560     MEM_READ_LONG( R_EAX, R_EAX );
  2561     store_spreg( R_EAX, R_MACH );
  2562     sh4_x86.tstate = TSTATE_NONE;
  2563 :}
  2564 LDS Rm, MACL {:  
  2565     COUNT_INST(I_LDS);
  2566     load_reg( R_EAX, Rm );
  2567     store_spreg( R_EAX, R_MACL );
  2568 :}
  2569 LDS.L @Rm+, MACL {:  
  2570     COUNT_INST(I_LDSM);
  2571     load_reg( R_EAX, Rm );
  2572     check_ralign32( R_EAX );
  2573     MMU_TRANSLATE_READ( R_EAX );
  2574     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2575     MEM_READ_LONG( R_EAX, R_EAX );
  2576     store_spreg( R_EAX, R_MACL );
  2577     sh4_x86.tstate = TSTATE_NONE;
  2578 :}
  2579 LDS Rm, PR {:  
  2580     COUNT_INST(I_LDS);
  2581     load_reg( R_EAX, Rm );
  2582     store_spreg( R_EAX, R_PR );
  2583 :}
  2584 LDS.L @Rm+, PR {:  
  2585     COUNT_INST(I_LDSM);
  2586     load_reg( R_EAX, Rm );
  2587     check_ralign32( R_EAX );
  2588     MMU_TRANSLATE_READ( R_EAX );
  2589     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2590     MEM_READ_LONG( R_EAX, R_EAX );
  2591     store_spreg( R_EAX, R_PR );
  2592     sh4_x86.tstate = TSTATE_NONE;
  2593 :}
  2594 LDTLB {:  
  2595     COUNT_INST(I_LDTLB);
  2596     call_func0( MMU_ldtlb );
  2597     sh4_x86.tstate = TSTATE_NONE;
  2598 :}
  2599 OCBI @Rn {:
  2600     COUNT_INST(I_OCBI);
  2601 :}
  2602 OCBP @Rn {:
  2603     COUNT_INST(I_OCBP);
  2604 :}
  2605 OCBWB @Rn {:
  2606     COUNT_INST(I_OCBWB);
  2607 :}
  2608 PREF @Rn {:
  2609     COUNT_INST(I_PREF);
  2610     load_reg( R_EAX, Rn );
  2611     MOV_r32_r32( R_EAX, R_ECX );
  2612     AND_imm32_r32( 0xFC000000, R_EAX );
  2613     CMP_imm32_r32( 0xE0000000, R_EAX );
  2614     JNE_rel8(end);
  2615     call_func1( sh4_flush_store_queue, R_ECX );
  2616     TEST_r32_r32( R_EAX, R_EAX );
  2617     JE_exc(-1);
  2618     JMP_TARGET(end);
  2619     sh4_x86.tstate = TSTATE_NONE;
  2620 :}
  2621 SLEEP {: 
  2622     COUNT_INST(I_SLEEP);
  2623     check_priv();
  2624     call_func0( sh4_sleep );
  2625     sh4_x86.tstate = TSTATE_NONE;
  2626     sh4_x86.in_delay_slot = DELAY_NONE;
  2627     return 2;
  2628 :}
  2629 STC SR, Rn {:
  2630     COUNT_INST(I_STCSR);
  2631     check_priv();
  2632     call_func0(sh4_read_sr);
  2633     store_reg( R_EAX, Rn );
  2634     sh4_x86.tstate = TSTATE_NONE;
  2635 :}
  2636 STC GBR, Rn {:  
  2637     COUNT_INST(I_STC);
  2638     load_spreg( R_EAX, R_GBR );
  2639     store_reg( R_EAX, Rn );
  2640 :}
  2641 STC VBR, Rn {:  
  2642     COUNT_INST(I_STC);
  2643     check_priv();
  2644     load_spreg( R_EAX, R_VBR );
  2645     store_reg( R_EAX, Rn );
  2646     sh4_x86.tstate = TSTATE_NONE;
  2647 :}
  2648 STC SSR, Rn {:  
  2649     COUNT_INST(I_STC);
  2650     check_priv();
  2651     load_spreg( R_EAX, R_SSR );
  2652     store_reg( R_EAX, Rn );
  2653     sh4_x86.tstate = TSTATE_NONE;
  2654 :}
  2655 STC SPC, Rn {:  
  2656     COUNT_INST(I_STC);
  2657     check_priv();
  2658     load_spreg( R_EAX, R_SPC );
  2659     store_reg( R_EAX, Rn );
  2660     sh4_x86.tstate = TSTATE_NONE;
  2661 :}
  2662 STC SGR, Rn {:  
  2663     COUNT_INST(I_STC);
  2664     check_priv();
  2665     load_spreg( R_EAX, R_SGR );
  2666     store_reg( R_EAX, Rn );
  2667     sh4_x86.tstate = TSTATE_NONE;
  2668 :}
  2669 STC DBR, Rn {:  
  2670     COUNT_INST(I_STC);
  2671     check_priv();
  2672     load_spreg( R_EAX, R_DBR );
  2673     store_reg( R_EAX, Rn );
  2674     sh4_x86.tstate = TSTATE_NONE;
  2675 :}
  2676 STC Rm_BANK, Rn {:
  2677     COUNT_INST(I_STC);
  2678     check_priv();
  2679     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2680     store_reg( R_EAX, Rn );
  2681     sh4_x86.tstate = TSTATE_NONE;
  2682 :}
  2683 STC.L SR, @-Rn {:
  2684     COUNT_INST(I_STCSRM);
  2685     check_priv();
  2686     load_reg( R_EAX, Rn );
  2687     check_walign32( R_EAX );
  2688     ADD_imm8s_r32( -4, R_EAX );
  2689     MMU_TRANSLATE_WRITE( R_EAX );
  2690     PUSH_realigned_r32( R_EAX );
  2691     call_func0( sh4_read_sr );
  2692     POP_realigned_r32( R_ECX );
  2693     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2694     MEM_WRITE_LONG( R_ECX, R_EAX );
  2695     sh4_x86.tstate = TSTATE_NONE;
  2696 :}
  2697 STC.L VBR, @-Rn {:  
  2698     COUNT_INST(I_STCM);
  2699     check_priv();
  2700     load_reg( R_EAX, Rn );
  2701     check_walign32( R_EAX );
  2702     ADD_imm8s_r32( -4, R_EAX );
  2703     MMU_TRANSLATE_WRITE( R_EAX );
  2704     load_spreg( R_EDX, R_VBR );
  2705     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2706     MEM_WRITE_LONG( R_EAX, R_EDX );
  2707     sh4_x86.tstate = TSTATE_NONE;
  2708 :}
  2709 STC.L SSR, @-Rn {:  
  2710     COUNT_INST(I_STCM);
  2711     check_priv();
  2712     load_reg( R_EAX, Rn );
  2713     check_walign32( R_EAX );
  2714     ADD_imm8s_r32( -4, R_EAX );
  2715     MMU_TRANSLATE_WRITE( R_EAX );
  2716     load_spreg( R_EDX, R_SSR );
  2717     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2718     MEM_WRITE_LONG( R_EAX, R_EDX );
  2719     sh4_x86.tstate = TSTATE_NONE;
  2720 :}
  2721 STC.L SPC, @-Rn {:
  2722     COUNT_INST(I_STCM);
  2723     check_priv();
  2724     load_reg( R_EAX, Rn );
  2725     check_walign32( R_EAX );
  2726     ADD_imm8s_r32( -4, R_EAX );
  2727     MMU_TRANSLATE_WRITE( R_EAX );
  2728     load_spreg( R_EDX, R_SPC );
  2729     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2730     MEM_WRITE_LONG( R_EAX, R_EDX );
  2731     sh4_x86.tstate = TSTATE_NONE;
  2732 :}
  2733 STC.L SGR, @-Rn {:  
  2734     COUNT_INST(I_STCM);
  2735     check_priv();
  2736     load_reg( R_EAX, Rn );
  2737     check_walign32( R_EAX );
  2738     ADD_imm8s_r32( -4, R_EAX );
  2739     MMU_TRANSLATE_WRITE( R_EAX );
  2740     load_spreg( R_EDX, R_SGR );
  2741     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2742     MEM_WRITE_LONG( R_EAX, R_EDX );
  2743     sh4_x86.tstate = TSTATE_NONE;
  2744 :}
  2745 STC.L DBR, @-Rn {:  
  2746     COUNT_INST(I_STCM);
  2747     check_priv();
  2748     load_reg( R_EAX, Rn );
  2749     check_walign32( R_EAX );
  2750     ADD_imm8s_r32( -4, R_EAX );
  2751     MMU_TRANSLATE_WRITE( R_EAX );
  2752     load_spreg( R_EDX, R_DBR );
  2753     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2754     MEM_WRITE_LONG( R_EAX, R_EDX );
  2755     sh4_x86.tstate = TSTATE_NONE;
  2756 :}
  2757 STC.L Rm_BANK, @-Rn {:  
  2758     COUNT_INST(I_STCM);
  2759     check_priv();
  2760     load_reg( R_EAX, Rn );
  2761     check_walign32( R_EAX );
  2762     ADD_imm8s_r32( -4, R_EAX );
  2763     MMU_TRANSLATE_WRITE( R_EAX );
  2764     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2765     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2766     MEM_WRITE_LONG( R_EAX, R_EDX );
  2767     sh4_x86.tstate = TSTATE_NONE;
  2768 :}
  2769 STC.L GBR, @-Rn {:  
  2770     COUNT_INST(I_STCM);
  2771     load_reg( R_EAX, Rn );
  2772     check_walign32( R_EAX );
  2773     ADD_imm8s_r32( -4, R_EAX );
  2774     MMU_TRANSLATE_WRITE( R_EAX );
  2775     load_spreg( R_EDX, R_GBR );
  2776     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2777     MEM_WRITE_LONG( R_EAX, R_EDX );
  2778     sh4_x86.tstate = TSTATE_NONE;
  2779 :}
  2780 STS FPSCR, Rn {:  
  2781     COUNT_INST(I_STSFPSCR);
  2782     check_fpuen();
  2783     load_spreg( R_EAX, R_FPSCR );
  2784     store_reg( R_EAX, Rn );
  2785 :}
  2786 STS.L FPSCR, @-Rn {:  
  2787     COUNT_INST(I_STSFPSCRM);
  2788     check_fpuen();
  2789     load_reg( R_EAX, Rn );
  2790     check_walign32( R_EAX );
  2791     ADD_imm8s_r32( -4, R_EAX );
  2792     MMU_TRANSLATE_WRITE( R_EAX );
  2793     load_spreg( R_EDX, R_FPSCR );
  2794     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2795     MEM_WRITE_LONG( R_EAX, R_EDX );
  2796     sh4_x86.tstate = TSTATE_NONE;
  2797 :}
  2798 STS FPUL, Rn {:  
  2799     COUNT_INST(I_STS);
  2800     check_fpuen();
  2801     load_spreg( R_EAX, R_FPUL );
  2802     store_reg( R_EAX, Rn );
  2803 :}
  2804 STS.L FPUL, @-Rn {:  
  2805     COUNT_INST(I_STSM);
  2806     check_fpuen();
  2807     load_reg( R_EAX, Rn );
  2808     check_walign32( R_EAX );
  2809     ADD_imm8s_r32( -4, R_EAX );
  2810     MMU_TRANSLATE_WRITE( R_EAX );
  2811     load_spreg( R_EDX, R_FPUL );
  2812     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2813     MEM_WRITE_LONG( R_EAX, R_EDX );
  2814     sh4_x86.tstate = TSTATE_NONE;
  2815 :}
  2816 STS MACH, Rn {:  
  2817     COUNT_INST(I_STS);
  2818     load_spreg( R_EAX, R_MACH );
  2819     store_reg( R_EAX, Rn );
  2820 :}
  2821 STS.L MACH, @-Rn {:  
  2822     COUNT_INST(I_STSM);
  2823     load_reg( R_EAX, Rn );
  2824     check_walign32( R_EAX );
  2825     ADD_imm8s_r32( -4, R_EAX );
  2826     MMU_TRANSLATE_WRITE( R_EAX );
  2827     load_spreg( R_EDX, R_MACH );
  2828     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2829     MEM_WRITE_LONG( R_EAX, R_EDX );
  2830     sh4_x86.tstate = TSTATE_NONE;
  2831 :}
  2832 STS MACL, Rn {:  
  2833     COUNT_INST(I_STS);
  2834     load_spreg( R_EAX, R_MACL );
  2835     store_reg( R_EAX, Rn );
  2836 :}
  2837 STS.L MACL, @-Rn {:  
  2838     COUNT_INST(I_STSM);
  2839     load_reg( R_EAX, Rn );
  2840     check_walign32( R_EAX );
  2841     ADD_imm8s_r32( -4, R_EAX );
  2842     MMU_TRANSLATE_WRITE( R_EAX );
  2843     load_spreg( R_EDX, R_MACL );
  2844     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2845     MEM_WRITE_LONG( R_EAX, R_EDX );
  2846     sh4_x86.tstate = TSTATE_NONE;
  2847 :}
  2848 STS PR, Rn {:  
  2849     COUNT_INST(I_STS);
  2850     load_spreg( R_EAX, R_PR );
  2851     store_reg( R_EAX, Rn );
  2852 :}
  2853 STS.L PR, @-Rn {:  
  2854     COUNT_INST(I_STSM);
  2855     load_reg( R_EAX, Rn );
  2856     check_walign32( R_EAX );
  2857     ADD_imm8s_r32( -4, R_EAX );
  2858     MMU_TRANSLATE_WRITE( R_EAX );
  2859     load_spreg( R_EDX, R_PR );
  2860     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2861     MEM_WRITE_LONG( R_EAX, R_EDX );
  2862     sh4_x86.tstate = TSTATE_NONE;
  2863 :}
  2865 NOP {: 
  2866     COUNT_INST(I_NOP);
  2867     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2868 :}
  2869 %%
  2870     sh4_x86.in_delay_slot = DELAY_NONE;
  2871     return 0;
.