Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 733:633ee022f52e
prev732:f05753bbe723
next789:7e7ec23217fc
author nkeynes
date Fri Jul 11 04:02:25 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Bug 60: Fix off-by-one in recovery list size
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/x86op.h"
    34 #include "clock.h"
    36 #define DEFAULT_BACKPATCH_SIZE 4096
    38 struct backpatch_record {
    39     uint32_t fixup_offset;
    40     uint32_t fixup_icount;
    41     int32_t exc_code;
    42 };
    44 #define DELAY_NONE 0
    45 #define DELAY_PC 1
    46 #define DELAY_PC_PR 2
    48 /** 
    49  * Struct to manage internal translation state. This state is not saved -
    50  * it is only valid between calls to sh4_translate_begin_block() and
    51  * sh4_translate_end_block()
    52  */
    53 struct sh4_x86_state {
    54     int in_delay_slot;
    55     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    56     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    57     gboolean branch_taken; /* true if we branched unconditionally */
    58     uint32_t block_start_pc;
    59     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    60     int tstate;
    62     /* mode flags */
    63     gboolean tlb_on; /* True if tlb translation is active */
    65     /* Allocated memory for the (block-wide) back-patch list */
    66     struct backpatch_record *backpatch_list;
    67     uint32_t backpatch_posn;
    68     uint32_t backpatch_size;
    69 };
    71 #define TSTATE_NONE -1
    72 #define TSTATE_O    0
    73 #define TSTATE_C    2
    74 #define TSTATE_E    4
    75 #define TSTATE_NE   5
    76 #define TSTATE_G    0xF
    77 #define TSTATE_GE   0xD
    78 #define TSTATE_A    7
    79 #define TSTATE_AE   3
    81 #ifdef ENABLE_SH4STATS
    82 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    83 #else
    84 #define COUNT_INST(id)
    85 #endif
    87 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    88 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    89 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    90     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    92 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    93 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    94 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    95     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
    97 static struct sh4_x86_state sh4_x86;
    99 static uint32_t max_int = 0x7FFFFFFF;
   100 static uint32_t min_int = 0x80000000;
   101 static uint32_t save_fcw; /* save value for fpu control word */
   102 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   104 void sh4_translate_init(void)
   105 {
   106     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   107     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   108 }
   111 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   112 {
   113     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   114 	sh4_x86.backpatch_size <<= 1;
   115 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   116 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   117 	assert( sh4_x86.backpatch_list != NULL );
   118     }
   119     if( sh4_x86.in_delay_slot ) {
   120 	fixup_pc -= 2;
   121     }
   122     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   123 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   124     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   125     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   126     sh4_x86.backpatch_posn++;
   127 }
   129 /**
   130  * Emit an instruction to load an SH4 reg into a real register
   131  */
   132 static inline void load_reg( int x86reg, int sh4reg ) 
   133 {
   134     /* mov [bp+n], reg */
   135     OP(0x8B);
   136     OP(0x45 + (x86reg<<3));
   137     OP(REG_OFFSET(r[sh4reg]));
   138 }
   140 static inline void load_reg16s( int x86reg, int sh4reg )
   141 {
   142     OP(0x0F);
   143     OP(0xBF);
   144     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   145 }
   147 static inline void load_reg16u( int x86reg, int sh4reg )
   148 {
   149     OP(0x0F);
   150     OP(0xB7);
   151     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   153 }
   155 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   156 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   157 /**
   158  * Emit an instruction to load an immediate value into a register
   159  */
   160 static inline void load_imm32( int x86reg, uint32_t value ) {
   161     /* mov #value, reg */
   162     OP(0xB8 + x86reg);
   163     OP32(value);
   164 }
   166 /**
   167  * Load an immediate 64-bit quantity (note: x86-64 only)
   168  */
   169 static inline void load_imm64( int x86reg, uint32_t value ) {
   170     /* mov #value, reg */
   171     REXW();
   172     OP(0xB8 + x86reg);
   173     OP64(value);
   174 }
   176 /**
   177  * Emit an instruction to store an SH4 reg (RN)
   178  */
   179 void static inline store_reg( int x86reg, int sh4reg ) {
   180     /* mov reg, [bp+n] */
   181     OP(0x89);
   182     OP(0x45 + (x86reg<<3));
   183     OP(REG_OFFSET(r[sh4reg]));
   184 }
   186 /**
   187  * Load an FR register (single-precision floating point) into an integer x86
   188  * register (eg for register-to-register moves)
   189  */
   190 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   191 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   193 /**
   194  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   195  */
   196 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   197 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   199 /**
   200  * Store an FR register (single-precision floating point) from an integer x86+
   201  * register (eg for register-to-register moves)
   202  */
   203 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   204 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   206 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   207 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   210 #define push_fpul()  FLDF_sh4r(R_FPUL)
   211 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   212 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   213 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   214 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   215 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   216 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   217 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   218 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   219 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   223 /* Exception checks - Note that all exception checks will clobber EAX */
   225 #define check_priv( ) \
   226     if( !sh4_x86.priv_checked ) { \
   227 	sh4_x86.priv_checked = TRUE;\
   228 	load_spreg( R_EAX, R_SR );\
   229 	AND_imm32_r32( SR_MD, R_EAX );\
   230 	if( sh4_x86.in_delay_slot ) {\
   231 	    JE_exc( EXC_SLOT_ILLEGAL );\
   232 	} else {\
   233 	    JE_exc( EXC_ILLEGAL );\
   234 	}\
   235     }\
   237 #define check_fpuen( ) \
   238     if( !sh4_x86.fpuen_checked ) {\
   239 	sh4_x86.fpuen_checked = TRUE;\
   240 	load_spreg( R_EAX, R_SR );\
   241 	AND_imm32_r32( SR_FD, R_EAX );\
   242 	if( sh4_x86.in_delay_slot ) {\
   243 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   244 	} else {\
   245 	    JNE_exc(EXC_FPU_DISABLED);\
   246 	}\
   247     }
   249 #define check_ralign16( x86reg ) \
   250     TEST_imm32_r32( 0x00000001, x86reg ); \
   251     JNE_exc(EXC_DATA_ADDR_READ)
   253 #define check_walign16( x86reg ) \
   254     TEST_imm32_r32( 0x00000001, x86reg ); \
   255     JNE_exc(EXC_DATA_ADDR_WRITE);
   257 #define check_ralign32( x86reg ) \
   258     TEST_imm32_r32( 0x00000003, x86reg ); \
   259     JNE_exc(EXC_DATA_ADDR_READ)
   261 #define check_walign32( x86reg ) \
   262     TEST_imm32_r32( 0x00000003, x86reg ); \
   263     JNE_exc(EXC_DATA_ADDR_WRITE);
   265 #define check_ralign64( x86reg ) \
   266     TEST_imm32_r32( 0x00000007, x86reg ); \
   267     JNE_exc(EXC_DATA_ADDR_READ)
   269 #define check_walign64( x86reg ) \
   270     TEST_imm32_r32( 0x00000007, x86reg ); \
   271     JNE_exc(EXC_DATA_ADDR_WRITE);
   273 #define UNDEF()
   274 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   275 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   276 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   277 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   278 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   279 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   280 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   282 /**
   283  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   284  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   285  */
   286 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   288 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   289 /**
   290  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   291  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   292  */
   293 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   295 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   296 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   297 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   299 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   301 /****** Import appropriate calling conventions ******/
   302 #if SIZEOF_VOID_P == 8
   303 #include "sh4/ia64abi.h"
   304 #else /* 32-bit system */
   305 #ifdef APPLE_BUILD
   306 #include "sh4/ia32mac.h"
   307 #else
   308 #include "sh4/ia32abi.h"
   309 #endif
   310 #endif
   312 uint32_t sh4_translate_end_block_size()
   313 {
   314     if( sh4_x86.backpatch_posn <= 3 ) {
   315 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   316     } else {
   317 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   318     }
   319 }
   322 /**
   323  * Embed a breakpoint into the generated code
   324  */
   325 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   326 {
   327     load_imm32( R_EAX, pc );
   328     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   329 }
   332 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   334 /**
   335  * Embed a call to sh4_execute_instruction for situations that we
   336  * can't translate (just page-crossing delay slots at the moment).
   337  * Caller is responsible for setting new_pc before calling this function.
   338  *
   339  * Performs:
   340  *   Set PC = endpc
   341  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   342  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   343  *   Call sh4_execute_instruction
   344  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   345  */
   346 void exit_block_emu( sh4vma_t endpc )
   347 {
   348     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   349     ADD_r32_sh4r( R_ECX, R_PC );
   351     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   352     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   353     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   354     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   356     call_func0( sh4_execute_instruction );    
   357     load_spreg( R_EAX, R_PC );
   358     if( sh4_x86.tlb_on ) {
   359 	call_func1(xlat_get_code_by_vma,R_EAX);
   360     } else {
   361 	call_func1(xlat_get_code,R_EAX);
   362     }
   363     AND_imm8s_rptr( 0xFC, R_EAX );
   364     POP_r32(R_EBP);
   365     RET();
   366 } 
   368 /**
   369  * Translate a single instruction. Delayed branches are handled specially
   370  * by translating both branch and delayed instruction as a single unit (as
   371  * 
   372  * The instruction MUST be in the icache (assert check)
   373  *
   374  * @return true if the instruction marks the end of a basic block
   375  * (eg a branch or 
   376  */
   377 uint32_t sh4_translate_instruction( sh4vma_t pc )
   378 {
   379     uint32_t ir;
   380     /* Read instruction from icache */
   381     assert( IS_IN_ICACHE(pc) );
   382     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   384 	/* PC is not in the current icache - this usually means we're running
   385 	 * with MMU on, and we've gone past the end of the page. And since 
   386 	 * sh4_translate_block is pretty careful about this, it means we're
   387 	 * almost certainly in a delay slot.
   388 	 *
   389 	 * Since we can't assume the page is present (and we can't fault it in
   390 	 * at this point, inline a call to sh4_execute_instruction (with a few
   391 	 * small repairs to cope with the different environment).
   392 	 */
   394     if( !sh4_x86.in_delay_slot ) {
   395 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   396     }
   397 %%
   398 /* ALU operations */
   399 ADD Rm, Rn {:
   400     COUNT_INST(I_ADD);
   401     load_reg( R_EAX, Rm );
   402     load_reg( R_ECX, Rn );
   403     ADD_r32_r32( R_EAX, R_ECX );
   404     store_reg( R_ECX, Rn );
   405     sh4_x86.tstate = TSTATE_NONE;
   406 :}
   407 ADD #imm, Rn {:  
   408     COUNT_INST(I_ADDI);
   409     load_reg( R_EAX, Rn );
   410     ADD_imm8s_r32( imm, R_EAX );
   411     store_reg( R_EAX, Rn );
   412     sh4_x86.tstate = TSTATE_NONE;
   413 :}
   414 ADDC Rm, Rn {:
   415     COUNT_INST(I_ADDC);
   416     if( sh4_x86.tstate != TSTATE_C ) {
   417 	LDC_t();
   418     }
   419     load_reg( R_EAX, Rm );
   420     load_reg( R_ECX, Rn );
   421     ADC_r32_r32( R_EAX, R_ECX );
   422     store_reg( R_ECX, Rn );
   423     SETC_t();
   424     sh4_x86.tstate = TSTATE_C;
   425 :}
   426 ADDV Rm, Rn {:
   427     COUNT_INST(I_ADDV);
   428     load_reg( R_EAX, Rm );
   429     load_reg( R_ECX, Rn );
   430     ADD_r32_r32( R_EAX, R_ECX );
   431     store_reg( R_ECX, Rn );
   432     SETO_t();
   433     sh4_x86.tstate = TSTATE_O;
   434 :}
   435 AND Rm, Rn {:
   436     COUNT_INST(I_AND);
   437     load_reg( R_EAX, Rm );
   438     load_reg( R_ECX, Rn );
   439     AND_r32_r32( R_EAX, R_ECX );
   440     store_reg( R_ECX, Rn );
   441     sh4_x86.tstate = TSTATE_NONE;
   442 :}
   443 AND #imm, R0 {:  
   444     COUNT_INST(I_ANDI);
   445     load_reg( R_EAX, 0 );
   446     AND_imm32_r32(imm, R_EAX); 
   447     store_reg( R_EAX, 0 );
   448     sh4_x86.tstate = TSTATE_NONE;
   449 :}
   450 AND.B #imm, @(R0, GBR) {: 
   451     COUNT_INST(I_ANDB);
   452     load_reg( R_EAX, 0 );
   453     load_spreg( R_ECX, R_GBR );
   454     ADD_r32_r32( R_ECX, R_EAX );
   455     MMU_TRANSLATE_WRITE( R_EAX );
   456     PUSH_realigned_r32(R_EAX);
   457     MEM_READ_BYTE( R_EAX, R_EAX );
   458     POP_realigned_r32(R_ECX);
   459     AND_imm32_r32(imm, R_EAX );
   460     MEM_WRITE_BYTE( R_ECX, R_EAX );
   461     sh4_x86.tstate = TSTATE_NONE;
   462 :}
   463 CMP/EQ Rm, Rn {:  
   464     COUNT_INST(I_CMPEQ);
   465     load_reg( R_EAX, Rm );
   466     load_reg( R_ECX, Rn );
   467     CMP_r32_r32( R_EAX, R_ECX );
   468     SETE_t();
   469     sh4_x86.tstate = TSTATE_E;
   470 :}
   471 CMP/EQ #imm, R0 {:  
   472     COUNT_INST(I_CMPEQI);
   473     load_reg( R_EAX, 0 );
   474     CMP_imm8s_r32(imm, R_EAX);
   475     SETE_t();
   476     sh4_x86.tstate = TSTATE_E;
   477 :}
   478 CMP/GE Rm, Rn {:  
   479     COUNT_INST(I_CMPGE);
   480     load_reg( R_EAX, Rm );
   481     load_reg( R_ECX, Rn );
   482     CMP_r32_r32( R_EAX, R_ECX );
   483     SETGE_t();
   484     sh4_x86.tstate = TSTATE_GE;
   485 :}
   486 CMP/GT Rm, Rn {: 
   487     COUNT_INST(I_CMPGT);
   488     load_reg( R_EAX, Rm );
   489     load_reg( R_ECX, Rn );
   490     CMP_r32_r32( R_EAX, R_ECX );
   491     SETG_t();
   492     sh4_x86.tstate = TSTATE_G;
   493 :}
   494 CMP/HI Rm, Rn {:  
   495     COUNT_INST(I_CMPHI);
   496     load_reg( R_EAX, Rm );
   497     load_reg( R_ECX, Rn );
   498     CMP_r32_r32( R_EAX, R_ECX );
   499     SETA_t();
   500     sh4_x86.tstate = TSTATE_A;
   501 :}
   502 CMP/HS Rm, Rn {: 
   503     COUNT_INST(I_CMPHS);
   504     load_reg( R_EAX, Rm );
   505     load_reg( R_ECX, Rn );
   506     CMP_r32_r32( R_EAX, R_ECX );
   507     SETAE_t();
   508     sh4_x86.tstate = TSTATE_AE;
   509  :}
   510 CMP/PL Rn {: 
   511     COUNT_INST(I_CMPPL);
   512     load_reg( R_EAX, Rn );
   513     CMP_imm8s_r32( 0, R_EAX );
   514     SETG_t();
   515     sh4_x86.tstate = TSTATE_G;
   516 :}
   517 CMP/PZ Rn {:  
   518     COUNT_INST(I_CMPPZ);
   519     load_reg( R_EAX, Rn );
   520     CMP_imm8s_r32( 0, R_EAX );
   521     SETGE_t();
   522     sh4_x86.tstate = TSTATE_GE;
   523 :}
   524 CMP/STR Rm, Rn {:  
   525     COUNT_INST(I_CMPSTR);
   526     load_reg( R_EAX, Rm );
   527     load_reg( R_ECX, Rn );
   528     XOR_r32_r32( R_ECX, R_EAX );
   529     TEST_r8_r8( R_AL, R_AL );
   530     JE_rel8(target1);
   531     TEST_r8_r8( R_AH, R_AH );
   532     JE_rel8(target2);
   533     SHR_imm8_r32( 16, R_EAX );
   534     TEST_r8_r8( R_AL, R_AL );
   535     JE_rel8(target3);
   536     TEST_r8_r8( R_AH, R_AH );
   537     JMP_TARGET(target1);
   538     JMP_TARGET(target2);
   539     JMP_TARGET(target3);
   540     SETE_t();
   541     sh4_x86.tstate = TSTATE_E;
   542 :}
   543 DIV0S Rm, Rn {:
   544     COUNT_INST(I_DIV0S);
   545     load_reg( R_EAX, Rm );
   546     load_reg( R_ECX, Rn );
   547     SHR_imm8_r32( 31, R_EAX );
   548     SHR_imm8_r32( 31, R_ECX );
   549     store_spreg( R_EAX, R_M );
   550     store_spreg( R_ECX, R_Q );
   551     CMP_r32_r32( R_EAX, R_ECX );
   552     SETNE_t();
   553     sh4_x86.tstate = TSTATE_NE;
   554 :}
   555 DIV0U {:  
   556     COUNT_INST(I_DIV0U);
   557     XOR_r32_r32( R_EAX, R_EAX );
   558     store_spreg( R_EAX, R_Q );
   559     store_spreg( R_EAX, R_M );
   560     store_spreg( R_EAX, R_T );
   561     sh4_x86.tstate = TSTATE_C; // works for DIV1
   562 :}
   563 DIV1 Rm, Rn {:
   564     COUNT_INST(I_DIV1);
   565     load_spreg( R_ECX, R_M );
   566     load_reg( R_EAX, Rn );
   567     if( sh4_x86.tstate != TSTATE_C ) {
   568 	LDC_t();
   569     }
   570     RCL1_r32( R_EAX );
   571     SETC_r8( R_DL ); // Q'
   572     CMP_sh4r_r32( R_Q, R_ECX );
   573     JE_rel8(mqequal);
   574     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   575     JMP_rel8(end);
   576     JMP_TARGET(mqequal);
   577     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   578     JMP_TARGET(end);
   579     store_reg( R_EAX, Rn ); // Done with Rn now
   580     SETC_r8(R_AL); // tmp1
   581     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   582     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   583     store_spreg( R_ECX, R_Q );
   584     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   585     MOVZX_r8_r32( R_AL, R_EAX );
   586     store_spreg( R_EAX, R_T );
   587     sh4_x86.tstate = TSTATE_NONE;
   588 :}
   589 DMULS.L Rm, Rn {:  
   590     COUNT_INST(I_DMULS);
   591     load_reg( R_EAX, Rm );
   592     load_reg( R_ECX, Rn );
   593     IMUL_r32(R_ECX);
   594     store_spreg( R_EDX, R_MACH );
   595     store_spreg( R_EAX, R_MACL );
   596     sh4_x86.tstate = TSTATE_NONE;
   597 :}
   598 DMULU.L Rm, Rn {:  
   599     COUNT_INST(I_DMULU);
   600     load_reg( R_EAX, Rm );
   601     load_reg( R_ECX, Rn );
   602     MUL_r32(R_ECX);
   603     store_spreg( R_EDX, R_MACH );
   604     store_spreg( R_EAX, R_MACL );    
   605     sh4_x86.tstate = TSTATE_NONE;
   606 :}
   607 DT Rn {:  
   608     COUNT_INST(I_DT);
   609     load_reg( R_EAX, Rn );
   610     ADD_imm8s_r32( -1, R_EAX );
   611     store_reg( R_EAX, Rn );
   612     SETE_t();
   613     sh4_x86.tstate = TSTATE_E;
   614 :}
   615 EXTS.B Rm, Rn {:  
   616     COUNT_INST(I_EXTSB);
   617     load_reg( R_EAX, Rm );
   618     MOVSX_r8_r32( R_EAX, R_EAX );
   619     store_reg( R_EAX, Rn );
   620 :}
   621 EXTS.W Rm, Rn {:  
   622     COUNT_INST(I_EXTSW);
   623     load_reg( R_EAX, Rm );
   624     MOVSX_r16_r32( R_EAX, R_EAX );
   625     store_reg( R_EAX, Rn );
   626 :}
   627 EXTU.B Rm, Rn {:  
   628     COUNT_INST(I_EXTUB);
   629     load_reg( R_EAX, Rm );
   630     MOVZX_r8_r32( R_EAX, R_EAX );
   631     store_reg( R_EAX, Rn );
   632 :}
   633 EXTU.W Rm, Rn {:  
   634     COUNT_INST(I_EXTUW);
   635     load_reg( R_EAX, Rm );
   636     MOVZX_r16_r32( R_EAX, R_EAX );
   637     store_reg( R_EAX, Rn );
   638 :}
   639 MAC.L @Rm+, @Rn+ {:
   640     COUNT_INST(I_MACL);
   641     if( Rm == Rn ) {
   642 	load_reg( R_EAX, Rm );
   643 	check_ralign32( R_EAX );
   644 	MMU_TRANSLATE_READ( R_EAX );
   645 	PUSH_realigned_r32( R_EAX );
   646 	load_reg( R_EAX, Rn );
   647 	ADD_imm8s_r32( 4, R_EAX );
   648 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   649 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   650 	// Note translate twice in case of page boundaries. Maybe worth
   651 	// adding a page-boundary check to skip the second translation
   652     } else {
   653 	load_reg( R_EAX, Rm );
   654 	check_ralign32( R_EAX );
   655 	MMU_TRANSLATE_READ( R_EAX );
   656 	load_reg( R_ECX, Rn );
   657 	check_ralign32( R_ECX );
   658 	PUSH_realigned_r32( R_EAX );
   659 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   660 	MOV_r32_r32( R_ECX, R_EAX );
   661 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   662 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   663     }
   664     MEM_READ_LONG( R_EAX, R_EAX );
   665     POP_r32( R_ECX );
   666     PUSH_r32( R_EAX );
   667     MEM_READ_LONG( R_ECX, R_EAX );
   668     POP_realigned_r32( R_ECX );
   670     IMUL_r32( R_ECX );
   671     ADD_r32_sh4r( R_EAX, R_MACL );
   672     ADC_r32_sh4r( R_EDX, R_MACH );
   674     load_spreg( R_ECX, R_S );
   675     TEST_r32_r32(R_ECX, R_ECX);
   676     JE_rel8( nosat );
   677     call_func0( signsat48 );
   678     JMP_TARGET( nosat );
   679     sh4_x86.tstate = TSTATE_NONE;
   680 :}
   681 MAC.W @Rm+, @Rn+ {:  
   682     COUNT_INST(I_MACW);
   683     if( Rm == Rn ) {
   684 	load_reg( R_EAX, Rm );
   685 	check_ralign16( R_EAX );
   686 	MMU_TRANSLATE_READ( R_EAX );
   687 	PUSH_realigned_r32( R_EAX );
   688 	load_reg( R_EAX, Rn );
   689 	ADD_imm8s_r32( 2, R_EAX );
   690 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   691 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   692 	// Note translate twice in case of page boundaries. Maybe worth
   693 	// adding a page-boundary check to skip the second translation
   694     } else {
   695 	load_reg( R_EAX, Rm );
   696 	check_ralign16( R_EAX );
   697 	MMU_TRANSLATE_READ( R_EAX );
   698 	load_reg( R_ECX, Rn );
   699 	check_ralign16( R_ECX );
   700 	PUSH_realigned_r32( R_EAX );
   701 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   702 	MOV_r32_r32( R_ECX, R_EAX );
   703 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   704 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   705     }
   706     MEM_READ_WORD( R_EAX, R_EAX );
   707     POP_r32( R_ECX );
   708     PUSH_r32( R_EAX );
   709     MEM_READ_WORD( R_ECX, R_EAX );
   710     POP_realigned_r32( R_ECX );
   711     IMUL_r32( R_ECX );
   713     load_spreg( R_ECX, R_S );
   714     TEST_r32_r32( R_ECX, R_ECX );
   715     JE_rel8( nosat );
   717     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   718     JNO_rel8( end );            // 2
   719     load_imm32( R_EDX, 1 );         // 5
   720     store_spreg( R_EDX, R_MACH );   // 6
   721     JS_rel8( positive );        // 2
   722     load_imm32( R_EAX, 0x80000000 );// 5
   723     store_spreg( R_EAX, R_MACL );   // 6
   724     JMP_rel8(end2);           // 2
   726     JMP_TARGET(positive);
   727     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   728     store_spreg( R_EAX, R_MACL );   // 6
   729     JMP_rel8(end3);            // 2
   731     JMP_TARGET(nosat);
   732     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   733     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   734     JMP_TARGET(end);
   735     JMP_TARGET(end2);
   736     JMP_TARGET(end3);
   737     sh4_x86.tstate = TSTATE_NONE;
   738 :}
   739 MOVT Rn {:  
   740     COUNT_INST(I_MOVT);
   741     load_spreg( R_EAX, R_T );
   742     store_reg( R_EAX, Rn );
   743 :}
   744 MUL.L Rm, Rn {:  
   745     COUNT_INST(I_MULL);
   746     load_reg( R_EAX, Rm );
   747     load_reg( R_ECX, Rn );
   748     MUL_r32( R_ECX );
   749     store_spreg( R_EAX, R_MACL );
   750     sh4_x86.tstate = TSTATE_NONE;
   751 :}
   752 MULS.W Rm, Rn {:
   753     COUNT_INST(I_MULSW);
   754     load_reg16s( R_EAX, Rm );
   755     load_reg16s( R_ECX, Rn );
   756     MUL_r32( R_ECX );
   757     store_spreg( R_EAX, R_MACL );
   758     sh4_x86.tstate = TSTATE_NONE;
   759 :}
   760 MULU.W Rm, Rn {:  
   761     COUNT_INST(I_MULUW);
   762     load_reg16u( R_EAX, Rm );
   763     load_reg16u( R_ECX, Rn );
   764     MUL_r32( R_ECX );
   765     store_spreg( R_EAX, R_MACL );
   766     sh4_x86.tstate = TSTATE_NONE;
   767 :}
   768 NEG Rm, Rn {:
   769     COUNT_INST(I_NEG);
   770     load_reg( R_EAX, Rm );
   771     NEG_r32( R_EAX );
   772     store_reg( R_EAX, Rn );
   773     sh4_x86.tstate = TSTATE_NONE;
   774 :}
   775 NEGC Rm, Rn {:  
   776     COUNT_INST(I_NEGC);
   777     load_reg( R_EAX, Rm );
   778     XOR_r32_r32( R_ECX, R_ECX );
   779     LDC_t();
   780     SBB_r32_r32( R_EAX, R_ECX );
   781     store_reg( R_ECX, Rn );
   782     SETC_t();
   783     sh4_x86.tstate = TSTATE_C;
   784 :}
   785 NOT Rm, Rn {:  
   786     COUNT_INST(I_NOT);
   787     load_reg( R_EAX, Rm );
   788     NOT_r32( R_EAX );
   789     store_reg( R_EAX, Rn );
   790     sh4_x86.tstate = TSTATE_NONE;
   791 :}
   792 OR Rm, Rn {:  
   793     COUNT_INST(I_OR);
   794     load_reg( R_EAX, Rm );
   795     load_reg( R_ECX, Rn );
   796     OR_r32_r32( R_EAX, R_ECX );
   797     store_reg( R_ECX, Rn );
   798     sh4_x86.tstate = TSTATE_NONE;
   799 :}
   800 OR #imm, R0 {:
   801     COUNT_INST(I_ORI);
   802     load_reg( R_EAX, 0 );
   803     OR_imm32_r32(imm, R_EAX);
   804     store_reg( R_EAX, 0 );
   805     sh4_x86.tstate = TSTATE_NONE;
   806 :}
   807 OR.B #imm, @(R0, GBR) {:  
   808     COUNT_INST(I_ORB);
   809     load_reg( R_EAX, 0 );
   810     load_spreg( R_ECX, R_GBR );
   811     ADD_r32_r32( R_ECX, R_EAX );
   812     MMU_TRANSLATE_WRITE( R_EAX );
   813     PUSH_realigned_r32(R_EAX);
   814     MEM_READ_BYTE( R_EAX, R_EAX );
   815     POP_realigned_r32(R_ECX);
   816     OR_imm32_r32(imm, R_EAX );
   817     MEM_WRITE_BYTE( R_ECX, R_EAX );
   818     sh4_x86.tstate = TSTATE_NONE;
   819 :}
   820 ROTCL Rn {:
   821     COUNT_INST(I_ROTCL);
   822     load_reg( R_EAX, Rn );
   823     if( sh4_x86.tstate != TSTATE_C ) {
   824 	LDC_t();
   825     }
   826     RCL1_r32( R_EAX );
   827     store_reg( R_EAX, Rn );
   828     SETC_t();
   829     sh4_x86.tstate = TSTATE_C;
   830 :}
   831 ROTCR Rn {:  
   832     COUNT_INST(I_ROTCR);
   833     load_reg( R_EAX, Rn );
   834     if( sh4_x86.tstate != TSTATE_C ) {
   835 	LDC_t();
   836     }
   837     RCR1_r32( R_EAX );
   838     store_reg( R_EAX, Rn );
   839     SETC_t();
   840     sh4_x86.tstate = TSTATE_C;
   841 :}
   842 ROTL Rn {:  
   843     COUNT_INST(I_ROTL);
   844     load_reg( R_EAX, Rn );
   845     ROL1_r32( R_EAX );
   846     store_reg( R_EAX, Rn );
   847     SETC_t();
   848     sh4_x86.tstate = TSTATE_C;
   849 :}
   850 ROTR Rn {:  
   851     COUNT_INST(I_ROTR);
   852     load_reg( R_EAX, Rn );
   853     ROR1_r32( R_EAX );
   854     store_reg( R_EAX, Rn );
   855     SETC_t();
   856     sh4_x86.tstate = TSTATE_C;
   857 :}
   858 SHAD Rm, Rn {:
   859     COUNT_INST(I_SHAD);
   860     /* Annoyingly enough, not directly convertible */
   861     load_reg( R_EAX, Rn );
   862     load_reg( R_ECX, Rm );
   863     CMP_imm32_r32( 0, R_ECX );
   864     JGE_rel8(doshl);
   866     NEG_r32( R_ECX );      // 2
   867     AND_imm8_r8( 0x1F, R_CL ); // 3
   868     JE_rel8(emptysar);     // 2
   869     SAR_r32_CL( R_EAX );       // 2
   870     JMP_rel8(end);          // 2
   872     JMP_TARGET(emptysar);
   873     SAR_imm8_r32(31, R_EAX );  // 3
   874     JMP_rel8(end2);
   876     JMP_TARGET(doshl);
   877     AND_imm8_r8( 0x1F, R_CL ); // 3
   878     SHL_r32_CL( R_EAX );       // 2
   879     JMP_TARGET(end);
   880     JMP_TARGET(end2);
   881     store_reg( R_EAX, Rn );
   882     sh4_x86.tstate = TSTATE_NONE;
   883 :}
   884 SHLD Rm, Rn {:  
   885     COUNT_INST(I_SHLD);
   886     load_reg( R_EAX, Rn );
   887     load_reg( R_ECX, Rm );
   888     CMP_imm32_r32( 0, R_ECX );
   889     JGE_rel8(doshl);
   891     NEG_r32( R_ECX );      // 2
   892     AND_imm8_r8( 0x1F, R_CL ); // 3
   893     JE_rel8(emptyshr );
   894     SHR_r32_CL( R_EAX );       // 2
   895     JMP_rel8(end);          // 2
   897     JMP_TARGET(emptyshr);
   898     XOR_r32_r32( R_EAX, R_EAX );
   899     JMP_rel8(end2);
   901     JMP_TARGET(doshl);
   902     AND_imm8_r8( 0x1F, R_CL ); // 3
   903     SHL_r32_CL( R_EAX );       // 2
   904     JMP_TARGET(end);
   905     JMP_TARGET(end2);
   906     store_reg( R_EAX, Rn );
   907     sh4_x86.tstate = TSTATE_NONE;
   908 :}
   909 SHAL Rn {: 
   910     COUNT_INST(I_SHAL);
   911     load_reg( R_EAX, Rn );
   912     SHL1_r32( R_EAX );
   913     SETC_t();
   914     store_reg( R_EAX, Rn );
   915     sh4_x86.tstate = TSTATE_C;
   916 :}
   917 SHAR Rn {:  
   918     COUNT_INST(I_SHAR);
   919     load_reg( R_EAX, Rn );
   920     SAR1_r32( R_EAX );
   921     SETC_t();
   922     store_reg( R_EAX, Rn );
   923     sh4_x86.tstate = TSTATE_C;
   924 :}
   925 SHLL Rn {:  
   926     COUNT_INST(I_SHLL);
   927     load_reg( R_EAX, Rn );
   928     SHL1_r32( R_EAX );
   929     SETC_t();
   930     store_reg( R_EAX, Rn );
   931     sh4_x86.tstate = TSTATE_C;
   932 :}
   933 SHLL2 Rn {:
   934     COUNT_INST(I_SHLL);
   935     load_reg( R_EAX, Rn );
   936     SHL_imm8_r32( 2, R_EAX );
   937     store_reg( R_EAX, Rn );
   938     sh4_x86.tstate = TSTATE_NONE;
   939 :}
   940 SHLL8 Rn {:  
   941     COUNT_INST(I_SHLL);
   942     load_reg( R_EAX, Rn );
   943     SHL_imm8_r32( 8, R_EAX );
   944     store_reg( R_EAX, Rn );
   945     sh4_x86.tstate = TSTATE_NONE;
   946 :}
   947 SHLL16 Rn {:  
   948     COUNT_INST(I_SHLL);
   949     load_reg( R_EAX, Rn );
   950     SHL_imm8_r32( 16, R_EAX );
   951     store_reg( R_EAX, Rn );
   952     sh4_x86.tstate = TSTATE_NONE;
   953 :}
   954 SHLR Rn {:  
   955     COUNT_INST(I_SHLR);
   956     load_reg( R_EAX, Rn );
   957     SHR1_r32( R_EAX );
   958     SETC_t();
   959     store_reg( R_EAX, Rn );
   960     sh4_x86.tstate = TSTATE_C;
   961 :}
   962 SHLR2 Rn {:  
   963     COUNT_INST(I_SHLR);
   964     load_reg( R_EAX, Rn );
   965     SHR_imm8_r32( 2, R_EAX );
   966     store_reg( R_EAX, Rn );
   967     sh4_x86.tstate = TSTATE_NONE;
   968 :}
   969 SHLR8 Rn {:  
   970     COUNT_INST(I_SHLR);
   971     load_reg( R_EAX, Rn );
   972     SHR_imm8_r32( 8, R_EAX );
   973     store_reg( R_EAX, Rn );
   974     sh4_x86.tstate = TSTATE_NONE;
   975 :}
   976 SHLR16 Rn {:  
   977     COUNT_INST(I_SHLR);
   978     load_reg( R_EAX, Rn );
   979     SHR_imm8_r32( 16, R_EAX );
   980     store_reg( R_EAX, Rn );
   981     sh4_x86.tstate = TSTATE_NONE;
   982 :}
   983 SUB Rm, Rn {:  
   984     COUNT_INST(I_SUB);
   985     load_reg( R_EAX, Rm );
   986     load_reg( R_ECX, Rn );
   987     SUB_r32_r32( R_EAX, R_ECX );
   988     store_reg( R_ECX, Rn );
   989     sh4_x86.tstate = TSTATE_NONE;
   990 :}
   991 SUBC Rm, Rn {:  
   992     COUNT_INST(I_SUBC);
   993     load_reg( R_EAX, Rm );
   994     load_reg( R_ECX, Rn );
   995     if( sh4_x86.tstate != TSTATE_C ) {
   996 	LDC_t();
   997     }
   998     SBB_r32_r32( R_EAX, R_ECX );
   999     store_reg( R_ECX, Rn );
  1000     SETC_t();
  1001     sh4_x86.tstate = TSTATE_C;
  1002 :}
  1003 SUBV Rm, Rn {:  
  1004     COUNT_INST(I_SUBV);
  1005     load_reg( R_EAX, Rm );
  1006     load_reg( R_ECX, Rn );
  1007     SUB_r32_r32( R_EAX, R_ECX );
  1008     store_reg( R_ECX, Rn );
  1009     SETO_t();
  1010     sh4_x86.tstate = TSTATE_O;
  1011 :}
  1012 SWAP.B Rm, Rn {:  
  1013     COUNT_INST(I_SWAPB);
  1014     load_reg( R_EAX, Rm );
  1015     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1016     store_reg( R_EAX, Rn );
  1017 :}
  1018 SWAP.W Rm, Rn {:  
  1019     COUNT_INST(I_SWAPB);
  1020     load_reg( R_EAX, Rm );
  1021     MOV_r32_r32( R_EAX, R_ECX );
  1022     SHL_imm8_r32( 16, R_ECX );
  1023     SHR_imm8_r32( 16, R_EAX );
  1024     OR_r32_r32( R_EAX, R_ECX );
  1025     store_reg( R_ECX, Rn );
  1026     sh4_x86.tstate = TSTATE_NONE;
  1027 :}
  1028 TAS.B @Rn {:  
  1029     COUNT_INST(I_TASB);
  1030     load_reg( R_EAX, Rn );
  1031     MMU_TRANSLATE_WRITE( R_EAX );
  1032     PUSH_realigned_r32( R_EAX );
  1033     MEM_READ_BYTE( R_EAX, R_EAX );
  1034     TEST_r8_r8( R_AL, R_AL );
  1035     SETE_t();
  1036     OR_imm8_r8( 0x80, R_AL );
  1037     POP_realigned_r32( R_ECX );
  1038     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1039     sh4_x86.tstate = TSTATE_NONE;
  1040 :}
  1041 TST Rm, Rn {:  
  1042     COUNT_INST(I_TST);
  1043     load_reg( R_EAX, Rm );
  1044     load_reg( R_ECX, Rn );
  1045     TEST_r32_r32( R_EAX, R_ECX );
  1046     SETE_t();
  1047     sh4_x86.tstate = TSTATE_E;
  1048 :}
  1049 TST #imm, R0 {:  
  1050     COUNT_INST(I_TSTI);
  1051     load_reg( R_EAX, 0 );
  1052     TEST_imm32_r32( imm, R_EAX );
  1053     SETE_t();
  1054     sh4_x86.tstate = TSTATE_E;
  1055 :}
  1056 TST.B #imm, @(R0, GBR) {:  
  1057     COUNT_INST(I_TSTB);
  1058     load_reg( R_EAX, 0);
  1059     load_reg( R_ECX, R_GBR);
  1060     ADD_r32_r32( R_ECX, R_EAX );
  1061     MMU_TRANSLATE_READ( R_EAX );
  1062     MEM_READ_BYTE( R_EAX, R_EAX );
  1063     TEST_imm8_r8( imm, R_AL );
  1064     SETE_t();
  1065     sh4_x86.tstate = TSTATE_E;
  1066 :}
  1067 XOR Rm, Rn {:  
  1068     COUNT_INST(I_XOR);
  1069     load_reg( R_EAX, Rm );
  1070     load_reg( R_ECX, Rn );
  1071     XOR_r32_r32( R_EAX, R_ECX );
  1072     store_reg( R_ECX, Rn );
  1073     sh4_x86.tstate = TSTATE_NONE;
  1074 :}
  1075 XOR #imm, R0 {:  
  1076     COUNT_INST(I_XORI);
  1077     load_reg( R_EAX, 0 );
  1078     XOR_imm32_r32( imm, R_EAX );
  1079     store_reg( R_EAX, 0 );
  1080     sh4_x86.tstate = TSTATE_NONE;
  1081 :}
  1082 XOR.B #imm, @(R0, GBR) {:  
  1083     COUNT_INST(I_XORB);
  1084     load_reg( R_EAX, 0 );
  1085     load_spreg( R_ECX, R_GBR );
  1086     ADD_r32_r32( R_ECX, R_EAX );
  1087     MMU_TRANSLATE_WRITE( R_EAX );
  1088     PUSH_realigned_r32(R_EAX);
  1089     MEM_READ_BYTE(R_EAX, R_EAX);
  1090     POP_realigned_r32(R_ECX);
  1091     XOR_imm32_r32( imm, R_EAX );
  1092     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1093     sh4_x86.tstate = TSTATE_NONE;
  1094 :}
  1095 XTRCT Rm, Rn {:
  1096     COUNT_INST(I_XTRCT);
  1097     load_reg( R_EAX, Rm );
  1098     load_reg( R_ECX, Rn );
  1099     SHL_imm8_r32( 16, R_EAX );
  1100     SHR_imm8_r32( 16, R_ECX );
  1101     OR_r32_r32( R_EAX, R_ECX );
  1102     store_reg( R_ECX, Rn );
  1103     sh4_x86.tstate = TSTATE_NONE;
  1104 :}
  1106 /* Data move instructions */
  1107 MOV Rm, Rn {:  
  1108     COUNT_INST(I_MOV);
  1109     load_reg( R_EAX, Rm );
  1110     store_reg( R_EAX, Rn );
  1111 :}
  1112 MOV #imm, Rn {:  
  1113     COUNT_INST(I_MOVI);
  1114     load_imm32( R_EAX, imm );
  1115     store_reg( R_EAX, Rn );
  1116 :}
  1117 MOV.B Rm, @Rn {:  
  1118     COUNT_INST(I_MOVB);
  1119     load_reg( R_EAX, Rn );
  1120     MMU_TRANSLATE_WRITE( R_EAX );
  1121     load_reg( R_EDX, Rm );
  1122     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1123     sh4_x86.tstate = TSTATE_NONE;
  1124 :}
  1125 MOV.B Rm, @-Rn {:  
  1126     COUNT_INST(I_MOVB);
  1127     load_reg( R_EAX, Rn );
  1128     ADD_imm8s_r32( -1, R_EAX );
  1129     MMU_TRANSLATE_WRITE( R_EAX );
  1130     load_reg( R_EDX, Rm );
  1131     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1132     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1133     sh4_x86.tstate = TSTATE_NONE;
  1134 :}
  1135 MOV.B Rm, @(R0, Rn) {:  
  1136     COUNT_INST(I_MOVB);
  1137     load_reg( R_EAX, 0 );
  1138     load_reg( R_ECX, Rn );
  1139     ADD_r32_r32( R_ECX, R_EAX );
  1140     MMU_TRANSLATE_WRITE( R_EAX );
  1141     load_reg( R_EDX, Rm );
  1142     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1143     sh4_x86.tstate = TSTATE_NONE;
  1144 :}
  1145 MOV.B R0, @(disp, GBR) {:  
  1146     COUNT_INST(I_MOVB);
  1147     load_spreg( R_EAX, R_GBR );
  1148     ADD_imm32_r32( disp, R_EAX );
  1149     MMU_TRANSLATE_WRITE( R_EAX );
  1150     load_reg( R_EDX, 0 );
  1151     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1152     sh4_x86.tstate = TSTATE_NONE;
  1153 :}
  1154 MOV.B R0, @(disp, Rn) {:  
  1155     COUNT_INST(I_MOVB);
  1156     load_reg( R_EAX, Rn );
  1157     ADD_imm32_r32( disp, R_EAX );
  1158     MMU_TRANSLATE_WRITE( R_EAX );
  1159     load_reg( R_EDX, 0 );
  1160     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1161     sh4_x86.tstate = TSTATE_NONE;
  1162 :}
  1163 MOV.B @Rm, Rn {:  
  1164     COUNT_INST(I_MOVB);
  1165     load_reg( R_EAX, Rm );
  1166     MMU_TRANSLATE_READ( R_EAX );
  1167     MEM_READ_BYTE( R_EAX, R_EAX );
  1168     store_reg( R_EAX, Rn );
  1169     sh4_x86.tstate = TSTATE_NONE;
  1170 :}
  1171 MOV.B @Rm+, Rn {:  
  1172     COUNT_INST(I_MOVB);
  1173     load_reg( R_EAX, Rm );
  1174     MMU_TRANSLATE_READ( R_EAX );
  1175     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1176     MEM_READ_BYTE( R_EAX, R_EAX );
  1177     store_reg( R_EAX, Rn );
  1178     sh4_x86.tstate = TSTATE_NONE;
  1179 :}
  1180 MOV.B @(R0, Rm), Rn {:  
  1181     COUNT_INST(I_MOVB);
  1182     load_reg( R_EAX, 0 );
  1183     load_reg( R_ECX, Rm );
  1184     ADD_r32_r32( R_ECX, R_EAX );
  1185     MMU_TRANSLATE_READ( R_EAX )
  1186     MEM_READ_BYTE( R_EAX, R_EAX );
  1187     store_reg( R_EAX, Rn );
  1188     sh4_x86.tstate = TSTATE_NONE;
  1189 :}
  1190 MOV.B @(disp, GBR), R0 {:  
  1191     COUNT_INST(I_MOVB);
  1192     load_spreg( R_EAX, R_GBR );
  1193     ADD_imm32_r32( disp, R_EAX );
  1194     MMU_TRANSLATE_READ( R_EAX );
  1195     MEM_READ_BYTE( R_EAX, R_EAX );
  1196     store_reg( R_EAX, 0 );
  1197     sh4_x86.tstate = TSTATE_NONE;
  1198 :}
  1199 MOV.B @(disp, Rm), R0 {:  
  1200     COUNT_INST(I_MOVB);
  1201     load_reg( R_EAX, Rm );
  1202     ADD_imm32_r32( disp, R_EAX );
  1203     MMU_TRANSLATE_READ( R_EAX );
  1204     MEM_READ_BYTE( R_EAX, R_EAX );
  1205     store_reg( R_EAX, 0 );
  1206     sh4_x86.tstate = TSTATE_NONE;
  1207 :}
  1208 MOV.L Rm, @Rn {:
  1209     COUNT_INST(I_MOVL);
  1210     load_reg( R_EAX, Rn );
  1211     check_walign32(R_EAX);
  1212     MMU_TRANSLATE_WRITE( R_EAX );
  1213     load_reg( R_EDX, Rm );
  1214     MEM_WRITE_LONG( R_EAX, R_EDX );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 MOV.L Rm, @-Rn {:  
  1218     COUNT_INST(I_MOVL);
  1219     load_reg( R_EAX, Rn );
  1220     ADD_imm8s_r32( -4, R_EAX );
  1221     check_walign32( R_EAX );
  1222     MMU_TRANSLATE_WRITE( R_EAX );
  1223     load_reg( R_EDX, Rm );
  1224     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1225     MEM_WRITE_LONG( R_EAX, R_EDX );
  1226     sh4_x86.tstate = TSTATE_NONE;
  1227 :}
  1228 MOV.L Rm, @(R0, Rn) {:  
  1229     COUNT_INST(I_MOVL);
  1230     load_reg( R_EAX, 0 );
  1231     load_reg( R_ECX, Rn );
  1232     ADD_r32_r32( R_ECX, R_EAX );
  1233     check_walign32( R_EAX );
  1234     MMU_TRANSLATE_WRITE( R_EAX );
  1235     load_reg( R_EDX, Rm );
  1236     MEM_WRITE_LONG( R_EAX, R_EDX );
  1237     sh4_x86.tstate = TSTATE_NONE;
  1238 :}
  1239 MOV.L R0, @(disp, GBR) {:  
  1240     COUNT_INST(I_MOVL);
  1241     load_spreg( R_EAX, R_GBR );
  1242     ADD_imm32_r32( disp, R_EAX );
  1243     check_walign32( R_EAX );
  1244     MMU_TRANSLATE_WRITE( R_EAX );
  1245     load_reg( R_EDX, 0 );
  1246     MEM_WRITE_LONG( R_EAX, R_EDX );
  1247     sh4_x86.tstate = TSTATE_NONE;
  1248 :}
  1249 MOV.L Rm, @(disp, Rn) {:  
  1250     COUNT_INST(I_MOVL);
  1251     load_reg( R_EAX, Rn );
  1252     ADD_imm32_r32( disp, R_EAX );
  1253     check_walign32( R_EAX );
  1254     MMU_TRANSLATE_WRITE( R_EAX );
  1255     load_reg( R_EDX, Rm );
  1256     MEM_WRITE_LONG( R_EAX, R_EDX );
  1257     sh4_x86.tstate = TSTATE_NONE;
  1258 :}
  1259 MOV.L @Rm, Rn {:  
  1260     COUNT_INST(I_MOVL);
  1261     load_reg( R_EAX, Rm );
  1262     check_ralign32( R_EAX );
  1263     MMU_TRANSLATE_READ( R_EAX );
  1264     MEM_READ_LONG( R_EAX, R_EAX );
  1265     store_reg( R_EAX, Rn );
  1266     sh4_x86.tstate = TSTATE_NONE;
  1267 :}
  1268 MOV.L @Rm+, Rn {:  
  1269     COUNT_INST(I_MOVL);
  1270     load_reg( R_EAX, Rm );
  1271     check_ralign32( R_EAX );
  1272     MMU_TRANSLATE_READ( R_EAX );
  1273     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1274     MEM_READ_LONG( R_EAX, R_EAX );
  1275     store_reg( R_EAX, Rn );
  1276     sh4_x86.tstate = TSTATE_NONE;
  1277 :}
  1278 MOV.L @(R0, Rm), Rn {:  
  1279     COUNT_INST(I_MOVL);
  1280     load_reg( R_EAX, 0 );
  1281     load_reg( R_ECX, Rm );
  1282     ADD_r32_r32( R_ECX, R_EAX );
  1283     check_ralign32( R_EAX );
  1284     MMU_TRANSLATE_READ( R_EAX );
  1285     MEM_READ_LONG( R_EAX, R_EAX );
  1286     store_reg( R_EAX, Rn );
  1287     sh4_x86.tstate = TSTATE_NONE;
  1288 :}
  1289 MOV.L @(disp, GBR), R0 {:
  1290     COUNT_INST(I_MOVL);
  1291     load_spreg( R_EAX, R_GBR );
  1292     ADD_imm32_r32( disp, R_EAX );
  1293     check_ralign32( R_EAX );
  1294     MMU_TRANSLATE_READ( R_EAX );
  1295     MEM_READ_LONG( R_EAX, R_EAX );
  1296     store_reg( R_EAX, 0 );
  1297     sh4_x86.tstate = TSTATE_NONE;
  1298 :}
  1299 MOV.L @(disp, PC), Rn {:  
  1300     COUNT_INST(I_MOVLPC);
  1301     if( sh4_x86.in_delay_slot ) {
  1302 	SLOTILLEGAL();
  1303     } else {
  1304 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1305 	if( IS_IN_ICACHE(target) ) {
  1306 	    // If the target address is in the same page as the code, it's
  1307 	    // pretty safe to just ref it directly and circumvent the whole
  1308 	    // memory subsystem. (this is a big performance win)
  1310 	    // FIXME: There's a corner-case that's not handled here when
  1311 	    // the current code-page is in the ITLB but not in the UTLB.
  1312 	    // (should generate a TLB miss although need to test SH4 
  1313 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1314 	    // behaviour though.
  1315 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1316 	    MOV_moff32_EAX( ptr );
  1317 	} else {
  1318 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1319 	    // different virtual address than the translation was done with,
  1320 	    // but we can safely assume that the low bits are the same.
  1321 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1322 	    ADD_sh4r_r32( R_PC, R_EAX );
  1323 	    MMU_TRANSLATE_READ( R_EAX );
  1324 	    MEM_READ_LONG( R_EAX, R_EAX );
  1325 	    sh4_x86.tstate = TSTATE_NONE;
  1327 	store_reg( R_EAX, Rn );
  1329 :}
  1330 MOV.L @(disp, Rm), Rn {:  
  1331     COUNT_INST(I_MOVL);
  1332     load_reg( R_EAX, Rm );
  1333     ADD_imm8s_r32( disp, R_EAX );
  1334     check_ralign32( R_EAX );
  1335     MMU_TRANSLATE_READ( R_EAX );
  1336     MEM_READ_LONG( R_EAX, R_EAX );
  1337     store_reg( R_EAX, Rn );
  1338     sh4_x86.tstate = TSTATE_NONE;
  1339 :}
  1340 MOV.W Rm, @Rn {:  
  1341     COUNT_INST(I_MOVW);
  1342     load_reg( R_EAX, Rn );
  1343     check_walign16( R_EAX );
  1344     MMU_TRANSLATE_WRITE( R_EAX )
  1345     load_reg( R_EDX, Rm );
  1346     MEM_WRITE_WORD( R_EAX, R_EDX );
  1347     sh4_x86.tstate = TSTATE_NONE;
  1348 :}
  1349 MOV.W Rm, @-Rn {:  
  1350     COUNT_INST(I_MOVW);
  1351     load_reg( R_EAX, Rn );
  1352     ADD_imm8s_r32( -2, R_EAX );
  1353     check_walign16( R_EAX );
  1354     MMU_TRANSLATE_WRITE( R_EAX );
  1355     load_reg( R_EDX, Rm );
  1356     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1357     MEM_WRITE_WORD( R_EAX, R_EDX );
  1358     sh4_x86.tstate = TSTATE_NONE;
  1359 :}
  1360 MOV.W Rm, @(R0, Rn) {:  
  1361     COUNT_INST(I_MOVW);
  1362     load_reg( R_EAX, 0 );
  1363     load_reg( R_ECX, Rn );
  1364     ADD_r32_r32( R_ECX, R_EAX );
  1365     check_walign16( R_EAX );
  1366     MMU_TRANSLATE_WRITE( R_EAX );
  1367     load_reg( R_EDX, Rm );
  1368     MEM_WRITE_WORD( R_EAX, R_EDX );
  1369     sh4_x86.tstate = TSTATE_NONE;
  1370 :}
  1371 MOV.W R0, @(disp, GBR) {:  
  1372     COUNT_INST(I_MOVW);
  1373     load_spreg( R_EAX, R_GBR );
  1374     ADD_imm32_r32( disp, R_EAX );
  1375     check_walign16( R_EAX );
  1376     MMU_TRANSLATE_WRITE( R_EAX );
  1377     load_reg( R_EDX, 0 );
  1378     MEM_WRITE_WORD( R_EAX, R_EDX );
  1379     sh4_x86.tstate = TSTATE_NONE;
  1380 :}
  1381 MOV.W R0, @(disp, Rn) {:  
  1382     COUNT_INST(I_MOVW);
  1383     load_reg( R_EAX, Rn );
  1384     ADD_imm32_r32( disp, R_EAX );
  1385     check_walign16( R_EAX );
  1386     MMU_TRANSLATE_WRITE( R_EAX );
  1387     load_reg( R_EDX, 0 );
  1388     MEM_WRITE_WORD( R_EAX, R_EDX );
  1389     sh4_x86.tstate = TSTATE_NONE;
  1390 :}
  1391 MOV.W @Rm, Rn {:  
  1392     COUNT_INST(I_MOVW);
  1393     load_reg( R_EAX, Rm );
  1394     check_ralign16( R_EAX );
  1395     MMU_TRANSLATE_READ( R_EAX );
  1396     MEM_READ_WORD( R_EAX, R_EAX );
  1397     store_reg( R_EAX, Rn );
  1398     sh4_x86.tstate = TSTATE_NONE;
  1399 :}
  1400 MOV.W @Rm+, Rn {:  
  1401     COUNT_INST(I_MOVW);
  1402     load_reg( R_EAX, Rm );
  1403     check_ralign16( R_EAX );
  1404     MMU_TRANSLATE_READ( R_EAX );
  1405     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1406     MEM_READ_WORD( R_EAX, R_EAX );
  1407     store_reg( R_EAX, Rn );
  1408     sh4_x86.tstate = TSTATE_NONE;
  1409 :}
  1410 MOV.W @(R0, Rm), Rn {:  
  1411     COUNT_INST(I_MOVW);
  1412     load_reg( R_EAX, 0 );
  1413     load_reg( R_ECX, Rm );
  1414     ADD_r32_r32( R_ECX, R_EAX );
  1415     check_ralign16( R_EAX );
  1416     MMU_TRANSLATE_READ( R_EAX );
  1417     MEM_READ_WORD( R_EAX, R_EAX );
  1418     store_reg( R_EAX, Rn );
  1419     sh4_x86.tstate = TSTATE_NONE;
  1420 :}
  1421 MOV.W @(disp, GBR), R0 {:  
  1422     COUNT_INST(I_MOVW);
  1423     load_spreg( R_EAX, R_GBR );
  1424     ADD_imm32_r32( disp, R_EAX );
  1425     check_ralign16( R_EAX );
  1426     MMU_TRANSLATE_READ( R_EAX );
  1427     MEM_READ_WORD( R_EAX, R_EAX );
  1428     store_reg( R_EAX, 0 );
  1429     sh4_x86.tstate = TSTATE_NONE;
  1430 :}
  1431 MOV.W @(disp, PC), Rn {:  
  1432     COUNT_INST(I_MOVW);
  1433     if( sh4_x86.in_delay_slot ) {
  1434 	SLOTILLEGAL();
  1435     } else {
  1436 	// See comments for MOV.L @(disp, PC), Rn
  1437 	uint32_t target = pc + disp + 4;
  1438 	if( IS_IN_ICACHE(target) ) {
  1439 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1440 	    MOV_moff32_EAX( ptr );
  1441 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1442 	} else {
  1443 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1444 	    ADD_sh4r_r32( R_PC, R_EAX );
  1445 	    MMU_TRANSLATE_READ( R_EAX );
  1446 	    MEM_READ_WORD( R_EAX, R_EAX );
  1447 	    sh4_x86.tstate = TSTATE_NONE;
  1449 	store_reg( R_EAX, Rn );
  1451 :}
  1452 MOV.W @(disp, Rm), R0 {:  
  1453     COUNT_INST(I_MOVW);
  1454     load_reg( R_EAX, Rm );
  1455     ADD_imm32_r32( disp, R_EAX );
  1456     check_ralign16( R_EAX );
  1457     MMU_TRANSLATE_READ( R_EAX );
  1458     MEM_READ_WORD( R_EAX, R_EAX );
  1459     store_reg( R_EAX, 0 );
  1460     sh4_x86.tstate = TSTATE_NONE;
  1461 :}
  1462 MOVA @(disp, PC), R0 {:  
  1463     COUNT_INST(I_MOVA);
  1464     if( sh4_x86.in_delay_slot ) {
  1465 	SLOTILLEGAL();
  1466     } else {
  1467 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1468 	ADD_sh4r_r32( R_PC, R_ECX );
  1469 	store_reg( R_ECX, 0 );
  1470 	sh4_x86.tstate = TSTATE_NONE;
  1472 :}
  1473 MOVCA.L R0, @Rn {:  
  1474     COUNT_INST(I_MOVCA);
  1475     load_reg( R_EAX, Rn );
  1476     check_walign32( R_EAX );
  1477     MMU_TRANSLATE_WRITE( R_EAX );
  1478     load_reg( R_EDX, 0 );
  1479     MEM_WRITE_LONG( R_EAX, R_EDX );
  1480     sh4_x86.tstate = TSTATE_NONE;
  1481 :}
  1483 /* Control transfer instructions */
  1484 BF disp {:
  1485     COUNT_INST(I_BF);
  1486     if( sh4_x86.in_delay_slot ) {
  1487 	SLOTILLEGAL();
  1488     } else {
  1489 	sh4vma_t target = disp + pc + 4;
  1490 	JT_rel8( nottaken );
  1491 	exit_block_rel(target, pc+2 );
  1492 	JMP_TARGET(nottaken);
  1493 	return 2;
  1495 :}
  1496 BF/S disp {:
  1497     COUNT_INST(I_BFS);
  1498     if( sh4_x86.in_delay_slot ) {
  1499 	SLOTILLEGAL();
  1500     } else {
  1501 	sh4_x86.in_delay_slot = DELAY_PC;
  1502 	if( UNTRANSLATABLE(pc+2) ) {
  1503 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1504 	    JT_rel8(nottaken);
  1505 	    ADD_imm32_r32( disp, R_EAX );
  1506 	    JMP_TARGET(nottaken);
  1507 	    ADD_sh4r_r32( R_PC, R_EAX );
  1508 	    store_spreg( R_EAX, R_NEW_PC );
  1509 	    exit_block_emu(pc+2);
  1510 	    sh4_x86.branch_taken = TRUE;
  1511 	    return 2;
  1512 	} else {
  1513 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1514 		CMP_imm8s_sh4r( 1, R_T );
  1515 		sh4_x86.tstate = TSTATE_E;
  1517 	    sh4vma_t target = disp + pc + 4;
  1518 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1519 	    sh4_translate_instruction(pc+2);
  1520 	    exit_block_rel( target, pc+4 );
  1522 	    // not taken
  1523 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1524 	    sh4_translate_instruction(pc+2);
  1525 	    return 4;
  1528 :}
  1529 BRA disp {:  
  1530     COUNT_INST(I_BRA);
  1531     if( sh4_x86.in_delay_slot ) {
  1532 	SLOTILLEGAL();
  1533     } else {
  1534 	sh4_x86.in_delay_slot = DELAY_PC;
  1535 	sh4_x86.branch_taken = TRUE;
  1536 	if( UNTRANSLATABLE(pc+2) ) {
  1537 	    load_spreg( R_EAX, R_PC );
  1538 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1539 	    store_spreg( R_EAX, R_NEW_PC );
  1540 	    exit_block_emu(pc+2);
  1541 	    return 2;
  1542 	} else {
  1543 	    sh4_translate_instruction( pc + 2 );
  1544 	    exit_block_rel( disp + pc + 4, pc+4 );
  1545 	    return 4;
  1548 :}
  1549 BRAF Rn {:  
  1550     COUNT_INST(I_BRAF);
  1551     if( sh4_x86.in_delay_slot ) {
  1552 	SLOTILLEGAL();
  1553     } else {
  1554 	load_spreg( R_EAX, R_PC );
  1555 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1556 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1557 	store_spreg( R_EAX, R_NEW_PC );
  1558 	sh4_x86.in_delay_slot = DELAY_PC;
  1559 	sh4_x86.tstate = TSTATE_NONE;
  1560 	sh4_x86.branch_taken = TRUE;
  1561 	if( UNTRANSLATABLE(pc+2) ) {
  1562 	    exit_block_emu(pc+2);
  1563 	    return 2;
  1564 	} else {
  1565 	    sh4_translate_instruction( pc + 2 );
  1566 	    exit_block_newpcset(pc+2);
  1567 	    return 4;
  1570 :}
  1571 BSR disp {:  
  1572     COUNT_INST(I_BSR);
  1573     if( sh4_x86.in_delay_slot ) {
  1574 	SLOTILLEGAL();
  1575     } else {
  1576 	load_spreg( R_EAX, R_PC );
  1577 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1578 	store_spreg( R_EAX, R_PR );
  1579 	sh4_x86.in_delay_slot = DELAY_PC;
  1580 	sh4_x86.branch_taken = TRUE;
  1581 	sh4_x86.tstate = TSTATE_NONE;
  1582 	if( UNTRANSLATABLE(pc+2) ) {
  1583 	    ADD_imm32_r32( disp, R_EAX );
  1584 	    store_spreg( R_EAX, R_NEW_PC );
  1585 	    exit_block_emu(pc+2);
  1586 	    return 2;
  1587 	} else {
  1588 	    sh4_translate_instruction( pc + 2 );
  1589 	    exit_block_rel( disp + pc + 4, pc+4 );
  1590 	    return 4;
  1593 :}
  1594 BSRF Rn {:  
  1595     COUNT_INST(I_BSRF);
  1596     if( sh4_x86.in_delay_slot ) {
  1597 	SLOTILLEGAL();
  1598     } else {
  1599 	load_spreg( R_EAX, R_PC );
  1600 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1601 	store_spreg( R_EAX, R_PR );
  1602 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1603 	store_spreg( R_EAX, R_NEW_PC );
  1605 	sh4_x86.in_delay_slot = DELAY_PC;
  1606 	sh4_x86.tstate = TSTATE_NONE;
  1607 	sh4_x86.branch_taken = TRUE;
  1608 	if( UNTRANSLATABLE(pc+2) ) {
  1609 	    exit_block_emu(pc+2);
  1610 	    return 2;
  1611 	} else {
  1612 	    sh4_translate_instruction( pc + 2 );
  1613 	    exit_block_newpcset(pc+2);
  1614 	    return 4;
  1617 :}
  1618 BT disp {:
  1619     COUNT_INST(I_BT);
  1620     if( sh4_x86.in_delay_slot ) {
  1621 	SLOTILLEGAL();
  1622     } else {
  1623 	sh4vma_t target = disp + pc + 4;
  1624 	JF_rel8( nottaken );
  1625 	exit_block_rel(target, pc+2 );
  1626 	JMP_TARGET(nottaken);
  1627 	return 2;
  1629 :}
  1630 BT/S disp {:
  1631     COUNT_INST(I_BTS);
  1632     if( sh4_x86.in_delay_slot ) {
  1633 	SLOTILLEGAL();
  1634     } else {
  1635 	sh4_x86.in_delay_slot = DELAY_PC;
  1636 	if( UNTRANSLATABLE(pc+2) ) {
  1637 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1638 	    JF_rel8(nottaken);
  1639 	    ADD_imm32_r32( disp, R_EAX );
  1640 	    JMP_TARGET(nottaken);
  1641 	    ADD_sh4r_r32( R_PC, R_EAX );
  1642 	    store_spreg( R_EAX, R_NEW_PC );
  1643 	    exit_block_emu(pc+2);
  1644 	    sh4_x86.branch_taken = TRUE;
  1645 	    return 2;
  1646 	} else {
  1647 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1648 		CMP_imm8s_sh4r( 1, R_T );
  1649 		sh4_x86.tstate = TSTATE_E;
  1651 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1652 	    sh4_translate_instruction(pc+2);
  1653 	    exit_block_rel( disp + pc + 4, pc+4 );
  1654 	    // not taken
  1655 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1656 	    sh4_translate_instruction(pc+2);
  1657 	    return 4;
  1660 :}
  1661 JMP @Rn {:  
  1662     COUNT_INST(I_JMP);
  1663     if( sh4_x86.in_delay_slot ) {
  1664 	SLOTILLEGAL();
  1665     } else {
  1666 	load_reg( R_ECX, Rn );
  1667 	store_spreg( R_ECX, R_NEW_PC );
  1668 	sh4_x86.in_delay_slot = DELAY_PC;
  1669 	sh4_x86.branch_taken = TRUE;
  1670 	if( UNTRANSLATABLE(pc+2) ) {
  1671 	    exit_block_emu(pc+2);
  1672 	    return 2;
  1673 	} else {
  1674 	    sh4_translate_instruction(pc+2);
  1675 	    exit_block_newpcset(pc+2);
  1676 	    return 4;
  1679 :}
  1680 JSR @Rn {:  
  1681     COUNT_INST(I_JSR);
  1682     if( sh4_x86.in_delay_slot ) {
  1683 	SLOTILLEGAL();
  1684     } else {
  1685 	load_spreg( R_EAX, R_PC );
  1686 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1687 	store_spreg( R_EAX, R_PR );
  1688 	load_reg( R_ECX, Rn );
  1689 	store_spreg( R_ECX, R_NEW_PC );
  1690 	sh4_x86.in_delay_slot = DELAY_PC;
  1691 	sh4_x86.branch_taken = TRUE;
  1692 	sh4_x86.tstate = TSTATE_NONE;
  1693 	if( UNTRANSLATABLE(pc+2) ) {
  1694 	    exit_block_emu(pc+2);
  1695 	    return 2;
  1696 	} else {
  1697 	    sh4_translate_instruction(pc+2);
  1698 	    exit_block_newpcset(pc+2);
  1699 	    return 4;
  1702 :}
  1703 RTE {:  
  1704     COUNT_INST(I_RTE);
  1705     if( sh4_x86.in_delay_slot ) {
  1706 	SLOTILLEGAL();
  1707     } else {
  1708 	check_priv();
  1709 	load_spreg( R_ECX, R_SPC );
  1710 	store_spreg( R_ECX, R_NEW_PC );
  1711 	load_spreg( R_EAX, R_SSR );
  1712 	call_func1( sh4_write_sr, R_EAX );
  1713 	sh4_x86.in_delay_slot = DELAY_PC;
  1714 	sh4_x86.priv_checked = FALSE;
  1715 	sh4_x86.fpuen_checked = FALSE;
  1716 	sh4_x86.tstate = TSTATE_NONE;
  1717 	sh4_x86.branch_taken = TRUE;
  1718 	if( UNTRANSLATABLE(pc+2) ) {
  1719 	    exit_block_emu(pc+2);
  1720 	    return 2;
  1721 	} else {
  1722 	    sh4_translate_instruction(pc+2);
  1723 	    exit_block_newpcset(pc+2);
  1724 	    return 4;
  1727 :}
  1728 RTS {:  
  1729     COUNT_INST(I_RTS);
  1730     if( sh4_x86.in_delay_slot ) {
  1731 	SLOTILLEGAL();
  1732     } else {
  1733 	load_spreg( R_ECX, R_PR );
  1734 	store_spreg( R_ECX, R_NEW_PC );
  1735 	sh4_x86.in_delay_slot = DELAY_PC;
  1736 	sh4_x86.branch_taken = TRUE;
  1737 	if( UNTRANSLATABLE(pc+2) ) {
  1738 	    exit_block_emu(pc+2);
  1739 	    return 2;
  1740 	} else {
  1741 	    sh4_translate_instruction(pc+2);
  1742 	    exit_block_newpcset(pc+2);
  1743 	    return 4;
  1746 :}
  1747 TRAPA #imm {:  
  1748     COUNT_INST(I_TRAPA);
  1749     if( sh4_x86.in_delay_slot ) {
  1750 	SLOTILLEGAL();
  1751     } else {
  1752 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1753 	ADD_r32_sh4r( R_ECX, R_PC );
  1754 	load_imm32( R_EAX, imm );
  1755 	call_func1( sh4_raise_trap, R_EAX );
  1756 	sh4_x86.tstate = TSTATE_NONE;
  1757 	exit_block_pcset(pc);
  1758 	sh4_x86.branch_taken = TRUE;
  1759 	return 2;
  1761 :}
  1762 UNDEF {:  
  1763     COUNT_INST(I_UNDEF);
  1764     if( sh4_x86.in_delay_slot ) {
  1765 	SLOTILLEGAL();
  1766     } else {
  1767 	JMP_exc(EXC_ILLEGAL);
  1768 	return 2;
  1770 :}
  1772 CLRMAC {:  
  1773     COUNT_INST(I_CLRMAC);
  1774     XOR_r32_r32(R_EAX, R_EAX);
  1775     store_spreg( R_EAX, R_MACL );
  1776     store_spreg( R_EAX, R_MACH );
  1777     sh4_x86.tstate = TSTATE_NONE;
  1778 :}
  1779 CLRS {:
  1780     COUNT_INST(I_CLRS);
  1781     CLC();
  1782     SETC_sh4r(R_S);
  1783     sh4_x86.tstate = TSTATE_C;
  1784 :}
  1785 CLRT {:  
  1786     COUNT_INST(I_CLRT);
  1787     CLC();
  1788     SETC_t();
  1789     sh4_x86.tstate = TSTATE_C;
  1790 :}
  1791 SETS {:  
  1792     COUNT_INST(I_SETS);
  1793     STC();
  1794     SETC_sh4r(R_S);
  1795     sh4_x86.tstate = TSTATE_C;
  1796 :}
  1797 SETT {:  
  1798     COUNT_INST(I_SETT);
  1799     STC();
  1800     SETC_t();
  1801     sh4_x86.tstate = TSTATE_C;
  1802 :}
  1804 /* Floating point moves */
  1805 FMOV FRm, FRn {:  
  1806     COUNT_INST(I_FMOV1);
  1807     check_fpuen();
  1808     load_spreg( R_ECX, R_FPSCR );
  1809     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1810     JNE_rel8(doublesize);
  1811     load_fr( R_EAX, FRm ); // SZ=0 branch
  1812     store_fr( R_EAX, FRn );
  1813     JMP_rel8(end);
  1814     JMP_TARGET(doublesize);
  1815     load_dr0( R_EAX, FRm );
  1816     load_dr1( R_ECX, FRm );
  1817     store_dr0( R_EAX, FRn );
  1818     store_dr1( R_ECX, FRn );
  1819     JMP_TARGET(end);
  1820     sh4_x86.tstate = TSTATE_NONE;
  1821 :}
  1822 FMOV FRm, @Rn {: 
  1823     COUNT_INST(I_FMOV2);
  1824     check_fpuen();
  1825     load_reg( R_EAX, Rn );
  1826     load_spreg( R_EDX, R_FPSCR );
  1827     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1828     JNE_rel8(doublesize);
  1830     check_walign32( R_EAX );
  1831     MMU_TRANSLATE_WRITE( R_EAX );
  1832     load_fr( R_ECX, FRm );
  1833     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1834     JMP_rel8(end);
  1836     JMP_TARGET(doublesize);
  1837     check_walign64( R_EAX );
  1838     MMU_TRANSLATE_WRITE( R_EAX );
  1839     load_dr0( R_ECX, FRm );
  1840     load_dr1( R_EDX, FRm );
  1841     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1842     JMP_TARGET(end);
  1843     sh4_x86.tstate = TSTATE_NONE;
  1844 :}
  1845 FMOV @Rm, FRn {:  
  1846     COUNT_INST(I_FMOV5);
  1847     check_fpuen();
  1848     load_reg( R_EAX, Rm );
  1849     load_spreg( R_EDX, R_FPSCR );
  1850     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1851     JNE_rel8(doublesize);
  1853     check_ralign32( R_EAX );
  1854     MMU_TRANSLATE_READ( R_EAX );
  1855     MEM_READ_LONG( R_EAX, R_EAX );
  1856     store_fr( R_EAX, FRn );
  1857     JMP_rel8(end);
  1859     JMP_TARGET(doublesize);
  1860     check_ralign64( R_EAX );
  1861     MMU_TRANSLATE_READ( R_EAX );
  1862     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1863     store_dr0( R_ECX, FRn );
  1864     store_dr1( R_EAX, FRn );
  1865     JMP_TARGET(end);
  1866     sh4_x86.tstate = TSTATE_NONE;
  1867 :}
  1868 FMOV FRm, @-Rn {:  
  1869     COUNT_INST(I_FMOV3);
  1870     check_fpuen();
  1871     load_reg( R_EAX, Rn );
  1872     load_spreg( R_EDX, R_FPSCR );
  1873     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1874     JNE_rel8(doublesize);
  1876     check_walign32( R_EAX );
  1877     ADD_imm8s_r32( -4, R_EAX );
  1878     MMU_TRANSLATE_WRITE( R_EAX );
  1879     load_fr( R_ECX, FRm );
  1880     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1881     MEM_WRITE_LONG( R_EAX, R_ECX );
  1882     JMP_rel8(end);
  1884     JMP_TARGET(doublesize);
  1885     check_walign64( R_EAX );
  1886     ADD_imm8s_r32(-8,R_EAX);
  1887     MMU_TRANSLATE_WRITE( R_EAX );
  1888     load_dr0( R_ECX, FRm );
  1889     load_dr1( R_EDX, FRm );
  1890     ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1891     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1892     JMP_TARGET(end);
  1894     sh4_x86.tstate = TSTATE_NONE;
  1895 :}
  1896 FMOV @Rm+, FRn {:
  1897     COUNT_INST(I_FMOV6);
  1898     check_fpuen();
  1899     load_reg( R_EAX, Rm );
  1900     load_spreg( R_EDX, R_FPSCR );
  1901     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1902     JNE_rel8(doublesize);
  1904     check_ralign32( R_EAX );
  1905     MMU_TRANSLATE_READ( R_EAX );
  1906     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1907     MEM_READ_LONG( R_EAX, R_EAX );
  1908     store_fr( R_EAX, FRn );
  1909     JMP_rel8(end);
  1911     JMP_TARGET(doublesize);
  1912     check_ralign64( R_EAX );
  1913     MMU_TRANSLATE_READ( R_EAX );
  1914     ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1915     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1916     store_dr0( R_ECX, FRn );
  1917     store_dr1( R_EAX, FRn );
  1918     JMP_TARGET(end);
  1920     sh4_x86.tstate = TSTATE_NONE;
  1921 :}
  1922 FMOV FRm, @(R0, Rn) {:  
  1923     COUNT_INST(I_FMOV4);
  1924     check_fpuen();
  1925     load_reg( R_EAX, Rn );
  1926     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1927     load_spreg( R_EDX, R_FPSCR );
  1928     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1929     JNE_rel8(doublesize);
  1931     check_walign32( R_EAX );
  1932     MMU_TRANSLATE_WRITE( R_EAX );
  1933     load_fr( R_ECX, FRm );
  1934     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1935     JMP_rel8(end);
  1937     JMP_TARGET(doublesize);
  1938     check_walign64( R_EAX );
  1939     MMU_TRANSLATE_WRITE( R_EAX );
  1940     load_dr0( R_ECX, FRm );
  1941     load_dr1( R_EDX, FRm );
  1942     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1943     JMP_TARGET(end);
  1945     sh4_x86.tstate = TSTATE_NONE;
  1946 :}
  1947 FMOV @(R0, Rm), FRn {:  
  1948     COUNT_INST(I_FMOV7);
  1949     check_fpuen();
  1950     load_reg( R_EAX, Rm );
  1951     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1952     load_spreg( R_EDX, R_FPSCR );
  1953     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1954     JNE_rel8(doublesize);
  1956     check_ralign32( R_EAX );
  1957     MMU_TRANSLATE_READ( R_EAX );
  1958     MEM_READ_LONG( R_EAX, R_EAX );
  1959     store_fr( R_EAX, FRn );
  1960     JMP_rel8(end);
  1962     JMP_TARGET(doublesize);
  1963     check_ralign64( R_EAX );
  1964     MMU_TRANSLATE_READ( R_EAX );
  1965     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1966     store_dr0( R_ECX, FRn );
  1967     store_dr1( R_EAX, FRn );
  1968     JMP_TARGET(end);
  1970     sh4_x86.tstate = TSTATE_NONE;
  1971 :}
  1972 FLDI0 FRn {:  /* IFF PR=0 */
  1973     COUNT_INST(I_FLDI0);
  1974     check_fpuen();
  1975     load_spreg( R_ECX, R_FPSCR );
  1976     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1977     JNE_rel8(end);
  1978     XOR_r32_r32( R_EAX, R_EAX );
  1979     store_fr( R_EAX, FRn );
  1980     JMP_TARGET(end);
  1981     sh4_x86.tstate = TSTATE_NONE;
  1982 :}
  1983 FLDI1 FRn {:  /* IFF PR=0 */
  1984     COUNT_INST(I_FLDI1);
  1985     check_fpuen();
  1986     load_spreg( R_ECX, R_FPSCR );
  1987     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1988     JNE_rel8(end);
  1989     load_imm32(R_EAX, 0x3F800000);
  1990     store_fr( R_EAX, FRn );
  1991     JMP_TARGET(end);
  1992     sh4_x86.tstate = TSTATE_NONE;
  1993 :}
  1995 FLOAT FPUL, FRn {:  
  1996     COUNT_INST(I_FLOAT);
  1997     check_fpuen();
  1998     load_spreg( R_ECX, R_FPSCR );
  1999     FILD_sh4r(R_FPUL);
  2000     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2001     JNE_rel8(doubleprec);
  2002     pop_fr( FRn );
  2003     JMP_rel8(end);
  2004     JMP_TARGET(doubleprec);
  2005     pop_dr( FRn );
  2006     JMP_TARGET(end);
  2007     sh4_x86.tstate = TSTATE_NONE;
  2008 :}
  2009 FTRC FRm, FPUL {:  
  2010     COUNT_INST(I_FTRC);
  2011     check_fpuen();
  2012     load_spreg( R_ECX, R_FPSCR );
  2013     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2014     JNE_rel8(doubleprec);
  2015     push_fr( FRm );
  2016     JMP_rel8(doop);
  2017     JMP_TARGET(doubleprec);
  2018     push_dr( FRm );
  2019     JMP_TARGET( doop );
  2020     load_imm32( R_ECX, (uint32_t)&max_int );
  2021     FILD_r32ind( R_ECX );
  2022     FCOMIP_st(1);
  2023     JNA_rel8( sat );
  2024     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2025     FILD_r32ind( R_ECX );           // 2
  2026     FCOMIP_st(1);                   // 2
  2027     JAE_rel8( sat2 );            // 2
  2028     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2029     FNSTCW_r32ind( R_EAX );
  2030     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2031     FLDCW_r32ind( R_EDX );
  2032     FISTP_sh4r(R_FPUL);             // 3
  2033     FLDCW_r32ind( R_EAX );
  2034     JMP_rel8(end);             // 2
  2036     JMP_TARGET(sat);
  2037     JMP_TARGET(sat2);
  2038     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2039     store_spreg( R_ECX, R_FPUL );
  2040     FPOP_st();
  2041     JMP_TARGET(end);
  2042     sh4_x86.tstate = TSTATE_NONE;
  2043 :}
  2044 FLDS FRm, FPUL {:  
  2045     COUNT_INST(I_FLDS);
  2046     check_fpuen();
  2047     load_fr( R_EAX, FRm );
  2048     store_spreg( R_EAX, R_FPUL );
  2049     sh4_x86.tstate = TSTATE_NONE;
  2050 :}
  2051 FSTS FPUL, FRn {:  
  2052     COUNT_INST(I_FSTS);
  2053     check_fpuen();
  2054     load_spreg( R_EAX, R_FPUL );
  2055     store_fr( R_EAX, FRn );
  2056     sh4_x86.tstate = TSTATE_NONE;
  2057 :}
  2058 FCNVDS FRm, FPUL {:  
  2059     COUNT_INST(I_FCNVDS);
  2060     check_fpuen();
  2061     load_spreg( R_ECX, R_FPSCR );
  2062     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2063     JE_rel8(end); // only when PR=1
  2064     push_dr( FRm );
  2065     pop_fpul();
  2066     JMP_TARGET(end);
  2067     sh4_x86.tstate = TSTATE_NONE;
  2068 :}
  2069 FCNVSD FPUL, FRn {:  
  2070     COUNT_INST(I_FCNVSD);
  2071     check_fpuen();
  2072     load_spreg( R_ECX, R_FPSCR );
  2073     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2074     JE_rel8(end); // only when PR=1
  2075     push_fpul();
  2076     pop_dr( FRn );
  2077     JMP_TARGET(end);
  2078     sh4_x86.tstate = TSTATE_NONE;
  2079 :}
  2081 /* Floating point instructions */
  2082 FABS FRn {:  
  2083     COUNT_INST(I_FABS);
  2084     check_fpuen();
  2085     load_spreg( R_ECX, R_FPSCR );
  2086     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2087     JNE_rel8(doubleprec);
  2088     push_fr(FRn); // 6
  2089     FABS_st0(); // 2
  2090     pop_fr(FRn); //6
  2091     JMP_rel8(end); // 2
  2092     JMP_TARGET(doubleprec);
  2093     push_dr(FRn);
  2094     FABS_st0();
  2095     pop_dr(FRn);
  2096     JMP_TARGET(end);
  2097     sh4_x86.tstate = TSTATE_NONE;
  2098 :}
  2099 FADD FRm, FRn {:  
  2100     COUNT_INST(I_FADD);
  2101     check_fpuen();
  2102     load_spreg( R_ECX, R_FPSCR );
  2103     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2104     JNE_rel8(doubleprec);
  2105     push_fr(FRm);
  2106     push_fr(FRn);
  2107     FADDP_st(1);
  2108     pop_fr(FRn);
  2109     JMP_rel8(end);
  2110     JMP_TARGET(doubleprec);
  2111     push_dr(FRm);
  2112     push_dr(FRn);
  2113     FADDP_st(1);
  2114     pop_dr(FRn);
  2115     JMP_TARGET(end);
  2116     sh4_x86.tstate = TSTATE_NONE;
  2117 :}
  2118 FDIV FRm, FRn {:  
  2119     COUNT_INST(I_FDIV);
  2120     check_fpuen();
  2121     load_spreg( R_ECX, R_FPSCR );
  2122     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2123     JNE_rel8(doubleprec);
  2124     push_fr(FRn);
  2125     push_fr(FRm);
  2126     FDIVP_st(1);
  2127     pop_fr(FRn);
  2128     JMP_rel8(end);
  2129     JMP_TARGET(doubleprec);
  2130     push_dr(FRn);
  2131     push_dr(FRm);
  2132     FDIVP_st(1);
  2133     pop_dr(FRn);
  2134     JMP_TARGET(end);
  2135     sh4_x86.tstate = TSTATE_NONE;
  2136 :}
  2137 FMAC FR0, FRm, FRn {:  
  2138     COUNT_INST(I_FMAC);
  2139     check_fpuen();
  2140     load_spreg( R_ECX, R_FPSCR );
  2141     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2142     JNE_rel8(doubleprec);
  2143     push_fr( 0 );
  2144     push_fr( FRm );
  2145     FMULP_st(1);
  2146     push_fr( FRn );
  2147     FADDP_st(1);
  2148     pop_fr( FRn );
  2149     JMP_rel8(end);
  2150     JMP_TARGET(doubleprec);
  2151     push_dr( 0 );
  2152     push_dr( FRm );
  2153     FMULP_st(1);
  2154     push_dr( FRn );
  2155     FADDP_st(1);
  2156     pop_dr( FRn );
  2157     JMP_TARGET(end);
  2158     sh4_x86.tstate = TSTATE_NONE;
  2159 :}
  2161 FMUL FRm, FRn {:  
  2162     COUNT_INST(I_FMUL);
  2163     check_fpuen();
  2164     load_spreg( R_ECX, R_FPSCR );
  2165     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2166     JNE_rel8(doubleprec);
  2167     push_fr(FRm);
  2168     push_fr(FRn);
  2169     FMULP_st(1);
  2170     pop_fr(FRn);
  2171     JMP_rel8(end);
  2172     JMP_TARGET(doubleprec);
  2173     push_dr(FRm);
  2174     push_dr(FRn);
  2175     FMULP_st(1);
  2176     pop_dr(FRn);
  2177     JMP_TARGET(end);
  2178     sh4_x86.tstate = TSTATE_NONE;
  2179 :}
  2180 FNEG FRn {:  
  2181     COUNT_INST(I_FNEG);
  2182     check_fpuen();
  2183     load_spreg( R_ECX, R_FPSCR );
  2184     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2185     JNE_rel8(doubleprec);
  2186     push_fr(FRn);
  2187     FCHS_st0();
  2188     pop_fr(FRn);
  2189     JMP_rel8(end);
  2190     JMP_TARGET(doubleprec);
  2191     push_dr(FRn);
  2192     FCHS_st0();
  2193     pop_dr(FRn);
  2194     JMP_TARGET(end);
  2195     sh4_x86.tstate = TSTATE_NONE;
  2196 :}
  2197 FSRRA FRn {:  
  2198     COUNT_INST(I_FSRRA);
  2199     check_fpuen();
  2200     load_spreg( R_ECX, R_FPSCR );
  2201     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2202     JNE_rel8(end); // PR=0 only
  2203     FLD1_st0();
  2204     push_fr(FRn);
  2205     FSQRT_st0();
  2206     FDIVP_st(1);
  2207     pop_fr(FRn);
  2208     JMP_TARGET(end);
  2209     sh4_x86.tstate = TSTATE_NONE;
  2210 :}
  2211 FSQRT FRn {:  
  2212     COUNT_INST(I_FSQRT);
  2213     check_fpuen();
  2214     load_spreg( R_ECX, R_FPSCR );
  2215     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2216     JNE_rel8(doubleprec);
  2217     push_fr(FRn);
  2218     FSQRT_st0();
  2219     pop_fr(FRn);
  2220     JMP_rel8(end);
  2221     JMP_TARGET(doubleprec);
  2222     push_dr(FRn);
  2223     FSQRT_st0();
  2224     pop_dr(FRn);
  2225     JMP_TARGET(end);
  2226     sh4_x86.tstate = TSTATE_NONE;
  2227 :}
  2228 FSUB FRm, FRn {:  
  2229     COUNT_INST(I_FSUB);
  2230     check_fpuen();
  2231     load_spreg( R_ECX, R_FPSCR );
  2232     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2233     JNE_rel8(doubleprec);
  2234     push_fr(FRn);
  2235     push_fr(FRm);
  2236     FSUBP_st(1);
  2237     pop_fr(FRn);
  2238     JMP_rel8(end);
  2239     JMP_TARGET(doubleprec);
  2240     push_dr(FRn);
  2241     push_dr(FRm);
  2242     FSUBP_st(1);
  2243     pop_dr(FRn);
  2244     JMP_TARGET(end);
  2245     sh4_x86.tstate = TSTATE_NONE;
  2246 :}
  2248 FCMP/EQ FRm, FRn {:  
  2249     COUNT_INST(I_FCMPEQ);
  2250     check_fpuen();
  2251     load_spreg( R_ECX, R_FPSCR );
  2252     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2253     JNE_rel8(doubleprec);
  2254     push_fr(FRm);
  2255     push_fr(FRn);
  2256     JMP_rel8(end);
  2257     JMP_TARGET(doubleprec);
  2258     push_dr(FRm);
  2259     push_dr(FRn);
  2260     JMP_TARGET(end);
  2261     FCOMIP_st(1);
  2262     SETE_t();
  2263     FPOP_st();
  2264     sh4_x86.tstate = TSTATE_NONE;
  2265 :}
  2266 FCMP/GT FRm, FRn {:  
  2267     COUNT_INST(I_FCMPGT);
  2268     check_fpuen();
  2269     load_spreg( R_ECX, R_FPSCR );
  2270     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2271     JNE_rel8(doubleprec);
  2272     push_fr(FRm);
  2273     push_fr(FRn);
  2274     JMP_rel8(end);
  2275     JMP_TARGET(doubleprec);
  2276     push_dr(FRm);
  2277     push_dr(FRn);
  2278     JMP_TARGET(end);
  2279     FCOMIP_st(1);
  2280     SETA_t();
  2281     FPOP_st();
  2282     sh4_x86.tstate = TSTATE_NONE;
  2283 :}
  2285 FSCA FPUL, FRn {:  
  2286     COUNT_INST(I_FSCA);
  2287     check_fpuen();
  2288     load_spreg( R_ECX, R_FPSCR );
  2289     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2290     JNE_rel8(doubleprec );
  2291     LEA_sh4r_r32( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
  2292     load_spreg( R_EDX, R_FPUL );
  2293     call_func2( sh4_fsca, R_EDX, R_ECX );
  2294     JMP_TARGET(doubleprec);
  2295     sh4_x86.tstate = TSTATE_NONE;
  2296 :}
  2297 FIPR FVm, FVn {:  
  2298     COUNT_INST(I_FIPR);
  2299     check_fpuen();
  2300     load_spreg( R_ECX, R_FPSCR );
  2301     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2302     JNE_rel8( doubleprec);
  2304     push_fr( FVm<<2 );
  2305     push_fr( FVn<<2 );
  2306     FMULP_st(1);
  2307     push_fr( (FVm<<2)+1);
  2308     push_fr( (FVn<<2)+1);
  2309     FMULP_st(1);
  2310     FADDP_st(1);
  2311     push_fr( (FVm<<2)+2);
  2312     push_fr( (FVn<<2)+2);
  2313     FMULP_st(1);
  2314     FADDP_st(1);
  2315     push_fr( (FVm<<2)+3);
  2316     push_fr( (FVn<<2)+3);
  2317     FMULP_st(1);
  2318     FADDP_st(1);
  2319     pop_fr( (FVn<<2)+3);
  2320     JMP_TARGET(doubleprec);
  2321     sh4_x86.tstate = TSTATE_NONE;
  2322 :}
  2323 FTRV XMTRX, FVn {:  
  2324     COUNT_INST(I_FTRV);
  2325     check_fpuen();
  2326     load_spreg( R_ECX, R_FPSCR );
  2327     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2328     JNE_rel8( doubleprec );
  2329     LEA_sh4r_r32( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
  2330     call_func1( sh4_ftrv, R_EDX );  // 12
  2331     JMP_TARGET(doubleprec);
  2332     sh4_x86.tstate = TSTATE_NONE;
  2333 :}
  2335 FRCHG {:  
  2336     COUNT_INST(I_FRCHG);
  2337     check_fpuen();
  2338     load_spreg( R_ECX, R_FPSCR );
  2339     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2340     store_spreg( R_ECX, R_FPSCR );
  2341     call_func0( sh4_switch_fr_banks );
  2342     sh4_x86.tstate = TSTATE_NONE;
  2343 :}
  2344 FSCHG {:  
  2345     COUNT_INST(I_FSCHG);
  2346     check_fpuen();
  2347     load_spreg( R_ECX, R_FPSCR );
  2348     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2349     store_spreg( R_ECX, R_FPSCR );
  2350     sh4_x86.tstate = TSTATE_NONE;
  2351 :}
  2353 /* Processor control instructions */
  2354 LDC Rm, SR {:
  2355     COUNT_INST(I_LDCSR);
  2356     if( sh4_x86.in_delay_slot ) {
  2357 	SLOTILLEGAL();
  2358     } else {
  2359 	check_priv();
  2360 	load_reg( R_EAX, Rm );
  2361 	call_func1( sh4_write_sr, R_EAX );
  2362 	sh4_x86.priv_checked = FALSE;
  2363 	sh4_x86.fpuen_checked = FALSE;
  2364 	sh4_x86.tstate = TSTATE_NONE;
  2366 :}
  2367 LDC Rm, GBR {: 
  2368     COUNT_INST(I_LDC);
  2369     load_reg( R_EAX, Rm );
  2370     store_spreg( R_EAX, R_GBR );
  2371 :}
  2372 LDC Rm, VBR {:  
  2373     COUNT_INST(I_LDC);
  2374     check_priv();
  2375     load_reg( R_EAX, Rm );
  2376     store_spreg( R_EAX, R_VBR );
  2377     sh4_x86.tstate = TSTATE_NONE;
  2378 :}
  2379 LDC Rm, SSR {:  
  2380     COUNT_INST(I_LDC);
  2381     check_priv();
  2382     load_reg( R_EAX, Rm );
  2383     store_spreg( R_EAX, R_SSR );
  2384     sh4_x86.tstate = TSTATE_NONE;
  2385 :}
  2386 LDC Rm, SGR {:  
  2387     COUNT_INST(I_LDC);
  2388     check_priv();
  2389     load_reg( R_EAX, Rm );
  2390     store_spreg( R_EAX, R_SGR );
  2391     sh4_x86.tstate = TSTATE_NONE;
  2392 :}
  2393 LDC Rm, SPC {:  
  2394     COUNT_INST(I_LDC);
  2395     check_priv();
  2396     load_reg( R_EAX, Rm );
  2397     store_spreg( R_EAX, R_SPC );
  2398     sh4_x86.tstate = TSTATE_NONE;
  2399 :}
  2400 LDC Rm, DBR {:  
  2401     COUNT_INST(I_LDC);
  2402     check_priv();
  2403     load_reg( R_EAX, Rm );
  2404     store_spreg( R_EAX, R_DBR );
  2405     sh4_x86.tstate = TSTATE_NONE;
  2406 :}
  2407 LDC Rm, Rn_BANK {:  
  2408     COUNT_INST(I_LDC);
  2409     check_priv();
  2410     load_reg( R_EAX, Rm );
  2411     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2412     sh4_x86.tstate = TSTATE_NONE;
  2413 :}
  2414 LDC.L @Rm+, GBR {:  
  2415     COUNT_INST(I_LDCM);
  2416     load_reg( R_EAX, Rm );
  2417     check_ralign32( R_EAX );
  2418     MMU_TRANSLATE_READ( R_EAX );
  2419     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2420     MEM_READ_LONG( R_EAX, R_EAX );
  2421     store_spreg( R_EAX, R_GBR );
  2422     sh4_x86.tstate = TSTATE_NONE;
  2423 :}
  2424 LDC.L @Rm+, SR {:
  2425     COUNT_INST(I_LDCSRM);
  2426     if( sh4_x86.in_delay_slot ) {
  2427 	SLOTILLEGAL();
  2428     } else {
  2429 	check_priv();
  2430 	load_reg( R_EAX, Rm );
  2431 	check_ralign32( R_EAX );
  2432 	MMU_TRANSLATE_READ( R_EAX );
  2433 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2434 	MEM_READ_LONG( R_EAX, R_EAX );
  2435 	call_func1( sh4_write_sr, R_EAX );
  2436 	sh4_x86.priv_checked = FALSE;
  2437 	sh4_x86.fpuen_checked = FALSE;
  2438 	sh4_x86.tstate = TSTATE_NONE;
  2440 :}
  2441 LDC.L @Rm+, VBR {:  
  2442     COUNT_INST(I_LDCM);
  2443     check_priv();
  2444     load_reg( R_EAX, Rm );
  2445     check_ralign32( R_EAX );
  2446     MMU_TRANSLATE_READ( R_EAX );
  2447     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2448     MEM_READ_LONG( R_EAX, R_EAX );
  2449     store_spreg( R_EAX, R_VBR );
  2450     sh4_x86.tstate = TSTATE_NONE;
  2451 :}
  2452 LDC.L @Rm+, SSR {:
  2453     COUNT_INST(I_LDCM);
  2454     check_priv();
  2455     load_reg( R_EAX, Rm );
  2456     check_ralign32( R_EAX );
  2457     MMU_TRANSLATE_READ( R_EAX );
  2458     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2459     MEM_READ_LONG( R_EAX, R_EAX );
  2460     store_spreg( R_EAX, R_SSR );
  2461     sh4_x86.tstate = TSTATE_NONE;
  2462 :}
  2463 LDC.L @Rm+, SGR {:  
  2464     COUNT_INST(I_LDCM);
  2465     check_priv();
  2466     load_reg( R_EAX, Rm );
  2467     check_ralign32( R_EAX );
  2468     MMU_TRANSLATE_READ( R_EAX );
  2469     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2470     MEM_READ_LONG( R_EAX, R_EAX );
  2471     store_spreg( R_EAX, R_SGR );
  2472     sh4_x86.tstate = TSTATE_NONE;
  2473 :}
  2474 LDC.L @Rm+, SPC {:  
  2475     COUNT_INST(I_LDCM);
  2476     check_priv();
  2477     load_reg( R_EAX, Rm );
  2478     check_ralign32( R_EAX );
  2479     MMU_TRANSLATE_READ( R_EAX );
  2480     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2481     MEM_READ_LONG( R_EAX, R_EAX );
  2482     store_spreg( R_EAX, R_SPC );
  2483     sh4_x86.tstate = TSTATE_NONE;
  2484 :}
  2485 LDC.L @Rm+, DBR {:  
  2486     COUNT_INST(I_LDCM);
  2487     check_priv();
  2488     load_reg( R_EAX, Rm );
  2489     check_ralign32( R_EAX );
  2490     MMU_TRANSLATE_READ( R_EAX );
  2491     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2492     MEM_READ_LONG( R_EAX, R_EAX );
  2493     store_spreg( R_EAX, R_DBR );
  2494     sh4_x86.tstate = TSTATE_NONE;
  2495 :}
  2496 LDC.L @Rm+, Rn_BANK {:  
  2497     COUNT_INST(I_LDCM);
  2498     check_priv();
  2499     load_reg( R_EAX, Rm );
  2500     check_ralign32( R_EAX );
  2501     MMU_TRANSLATE_READ( R_EAX );
  2502     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2503     MEM_READ_LONG( R_EAX, R_EAX );
  2504     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2505     sh4_x86.tstate = TSTATE_NONE;
  2506 :}
  2507 LDS Rm, FPSCR {:
  2508     COUNT_INST(I_LDSFPSCR);
  2509     check_fpuen();
  2510     load_reg( R_EAX, Rm );
  2511     call_func1( sh4_write_fpscr, R_EAX );
  2512     sh4_x86.tstate = TSTATE_NONE;
  2513 :}
  2514 LDS.L @Rm+, FPSCR {:  
  2515     COUNT_INST(I_LDSFPSCRM);
  2516     check_fpuen();
  2517     load_reg( R_EAX, Rm );
  2518     check_ralign32( R_EAX );
  2519     MMU_TRANSLATE_READ( R_EAX );
  2520     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2521     MEM_READ_LONG( R_EAX, R_EAX );
  2522     call_func1( sh4_write_fpscr, R_EAX );
  2523     sh4_x86.tstate = TSTATE_NONE;
  2524 :}
  2525 LDS Rm, FPUL {:  
  2526     COUNT_INST(I_LDS);
  2527     check_fpuen();
  2528     load_reg( R_EAX, Rm );
  2529     store_spreg( R_EAX, R_FPUL );
  2530 :}
  2531 LDS.L @Rm+, FPUL {:  
  2532     COUNT_INST(I_LDSM);
  2533     check_fpuen();
  2534     load_reg( R_EAX, Rm );
  2535     check_ralign32( R_EAX );
  2536     MMU_TRANSLATE_READ( R_EAX );
  2537     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2538     MEM_READ_LONG( R_EAX, R_EAX );
  2539     store_spreg( R_EAX, R_FPUL );
  2540     sh4_x86.tstate = TSTATE_NONE;
  2541 :}
  2542 LDS Rm, MACH {: 
  2543     COUNT_INST(I_LDS);
  2544     load_reg( R_EAX, Rm );
  2545     store_spreg( R_EAX, R_MACH );
  2546 :}
  2547 LDS.L @Rm+, MACH {:  
  2548     COUNT_INST(I_LDSM);
  2549     load_reg( R_EAX, Rm );
  2550     check_ralign32( R_EAX );
  2551     MMU_TRANSLATE_READ( R_EAX );
  2552     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2553     MEM_READ_LONG( R_EAX, R_EAX );
  2554     store_spreg( R_EAX, R_MACH );
  2555     sh4_x86.tstate = TSTATE_NONE;
  2556 :}
  2557 LDS Rm, MACL {:  
  2558     COUNT_INST(I_LDS);
  2559     load_reg( R_EAX, Rm );
  2560     store_spreg( R_EAX, R_MACL );
  2561 :}
  2562 LDS.L @Rm+, MACL {:  
  2563     COUNT_INST(I_LDSM);
  2564     load_reg( R_EAX, Rm );
  2565     check_ralign32( R_EAX );
  2566     MMU_TRANSLATE_READ( R_EAX );
  2567     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2568     MEM_READ_LONG( R_EAX, R_EAX );
  2569     store_spreg( R_EAX, R_MACL );
  2570     sh4_x86.tstate = TSTATE_NONE;
  2571 :}
  2572 LDS Rm, PR {:  
  2573     COUNT_INST(I_LDS);
  2574     load_reg( R_EAX, Rm );
  2575     store_spreg( R_EAX, R_PR );
  2576 :}
  2577 LDS.L @Rm+, PR {:  
  2578     COUNT_INST(I_LDSM);
  2579     load_reg( R_EAX, Rm );
  2580     check_ralign32( R_EAX );
  2581     MMU_TRANSLATE_READ( R_EAX );
  2582     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2583     MEM_READ_LONG( R_EAX, R_EAX );
  2584     store_spreg( R_EAX, R_PR );
  2585     sh4_x86.tstate = TSTATE_NONE;
  2586 :}
  2587 LDTLB {:  
  2588     COUNT_INST(I_LDTLB);
  2589     call_func0( MMU_ldtlb );
  2590 :}
  2591 OCBI @Rn {:
  2592     COUNT_INST(I_OCBI);
  2593 :}
  2594 OCBP @Rn {:
  2595     COUNT_INST(I_OCBP);
  2596 :}
  2597 OCBWB @Rn {:
  2598     COUNT_INST(I_OCBWB);
  2599 :}
  2600 PREF @Rn {:
  2601     COUNT_INST(I_PREF);
  2602     load_reg( R_EAX, Rn );
  2603     MOV_r32_r32( R_EAX, R_ECX );
  2604     AND_imm32_r32( 0xFC000000, R_EAX );
  2605     CMP_imm32_r32( 0xE0000000, R_EAX );
  2606     JNE_rel8(end);
  2607     call_func1( sh4_flush_store_queue, R_ECX );
  2608     TEST_r32_r32( R_EAX, R_EAX );
  2609     JE_exc(-1);
  2610     JMP_TARGET(end);
  2611     sh4_x86.tstate = TSTATE_NONE;
  2612 :}
  2613 SLEEP {: 
  2614     COUNT_INST(I_SLEEP);
  2615     check_priv();
  2616     call_func0( sh4_sleep );
  2617     sh4_x86.tstate = TSTATE_NONE;
  2618     sh4_x86.in_delay_slot = DELAY_NONE;
  2619     return 2;
  2620 :}
  2621 STC SR, Rn {:
  2622     COUNT_INST(I_STCSR);
  2623     check_priv();
  2624     call_func0(sh4_read_sr);
  2625     store_reg( R_EAX, Rn );
  2626     sh4_x86.tstate = TSTATE_NONE;
  2627 :}
  2628 STC GBR, Rn {:  
  2629     COUNT_INST(I_STC);
  2630     load_spreg( R_EAX, R_GBR );
  2631     store_reg( R_EAX, Rn );
  2632 :}
  2633 STC VBR, Rn {:  
  2634     COUNT_INST(I_STC);
  2635     check_priv();
  2636     load_spreg( R_EAX, R_VBR );
  2637     store_reg( R_EAX, Rn );
  2638     sh4_x86.tstate = TSTATE_NONE;
  2639 :}
  2640 STC SSR, Rn {:  
  2641     COUNT_INST(I_STC);
  2642     check_priv();
  2643     load_spreg( R_EAX, R_SSR );
  2644     store_reg( R_EAX, Rn );
  2645     sh4_x86.tstate = TSTATE_NONE;
  2646 :}
  2647 STC SPC, Rn {:  
  2648     COUNT_INST(I_STC);
  2649     check_priv();
  2650     load_spreg( R_EAX, R_SPC );
  2651     store_reg( R_EAX, Rn );
  2652     sh4_x86.tstate = TSTATE_NONE;
  2653 :}
  2654 STC SGR, Rn {:  
  2655     COUNT_INST(I_STC);
  2656     check_priv();
  2657     load_spreg( R_EAX, R_SGR );
  2658     store_reg( R_EAX, Rn );
  2659     sh4_x86.tstate = TSTATE_NONE;
  2660 :}
  2661 STC DBR, Rn {:  
  2662     COUNT_INST(I_STC);
  2663     check_priv();
  2664     load_spreg( R_EAX, R_DBR );
  2665     store_reg( R_EAX, Rn );
  2666     sh4_x86.tstate = TSTATE_NONE;
  2667 :}
  2668 STC Rm_BANK, Rn {:
  2669     COUNT_INST(I_STC);
  2670     check_priv();
  2671     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2672     store_reg( R_EAX, Rn );
  2673     sh4_x86.tstate = TSTATE_NONE;
  2674 :}
  2675 STC.L SR, @-Rn {:
  2676     COUNT_INST(I_STCSRM);
  2677     check_priv();
  2678     load_reg( R_EAX, Rn );
  2679     check_walign32( R_EAX );
  2680     ADD_imm8s_r32( -4, R_EAX );
  2681     MMU_TRANSLATE_WRITE( R_EAX );
  2682     PUSH_realigned_r32( R_EAX );
  2683     call_func0( sh4_read_sr );
  2684     POP_realigned_r32( R_ECX );
  2685     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2686     MEM_WRITE_LONG( R_ECX, R_EAX );
  2687     sh4_x86.tstate = TSTATE_NONE;
  2688 :}
  2689 STC.L VBR, @-Rn {:  
  2690     COUNT_INST(I_STCM);
  2691     check_priv();
  2692     load_reg( R_EAX, Rn );
  2693     check_walign32( R_EAX );
  2694     ADD_imm8s_r32( -4, R_EAX );
  2695     MMU_TRANSLATE_WRITE( R_EAX );
  2696     load_spreg( R_EDX, R_VBR );
  2697     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2698     MEM_WRITE_LONG( R_EAX, R_EDX );
  2699     sh4_x86.tstate = TSTATE_NONE;
  2700 :}
  2701 STC.L SSR, @-Rn {:  
  2702     COUNT_INST(I_STCM);
  2703     check_priv();
  2704     load_reg( R_EAX, Rn );
  2705     check_walign32( R_EAX );
  2706     ADD_imm8s_r32( -4, R_EAX );
  2707     MMU_TRANSLATE_WRITE( R_EAX );
  2708     load_spreg( R_EDX, R_SSR );
  2709     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2710     MEM_WRITE_LONG( R_EAX, R_EDX );
  2711     sh4_x86.tstate = TSTATE_NONE;
  2712 :}
  2713 STC.L SPC, @-Rn {:
  2714     COUNT_INST(I_STCM);
  2715     check_priv();
  2716     load_reg( R_EAX, Rn );
  2717     check_walign32( R_EAX );
  2718     ADD_imm8s_r32( -4, R_EAX );
  2719     MMU_TRANSLATE_WRITE( R_EAX );
  2720     load_spreg( R_EDX, R_SPC );
  2721     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2722     MEM_WRITE_LONG( R_EAX, R_EDX );
  2723     sh4_x86.tstate = TSTATE_NONE;
  2724 :}
  2725 STC.L SGR, @-Rn {:  
  2726     COUNT_INST(I_STCM);
  2727     check_priv();
  2728     load_reg( R_EAX, Rn );
  2729     check_walign32( R_EAX );
  2730     ADD_imm8s_r32( -4, R_EAX );
  2731     MMU_TRANSLATE_WRITE( R_EAX );
  2732     load_spreg( R_EDX, R_SGR );
  2733     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2734     MEM_WRITE_LONG( R_EAX, R_EDX );
  2735     sh4_x86.tstate = TSTATE_NONE;
  2736 :}
  2737 STC.L DBR, @-Rn {:  
  2738     COUNT_INST(I_STCM);
  2739     check_priv();
  2740     load_reg( R_EAX, Rn );
  2741     check_walign32( R_EAX );
  2742     ADD_imm8s_r32( -4, R_EAX );
  2743     MMU_TRANSLATE_WRITE( R_EAX );
  2744     load_spreg( R_EDX, R_DBR );
  2745     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2746     MEM_WRITE_LONG( R_EAX, R_EDX );
  2747     sh4_x86.tstate = TSTATE_NONE;
  2748 :}
  2749 STC.L Rm_BANK, @-Rn {:  
  2750     COUNT_INST(I_STCM);
  2751     check_priv();
  2752     load_reg( R_EAX, Rn );
  2753     check_walign32( R_EAX );
  2754     ADD_imm8s_r32( -4, R_EAX );
  2755     MMU_TRANSLATE_WRITE( R_EAX );
  2756     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2757     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2758     MEM_WRITE_LONG( R_EAX, R_EDX );
  2759     sh4_x86.tstate = TSTATE_NONE;
  2760 :}
  2761 STC.L GBR, @-Rn {:  
  2762     COUNT_INST(I_STCM);
  2763     load_reg( R_EAX, Rn );
  2764     check_walign32( R_EAX );
  2765     ADD_imm8s_r32( -4, R_EAX );
  2766     MMU_TRANSLATE_WRITE( R_EAX );
  2767     load_spreg( R_EDX, R_GBR );
  2768     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2769     MEM_WRITE_LONG( R_EAX, R_EDX );
  2770     sh4_x86.tstate = TSTATE_NONE;
  2771 :}
  2772 STS FPSCR, Rn {:  
  2773     COUNT_INST(I_STSFPSCR);
  2774     check_fpuen();
  2775     load_spreg( R_EAX, R_FPSCR );
  2776     store_reg( R_EAX, Rn );
  2777 :}
  2778 STS.L FPSCR, @-Rn {:  
  2779     COUNT_INST(I_STSFPSCRM);
  2780     check_fpuen();
  2781     load_reg( R_EAX, Rn );
  2782     check_walign32( R_EAX );
  2783     ADD_imm8s_r32( -4, R_EAX );
  2784     MMU_TRANSLATE_WRITE( R_EAX );
  2785     load_spreg( R_EDX, R_FPSCR );
  2786     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2787     MEM_WRITE_LONG( R_EAX, R_EDX );
  2788     sh4_x86.tstate = TSTATE_NONE;
  2789 :}
  2790 STS FPUL, Rn {:  
  2791     COUNT_INST(I_STS);
  2792     check_fpuen();
  2793     load_spreg( R_EAX, R_FPUL );
  2794     store_reg( R_EAX, Rn );
  2795 :}
  2796 STS.L FPUL, @-Rn {:  
  2797     COUNT_INST(I_STSM);
  2798     check_fpuen();
  2799     load_reg( R_EAX, Rn );
  2800     check_walign32( R_EAX );
  2801     ADD_imm8s_r32( -4, R_EAX );
  2802     MMU_TRANSLATE_WRITE( R_EAX );
  2803     load_spreg( R_EDX, R_FPUL );
  2804     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2805     MEM_WRITE_LONG( R_EAX, R_EDX );
  2806     sh4_x86.tstate = TSTATE_NONE;
  2807 :}
  2808 STS MACH, Rn {:  
  2809     COUNT_INST(I_STS);
  2810     load_spreg( R_EAX, R_MACH );
  2811     store_reg( R_EAX, Rn );
  2812 :}
  2813 STS.L MACH, @-Rn {:  
  2814     COUNT_INST(I_STSM);
  2815     load_reg( R_EAX, Rn );
  2816     check_walign32( R_EAX );
  2817     ADD_imm8s_r32( -4, R_EAX );
  2818     MMU_TRANSLATE_WRITE( R_EAX );
  2819     load_spreg( R_EDX, R_MACH );
  2820     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2821     MEM_WRITE_LONG( R_EAX, R_EDX );
  2822     sh4_x86.tstate = TSTATE_NONE;
  2823 :}
  2824 STS MACL, Rn {:  
  2825     COUNT_INST(I_STS);
  2826     load_spreg( R_EAX, R_MACL );
  2827     store_reg( R_EAX, Rn );
  2828 :}
  2829 STS.L MACL, @-Rn {:  
  2830     COUNT_INST(I_STSM);
  2831     load_reg( R_EAX, Rn );
  2832     check_walign32( R_EAX );
  2833     ADD_imm8s_r32( -4, R_EAX );
  2834     MMU_TRANSLATE_WRITE( R_EAX );
  2835     load_spreg( R_EDX, R_MACL );
  2836     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2837     MEM_WRITE_LONG( R_EAX, R_EDX );
  2838     sh4_x86.tstate = TSTATE_NONE;
  2839 :}
  2840 STS PR, Rn {:  
  2841     COUNT_INST(I_STS);
  2842     load_spreg( R_EAX, R_PR );
  2843     store_reg( R_EAX, Rn );
  2844 :}
  2845 STS.L PR, @-Rn {:  
  2846     COUNT_INST(I_STSM);
  2847     load_reg( R_EAX, Rn );
  2848     check_walign32( R_EAX );
  2849     ADD_imm8s_r32( -4, R_EAX );
  2850     MMU_TRANSLATE_WRITE( R_EAX );
  2851     load_spreg( R_EDX, R_PR );
  2852     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2853     MEM_WRITE_LONG( R_EAX, R_EDX );
  2854     sh4_x86.tstate = TSTATE_NONE;
  2855 :}
  2857 NOP {: 
  2858     COUNT_INST(I_NOP);
  2859     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2860 :}
  2861 %%
  2862     sh4_x86.in_delay_slot = DELAY_NONE;
  2863     return 0;
.