Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 671:a530ea88eebd
prev669:ab344e42bca9
next673:44c579439d73
author nkeynes
date Thu May 15 10:22:39 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Permanently add SH4 instruction statistics tracking (enabled with --enable-sh4stats)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/x86op.h"
    34 #include "clock.h"
    36 #define DEFAULT_BACKPATCH_SIZE 4096
    38 struct backpatch_record {
    39     uint32_t fixup_offset;
    40     uint32_t fixup_icount;
    41     int32_t exc_code;
    42 };
    44 #define MAX_RECOVERY_SIZE 2048
    46 #define DELAY_NONE 0
    47 #define DELAY_PC 1
    48 #define DELAY_PC_PR 2
    50 /** 
    51  * Struct to manage internal translation state. This state is not saved -
    52  * it is only valid between calls to sh4_translate_begin_block() and
    53  * sh4_translate_end_block()
    54  */
    55 struct sh4_x86_state {
    56     int in_delay_slot;
    57     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    58     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    59     gboolean branch_taken; /* true if we branched unconditionally */
    60     uint32_t block_start_pc;
    61     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    62     int tstate;
    64     /* mode flags */
    65     gboolean tlb_on; /* True if tlb translation is active */
    67     /* Allocated memory for the (block-wide) back-patch list */
    68     struct backpatch_record *backpatch_list;
    69     uint32_t backpatch_posn;
    70     uint32_t backpatch_size;
    71 };
    73 #define TSTATE_NONE -1
    74 #define TSTATE_O    0
    75 #define TSTATE_C    2
    76 #define TSTATE_E    4
    77 #define TSTATE_NE   5
    78 #define TSTATE_G    0xF
    79 #define TSTATE_GE   0xD
    80 #define TSTATE_A    7
    81 #define TSTATE_AE   3
    83 #ifdef ENABLE_SH4STATS
    84 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    85 #else
    86 #define COUNT_INST(id)
    87 #endif
    89 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    90 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    91 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    92     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    94 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    95 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    96 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    97     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
    99 static struct sh4_x86_state sh4_x86;
   101 static uint32_t max_int = 0x7FFFFFFF;
   102 static uint32_t min_int = 0x80000000;
   103 static uint32_t save_fcw; /* save value for fpu control word */
   104 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   106 void sh4_translate_init(void)
   107 {
   108     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   109     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   110 }
   113 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   114 {
   115     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   116 	sh4_x86.backpatch_size <<= 1;
   117 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   118 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   119 	assert( sh4_x86.backpatch_list != NULL );
   120     }
   121     if( sh4_x86.in_delay_slot ) {
   122 	fixup_pc -= 2;
   123     }
   124     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   125 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   126     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   127     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   128     sh4_x86.backpatch_posn++;
   129 }
   131 /**
   132  * Emit an instruction to load an SH4 reg into a real register
   133  */
   134 static inline void load_reg( int x86reg, int sh4reg ) 
   135 {
   136     /* mov [bp+n], reg */
   137     OP(0x8B);
   138     OP(0x45 + (x86reg<<3));
   139     OP(REG_OFFSET(r[sh4reg]));
   140 }
   142 static inline void load_reg16s( int x86reg, int sh4reg )
   143 {
   144     OP(0x0F);
   145     OP(0xBF);
   146     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   147 }
   149 static inline void load_reg16u( int x86reg, int sh4reg )
   150 {
   151     OP(0x0F);
   152     OP(0xB7);
   153     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   155 }
   157 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   158 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   159 /**
   160  * Emit an instruction to load an immediate value into a register
   161  */
   162 static inline void load_imm32( int x86reg, uint32_t value ) {
   163     /* mov #value, reg */
   164     OP(0xB8 + x86reg);
   165     OP32(value);
   166 }
   168 /**
   169  * Load an immediate 64-bit quantity (note: x86-64 only)
   170  */
   171 static inline void load_imm64( int x86reg, uint32_t value ) {
   172     /* mov #value, reg */
   173     REXW();
   174     OP(0xB8 + x86reg);
   175     OP64(value);
   176 }
   178 /**
   179  * Emit an instruction to store an SH4 reg (RN)
   180  */
   181 void static inline store_reg( int x86reg, int sh4reg ) {
   182     /* mov reg, [bp+n] */
   183     OP(0x89);
   184     OP(0x45 + (x86reg<<3));
   185     OP(REG_OFFSET(r[sh4reg]));
   186 }
   188 /**
   189  * Load an FR register (single-precision floating point) into an integer x86
   190  * register (eg for register-to-register moves)
   191  */
   192 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   193 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   195 /**
   196  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   197  */
   198 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   199 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   201 /**
   202  * Store an FR register (single-precision floating point) from an integer x86+
   203  * register (eg for register-to-register moves)
   204  */
   205 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   209 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   212 #define push_fpul()  FLDF_sh4r(R_FPUL)
   213 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   214 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   215 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   216 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   217 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   218 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   219 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   220 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   221 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   225 /* Exception checks - Note that all exception checks will clobber EAX */
   227 #define check_priv( ) \
   228     if( !sh4_x86.priv_checked ) { \
   229 	sh4_x86.priv_checked = TRUE;\
   230 	load_spreg( R_EAX, R_SR );\
   231 	AND_imm32_r32( SR_MD, R_EAX );\
   232 	if( sh4_x86.in_delay_slot ) {\
   233 	    JE_exc( EXC_SLOT_ILLEGAL );\
   234 	} else {\
   235 	    JE_exc( EXC_ILLEGAL );\
   236 	}\
   237     }\
   239 #define check_fpuen( ) \
   240     if( !sh4_x86.fpuen_checked ) {\
   241 	sh4_x86.fpuen_checked = TRUE;\
   242 	load_spreg( R_EAX, R_SR );\
   243 	AND_imm32_r32( SR_FD, R_EAX );\
   244 	if( sh4_x86.in_delay_slot ) {\
   245 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   246 	} else {\
   247 	    JNE_exc(EXC_FPU_DISABLED);\
   248 	}\
   249     }
   251 #define check_ralign16( x86reg ) \
   252     TEST_imm32_r32( 0x00000001, x86reg ); \
   253     JNE_exc(EXC_DATA_ADDR_READ)
   255 #define check_walign16( x86reg ) \
   256     TEST_imm32_r32( 0x00000001, x86reg ); \
   257     JNE_exc(EXC_DATA_ADDR_WRITE);
   259 #define check_ralign32( x86reg ) \
   260     TEST_imm32_r32( 0x00000003, x86reg ); \
   261     JNE_exc(EXC_DATA_ADDR_READ)
   263 #define check_walign32( x86reg ) \
   264     TEST_imm32_r32( 0x00000003, x86reg ); \
   265     JNE_exc(EXC_DATA_ADDR_WRITE);
   267 #define UNDEF()
   268 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   269 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   270 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   271 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   272 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   273 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   274 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   276 /**
   277  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   278  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   279  */
   280 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   282 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   283 /**
   284  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   285  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   286  */
   287 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   289 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   290 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   291 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   293 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   295 /****** Import appropriate calling conventions ******/
   296 #if SH4_TRANSLATOR == TARGET_X86_64
   297 #include "sh4/ia64abi.h"
   298 #else /* SH4_TRANSLATOR == TARGET_X86 */
   299 #ifdef APPLE_BUILD
   300 #include "sh4/ia32mac.h"
   301 #else
   302 #include "sh4/ia32abi.h"
   303 #endif
   304 #endif
   306 uint32_t sh4_translate_end_block_size()
   307 {
   308     if( sh4_x86.backpatch_posn <= 3 ) {
   309 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   310     } else {
   311 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   312     }
   313 }
   316 /**
   317  * Embed a breakpoint into the generated code
   318  */
   319 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   320 {
   321     load_imm32( R_EAX, pc );
   322     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   323 }
   326 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   328 /**
   329  * Embed a call to sh4_execute_instruction for situations that we
   330  * can't translate (just page-crossing delay slots at the moment).
   331  * Caller is responsible for setting new_pc before calling this function.
   332  *
   333  * Performs:
   334  *   Set PC = endpc
   335  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   336  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   337  *   Call sh4_execute_instruction
   338  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   339  */
   340 void exit_block_emu( sh4vma_t endpc )
   341 {
   342     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   343     ADD_r32_sh4r( R_ECX, R_PC );
   345     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   346     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   347     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   348     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   350     call_func0( sh4_execute_instruction );    
   351     load_spreg( R_EAX, R_PC );
   352     if( sh4_x86.tlb_on ) {
   353 	call_func1(xlat_get_code_by_vma,R_EAX);
   354     } else {
   355 	call_func1(xlat_get_code,R_EAX);
   356     }
   357     AND_imm8s_rptr( 0xFC, R_EAX );
   358     POP_r32(R_EBP);
   359     RET();
   360 } 
   362 /**
   363  * Translate a single instruction. Delayed branches are handled specially
   364  * by translating both branch and delayed instruction as a single unit (as
   365  * 
   366  * The instruction MUST be in the icache (assert check)
   367  *
   368  * @return true if the instruction marks the end of a basic block
   369  * (eg a branch or 
   370  */
   371 uint32_t sh4_translate_instruction( sh4vma_t pc )
   372 {
   373     uint32_t ir;
   374     /* Read instruction from icache */
   375     assert( IS_IN_ICACHE(pc) );
   376     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   378 	/* PC is not in the current icache - this usually means we're running
   379 	 * with MMU on, and we've gone past the end of the page. And since 
   380 	 * sh4_translate_block is pretty careful about this, it means we're
   381 	 * almost certainly in a delay slot.
   382 	 *
   383 	 * Since we can't assume the page is present (and we can't fault it in
   384 	 * at this point, inline a call to sh4_execute_instruction (with a few
   385 	 * small repairs to cope with the different environment).
   386 	 */
   388     if( !sh4_x86.in_delay_slot ) {
   389 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   390     }
   391 %%
   392 /* ALU operations */
   393 ADD Rm, Rn {:
   394     COUNT_INST(I_ADD);
   395     load_reg( R_EAX, Rm );
   396     load_reg( R_ECX, Rn );
   397     ADD_r32_r32( R_EAX, R_ECX );
   398     store_reg( R_ECX, Rn );
   399     sh4_x86.tstate = TSTATE_NONE;
   400 :}
   401 ADD #imm, Rn {:  
   402     COUNT_INST(I_ADDI);
   403     load_reg( R_EAX, Rn );
   404     ADD_imm8s_r32( imm, R_EAX );
   405     store_reg( R_EAX, Rn );
   406     sh4_x86.tstate = TSTATE_NONE;
   407 :}
   408 ADDC Rm, Rn {:
   409     COUNT_INST(I_ADDC);
   410     if( sh4_x86.tstate != TSTATE_C ) {
   411 	LDC_t();
   412     }
   413     load_reg( R_EAX, Rm );
   414     load_reg( R_ECX, Rn );
   415     ADC_r32_r32( R_EAX, R_ECX );
   416     store_reg( R_ECX, Rn );
   417     SETC_t();
   418     sh4_x86.tstate = TSTATE_C;
   419 :}
   420 ADDV Rm, Rn {:
   421     COUNT_INST(I_ADDV);
   422     load_reg( R_EAX, Rm );
   423     load_reg( R_ECX, Rn );
   424     ADD_r32_r32( R_EAX, R_ECX );
   425     store_reg( R_ECX, Rn );
   426     SETO_t();
   427     sh4_x86.tstate = TSTATE_O;
   428 :}
   429 AND Rm, Rn {:
   430     COUNT_INST(I_AND);
   431     load_reg( R_EAX, Rm );
   432     load_reg( R_ECX, Rn );
   433     AND_r32_r32( R_EAX, R_ECX );
   434     store_reg( R_ECX, Rn );
   435     sh4_x86.tstate = TSTATE_NONE;
   436 :}
   437 AND #imm, R0 {:  
   438     COUNT_INST(I_ANDI);
   439     load_reg( R_EAX, 0 );
   440     AND_imm32_r32(imm, R_EAX); 
   441     store_reg( R_EAX, 0 );
   442     sh4_x86.tstate = TSTATE_NONE;
   443 :}
   444 AND.B #imm, @(R0, GBR) {: 
   445     COUNT_INST(I_ANDB);
   446     load_reg( R_EAX, 0 );
   447     load_spreg( R_ECX, R_GBR );
   448     ADD_r32_r32( R_ECX, R_EAX );
   449     MMU_TRANSLATE_WRITE( R_EAX );
   450     PUSH_realigned_r32(R_EAX);
   451     MEM_READ_BYTE( R_EAX, R_EAX );
   452     POP_realigned_r32(R_ECX);
   453     AND_imm32_r32(imm, R_EAX );
   454     MEM_WRITE_BYTE( R_ECX, R_EAX );
   455     sh4_x86.tstate = TSTATE_NONE;
   456 :}
   457 CMP/EQ Rm, Rn {:  
   458     COUNT_INST(I_CMPEQ);
   459     load_reg( R_EAX, Rm );
   460     load_reg( R_ECX, Rn );
   461     CMP_r32_r32( R_EAX, R_ECX );
   462     SETE_t();
   463     sh4_x86.tstate = TSTATE_E;
   464 :}
   465 CMP/EQ #imm, R0 {:  
   466     COUNT_INST(I_CMPEQI);
   467     load_reg( R_EAX, 0 );
   468     CMP_imm8s_r32(imm, R_EAX);
   469     SETE_t();
   470     sh4_x86.tstate = TSTATE_E;
   471 :}
   472 CMP/GE Rm, Rn {:  
   473     COUNT_INST(I_CMPGE);
   474     load_reg( R_EAX, Rm );
   475     load_reg( R_ECX, Rn );
   476     CMP_r32_r32( R_EAX, R_ECX );
   477     SETGE_t();
   478     sh4_x86.tstate = TSTATE_GE;
   479 :}
   480 CMP/GT Rm, Rn {: 
   481     COUNT_INST(I_CMPGT);
   482     load_reg( R_EAX, Rm );
   483     load_reg( R_ECX, Rn );
   484     CMP_r32_r32( R_EAX, R_ECX );
   485     SETG_t();
   486     sh4_x86.tstate = TSTATE_G;
   487 :}
   488 CMP/HI Rm, Rn {:  
   489     COUNT_INST(I_CMPHI);
   490     load_reg( R_EAX, Rm );
   491     load_reg( R_ECX, Rn );
   492     CMP_r32_r32( R_EAX, R_ECX );
   493     SETA_t();
   494     sh4_x86.tstate = TSTATE_A;
   495 :}
   496 CMP/HS Rm, Rn {: 
   497     COUNT_INST(I_CMPHS);
   498     load_reg( R_EAX, Rm );
   499     load_reg( R_ECX, Rn );
   500     CMP_r32_r32( R_EAX, R_ECX );
   501     SETAE_t();
   502     sh4_x86.tstate = TSTATE_AE;
   503  :}
   504 CMP/PL Rn {: 
   505     COUNT_INST(I_CMPPL);
   506     load_reg( R_EAX, Rn );
   507     CMP_imm8s_r32( 0, R_EAX );
   508     SETG_t();
   509     sh4_x86.tstate = TSTATE_G;
   510 :}
   511 CMP/PZ Rn {:  
   512     COUNT_INST(I_CMPPZ);
   513     load_reg( R_EAX, Rn );
   514     CMP_imm8s_r32( 0, R_EAX );
   515     SETGE_t();
   516     sh4_x86.tstate = TSTATE_GE;
   517 :}
   518 CMP/STR Rm, Rn {:  
   519     COUNT_INST(I_CMPSTR);
   520     load_reg( R_EAX, Rm );
   521     load_reg( R_ECX, Rn );
   522     XOR_r32_r32( R_ECX, R_EAX );
   523     TEST_r8_r8( R_AL, R_AL );
   524     JE_rel8(target1);
   525     TEST_r8_r8( R_AH, R_AH );
   526     JE_rel8(target2);
   527     SHR_imm8_r32( 16, R_EAX );
   528     TEST_r8_r8( R_AL, R_AL );
   529     JE_rel8(target3);
   530     TEST_r8_r8( R_AH, R_AH );
   531     JMP_TARGET(target1);
   532     JMP_TARGET(target2);
   533     JMP_TARGET(target3);
   534     SETE_t();
   535     sh4_x86.tstate = TSTATE_E;
   536 :}
   537 DIV0S Rm, Rn {:
   538     COUNT_INST(I_DIV0S);
   539     load_reg( R_EAX, Rm );
   540     load_reg( R_ECX, Rn );
   541     SHR_imm8_r32( 31, R_EAX );
   542     SHR_imm8_r32( 31, R_ECX );
   543     store_spreg( R_EAX, R_M );
   544     store_spreg( R_ECX, R_Q );
   545     CMP_r32_r32( R_EAX, R_ECX );
   546     SETNE_t();
   547     sh4_x86.tstate = TSTATE_NE;
   548 :}
   549 DIV0U {:  
   550     COUNT_INST(I_DIV0U);
   551     XOR_r32_r32( R_EAX, R_EAX );
   552     store_spreg( R_EAX, R_Q );
   553     store_spreg( R_EAX, R_M );
   554     store_spreg( R_EAX, R_T );
   555     sh4_x86.tstate = TSTATE_C; // works for DIV1
   556 :}
   557 DIV1 Rm, Rn {:
   558     COUNT_INST(I_DIV1);
   559     load_spreg( R_ECX, R_M );
   560     load_reg( R_EAX, Rn );
   561     if( sh4_x86.tstate != TSTATE_C ) {
   562 	LDC_t();
   563     }
   564     RCL1_r32( R_EAX );
   565     SETC_r8( R_DL ); // Q'
   566     CMP_sh4r_r32( R_Q, R_ECX );
   567     JE_rel8(mqequal);
   568     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   569     JMP_rel8(end);
   570     JMP_TARGET(mqequal);
   571     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   572     JMP_TARGET(end);
   573     store_reg( R_EAX, Rn ); // Done with Rn now
   574     SETC_r8(R_AL); // tmp1
   575     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   576     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   577     store_spreg( R_ECX, R_Q );
   578     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   579     MOVZX_r8_r32( R_AL, R_EAX );
   580     store_spreg( R_EAX, R_T );
   581     sh4_x86.tstate = TSTATE_NONE;
   582 :}
   583 DMULS.L Rm, Rn {:  
   584     COUNT_INST(I_DMULS);
   585     load_reg( R_EAX, Rm );
   586     load_reg( R_ECX, Rn );
   587     IMUL_r32(R_ECX);
   588     store_spreg( R_EDX, R_MACH );
   589     store_spreg( R_EAX, R_MACL );
   590     sh4_x86.tstate = TSTATE_NONE;
   591 :}
   592 DMULU.L Rm, Rn {:  
   593     COUNT_INST(I_DMULU);
   594     load_reg( R_EAX, Rm );
   595     load_reg( R_ECX, Rn );
   596     MUL_r32(R_ECX);
   597     store_spreg( R_EDX, R_MACH );
   598     store_spreg( R_EAX, R_MACL );    
   599     sh4_x86.tstate = TSTATE_NONE;
   600 :}
   601 DT Rn {:  
   602     COUNT_INST(I_DT);
   603     load_reg( R_EAX, Rn );
   604     ADD_imm8s_r32( -1, R_EAX );
   605     store_reg( R_EAX, Rn );
   606     SETE_t();
   607     sh4_x86.tstate = TSTATE_E;
   608 :}
   609 EXTS.B Rm, Rn {:  
   610     COUNT_INST(I_EXTSB);
   611     load_reg( R_EAX, Rm );
   612     MOVSX_r8_r32( R_EAX, R_EAX );
   613     store_reg( R_EAX, Rn );
   614 :}
   615 EXTS.W Rm, Rn {:  
   616     COUNT_INST(I_EXTSW);
   617     load_reg( R_EAX, Rm );
   618     MOVSX_r16_r32( R_EAX, R_EAX );
   619     store_reg( R_EAX, Rn );
   620 :}
   621 EXTU.B Rm, Rn {:  
   622     COUNT_INST(I_EXTUB);
   623     load_reg( R_EAX, Rm );
   624     MOVZX_r8_r32( R_EAX, R_EAX );
   625     store_reg( R_EAX, Rn );
   626 :}
   627 EXTU.W Rm, Rn {:  
   628     COUNT_INST(I_EXTUW);
   629     load_reg( R_EAX, Rm );
   630     MOVZX_r16_r32( R_EAX, R_EAX );
   631     store_reg( R_EAX, Rn );
   632 :}
   633 MAC.L @Rm+, @Rn+ {:
   634     COUNT_INST(I_MACL);
   635     if( Rm == Rn ) {
   636 	load_reg( R_EAX, Rm );
   637 	check_ralign32( R_EAX );
   638 	MMU_TRANSLATE_READ( R_EAX );
   639 	PUSH_realigned_r32( R_EAX );
   640 	load_reg( R_EAX, Rn );
   641 	ADD_imm8s_r32( 4, R_EAX );
   642 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   643 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   644 	// Note translate twice in case of page boundaries. Maybe worth
   645 	// adding a page-boundary check to skip the second translation
   646     } else {
   647 	load_reg( R_EAX, Rm );
   648 	check_ralign32( R_EAX );
   649 	MMU_TRANSLATE_READ( R_EAX );
   650 	load_reg( R_ECX, Rn );
   651 	check_ralign32( R_ECX );
   652 	PUSH_realigned_r32( R_EAX );
   653 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   654 	MOV_r32_r32( R_ECX, R_EAX );
   655 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   656 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   657     }
   658     MEM_READ_LONG( R_EAX, R_EAX );
   659     POP_r32( R_ECX );
   660     PUSH_r32( R_EAX );
   661     MEM_READ_LONG( R_ECX, R_EAX );
   662     POP_realigned_r32( R_ECX );
   664     IMUL_r32( R_ECX );
   665     ADD_r32_sh4r( R_EAX, R_MACL );
   666     ADC_r32_sh4r( R_EDX, R_MACH );
   668     load_spreg( R_ECX, R_S );
   669     TEST_r32_r32(R_ECX, R_ECX);
   670     JE_rel8( nosat );
   671     call_func0( signsat48 );
   672     JMP_TARGET( nosat );
   673     sh4_x86.tstate = TSTATE_NONE;
   674 :}
   675 MAC.W @Rm+, @Rn+ {:  
   676     COUNT_INST(I_MACW);
   677     if( Rm == Rn ) {
   678 	load_reg( R_EAX, Rm );
   679 	check_ralign16( R_EAX );
   680 	MMU_TRANSLATE_READ( R_EAX );
   681 	PUSH_realigned_r32( R_EAX );
   682 	load_reg( R_EAX, Rn );
   683 	ADD_imm8s_r32( 2, R_EAX );
   684 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   685 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   686 	// Note translate twice in case of page boundaries. Maybe worth
   687 	// adding a page-boundary check to skip the second translation
   688     } else {
   689 	load_reg( R_EAX, Rm );
   690 	check_ralign16( R_EAX );
   691 	MMU_TRANSLATE_READ( R_EAX );
   692 	load_reg( R_ECX, Rn );
   693 	check_ralign16( R_ECX );
   694 	PUSH_realigned_r32( R_EAX );
   695 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   696 	MOV_r32_r32( R_ECX, R_EAX );
   697 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   698 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   699     }
   700     MEM_READ_WORD( R_EAX, R_EAX );
   701     POP_r32( R_ECX );
   702     PUSH_r32( R_EAX );
   703     MEM_READ_WORD( R_ECX, R_EAX );
   704     POP_realigned_r32( R_ECX );
   705     IMUL_r32( R_ECX );
   707     load_spreg( R_ECX, R_S );
   708     TEST_r32_r32( R_ECX, R_ECX );
   709     JE_rel8( nosat );
   711     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   712     JNO_rel8( end );            // 2
   713     load_imm32( R_EDX, 1 );         // 5
   714     store_spreg( R_EDX, R_MACH );   // 6
   715     JS_rel8( positive );        // 2
   716     load_imm32( R_EAX, 0x80000000 );// 5
   717     store_spreg( R_EAX, R_MACL );   // 6
   718     JMP_rel8(end2);           // 2
   720     JMP_TARGET(positive);
   721     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   722     store_spreg( R_EAX, R_MACL );   // 6
   723     JMP_rel8(end3);            // 2
   725     JMP_TARGET(nosat);
   726     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   727     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   728     JMP_TARGET(end);
   729     JMP_TARGET(end2);
   730     JMP_TARGET(end3);
   731     sh4_x86.tstate = TSTATE_NONE;
   732 :}
   733 MOVT Rn {:  
   734     COUNT_INST(I_MOVT);
   735     load_spreg( R_EAX, R_T );
   736     store_reg( R_EAX, Rn );
   737 :}
   738 MUL.L Rm, Rn {:  
   739     COUNT_INST(I_MULL);
   740     load_reg( R_EAX, Rm );
   741     load_reg( R_ECX, Rn );
   742     MUL_r32( R_ECX );
   743     store_spreg( R_EAX, R_MACL );
   744     sh4_x86.tstate = TSTATE_NONE;
   745 :}
   746 MULS.W Rm, Rn {:
   747     COUNT_INST(I_MULSW);
   748     load_reg16s( R_EAX, Rm );
   749     load_reg16s( R_ECX, Rn );
   750     MUL_r32( R_ECX );
   751     store_spreg( R_EAX, R_MACL );
   752     sh4_x86.tstate = TSTATE_NONE;
   753 :}
   754 MULU.W Rm, Rn {:  
   755     COUNT_INST(I_MULUW);
   756     load_reg16u( R_EAX, Rm );
   757     load_reg16u( R_ECX, Rn );
   758     MUL_r32( R_ECX );
   759     store_spreg( R_EAX, R_MACL );
   760     sh4_x86.tstate = TSTATE_NONE;
   761 :}
   762 NEG Rm, Rn {:
   763     COUNT_INST(I_NEG);
   764     load_reg( R_EAX, Rm );
   765     NEG_r32( R_EAX );
   766     store_reg( R_EAX, Rn );
   767     sh4_x86.tstate = TSTATE_NONE;
   768 :}
   769 NEGC Rm, Rn {:  
   770     COUNT_INST(I_NEGC);
   771     load_reg( R_EAX, Rm );
   772     XOR_r32_r32( R_ECX, R_ECX );
   773     LDC_t();
   774     SBB_r32_r32( R_EAX, R_ECX );
   775     store_reg( R_ECX, Rn );
   776     SETC_t();
   777     sh4_x86.tstate = TSTATE_C;
   778 :}
   779 NOT Rm, Rn {:  
   780     COUNT_INST(I_NOT);
   781     load_reg( R_EAX, Rm );
   782     NOT_r32( R_EAX );
   783     store_reg( R_EAX, Rn );
   784     sh4_x86.tstate = TSTATE_NONE;
   785 :}
   786 OR Rm, Rn {:  
   787     COUNT_INST(I_OR);
   788     load_reg( R_EAX, Rm );
   789     load_reg( R_ECX, Rn );
   790     OR_r32_r32( R_EAX, R_ECX );
   791     store_reg( R_ECX, Rn );
   792     sh4_x86.tstate = TSTATE_NONE;
   793 :}
   794 OR #imm, R0 {:
   795     COUNT_INST(I_ORI);
   796     load_reg( R_EAX, 0 );
   797     OR_imm32_r32(imm, R_EAX);
   798     store_reg( R_EAX, 0 );
   799     sh4_x86.tstate = TSTATE_NONE;
   800 :}
   801 OR.B #imm, @(R0, GBR) {:  
   802     COUNT_INST(I_ORB);
   803     load_reg( R_EAX, 0 );
   804     load_spreg( R_ECX, R_GBR );
   805     ADD_r32_r32( R_ECX, R_EAX );
   806     MMU_TRANSLATE_WRITE( R_EAX );
   807     PUSH_realigned_r32(R_EAX);
   808     MEM_READ_BYTE( R_EAX, R_EAX );
   809     POP_realigned_r32(R_ECX);
   810     OR_imm32_r32(imm, R_EAX );
   811     MEM_WRITE_BYTE( R_ECX, R_EAX );
   812     sh4_x86.tstate = TSTATE_NONE;
   813 :}
   814 ROTCL Rn {:
   815     COUNT_INST(I_ROTCL);
   816     load_reg( R_EAX, Rn );
   817     if( sh4_x86.tstate != TSTATE_C ) {
   818 	LDC_t();
   819     }
   820     RCL1_r32( R_EAX );
   821     store_reg( R_EAX, Rn );
   822     SETC_t();
   823     sh4_x86.tstate = TSTATE_C;
   824 :}
   825 ROTCR Rn {:  
   826     COUNT_INST(I_ROTCR);
   827     load_reg( R_EAX, Rn );
   828     if( sh4_x86.tstate != TSTATE_C ) {
   829 	LDC_t();
   830     }
   831     RCR1_r32( R_EAX );
   832     store_reg( R_EAX, Rn );
   833     SETC_t();
   834     sh4_x86.tstate = TSTATE_C;
   835 :}
   836 ROTL Rn {:  
   837     COUNT_INST(I_ROTL);
   838     load_reg( R_EAX, Rn );
   839     ROL1_r32( R_EAX );
   840     store_reg( R_EAX, Rn );
   841     SETC_t();
   842     sh4_x86.tstate = TSTATE_C;
   843 :}
   844 ROTR Rn {:  
   845     COUNT_INST(I_ROTR);
   846     load_reg( R_EAX, Rn );
   847     ROR1_r32( R_EAX );
   848     store_reg( R_EAX, Rn );
   849     SETC_t();
   850     sh4_x86.tstate = TSTATE_C;
   851 :}
   852 SHAD Rm, Rn {:
   853     COUNT_INST(I_SHAD);
   854     /* Annoyingly enough, not directly convertible */
   855     load_reg( R_EAX, Rn );
   856     load_reg( R_ECX, Rm );
   857     CMP_imm32_r32( 0, R_ECX );
   858     JGE_rel8(doshl);
   860     NEG_r32( R_ECX );      // 2
   861     AND_imm8_r8( 0x1F, R_CL ); // 3
   862     JE_rel8(emptysar);     // 2
   863     SAR_r32_CL( R_EAX );       // 2
   864     JMP_rel8(end);          // 2
   866     JMP_TARGET(emptysar);
   867     SAR_imm8_r32(31, R_EAX );  // 3
   868     JMP_rel8(end2);
   870     JMP_TARGET(doshl);
   871     AND_imm8_r8( 0x1F, R_CL ); // 3
   872     SHL_r32_CL( R_EAX );       // 2
   873     JMP_TARGET(end);
   874     JMP_TARGET(end2);
   875     store_reg( R_EAX, Rn );
   876     sh4_x86.tstate = TSTATE_NONE;
   877 :}
   878 SHLD Rm, Rn {:  
   879     COUNT_INST(I_SHLD);
   880     load_reg( R_EAX, Rn );
   881     load_reg( R_ECX, Rm );
   882     CMP_imm32_r32( 0, R_ECX );
   883     JGE_rel8(doshl);
   885     NEG_r32( R_ECX );      // 2
   886     AND_imm8_r8( 0x1F, R_CL ); // 3
   887     JE_rel8(emptyshr );
   888     SHR_r32_CL( R_EAX );       // 2
   889     JMP_rel8(end);          // 2
   891     JMP_TARGET(emptyshr);
   892     XOR_r32_r32( R_EAX, R_EAX );
   893     JMP_rel8(end2);
   895     JMP_TARGET(doshl);
   896     AND_imm8_r8( 0x1F, R_CL ); // 3
   897     SHL_r32_CL( R_EAX );       // 2
   898     JMP_TARGET(end);
   899     JMP_TARGET(end2);
   900     store_reg( R_EAX, Rn );
   901     sh4_x86.tstate = TSTATE_NONE;
   902 :}
   903 SHAL Rn {: 
   904     COUNT_INST(I_SHAL);
   905     load_reg( R_EAX, Rn );
   906     SHL1_r32( R_EAX );
   907     SETC_t();
   908     store_reg( R_EAX, Rn );
   909     sh4_x86.tstate = TSTATE_C;
   910 :}
   911 SHAR Rn {:  
   912     COUNT_INST(I_SHAR);
   913     load_reg( R_EAX, Rn );
   914     SAR1_r32( R_EAX );
   915     SETC_t();
   916     store_reg( R_EAX, Rn );
   917     sh4_x86.tstate = TSTATE_C;
   918 :}
   919 SHLL Rn {:  
   920     COUNT_INST(I_SHLL);
   921     load_reg( R_EAX, Rn );
   922     SHL1_r32( R_EAX );
   923     SETC_t();
   924     store_reg( R_EAX, Rn );
   925     sh4_x86.tstate = TSTATE_C;
   926 :}
   927 SHLL2 Rn {:
   928     COUNT_INST(I_SHLL);
   929     load_reg( R_EAX, Rn );
   930     SHL_imm8_r32( 2, R_EAX );
   931     store_reg( R_EAX, Rn );
   932     sh4_x86.tstate = TSTATE_NONE;
   933 :}
   934 SHLL8 Rn {:  
   935     COUNT_INST(I_SHLL);
   936     load_reg( R_EAX, Rn );
   937     SHL_imm8_r32( 8, R_EAX );
   938     store_reg( R_EAX, Rn );
   939     sh4_x86.tstate = TSTATE_NONE;
   940 :}
   941 SHLL16 Rn {:  
   942     COUNT_INST(I_SHLL);
   943     load_reg( R_EAX, Rn );
   944     SHL_imm8_r32( 16, R_EAX );
   945     store_reg( R_EAX, Rn );
   946     sh4_x86.tstate = TSTATE_NONE;
   947 :}
   948 SHLR Rn {:  
   949     COUNT_INST(I_SHLR);
   950     load_reg( R_EAX, Rn );
   951     SHR1_r32( R_EAX );
   952     SETC_t();
   953     store_reg( R_EAX, Rn );
   954     sh4_x86.tstate = TSTATE_C;
   955 :}
   956 SHLR2 Rn {:  
   957     COUNT_INST(I_SHLR);
   958     load_reg( R_EAX, Rn );
   959     SHR_imm8_r32( 2, R_EAX );
   960     store_reg( R_EAX, Rn );
   961     sh4_x86.tstate = TSTATE_NONE;
   962 :}
   963 SHLR8 Rn {:  
   964     COUNT_INST(I_SHLR);
   965     load_reg( R_EAX, Rn );
   966     SHR_imm8_r32( 8, R_EAX );
   967     store_reg( R_EAX, Rn );
   968     sh4_x86.tstate = TSTATE_NONE;
   969 :}
   970 SHLR16 Rn {:  
   971     COUNT_INST(I_SHLR);
   972     load_reg( R_EAX, Rn );
   973     SHR_imm8_r32( 16, R_EAX );
   974     store_reg( R_EAX, Rn );
   975     sh4_x86.tstate = TSTATE_NONE;
   976 :}
   977 SUB Rm, Rn {:  
   978     COUNT_INST(I_SUB);
   979     load_reg( R_EAX, Rm );
   980     load_reg( R_ECX, Rn );
   981     SUB_r32_r32( R_EAX, R_ECX );
   982     store_reg( R_ECX, Rn );
   983     sh4_x86.tstate = TSTATE_NONE;
   984 :}
   985 SUBC Rm, Rn {:  
   986     COUNT_INST(I_SUBC);
   987     load_reg( R_EAX, Rm );
   988     load_reg( R_ECX, Rn );
   989     if( sh4_x86.tstate != TSTATE_C ) {
   990 	LDC_t();
   991     }
   992     SBB_r32_r32( R_EAX, R_ECX );
   993     store_reg( R_ECX, Rn );
   994     SETC_t();
   995     sh4_x86.tstate = TSTATE_C;
   996 :}
   997 SUBV Rm, Rn {:  
   998     COUNT_INST(I_SUBV);
   999     load_reg( R_EAX, Rm );
  1000     load_reg( R_ECX, Rn );
  1001     SUB_r32_r32( R_EAX, R_ECX );
  1002     store_reg( R_ECX, Rn );
  1003     SETO_t();
  1004     sh4_x86.tstate = TSTATE_O;
  1005 :}
  1006 SWAP.B Rm, Rn {:  
  1007     COUNT_INST(I_SWAPB);
  1008     load_reg( R_EAX, Rm );
  1009     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1010     store_reg( R_EAX, Rn );
  1011 :}
  1012 SWAP.W Rm, Rn {:  
  1013     COUNT_INST(I_SWAPB);
  1014     load_reg( R_EAX, Rm );
  1015     MOV_r32_r32( R_EAX, R_ECX );
  1016     SHL_imm8_r32( 16, R_ECX );
  1017     SHR_imm8_r32( 16, R_EAX );
  1018     OR_r32_r32( R_EAX, R_ECX );
  1019     store_reg( R_ECX, Rn );
  1020     sh4_x86.tstate = TSTATE_NONE;
  1021 :}
  1022 TAS.B @Rn {:  
  1023     COUNT_INST(I_TASB);
  1024     load_reg( R_EAX, Rn );
  1025     MMU_TRANSLATE_WRITE( R_EAX );
  1026     PUSH_realigned_r32( R_EAX );
  1027     MEM_READ_BYTE( R_EAX, R_EAX );
  1028     TEST_r8_r8( R_AL, R_AL );
  1029     SETE_t();
  1030     OR_imm8_r8( 0x80, R_AL );
  1031     POP_realigned_r32( R_ECX );
  1032     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1033     sh4_x86.tstate = TSTATE_NONE;
  1034 :}
  1035 TST Rm, Rn {:  
  1036     COUNT_INST(I_TST);
  1037     load_reg( R_EAX, Rm );
  1038     load_reg( R_ECX, Rn );
  1039     TEST_r32_r32( R_EAX, R_ECX );
  1040     SETE_t();
  1041     sh4_x86.tstate = TSTATE_E;
  1042 :}
  1043 TST #imm, R0 {:  
  1044     COUNT_INST(I_TSTI);
  1045     load_reg( R_EAX, 0 );
  1046     TEST_imm32_r32( imm, R_EAX );
  1047     SETE_t();
  1048     sh4_x86.tstate = TSTATE_E;
  1049 :}
  1050 TST.B #imm, @(R0, GBR) {:  
  1051     COUNT_INST(I_TSTB);
  1052     load_reg( R_EAX, 0);
  1053     load_reg( R_ECX, R_GBR);
  1054     ADD_r32_r32( R_ECX, R_EAX );
  1055     MMU_TRANSLATE_READ( R_EAX );
  1056     MEM_READ_BYTE( R_EAX, R_EAX );
  1057     TEST_imm8_r8( imm, R_AL );
  1058     SETE_t();
  1059     sh4_x86.tstate = TSTATE_E;
  1060 :}
  1061 XOR Rm, Rn {:  
  1062     COUNT_INST(I_XOR);
  1063     load_reg( R_EAX, Rm );
  1064     load_reg( R_ECX, Rn );
  1065     XOR_r32_r32( R_EAX, R_ECX );
  1066     store_reg( R_ECX, Rn );
  1067     sh4_x86.tstate = TSTATE_NONE;
  1068 :}
  1069 XOR #imm, R0 {:  
  1070     COUNT_INST(I_XORI);
  1071     load_reg( R_EAX, 0 );
  1072     XOR_imm32_r32( imm, R_EAX );
  1073     store_reg( R_EAX, 0 );
  1074     sh4_x86.tstate = TSTATE_NONE;
  1075 :}
  1076 XOR.B #imm, @(R0, GBR) {:  
  1077     COUNT_INST(I_XORB);
  1078     load_reg( R_EAX, 0 );
  1079     load_spreg( R_ECX, R_GBR );
  1080     ADD_r32_r32( R_ECX, R_EAX );
  1081     MMU_TRANSLATE_WRITE( R_EAX );
  1082     PUSH_realigned_r32(R_EAX);
  1083     MEM_READ_BYTE(R_EAX, R_EAX);
  1084     POP_realigned_r32(R_ECX);
  1085     XOR_imm32_r32( imm, R_EAX );
  1086     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1087     sh4_x86.tstate = TSTATE_NONE;
  1088 :}
  1089 XTRCT Rm, Rn {:
  1090     COUNT_INST(I_XTRCT);
  1091     load_reg( R_EAX, Rm );
  1092     load_reg( R_ECX, Rn );
  1093     SHL_imm8_r32( 16, R_EAX );
  1094     SHR_imm8_r32( 16, R_ECX );
  1095     OR_r32_r32( R_EAX, R_ECX );
  1096     store_reg( R_ECX, Rn );
  1097     sh4_x86.tstate = TSTATE_NONE;
  1098 :}
  1100 /* Data move instructions */
  1101 MOV Rm, Rn {:  
  1102     COUNT_INST(I_MOV);
  1103     load_reg( R_EAX, Rm );
  1104     store_reg( R_EAX, Rn );
  1105 :}
  1106 MOV #imm, Rn {:  
  1107     COUNT_INST(I_MOVI);
  1108     load_imm32( R_EAX, imm );
  1109     store_reg( R_EAX, Rn );
  1110 :}
  1111 MOV.B Rm, @Rn {:  
  1112     COUNT_INST(I_MOVB);
  1113     load_reg( R_EAX, Rn );
  1114     MMU_TRANSLATE_WRITE( R_EAX );
  1115     load_reg( R_EDX, Rm );
  1116     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1117     sh4_x86.tstate = TSTATE_NONE;
  1118 :}
  1119 MOV.B Rm, @-Rn {:  
  1120     COUNT_INST(I_MOVB);
  1121     load_reg( R_EAX, Rn );
  1122     ADD_imm8s_r32( -1, R_EAX );
  1123     MMU_TRANSLATE_WRITE( R_EAX );
  1124     load_reg( R_EDX, Rm );
  1125     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1126     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1127     sh4_x86.tstate = TSTATE_NONE;
  1128 :}
  1129 MOV.B Rm, @(R0, Rn) {:  
  1130     COUNT_INST(I_MOVB);
  1131     load_reg( R_EAX, 0 );
  1132     load_reg( R_ECX, Rn );
  1133     ADD_r32_r32( R_ECX, R_EAX );
  1134     MMU_TRANSLATE_WRITE( R_EAX );
  1135     load_reg( R_EDX, Rm );
  1136     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1139 MOV.B R0, @(disp, GBR) {:  
  1140     COUNT_INST(I_MOVB);
  1141     load_spreg( R_EAX, R_GBR );
  1142     ADD_imm32_r32( disp, R_EAX );
  1143     MMU_TRANSLATE_WRITE( R_EAX );
  1144     load_reg( R_EDX, 0 );
  1145     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1146     sh4_x86.tstate = TSTATE_NONE;
  1147 :}
  1148 MOV.B R0, @(disp, Rn) {:  
  1149     COUNT_INST(I_MOVB);
  1150     load_reg( R_EAX, Rn );
  1151     ADD_imm32_r32( disp, R_EAX );
  1152     MMU_TRANSLATE_WRITE( R_EAX );
  1153     load_reg( R_EDX, 0 );
  1154     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 MOV.B @Rm, Rn {:  
  1158     COUNT_INST(I_MOVB);
  1159     load_reg( R_EAX, Rm );
  1160     MMU_TRANSLATE_READ( R_EAX );
  1161     MEM_READ_BYTE( R_EAX, R_EAX );
  1162     store_reg( R_EAX, Rn );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.B @Rm+, Rn {:  
  1166     COUNT_INST(I_MOVB);
  1167     load_reg( R_EAX, Rm );
  1168     MMU_TRANSLATE_READ( R_EAX );
  1169     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1170     MEM_READ_BYTE( R_EAX, R_EAX );
  1171     store_reg( R_EAX, Rn );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 MOV.B @(R0, Rm), Rn {:  
  1175     COUNT_INST(I_MOVB);
  1176     load_reg( R_EAX, 0 );
  1177     load_reg( R_ECX, Rm );
  1178     ADD_r32_r32( R_ECX, R_EAX );
  1179     MMU_TRANSLATE_READ( R_EAX )
  1180     MEM_READ_BYTE( R_EAX, R_EAX );
  1181     store_reg( R_EAX, Rn );
  1182     sh4_x86.tstate = TSTATE_NONE;
  1183 :}
  1184 MOV.B @(disp, GBR), R0 {:  
  1185     COUNT_INST(I_MOVB);
  1186     load_spreg( R_EAX, R_GBR );
  1187     ADD_imm32_r32( disp, R_EAX );
  1188     MMU_TRANSLATE_READ( R_EAX );
  1189     MEM_READ_BYTE( R_EAX, R_EAX );
  1190     store_reg( R_EAX, 0 );
  1191     sh4_x86.tstate = TSTATE_NONE;
  1192 :}
  1193 MOV.B @(disp, Rm), R0 {:  
  1194     COUNT_INST(I_MOVB);
  1195     load_reg( R_EAX, Rm );
  1196     ADD_imm32_r32( disp, R_EAX );
  1197     MMU_TRANSLATE_READ( R_EAX );
  1198     MEM_READ_BYTE( R_EAX, R_EAX );
  1199     store_reg( R_EAX, 0 );
  1200     sh4_x86.tstate = TSTATE_NONE;
  1201 :}
  1202 MOV.L Rm, @Rn {:
  1203     COUNT_INST(I_MOVL);
  1204     load_reg( R_EAX, Rn );
  1205     check_walign32(R_EAX);
  1206     MMU_TRANSLATE_WRITE( R_EAX );
  1207     load_reg( R_EDX, Rm );
  1208     MEM_WRITE_LONG( R_EAX, R_EDX );
  1209     sh4_x86.tstate = TSTATE_NONE;
  1210 :}
  1211 MOV.L Rm, @-Rn {:  
  1212     COUNT_INST(I_MOVL);
  1213     load_reg( R_EAX, Rn );
  1214     ADD_imm8s_r32( -4, R_EAX );
  1215     check_walign32( R_EAX );
  1216     MMU_TRANSLATE_WRITE( R_EAX );
  1217     load_reg( R_EDX, Rm );
  1218     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1219     MEM_WRITE_LONG( R_EAX, R_EDX );
  1220     sh4_x86.tstate = TSTATE_NONE;
  1221 :}
  1222 MOV.L Rm, @(R0, Rn) {:  
  1223     COUNT_INST(I_MOVL);
  1224     load_reg( R_EAX, 0 );
  1225     load_reg( R_ECX, Rn );
  1226     ADD_r32_r32( R_ECX, R_EAX );
  1227     check_walign32( R_EAX );
  1228     MMU_TRANSLATE_WRITE( R_EAX );
  1229     load_reg( R_EDX, Rm );
  1230     MEM_WRITE_LONG( R_EAX, R_EDX );
  1231     sh4_x86.tstate = TSTATE_NONE;
  1232 :}
  1233 MOV.L R0, @(disp, GBR) {:  
  1234     COUNT_INST(I_MOVL);
  1235     load_spreg( R_EAX, R_GBR );
  1236     ADD_imm32_r32( disp, R_EAX );
  1237     check_walign32( R_EAX );
  1238     MMU_TRANSLATE_WRITE( R_EAX );
  1239     load_reg( R_EDX, 0 );
  1240     MEM_WRITE_LONG( R_EAX, R_EDX );
  1241     sh4_x86.tstate = TSTATE_NONE;
  1242 :}
  1243 MOV.L Rm, @(disp, Rn) {:  
  1244     COUNT_INST(I_MOVL);
  1245     load_reg( R_EAX, Rn );
  1246     ADD_imm32_r32( disp, R_EAX );
  1247     check_walign32( R_EAX );
  1248     MMU_TRANSLATE_WRITE( R_EAX );
  1249     load_reg( R_EDX, Rm );
  1250     MEM_WRITE_LONG( R_EAX, R_EDX );
  1251     sh4_x86.tstate = TSTATE_NONE;
  1252 :}
  1253 MOV.L @Rm, Rn {:  
  1254     COUNT_INST(I_MOVL);
  1255     load_reg( R_EAX, Rm );
  1256     check_ralign32( R_EAX );
  1257     MMU_TRANSLATE_READ( R_EAX );
  1258     MEM_READ_LONG( R_EAX, R_EAX );
  1259     store_reg( R_EAX, Rn );
  1260     sh4_x86.tstate = TSTATE_NONE;
  1261 :}
  1262 MOV.L @Rm+, Rn {:  
  1263     COUNT_INST(I_MOVL);
  1264     load_reg( R_EAX, Rm );
  1265     check_ralign32( R_EAX );
  1266     MMU_TRANSLATE_READ( R_EAX );
  1267     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1268     MEM_READ_LONG( R_EAX, R_EAX );
  1269     store_reg( R_EAX, Rn );
  1270     sh4_x86.tstate = TSTATE_NONE;
  1271 :}
  1272 MOV.L @(R0, Rm), Rn {:  
  1273     COUNT_INST(I_MOVL);
  1274     load_reg( R_EAX, 0 );
  1275     load_reg( R_ECX, Rm );
  1276     ADD_r32_r32( R_ECX, R_EAX );
  1277     check_ralign32( R_EAX );
  1278     MMU_TRANSLATE_READ( R_EAX );
  1279     MEM_READ_LONG( R_EAX, R_EAX );
  1280     store_reg( R_EAX, Rn );
  1281     sh4_x86.tstate = TSTATE_NONE;
  1282 :}
  1283 MOV.L @(disp, GBR), R0 {:
  1284     COUNT_INST(I_MOVL);
  1285     load_spreg( R_EAX, R_GBR );
  1286     ADD_imm32_r32( disp, R_EAX );
  1287     check_ralign32( R_EAX );
  1288     MMU_TRANSLATE_READ( R_EAX );
  1289     MEM_READ_LONG( R_EAX, R_EAX );
  1290     store_reg( R_EAX, 0 );
  1291     sh4_x86.tstate = TSTATE_NONE;
  1292 :}
  1293 MOV.L @(disp, PC), Rn {:  
  1294     COUNT_INST(I_MOVLPC);
  1295     if( sh4_x86.in_delay_slot ) {
  1296 	SLOTILLEGAL();
  1297     } else {
  1298 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1299 	if( IS_IN_ICACHE(target) ) {
  1300 	    // If the target address is in the same page as the code, it's
  1301 	    // pretty safe to just ref it directly and circumvent the whole
  1302 	    // memory subsystem. (this is a big performance win)
  1304 	    // FIXME: There's a corner-case that's not handled here when
  1305 	    // the current code-page is in the ITLB but not in the UTLB.
  1306 	    // (should generate a TLB miss although need to test SH4 
  1307 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1308 	    // behaviour though.
  1309 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1310 	    MOV_moff32_EAX( ptr );
  1311 	} else {
  1312 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1313 	    // different virtual address than the translation was done with,
  1314 	    // but we can safely assume that the low bits are the same.
  1315 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1316 	    ADD_sh4r_r32( R_PC, R_EAX );
  1317 	    MMU_TRANSLATE_READ( R_EAX );
  1318 	    MEM_READ_LONG( R_EAX, R_EAX );
  1319 	    sh4_x86.tstate = TSTATE_NONE;
  1321 	store_reg( R_EAX, Rn );
  1323 :}
  1324 MOV.L @(disp, Rm), Rn {:  
  1325     COUNT_INST(I_MOVL);
  1326     load_reg( R_EAX, Rm );
  1327     ADD_imm8s_r32( disp, R_EAX );
  1328     check_ralign32( R_EAX );
  1329     MMU_TRANSLATE_READ( R_EAX );
  1330     MEM_READ_LONG( R_EAX, R_EAX );
  1331     store_reg( R_EAX, Rn );
  1332     sh4_x86.tstate = TSTATE_NONE;
  1333 :}
  1334 MOV.W Rm, @Rn {:  
  1335     COUNT_INST(I_MOVW);
  1336     load_reg( R_EAX, Rn );
  1337     check_walign16( R_EAX );
  1338     MMU_TRANSLATE_WRITE( R_EAX )
  1339     load_reg( R_EDX, Rm );
  1340     MEM_WRITE_WORD( R_EAX, R_EDX );
  1341     sh4_x86.tstate = TSTATE_NONE;
  1342 :}
  1343 MOV.W Rm, @-Rn {:  
  1344     COUNT_INST(I_MOVW);
  1345     load_reg( R_EAX, Rn );
  1346     ADD_imm8s_r32( -2, R_EAX );
  1347     check_walign16( R_EAX );
  1348     MMU_TRANSLATE_WRITE( R_EAX );
  1349     load_reg( R_EDX, Rm );
  1350     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1351     MEM_WRITE_WORD( R_EAX, R_EDX );
  1352     sh4_x86.tstate = TSTATE_NONE;
  1353 :}
  1354 MOV.W Rm, @(R0, Rn) {:  
  1355     COUNT_INST(I_MOVW);
  1356     load_reg( R_EAX, 0 );
  1357     load_reg( R_ECX, Rn );
  1358     ADD_r32_r32( R_ECX, R_EAX );
  1359     check_walign16( R_EAX );
  1360     MMU_TRANSLATE_WRITE( R_EAX );
  1361     load_reg( R_EDX, Rm );
  1362     MEM_WRITE_WORD( R_EAX, R_EDX );
  1363     sh4_x86.tstate = TSTATE_NONE;
  1364 :}
  1365 MOV.W R0, @(disp, GBR) {:  
  1366     COUNT_INST(I_MOVW);
  1367     load_spreg( R_EAX, R_GBR );
  1368     ADD_imm32_r32( disp, R_EAX );
  1369     check_walign16( R_EAX );
  1370     MMU_TRANSLATE_WRITE( R_EAX );
  1371     load_reg( R_EDX, 0 );
  1372     MEM_WRITE_WORD( R_EAX, R_EDX );
  1373     sh4_x86.tstate = TSTATE_NONE;
  1374 :}
  1375 MOV.W R0, @(disp, Rn) {:  
  1376     COUNT_INST(I_MOVW);
  1377     load_reg( R_EAX, Rn );
  1378     ADD_imm32_r32( disp, R_EAX );
  1379     check_walign16( R_EAX );
  1380     MMU_TRANSLATE_WRITE( R_EAX );
  1381     load_reg( R_EDX, 0 );
  1382     MEM_WRITE_WORD( R_EAX, R_EDX );
  1383     sh4_x86.tstate = TSTATE_NONE;
  1384 :}
  1385 MOV.W @Rm, Rn {:  
  1386     COUNT_INST(I_MOVW);
  1387     load_reg( R_EAX, Rm );
  1388     check_ralign16( R_EAX );
  1389     MMU_TRANSLATE_READ( R_EAX );
  1390     MEM_READ_WORD( R_EAX, R_EAX );
  1391     store_reg( R_EAX, Rn );
  1392     sh4_x86.tstate = TSTATE_NONE;
  1393 :}
  1394 MOV.W @Rm+, Rn {:  
  1395     COUNT_INST(I_MOVW);
  1396     load_reg( R_EAX, Rm );
  1397     check_ralign16( R_EAX );
  1398     MMU_TRANSLATE_READ( R_EAX );
  1399     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1400     MEM_READ_WORD( R_EAX, R_EAX );
  1401     store_reg( R_EAX, Rn );
  1402     sh4_x86.tstate = TSTATE_NONE;
  1403 :}
  1404 MOV.W @(R0, Rm), Rn {:  
  1405     COUNT_INST(I_MOVW);
  1406     load_reg( R_EAX, 0 );
  1407     load_reg( R_ECX, Rm );
  1408     ADD_r32_r32( R_ECX, R_EAX );
  1409     check_ralign16( R_EAX );
  1410     MMU_TRANSLATE_READ( R_EAX );
  1411     MEM_READ_WORD( R_EAX, R_EAX );
  1412     store_reg( R_EAX, Rn );
  1413     sh4_x86.tstate = TSTATE_NONE;
  1414 :}
  1415 MOV.W @(disp, GBR), R0 {:  
  1416     COUNT_INST(I_MOVW);
  1417     load_spreg( R_EAX, R_GBR );
  1418     ADD_imm32_r32( disp, R_EAX );
  1419     check_ralign16( R_EAX );
  1420     MMU_TRANSLATE_READ( R_EAX );
  1421     MEM_READ_WORD( R_EAX, R_EAX );
  1422     store_reg( R_EAX, 0 );
  1423     sh4_x86.tstate = TSTATE_NONE;
  1424 :}
  1425 MOV.W @(disp, PC), Rn {:  
  1426     COUNT_INST(I_MOVW);
  1427     if( sh4_x86.in_delay_slot ) {
  1428 	SLOTILLEGAL();
  1429     } else {
  1430 	// See comments for MOV.L @(disp, PC), Rn
  1431 	uint32_t target = pc + disp + 4;
  1432 	if( IS_IN_ICACHE(target) ) {
  1433 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1434 	    MOV_moff32_EAX( ptr );
  1435 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1436 	} else {
  1437 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1438 	    ADD_sh4r_r32( R_PC, R_EAX );
  1439 	    MMU_TRANSLATE_READ( R_EAX );
  1440 	    MEM_READ_WORD( R_EAX, R_EAX );
  1441 	    sh4_x86.tstate = TSTATE_NONE;
  1443 	store_reg( R_EAX, Rn );
  1445 :}
  1446 MOV.W @(disp, Rm), R0 {:  
  1447     COUNT_INST(I_MOVW);
  1448     load_reg( R_EAX, Rm );
  1449     ADD_imm32_r32( disp, R_EAX );
  1450     check_ralign16( R_EAX );
  1451     MMU_TRANSLATE_READ( R_EAX );
  1452     MEM_READ_WORD( R_EAX, R_EAX );
  1453     store_reg( R_EAX, 0 );
  1454     sh4_x86.tstate = TSTATE_NONE;
  1455 :}
  1456 MOVA @(disp, PC), R0 {:  
  1457     COUNT_INST(I_MOVA);
  1458     if( sh4_x86.in_delay_slot ) {
  1459 	SLOTILLEGAL();
  1460     } else {
  1461 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1462 	ADD_sh4r_r32( R_PC, R_ECX );
  1463 	store_reg( R_ECX, 0 );
  1464 	sh4_x86.tstate = TSTATE_NONE;
  1466 :}
  1467 MOVCA.L R0, @Rn {:  
  1468     COUNT_INST(I_MOVCA);
  1469     load_reg( R_EAX, Rn );
  1470     check_walign32( R_EAX );
  1471     MMU_TRANSLATE_WRITE( R_EAX );
  1472     load_reg( R_EDX, 0 );
  1473     MEM_WRITE_LONG( R_EAX, R_EDX );
  1474     sh4_x86.tstate = TSTATE_NONE;
  1475 :}
  1477 /* Control transfer instructions */
  1478 BF disp {:
  1479     COUNT_INST(I_BF);
  1480     if( sh4_x86.in_delay_slot ) {
  1481 	SLOTILLEGAL();
  1482     } else {
  1483 	sh4vma_t target = disp + pc + 4;
  1484 	JT_rel8( nottaken );
  1485 	exit_block_rel(target, pc+2 );
  1486 	JMP_TARGET(nottaken);
  1487 	return 2;
  1489 :}
  1490 BF/S disp {:
  1491     COUNT_INST(I_BFS);
  1492     if( sh4_x86.in_delay_slot ) {
  1493 	SLOTILLEGAL();
  1494     } else {
  1495 	sh4_x86.in_delay_slot = DELAY_PC;
  1496 	if( UNTRANSLATABLE(pc+2) ) {
  1497 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1498 	    JT_rel8(nottaken);
  1499 	    ADD_imm32_r32( disp, R_EAX );
  1500 	    JMP_TARGET(nottaken);
  1501 	    ADD_sh4r_r32( R_PC, R_EAX );
  1502 	    store_spreg( R_EAX, R_NEW_PC );
  1503 	    exit_block_emu(pc+2);
  1504 	    sh4_x86.branch_taken = TRUE;
  1505 	    return 2;
  1506 	} else {
  1507 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1508 		CMP_imm8s_sh4r( 1, R_T );
  1509 		sh4_x86.tstate = TSTATE_E;
  1511 	    sh4vma_t target = disp + pc + 4;
  1512 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1513 	    sh4_translate_instruction(pc+2);
  1514 	    exit_block_rel( target, pc+4 );
  1516 	    // not taken
  1517 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1518 	    sh4_translate_instruction(pc+2);
  1519 	    return 4;
  1522 :}
  1523 BRA disp {:  
  1524     COUNT_INST(I_BRA);
  1525     if( sh4_x86.in_delay_slot ) {
  1526 	SLOTILLEGAL();
  1527     } else {
  1528 	sh4_x86.in_delay_slot = DELAY_PC;
  1529 	sh4_x86.branch_taken = TRUE;
  1530 	if( UNTRANSLATABLE(pc+2) ) {
  1531 	    load_spreg( R_EAX, R_PC );
  1532 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1533 	    store_spreg( R_EAX, R_NEW_PC );
  1534 	    exit_block_emu(pc+2);
  1535 	    return 2;
  1536 	} else {
  1537 	    sh4_translate_instruction( pc + 2 );
  1538 	    exit_block_rel( disp + pc + 4, pc+4 );
  1539 	    return 4;
  1542 :}
  1543 BRAF Rn {:  
  1544     COUNT_INST(I_BRAF);
  1545     if( sh4_x86.in_delay_slot ) {
  1546 	SLOTILLEGAL();
  1547     } else {
  1548 	load_spreg( R_EAX, R_PC );
  1549 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1550 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1551 	store_spreg( R_EAX, R_NEW_PC );
  1552 	sh4_x86.in_delay_slot = DELAY_PC;
  1553 	sh4_x86.tstate = TSTATE_NONE;
  1554 	sh4_x86.branch_taken = TRUE;
  1555 	if( UNTRANSLATABLE(pc+2) ) {
  1556 	    exit_block_emu(pc+2);
  1557 	    return 2;
  1558 	} else {
  1559 	    sh4_translate_instruction( pc + 2 );
  1560 	    exit_block_newpcset(pc+2);
  1561 	    return 4;
  1564 :}
  1565 BSR disp {:  
  1566     COUNT_INST(I_BSR);
  1567     if( sh4_x86.in_delay_slot ) {
  1568 	SLOTILLEGAL();
  1569     } else {
  1570 	load_spreg( R_EAX, R_PC );
  1571 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1572 	store_spreg( R_EAX, R_PR );
  1573 	sh4_x86.in_delay_slot = DELAY_PC;
  1574 	sh4_x86.branch_taken = TRUE;
  1575 	sh4_x86.tstate = TSTATE_NONE;
  1576 	if( UNTRANSLATABLE(pc+2) ) {
  1577 	    ADD_imm32_r32( disp, R_EAX );
  1578 	    store_spreg( R_EAX, R_NEW_PC );
  1579 	    exit_block_emu(pc+2);
  1580 	    return 2;
  1581 	} else {
  1582 	    sh4_translate_instruction( pc + 2 );
  1583 	    exit_block_rel( disp + pc + 4, pc+4 );
  1584 	    return 4;
  1587 :}
  1588 BSRF Rn {:  
  1589     COUNT_INST(I_BSRF);
  1590     if( sh4_x86.in_delay_slot ) {
  1591 	SLOTILLEGAL();
  1592     } else {
  1593 	load_spreg( R_EAX, R_PC );
  1594 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1595 	store_spreg( R_EAX, R_PR );
  1596 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1597 	store_spreg( R_EAX, R_NEW_PC );
  1599 	sh4_x86.in_delay_slot = DELAY_PC;
  1600 	sh4_x86.tstate = TSTATE_NONE;
  1601 	sh4_x86.branch_taken = TRUE;
  1602 	if( UNTRANSLATABLE(pc+2) ) {
  1603 	    exit_block_emu(pc+2);
  1604 	    return 2;
  1605 	} else {
  1606 	    sh4_translate_instruction( pc + 2 );
  1607 	    exit_block_newpcset(pc+2);
  1608 	    return 4;
  1611 :}
  1612 BT disp {:
  1613     COUNT_INST(I_BT);
  1614     if( sh4_x86.in_delay_slot ) {
  1615 	SLOTILLEGAL();
  1616     } else {
  1617 	sh4vma_t target = disp + pc + 4;
  1618 	JF_rel8( nottaken );
  1619 	exit_block_rel(target, pc+2 );
  1620 	JMP_TARGET(nottaken);
  1621 	return 2;
  1623 :}
  1624 BT/S disp {:
  1625     COUNT_INST(I_BTS);
  1626     if( sh4_x86.in_delay_slot ) {
  1627 	SLOTILLEGAL();
  1628     } else {
  1629 	sh4_x86.in_delay_slot = DELAY_PC;
  1630 	if( UNTRANSLATABLE(pc+2) ) {
  1631 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1632 	    JF_rel8(nottaken);
  1633 	    ADD_imm32_r32( disp, R_EAX );
  1634 	    JMP_TARGET(nottaken);
  1635 	    ADD_sh4r_r32( R_PC, R_EAX );
  1636 	    store_spreg( R_EAX, R_NEW_PC );
  1637 	    exit_block_emu(pc+2);
  1638 	    sh4_x86.branch_taken = TRUE;
  1639 	    return 2;
  1640 	} else {
  1641 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1642 		CMP_imm8s_sh4r( 1, R_T );
  1643 		sh4_x86.tstate = TSTATE_E;
  1645 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1646 	    sh4_translate_instruction(pc+2);
  1647 	    exit_block_rel( disp + pc + 4, pc+4 );
  1648 	    // not taken
  1649 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1650 	    sh4_translate_instruction(pc+2);
  1651 	    return 4;
  1654 :}
  1655 JMP @Rn {:  
  1656     COUNT_INST(I_JMP);
  1657     if( sh4_x86.in_delay_slot ) {
  1658 	SLOTILLEGAL();
  1659     } else {
  1660 	load_reg( R_ECX, Rn );
  1661 	store_spreg( R_ECX, R_NEW_PC );
  1662 	sh4_x86.in_delay_slot = DELAY_PC;
  1663 	sh4_x86.branch_taken = TRUE;
  1664 	if( UNTRANSLATABLE(pc+2) ) {
  1665 	    exit_block_emu(pc+2);
  1666 	    return 2;
  1667 	} else {
  1668 	    sh4_translate_instruction(pc+2);
  1669 	    exit_block_newpcset(pc+2);
  1670 	    return 4;
  1673 :}
  1674 JSR @Rn {:  
  1675     COUNT_INST(I_JSR);
  1676     if( sh4_x86.in_delay_slot ) {
  1677 	SLOTILLEGAL();
  1678     } else {
  1679 	load_spreg( R_EAX, R_PC );
  1680 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1681 	store_spreg( R_EAX, R_PR );
  1682 	load_reg( R_ECX, Rn );
  1683 	store_spreg( R_ECX, R_NEW_PC );
  1684 	sh4_x86.in_delay_slot = DELAY_PC;
  1685 	sh4_x86.branch_taken = TRUE;
  1686 	sh4_x86.tstate = TSTATE_NONE;
  1687 	if( UNTRANSLATABLE(pc+2) ) {
  1688 	    exit_block_emu(pc+2);
  1689 	    return 2;
  1690 	} else {
  1691 	    sh4_translate_instruction(pc+2);
  1692 	    exit_block_newpcset(pc+2);
  1693 	    return 4;
  1696 :}
  1697 RTE {:  
  1698     COUNT_INST(I_RTE);
  1699     if( sh4_x86.in_delay_slot ) {
  1700 	SLOTILLEGAL();
  1701     } else {
  1702 	check_priv();
  1703 	load_spreg( R_ECX, R_SPC );
  1704 	store_spreg( R_ECX, R_NEW_PC );
  1705 	load_spreg( R_EAX, R_SSR );
  1706 	call_func1( sh4_write_sr, R_EAX );
  1707 	sh4_x86.in_delay_slot = DELAY_PC;
  1708 	sh4_x86.priv_checked = FALSE;
  1709 	sh4_x86.fpuen_checked = FALSE;
  1710 	sh4_x86.tstate = TSTATE_NONE;
  1711 	sh4_x86.branch_taken = TRUE;
  1712 	if( UNTRANSLATABLE(pc+2) ) {
  1713 	    exit_block_emu(pc+2);
  1714 	    return 2;
  1715 	} else {
  1716 	    sh4_translate_instruction(pc+2);
  1717 	    exit_block_newpcset(pc+2);
  1718 	    return 4;
  1721 :}
  1722 RTS {:  
  1723     COUNT_INST(I_RTS);
  1724     if( sh4_x86.in_delay_slot ) {
  1725 	SLOTILLEGAL();
  1726     } else {
  1727 	load_spreg( R_ECX, R_PR );
  1728 	store_spreg( R_ECX, R_NEW_PC );
  1729 	sh4_x86.in_delay_slot = DELAY_PC;
  1730 	sh4_x86.branch_taken = TRUE;
  1731 	if( UNTRANSLATABLE(pc+2) ) {
  1732 	    exit_block_emu(pc+2);
  1733 	    return 2;
  1734 	} else {
  1735 	    sh4_translate_instruction(pc+2);
  1736 	    exit_block_newpcset(pc+2);
  1737 	    return 4;
  1740 :}
  1741 TRAPA #imm {:  
  1742     COUNT_INST(I_TRAPA);
  1743     if( sh4_x86.in_delay_slot ) {
  1744 	SLOTILLEGAL();
  1745     } else {
  1746 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1747 	ADD_r32_sh4r( R_ECX, R_PC );
  1748 	load_imm32( R_EAX, imm );
  1749 	call_func1( sh4_raise_trap, R_EAX );
  1750 	sh4_x86.tstate = TSTATE_NONE;
  1751 	exit_block_pcset(pc);
  1752 	sh4_x86.branch_taken = TRUE;
  1753 	return 2;
  1755 :}
  1756 UNDEF {:  
  1757     COUNT_INST(I_UNDEF);
  1758     if( sh4_x86.in_delay_slot ) {
  1759 	SLOTILLEGAL();
  1760     } else {
  1761 	JMP_exc(EXC_ILLEGAL);
  1762 	return 2;
  1764 :}
  1766 CLRMAC {:  
  1767     COUNT_INST(I_CLRMAC);
  1768     XOR_r32_r32(R_EAX, R_EAX);
  1769     store_spreg( R_EAX, R_MACL );
  1770     store_spreg( R_EAX, R_MACH );
  1771     sh4_x86.tstate = TSTATE_NONE;
  1772 :}
  1773 CLRS {:
  1774     COUNT_INST(I_CLRS);
  1775     CLC();
  1776     SETC_sh4r(R_S);
  1777     sh4_x86.tstate = TSTATE_C;
  1778 :}
  1779 CLRT {:  
  1780     COUNT_INST(I_CLRT);
  1781     CLC();
  1782     SETC_t();
  1783     sh4_x86.tstate = TSTATE_C;
  1784 :}
  1785 SETS {:  
  1786     COUNT_INST(I_SETS);
  1787     STC();
  1788     SETC_sh4r(R_S);
  1789     sh4_x86.tstate = TSTATE_C;
  1790 :}
  1791 SETT {:  
  1792     COUNT_INST(I_SETT);
  1793     STC();
  1794     SETC_t();
  1795     sh4_x86.tstate = TSTATE_C;
  1796 :}
  1798 /* Floating point moves */
  1799 FMOV FRm, FRn {:  
  1800     COUNT_INST(I_FMOV1);
  1801     /* As horrible as this looks, it's actually covering 5 separate cases:
  1802      * 1. 32-bit fr-to-fr (PR=0)
  1803      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1804      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1805      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1806      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1807      */
  1808     check_fpuen();
  1809     load_spreg( R_ECX, R_FPSCR );
  1810     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1811     JNE_rel8(doublesize);
  1812     load_fr( R_EAX, FRm ); // PR=0 branch
  1813     store_fr( R_EAX, FRn );
  1814     JMP_rel8(end);
  1815     JMP_TARGET(doublesize);
  1816     load_dr0( R_EAX, FRm );
  1817     load_dr1( R_ECX, FRm );
  1818     store_dr0( R_EAX, FRn );
  1819     store_dr1( R_ECX, FRn );
  1820     JMP_TARGET(end);
  1821     sh4_x86.tstate = TSTATE_NONE;
  1822 :}
  1823 FMOV FRm, @Rn {: 
  1824     COUNT_INST(I_FMOV2);
  1825     check_fpuen();
  1826     load_reg( R_EAX, Rn );
  1827     check_walign32( R_EAX );
  1828     MMU_TRANSLATE_WRITE( R_EAX );
  1829     load_spreg( R_EDX, R_FPSCR );
  1830     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1831     JNE_rel8(doublesize);
  1833     load_fr( R_ECX, FRm );
  1834     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1835     JMP_rel8(end);
  1837     JMP_TARGET(doublesize);
  1838     load_dr0( R_ECX, FRm );
  1839     load_dr1( R_EDX, FRm );
  1840     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1841     JMP_TARGET(end);
  1842     sh4_x86.tstate = TSTATE_NONE;
  1843 :}
  1844 FMOV @Rm, FRn {:  
  1845     COUNT_INST(I_FMOV5);
  1846     check_fpuen();
  1847     load_reg( R_EAX, Rm );
  1848     check_ralign32( R_EAX );
  1849     MMU_TRANSLATE_READ( R_EAX );
  1850     load_spreg( R_EDX, R_FPSCR );
  1851     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1852     JNE_rel8(doublesize);
  1854     MEM_READ_LONG( R_EAX, R_EAX );
  1855     store_fr( R_EAX, FRn );
  1856     JMP_rel8(end);
  1858     JMP_TARGET(doublesize);
  1859     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1860     store_dr0( R_ECX, FRn );
  1861     store_dr1( R_EAX, FRn );
  1862     JMP_TARGET(end);
  1863     sh4_x86.tstate = TSTATE_NONE;
  1864 :}
  1865 FMOV FRm, @-Rn {:  
  1866     COUNT_INST(I_FMOV3);
  1867     check_fpuen();
  1868     load_reg( R_EAX, Rn );
  1869     check_walign32( R_EAX );
  1870     load_spreg( R_EDX, R_FPSCR );
  1871     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1872     JNE_rel8(doublesize);
  1874     ADD_imm8s_r32( -4, R_EAX );
  1875     MMU_TRANSLATE_WRITE( R_EAX );
  1876     load_fr( R_ECX, FRm );
  1877     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1878     MEM_WRITE_LONG( R_EAX, R_ECX );
  1879     JMP_rel8(end);
  1881     JMP_TARGET(doublesize);
  1882     ADD_imm8s_r32(-8,R_EAX);
  1883     MMU_TRANSLATE_WRITE( R_EAX );
  1884     load_dr0( R_ECX, FRm );
  1885     load_dr1( R_EDX, FRm );
  1886     ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1887     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1888     JMP_TARGET(end);
  1890     sh4_x86.tstate = TSTATE_NONE;
  1891 :}
  1892 FMOV @Rm+, FRn {:
  1893     COUNT_INST(I_FMOV6);
  1894     check_fpuen();
  1895     load_reg( R_EAX, Rm );
  1896     check_ralign32( R_EAX );
  1897     MMU_TRANSLATE_READ( R_EAX );
  1898     load_spreg( R_EDX, R_FPSCR );
  1899     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1900     JNE_rel8(doublesize);
  1902     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1903     MEM_READ_LONG( R_EAX, R_EAX );
  1904     store_fr( R_EAX, FRn );
  1905     JMP_rel8(end);
  1907     JMP_TARGET(doublesize);
  1908     ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1909     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1910     store_dr0( R_ECX, FRn );
  1911     store_dr1( R_EAX, FRn );
  1912     JMP_TARGET(end);
  1914     sh4_x86.tstate = TSTATE_NONE;
  1915 :}
  1916 FMOV FRm, @(R0, Rn) {:  
  1917     COUNT_INST(I_FMOV4);
  1918     check_fpuen();
  1919     load_reg( R_EAX, Rn );
  1920     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1921     check_walign32( R_EAX );
  1922     MMU_TRANSLATE_WRITE( R_EAX );
  1923     load_spreg( R_EDX, R_FPSCR );
  1924     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1925     JNE_rel8(doublesize);
  1927     load_fr( R_ECX, FRm );
  1928     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1929     JMP_rel8(end);
  1931     JMP_TARGET(doublesize);
  1932     load_dr0( R_ECX, FRm );
  1933     load_dr1( R_EDX, FRm );
  1934     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1935     JMP_TARGET(end);
  1937     sh4_x86.tstate = TSTATE_NONE;
  1938 :}
  1939 FMOV @(R0, Rm), FRn {:  
  1940     COUNT_INST(I_FMOV7);
  1941     check_fpuen();
  1942     load_reg( R_EAX, Rm );
  1943     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1944     check_ralign32( R_EAX );
  1945     MMU_TRANSLATE_READ( R_EAX );
  1946     load_spreg( R_EDX, R_FPSCR );
  1947     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1948     JNE_rel8(doublesize);
  1950     MEM_READ_LONG( R_EAX, R_EAX );
  1951     store_fr( R_EAX, FRn );
  1952     JMP_rel8(end);
  1954     JMP_TARGET(doublesize);
  1955     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1956     store_dr0( R_ECX, FRn );
  1957     store_dr1( R_EAX, FRn );
  1958     JMP_TARGET(end);
  1960     sh4_x86.tstate = TSTATE_NONE;
  1961 :}
  1962 FLDI0 FRn {:  /* IFF PR=0 */
  1963     COUNT_INST(I_FLDI0);
  1964     check_fpuen();
  1965     load_spreg( R_ECX, R_FPSCR );
  1966     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1967     JNE_rel8(end);
  1968     XOR_r32_r32( R_EAX, R_EAX );
  1969     store_fr( R_EAX, FRn );
  1970     JMP_TARGET(end);
  1971     sh4_x86.tstate = TSTATE_NONE;
  1972 :}
  1973 FLDI1 FRn {:  /* IFF PR=0 */
  1974     COUNT_INST(I_FLDI1);
  1975     check_fpuen();
  1976     load_spreg( R_ECX, R_FPSCR );
  1977     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1978     JNE_rel8(end);
  1979     load_imm32(R_EAX, 0x3F800000);
  1980     store_fr( R_EAX, FRn );
  1981     JMP_TARGET(end);
  1982     sh4_x86.tstate = TSTATE_NONE;
  1983 :}
  1985 FLOAT FPUL, FRn {:  
  1986     COUNT_INST(I_FLOAT);
  1987     check_fpuen();
  1988     load_spreg( R_ECX, R_FPSCR );
  1989     FILD_sh4r(R_FPUL);
  1990     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1991     JNE_rel8(doubleprec);
  1992     pop_fr( FRn );
  1993     JMP_rel8(end);
  1994     JMP_TARGET(doubleprec);
  1995     pop_dr( FRn );
  1996     JMP_TARGET(end);
  1997     sh4_x86.tstate = TSTATE_NONE;
  1998 :}
  1999 FTRC FRm, FPUL {:  
  2000     COUNT_INST(I_FTRC);
  2001     check_fpuen();
  2002     load_spreg( R_ECX, R_FPSCR );
  2003     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2004     JNE_rel8(doubleprec);
  2005     push_fr( FRm );
  2006     JMP_rel8(doop);
  2007     JMP_TARGET(doubleprec);
  2008     push_dr( FRm );
  2009     JMP_TARGET( doop );
  2010     load_imm32( R_ECX, (uint32_t)&max_int );
  2011     FILD_r32ind( R_ECX );
  2012     FCOMIP_st(1);
  2013     JNA_rel8( sat );
  2014     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2015     FILD_r32ind( R_ECX );           // 2
  2016     FCOMIP_st(1);                   // 2
  2017     JAE_rel8( sat2 );            // 2
  2018     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2019     FNSTCW_r32ind( R_EAX );
  2020     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2021     FLDCW_r32ind( R_EDX );
  2022     FISTP_sh4r(R_FPUL);             // 3
  2023     FLDCW_r32ind( R_EAX );
  2024     JMP_rel8(end);             // 2
  2026     JMP_TARGET(sat);
  2027     JMP_TARGET(sat2);
  2028     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2029     store_spreg( R_ECX, R_FPUL );
  2030     FPOP_st();
  2031     JMP_TARGET(end);
  2032     sh4_x86.tstate = TSTATE_NONE;
  2033 :}
  2034 FLDS FRm, FPUL {:  
  2035     COUNT_INST(I_FLDS);
  2036     check_fpuen();
  2037     load_fr( R_EAX, FRm );
  2038     store_spreg( R_EAX, R_FPUL );
  2039     sh4_x86.tstate = TSTATE_NONE;
  2040 :}
  2041 FSTS FPUL, FRn {:  
  2042     COUNT_INST(I_FSTS);
  2043     check_fpuen();
  2044     load_spreg( R_EAX, R_FPUL );
  2045     store_fr( R_EAX, FRn );
  2046     sh4_x86.tstate = TSTATE_NONE;
  2047 :}
  2048 FCNVDS FRm, FPUL {:  
  2049     COUNT_INST(I_FCNVDS);
  2050     check_fpuen();
  2051     load_spreg( R_ECX, R_FPSCR );
  2052     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2053     JE_rel8(end); // only when PR=1
  2054     push_dr( FRm );
  2055     pop_fpul();
  2056     JMP_TARGET(end);
  2057     sh4_x86.tstate = TSTATE_NONE;
  2058 :}
  2059 FCNVSD FPUL, FRn {:  
  2060     COUNT_INST(I_FCNVSD);
  2061     check_fpuen();
  2062     load_spreg( R_ECX, R_FPSCR );
  2063     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2064     JE_rel8(end); // only when PR=1
  2065     push_fpul();
  2066     pop_dr( FRn );
  2067     JMP_TARGET(end);
  2068     sh4_x86.tstate = TSTATE_NONE;
  2069 :}
  2071 /* Floating point instructions */
  2072 FABS FRn {:  
  2073     COUNT_INST(I_FABS);
  2074     check_fpuen();
  2075     load_spreg( R_ECX, R_FPSCR );
  2076     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2077     JNE_rel8(doubleprec);
  2078     push_fr(FRn); // 6
  2079     FABS_st0(); // 2
  2080     pop_fr(FRn); //6
  2081     JMP_rel8(end); // 2
  2082     JMP_TARGET(doubleprec);
  2083     push_dr(FRn);
  2084     FABS_st0();
  2085     pop_dr(FRn);
  2086     JMP_TARGET(end);
  2087     sh4_x86.tstate = TSTATE_NONE;
  2088 :}
  2089 FADD FRm, FRn {:  
  2090     COUNT_INST(I_FADD);
  2091     check_fpuen();
  2092     load_spreg( R_ECX, R_FPSCR );
  2093     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2094     JNE_rel8(doubleprec);
  2095     push_fr(FRm);
  2096     push_fr(FRn);
  2097     FADDP_st(1);
  2098     pop_fr(FRn);
  2099     JMP_rel8(end);
  2100     JMP_TARGET(doubleprec);
  2101     push_dr(FRm);
  2102     push_dr(FRn);
  2103     FADDP_st(1);
  2104     pop_dr(FRn);
  2105     JMP_TARGET(end);
  2106     sh4_x86.tstate = TSTATE_NONE;
  2107 :}
  2108 FDIV FRm, FRn {:  
  2109     COUNT_INST(I_FDIV);
  2110     check_fpuen();
  2111     load_spreg( R_ECX, R_FPSCR );
  2112     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2113     JNE_rel8(doubleprec);
  2114     push_fr(FRn);
  2115     push_fr(FRm);
  2116     FDIVP_st(1);
  2117     pop_fr(FRn);
  2118     JMP_rel8(end);
  2119     JMP_TARGET(doubleprec);
  2120     push_dr(FRn);
  2121     push_dr(FRm);
  2122     FDIVP_st(1);
  2123     pop_dr(FRn);
  2124     JMP_TARGET(end);
  2125     sh4_x86.tstate = TSTATE_NONE;
  2126 :}
  2127 FMAC FR0, FRm, FRn {:  
  2128     COUNT_INST(I_FMAC);
  2129     check_fpuen();
  2130     load_spreg( R_ECX, R_FPSCR );
  2131     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2132     JNE_rel8(doubleprec);
  2133     push_fr( 0 );
  2134     push_fr( FRm );
  2135     FMULP_st(1);
  2136     push_fr( FRn );
  2137     FADDP_st(1);
  2138     pop_fr( FRn );
  2139     JMP_rel8(end);
  2140     JMP_TARGET(doubleprec);
  2141     push_dr( 0 );
  2142     push_dr( FRm );
  2143     FMULP_st(1);
  2144     push_dr( FRn );
  2145     FADDP_st(1);
  2146     pop_dr( FRn );
  2147     JMP_TARGET(end);
  2148     sh4_x86.tstate = TSTATE_NONE;
  2149 :}
  2151 FMUL FRm, FRn {:  
  2152     COUNT_INST(I_FMUL);
  2153     check_fpuen();
  2154     load_spreg( R_ECX, R_FPSCR );
  2155     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2156     JNE_rel8(doubleprec);
  2157     push_fr(FRm);
  2158     push_fr(FRn);
  2159     FMULP_st(1);
  2160     pop_fr(FRn);
  2161     JMP_rel8(end);
  2162     JMP_TARGET(doubleprec);
  2163     push_dr(FRm);
  2164     push_dr(FRn);
  2165     FMULP_st(1);
  2166     pop_dr(FRn);
  2167     JMP_TARGET(end);
  2168     sh4_x86.tstate = TSTATE_NONE;
  2169 :}
  2170 FNEG FRn {:  
  2171     COUNT_INST(I_FNEG);
  2172     check_fpuen();
  2173     load_spreg( R_ECX, R_FPSCR );
  2174     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2175     JNE_rel8(doubleprec);
  2176     push_fr(FRn);
  2177     FCHS_st0();
  2178     pop_fr(FRn);
  2179     JMP_rel8(end);
  2180     JMP_TARGET(doubleprec);
  2181     push_dr(FRn);
  2182     FCHS_st0();
  2183     pop_dr(FRn);
  2184     JMP_TARGET(end);
  2185     sh4_x86.tstate = TSTATE_NONE;
  2186 :}
  2187 FSRRA FRn {:  
  2188     COUNT_INST(I_FSRRA);
  2189     check_fpuen();
  2190     load_spreg( R_ECX, R_FPSCR );
  2191     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2192     JNE_rel8(end); // PR=0 only
  2193     FLD1_st0();
  2194     push_fr(FRn);
  2195     FSQRT_st0();
  2196     FDIVP_st(1);
  2197     pop_fr(FRn);
  2198     JMP_TARGET(end);
  2199     sh4_x86.tstate = TSTATE_NONE;
  2200 :}
  2201 FSQRT FRn {:  
  2202     COUNT_INST(I_FSQRT);
  2203     check_fpuen();
  2204     load_spreg( R_ECX, R_FPSCR );
  2205     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2206     JNE_rel8(doubleprec);
  2207     push_fr(FRn);
  2208     FSQRT_st0();
  2209     pop_fr(FRn);
  2210     JMP_rel8(end);
  2211     JMP_TARGET(doubleprec);
  2212     push_dr(FRn);
  2213     FSQRT_st0();
  2214     pop_dr(FRn);
  2215     JMP_TARGET(end);
  2216     sh4_x86.tstate = TSTATE_NONE;
  2217 :}
  2218 FSUB FRm, FRn {:  
  2219     COUNT_INST(I_FSUB);
  2220     check_fpuen();
  2221     load_spreg( R_ECX, R_FPSCR );
  2222     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2223     JNE_rel8(doubleprec);
  2224     push_fr(FRn);
  2225     push_fr(FRm);
  2226     FSUBP_st(1);
  2227     pop_fr(FRn);
  2228     JMP_rel8(end);
  2229     JMP_TARGET(doubleprec);
  2230     push_dr(FRn);
  2231     push_dr(FRm);
  2232     FSUBP_st(1);
  2233     pop_dr(FRn);
  2234     JMP_TARGET(end);
  2235     sh4_x86.tstate = TSTATE_NONE;
  2236 :}
  2238 FCMP/EQ FRm, FRn {:  
  2239     COUNT_INST(I_FCMPEQ);
  2240     check_fpuen();
  2241     load_spreg( R_ECX, R_FPSCR );
  2242     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2243     JNE_rel8(doubleprec);
  2244     push_fr(FRm);
  2245     push_fr(FRn);
  2246     JMP_rel8(end);
  2247     JMP_TARGET(doubleprec);
  2248     push_dr(FRm);
  2249     push_dr(FRn);
  2250     JMP_TARGET(end);
  2251     FCOMIP_st(1);
  2252     SETE_t();
  2253     FPOP_st();
  2254     sh4_x86.tstate = TSTATE_NONE;
  2255 :}
  2256 FCMP/GT FRm, FRn {:  
  2257     COUNT_INST(I_FCMPGT);
  2258     check_fpuen();
  2259     load_spreg( R_ECX, R_FPSCR );
  2260     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2261     JNE_rel8(doubleprec);
  2262     push_fr(FRm);
  2263     push_fr(FRn);
  2264     JMP_rel8(end);
  2265     JMP_TARGET(doubleprec);
  2266     push_dr(FRm);
  2267     push_dr(FRn);
  2268     JMP_TARGET(end);
  2269     FCOMIP_st(1);
  2270     SETA_t();
  2271     FPOP_st();
  2272     sh4_x86.tstate = TSTATE_NONE;
  2273 :}
  2275 FSCA FPUL, FRn {:  
  2276     COUNT_INST(I_FSCA);
  2277     check_fpuen();
  2278     load_spreg( R_ECX, R_FPSCR );
  2279     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2280     JNE_rel8(doubleprec );
  2281     LEA_sh4r_r32( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
  2282     load_spreg( R_EDX, R_FPUL );
  2283     call_func2( sh4_fsca, R_EDX, R_ECX );
  2284     JMP_TARGET(doubleprec);
  2285     sh4_x86.tstate = TSTATE_NONE;
  2286 :}
  2287 FIPR FVm, FVn {:  
  2288     COUNT_INST(I_FIPR);
  2289     check_fpuen();
  2290     load_spreg( R_ECX, R_FPSCR );
  2291     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2292     JNE_rel8( doubleprec);
  2294     push_fr( FVm<<2 );
  2295     push_fr( FVn<<2 );
  2296     FMULP_st(1);
  2297     push_fr( (FVm<<2)+1);
  2298     push_fr( (FVn<<2)+1);
  2299     FMULP_st(1);
  2300     FADDP_st(1);
  2301     push_fr( (FVm<<2)+2);
  2302     push_fr( (FVn<<2)+2);
  2303     FMULP_st(1);
  2304     FADDP_st(1);
  2305     push_fr( (FVm<<2)+3);
  2306     push_fr( (FVn<<2)+3);
  2307     FMULP_st(1);
  2308     FADDP_st(1);
  2309     pop_fr( (FVn<<2)+3);
  2310     JMP_TARGET(doubleprec);
  2311     sh4_x86.tstate = TSTATE_NONE;
  2312 :}
  2313 FTRV XMTRX, FVn {:  
  2314     COUNT_INST(I_FTRV);
  2315     check_fpuen();
  2316     load_spreg( R_ECX, R_FPSCR );
  2317     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2318     JNE_rel8( doubleprec );
  2319     LEA_sh4r_r32( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
  2320     call_func1( sh4_ftrv, R_EDX );  // 12
  2321     JMP_TARGET(doubleprec);
  2322     sh4_x86.tstate = TSTATE_NONE;
  2323 :}
  2325 FRCHG {:  
  2326     COUNT_INST(I_FRCHG);
  2327     check_fpuen();
  2328     load_spreg( R_ECX, R_FPSCR );
  2329     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2330     store_spreg( R_ECX, R_FPSCR );
  2331     call_func0( sh4_switch_fr_banks );
  2332     sh4_x86.tstate = TSTATE_NONE;
  2333 :}
  2334 FSCHG {:  
  2335     COUNT_INST(I_FSCHG);
  2336     check_fpuen();
  2337     load_spreg( R_ECX, R_FPSCR );
  2338     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2339     store_spreg( R_ECX, R_FPSCR );
  2340     sh4_x86.tstate = TSTATE_NONE;
  2341 :}
  2343 /* Processor control instructions */
  2344 LDC Rm, SR {:
  2345     COUNT_INST(I_LDCSR);
  2346     if( sh4_x86.in_delay_slot ) {
  2347 	SLOTILLEGAL();
  2348     } else {
  2349 	check_priv();
  2350 	load_reg( R_EAX, Rm );
  2351 	call_func1( sh4_write_sr, R_EAX );
  2352 	sh4_x86.priv_checked = FALSE;
  2353 	sh4_x86.fpuen_checked = FALSE;
  2354 	sh4_x86.tstate = TSTATE_NONE;
  2356 :}
  2357 LDC Rm, GBR {: 
  2358     COUNT_INST(I_LDC);
  2359     load_reg( R_EAX, Rm );
  2360     store_spreg( R_EAX, R_GBR );
  2361 :}
  2362 LDC Rm, VBR {:  
  2363     COUNT_INST(I_LDC);
  2364     check_priv();
  2365     load_reg( R_EAX, Rm );
  2366     store_spreg( R_EAX, R_VBR );
  2367     sh4_x86.tstate = TSTATE_NONE;
  2368 :}
  2369 LDC Rm, SSR {:  
  2370     COUNT_INST(I_LDC);
  2371     check_priv();
  2372     load_reg( R_EAX, Rm );
  2373     store_spreg( R_EAX, R_SSR );
  2374     sh4_x86.tstate = TSTATE_NONE;
  2375 :}
  2376 LDC Rm, SGR {:  
  2377     COUNT_INST(I_LDC);
  2378     check_priv();
  2379     load_reg( R_EAX, Rm );
  2380     store_spreg( R_EAX, R_SGR );
  2381     sh4_x86.tstate = TSTATE_NONE;
  2382 :}
  2383 LDC Rm, SPC {:  
  2384     COUNT_INST(I_LDC);
  2385     check_priv();
  2386     load_reg( R_EAX, Rm );
  2387     store_spreg( R_EAX, R_SPC );
  2388     sh4_x86.tstate = TSTATE_NONE;
  2389 :}
  2390 LDC Rm, DBR {:  
  2391     COUNT_INST(I_LDC);
  2392     check_priv();
  2393     load_reg( R_EAX, Rm );
  2394     store_spreg( R_EAX, R_DBR );
  2395     sh4_x86.tstate = TSTATE_NONE;
  2396 :}
  2397 LDC Rm, Rn_BANK {:  
  2398     COUNT_INST(I_LDC);
  2399     check_priv();
  2400     load_reg( R_EAX, Rm );
  2401     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2402     sh4_x86.tstate = TSTATE_NONE;
  2403 :}
  2404 LDC.L @Rm+, GBR {:  
  2405     COUNT_INST(I_LDCM);
  2406     load_reg( R_EAX, Rm );
  2407     check_ralign32( R_EAX );
  2408     MMU_TRANSLATE_READ( R_EAX );
  2409     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2410     MEM_READ_LONG( R_EAX, R_EAX );
  2411     store_spreg( R_EAX, R_GBR );
  2412     sh4_x86.tstate = TSTATE_NONE;
  2413 :}
  2414 LDC.L @Rm+, SR {:
  2415     COUNT_INST(I_LDCSRM);
  2416     if( sh4_x86.in_delay_slot ) {
  2417 	SLOTILLEGAL();
  2418     } else {
  2419 	check_priv();
  2420 	load_reg( R_EAX, Rm );
  2421 	check_ralign32( R_EAX );
  2422 	MMU_TRANSLATE_READ( R_EAX );
  2423 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2424 	MEM_READ_LONG( R_EAX, R_EAX );
  2425 	call_func1( sh4_write_sr, R_EAX );
  2426 	sh4_x86.priv_checked = FALSE;
  2427 	sh4_x86.fpuen_checked = FALSE;
  2428 	sh4_x86.tstate = TSTATE_NONE;
  2430 :}
  2431 LDC.L @Rm+, VBR {:  
  2432     COUNT_INST(I_LDCM);
  2433     check_priv();
  2434     load_reg( R_EAX, Rm );
  2435     check_ralign32( R_EAX );
  2436     MMU_TRANSLATE_READ( R_EAX );
  2437     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2438     MEM_READ_LONG( R_EAX, R_EAX );
  2439     store_spreg( R_EAX, R_VBR );
  2440     sh4_x86.tstate = TSTATE_NONE;
  2441 :}
  2442 LDC.L @Rm+, SSR {:
  2443     COUNT_INST(I_LDCM);
  2444     check_priv();
  2445     load_reg( R_EAX, Rm );
  2446     check_ralign32( R_EAX );
  2447     MMU_TRANSLATE_READ( R_EAX );
  2448     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2449     MEM_READ_LONG( R_EAX, R_EAX );
  2450     store_spreg( R_EAX, R_SSR );
  2451     sh4_x86.tstate = TSTATE_NONE;
  2452 :}
  2453 LDC.L @Rm+, SGR {:  
  2454     COUNT_INST(I_LDCM);
  2455     check_priv();
  2456     load_reg( R_EAX, Rm );
  2457     check_ralign32( R_EAX );
  2458     MMU_TRANSLATE_READ( R_EAX );
  2459     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2460     MEM_READ_LONG( R_EAX, R_EAX );
  2461     store_spreg( R_EAX, R_SGR );
  2462     sh4_x86.tstate = TSTATE_NONE;
  2463 :}
  2464 LDC.L @Rm+, SPC {:  
  2465     COUNT_INST(I_LDCM);
  2466     check_priv();
  2467     load_reg( R_EAX, Rm );
  2468     check_ralign32( R_EAX );
  2469     MMU_TRANSLATE_READ( R_EAX );
  2470     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2471     MEM_READ_LONG( R_EAX, R_EAX );
  2472     store_spreg( R_EAX, R_SPC );
  2473     sh4_x86.tstate = TSTATE_NONE;
  2474 :}
  2475 LDC.L @Rm+, DBR {:  
  2476     COUNT_INST(I_LDCM);
  2477     check_priv();
  2478     load_reg( R_EAX, Rm );
  2479     check_ralign32( R_EAX );
  2480     MMU_TRANSLATE_READ( R_EAX );
  2481     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2482     MEM_READ_LONG( R_EAX, R_EAX );
  2483     store_spreg( R_EAX, R_DBR );
  2484     sh4_x86.tstate = TSTATE_NONE;
  2485 :}
  2486 LDC.L @Rm+, Rn_BANK {:  
  2487     COUNT_INST(I_LDCM);
  2488     check_priv();
  2489     load_reg( R_EAX, Rm );
  2490     check_ralign32( R_EAX );
  2491     MMU_TRANSLATE_READ( R_EAX );
  2492     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2493     MEM_READ_LONG( R_EAX, R_EAX );
  2494     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2495     sh4_x86.tstate = TSTATE_NONE;
  2496 :}
  2497 LDS Rm, FPSCR {:
  2498     COUNT_INST(I_LDS);
  2499     check_fpuen();
  2500     load_reg( R_EAX, Rm );
  2501     call_func1( sh4_write_fpscr, R_EAX );
  2502     sh4_x86.tstate = TSTATE_NONE;
  2503 :}
  2504 LDS.L @Rm+, FPSCR {:  
  2505     COUNT_INST(I_LDS);
  2506     check_fpuen();
  2507     load_reg( R_EAX, Rm );
  2508     check_ralign32( R_EAX );
  2509     MMU_TRANSLATE_READ( R_EAX );
  2510     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2511     MEM_READ_LONG( R_EAX, R_EAX );
  2512     call_func1( sh4_write_fpscr, R_EAX );
  2513     sh4_x86.tstate = TSTATE_NONE;
  2514 :}
  2515 LDS Rm, FPUL {:  
  2516     COUNT_INST(I_LDS);
  2517     check_fpuen();
  2518     load_reg( R_EAX, Rm );
  2519     store_spreg( R_EAX, R_FPUL );
  2520 :}
  2521 LDS.L @Rm+, FPUL {:  
  2522     COUNT_INST(I_LDSM);
  2523     check_fpuen();
  2524     load_reg( R_EAX, Rm );
  2525     check_ralign32( R_EAX );
  2526     MMU_TRANSLATE_READ( R_EAX );
  2527     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2528     MEM_READ_LONG( R_EAX, R_EAX );
  2529     store_spreg( R_EAX, R_FPUL );
  2530     sh4_x86.tstate = TSTATE_NONE;
  2531 :}
  2532 LDS Rm, MACH {: 
  2533     COUNT_INST(I_LDS);
  2534     load_reg( R_EAX, Rm );
  2535     store_spreg( R_EAX, R_MACH );
  2536 :}
  2537 LDS.L @Rm+, MACH {:  
  2538     COUNT_INST(I_LDSM);
  2539     load_reg( R_EAX, Rm );
  2540     check_ralign32( R_EAX );
  2541     MMU_TRANSLATE_READ( R_EAX );
  2542     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2543     MEM_READ_LONG( R_EAX, R_EAX );
  2544     store_spreg( R_EAX, R_MACH );
  2545     sh4_x86.tstate = TSTATE_NONE;
  2546 :}
  2547 LDS Rm, MACL {:  
  2548     COUNT_INST(I_LDS);
  2549     load_reg( R_EAX, Rm );
  2550     store_spreg( R_EAX, R_MACL );
  2551 :}
  2552 LDS.L @Rm+, MACL {:  
  2553     COUNT_INST(I_LDSM);
  2554     load_reg( R_EAX, Rm );
  2555     check_ralign32( R_EAX );
  2556     MMU_TRANSLATE_READ( R_EAX );
  2557     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2558     MEM_READ_LONG( R_EAX, R_EAX );
  2559     store_spreg( R_EAX, R_MACL );
  2560     sh4_x86.tstate = TSTATE_NONE;
  2561 :}
  2562 LDS Rm, PR {:  
  2563     COUNT_INST(I_LDS);
  2564     load_reg( R_EAX, Rm );
  2565     store_spreg( R_EAX, R_PR );
  2566 :}
  2567 LDS.L @Rm+, PR {:  
  2568     COUNT_INST(I_LDSM);
  2569     load_reg( R_EAX, Rm );
  2570     check_ralign32( R_EAX );
  2571     MMU_TRANSLATE_READ( R_EAX );
  2572     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2573     MEM_READ_LONG( R_EAX, R_EAX );
  2574     store_spreg( R_EAX, R_PR );
  2575     sh4_x86.tstate = TSTATE_NONE;
  2576 :}
  2577 LDTLB {:  
  2578     COUNT_INST(I_LDTLB);
  2579     call_func0( MMU_ldtlb );
  2580 :}
  2581 OCBI @Rn {:
  2582     COUNT_INST(I_OCBI);
  2583 :}
  2584 OCBP @Rn {:
  2585     COUNT_INST(I_OCBP);
  2586 :}
  2587 OCBWB @Rn {:
  2588     COUNT_INST(I_OCBWB);
  2589 :}
  2590 PREF @Rn {:
  2591     COUNT_INST(I_PREF);
  2592     load_reg( R_EAX, Rn );
  2593     MOV_r32_r32( R_EAX, R_ECX );
  2594     AND_imm32_r32( 0xFC000000, R_EAX );
  2595     CMP_imm32_r32( 0xE0000000, R_EAX );
  2596     JNE_rel8(end);
  2597     call_func1( sh4_flush_store_queue, R_ECX );
  2598     TEST_r32_r32( R_EAX, R_EAX );
  2599     JE_exc(-1);
  2600     JMP_TARGET(end);
  2601     sh4_x86.tstate = TSTATE_NONE;
  2602 :}
  2603 SLEEP {: 
  2604     COUNT_INST(I_SLEEP);
  2605     check_priv();
  2606     call_func0( sh4_sleep );
  2607     sh4_x86.tstate = TSTATE_NONE;
  2608     sh4_x86.in_delay_slot = DELAY_NONE;
  2609     return 2;
  2610 :}
  2611 STC SR, Rn {:
  2612     COUNT_INST(I_STCSR);
  2613     check_priv();
  2614     call_func0(sh4_read_sr);
  2615     store_reg( R_EAX, Rn );
  2616     sh4_x86.tstate = TSTATE_NONE;
  2617 :}
  2618 STC GBR, Rn {:  
  2619     COUNT_INST(I_STC);
  2620     load_spreg( R_EAX, R_GBR );
  2621     store_reg( R_EAX, Rn );
  2622 :}
  2623 STC VBR, Rn {:  
  2624     COUNT_INST(I_STC);
  2625     check_priv();
  2626     load_spreg( R_EAX, R_VBR );
  2627     store_reg( R_EAX, Rn );
  2628     sh4_x86.tstate = TSTATE_NONE;
  2629 :}
  2630 STC SSR, Rn {:  
  2631     COUNT_INST(I_STC);
  2632     check_priv();
  2633     load_spreg( R_EAX, R_SSR );
  2634     store_reg( R_EAX, Rn );
  2635     sh4_x86.tstate = TSTATE_NONE;
  2636 :}
  2637 STC SPC, Rn {:  
  2638     COUNT_INST(I_STC);
  2639     check_priv();
  2640     load_spreg( R_EAX, R_SPC );
  2641     store_reg( R_EAX, Rn );
  2642     sh4_x86.tstate = TSTATE_NONE;
  2643 :}
  2644 STC SGR, Rn {:  
  2645     COUNT_INST(I_STC);
  2646     check_priv();
  2647     load_spreg( R_EAX, R_SGR );
  2648     store_reg( R_EAX, Rn );
  2649     sh4_x86.tstate = TSTATE_NONE;
  2650 :}
  2651 STC DBR, Rn {:  
  2652     COUNT_INST(I_STC);
  2653     check_priv();
  2654     load_spreg( R_EAX, R_DBR );
  2655     store_reg( R_EAX, Rn );
  2656     sh4_x86.tstate = TSTATE_NONE;
  2657 :}
  2658 STC Rm_BANK, Rn {:
  2659     COUNT_INST(I_STC);
  2660     check_priv();
  2661     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2662     store_reg( R_EAX, Rn );
  2663     sh4_x86.tstate = TSTATE_NONE;
  2664 :}
  2665 STC.L SR, @-Rn {:
  2666     COUNT_INST(I_STCSRM);
  2667     check_priv();
  2668     load_reg( R_EAX, Rn );
  2669     check_walign32( R_EAX );
  2670     ADD_imm8s_r32( -4, R_EAX );
  2671     MMU_TRANSLATE_WRITE( R_EAX );
  2672     PUSH_realigned_r32( R_EAX );
  2673     call_func0( sh4_read_sr );
  2674     POP_realigned_r32( R_ECX );
  2675     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2676     MEM_WRITE_LONG( R_ECX, R_EAX );
  2677     sh4_x86.tstate = TSTATE_NONE;
  2678 :}
  2679 STC.L VBR, @-Rn {:  
  2680     COUNT_INST(I_STCM);
  2681     check_priv();
  2682     load_reg( R_EAX, Rn );
  2683     check_walign32( R_EAX );
  2684     ADD_imm8s_r32( -4, R_EAX );
  2685     MMU_TRANSLATE_WRITE( R_EAX );
  2686     load_spreg( R_EDX, R_VBR );
  2687     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2688     MEM_WRITE_LONG( R_EAX, R_EDX );
  2689     sh4_x86.tstate = TSTATE_NONE;
  2690 :}
  2691 STC.L SSR, @-Rn {:  
  2692     COUNT_INST(I_STCM);
  2693     check_priv();
  2694     load_reg( R_EAX, Rn );
  2695     check_walign32( R_EAX );
  2696     ADD_imm8s_r32( -4, R_EAX );
  2697     MMU_TRANSLATE_WRITE( R_EAX );
  2698     load_spreg( R_EDX, R_SSR );
  2699     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2700     MEM_WRITE_LONG( R_EAX, R_EDX );
  2701     sh4_x86.tstate = TSTATE_NONE;
  2702 :}
  2703 STC.L SPC, @-Rn {:
  2704     COUNT_INST(I_STCM);
  2705     check_priv();
  2706     load_reg( R_EAX, Rn );
  2707     check_walign32( R_EAX );
  2708     ADD_imm8s_r32( -4, R_EAX );
  2709     MMU_TRANSLATE_WRITE( R_EAX );
  2710     load_spreg( R_EDX, R_SPC );
  2711     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2712     MEM_WRITE_LONG( R_EAX, R_EDX );
  2713     sh4_x86.tstate = TSTATE_NONE;
  2714 :}
  2715 STC.L SGR, @-Rn {:  
  2716     COUNT_INST(I_STCM);
  2717     check_priv();
  2718     load_reg( R_EAX, Rn );
  2719     check_walign32( R_EAX );
  2720     ADD_imm8s_r32( -4, R_EAX );
  2721     MMU_TRANSLATE_WRITE( R_EAX );
  2722     load_spreg( R_EDX, R_SGR );
  2723     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2724     MEM_WRITE_LONG( R_EAX, R_EDX );
  2725     sh4_x86.tstate = TSTATE_NONE;
  2726 :}
  2727 STC.L DBR, @-Rn {:  
  2728     COUNT_INST(I_STCM);
  2729     check_priv();
  2730     load_reg( R_EAX, Rn );
  2731     check_walign32( R_EAX );
  2732     ADD_imm8s_r32( -4, R_EAX );
  2733     MMU_TRANSLATE_WRITE( R_EAX );
  2734     load_spreg( R_EDX, R_DBR );
  2735     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2736     MEM_WRITE_LONG( R_EAX, R_EDX );
  2737     sh4_x86.tstate = TSTATE_NONE;
  2738 :}
  2739 STC.L Rm_BANK, @-Rn {:  
  2740     COUNT_INST(I_STCM);
  2741     check_priv();
  2742     load_reg( R_EAX, Rn );
  2743     check_walign32( R_EAX );
  2744     ADD_imm8s_r32( -4, R_EAX );
  2745     MMU_TRANSLATE_WRITE( R_EAX );
  2746     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2747     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2748     MEM_WRITE_LONG( R_EAX, R_EDX );
  2749     sh4_x86.tstate = TSTATE_NONE;
  2750 :}
  2751 STC.L GBR, @-Rn {:  
  2752     COUNT_INST(I_STCM);
  2753     load_reg( R_EAX, Rn );
  2754     check_walign32( R_EAX );
  2755     ADD_imm8s_r32( -4, R_EAX );
  2756     MMU_TRANSLATE_WRITE( R_EAX );
  2757     load_spreg( R_EDX, R_GBR );
  2758     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2759     MEM_WRITE_LONG( R_EAX, R_EDX );
  2760     sh4_x86.tstate = TSTATE_NONE;
  2761 :}
  2762 STS FPSCR, Rn {:  
  2763     COUNT_INST(I_STS);
  2764     check_fpuen();
  2765     load_spreg( R_EAX, R_FPSCR );
  2766     store_reg( R_EAX, Rn );
  2767 :}
  2768 STS.L FPSCR, @-Rn {:  
  2769     COUNT_INST(I_STSM);
  2770     check_fpuen();
  2771     load_reg( R_EAX, Rn );
  2772     check_walign32( R_EAX );
  2773     ADD_imm8s_r32( -4, R_EAX );
  2774     MMU_TRANSLATE_WRITE( R_EAX );
  2775     load_spreg( R_EDX, R_FPSCR );
  2776     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2777     MEM_WRITE_LONG( R_EAX, R_EDX );
  2778     sh4_x86.tstate = TSTATE_NONE;
  2779 :}
  2780 STS FPUL, Rn {:  
  2781     COUNT_INST(I_STS);
  2782     check_fpuen();
  2783     load_spreg( R_EAX, R_FPUL );
  2784     store_reg( R_EAX, Rn );
  2785 :}
  2786 STS.L FPUL, @-Rn {:  
  2787     COUNT_INST(I_STSM);
  2788     check_fpuen();
  2789     load_reg( R_EAX, Rn );
  2790     check_walign32( R_EAX );
  2791     ADD_imm8s_r32( -4, R_EAX );
  2792     MMU_TRANSLATE_WRITE( R_EAX );
  2793     load_spreg( R_EDX, R_FPUL );
  2794     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2795     MEM_WRITE_LONG( R_EAX, R_EDX );
  2796     sh4_x86.tstate = TSTATE_NONE;
  2797 :}
  2798 STS MACH, Rn {:  
  2799     COUNT_INST(I_STS);
  2800     load_spreg( R_EAX, R_MACH );
  2801     store_reg( R_EAX, Rn );
  2802 :}
  2803 STS.L MACH, @-Rn {:  
  2804     COUNT_INST(I_STSM);
  2805     load_reg( R_EAX, Rn );
  2806     check_walign32( R_EAX );
  2807     ADD_imm8s_r32( -4, R_EAX );
  2808     MMU_TRANSLATE_WRITE( R_EAX );
  2809     load_spreg( R_EDX, R_MACH );
  2810     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2811     MEM_WRITE_LONG( R_EAX, R_EDX );
  2812     sh4_x86.tstate = TSTATE_NONE;
  2813 :}
  2814 STS MACL, Rn {:  
  2815     COUNT_INST(I_STS);
  2816     load_spreg( R_EAX, R_MACL );
  2817     store_reg( R_EAX, Rn );
  2818 :}
  2819 STS.L MACL, @-Rn {:  
  2820     COUNT_INST(I_STSM);
  2821     load_reg( R_EAX, Rn );
  2822     check_walign32( R_EAX );
  2823     ADD_imm8s_r32( -4, R_EAX );
  2824     MMU_TRANSLATE_WRITE( R_EAX );
  2825     load_spreg( R_EDX, R_MACL );
  2826     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2827     MEM_WRITE_LONG( R_EAX, R_EDX );
  2828     sh4_x86.tstate = TSTATE_NONE;
  2829 :}
  2830 STS PR, Rn {:  
  2831     COUNT_INST(I_STS);
  2832     load_spreg( R_EAX, R_PR );
  2833     store_reg( R_EAX, Rn );
  2834 :}
  2835 STS.L PR, @-Rn {:  
  2836     COUNT_INST(I_STSM);
  2837     load_reg( R_EAX, Rn );
  2838     check_walign32( R_EAX );
  2839     ADD_imm8s_r32( -4, R_EAX );
  2840     MMU_TRANSLATE_WRITE( R_EAX );
  2841     load_spreg( R_EDX, R_PR );
  2842     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2843     MEM_WRITE_LONG( R_EAX, R_EDX );
  2844     sh4_x86.tstate = TSTATE_NONE;
  2845 :}
  2847 NOP {: 
  2848     COUNT_INST(I_NOP);
  2849     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2850 :}
  2851 %%
  2852     sh4_x86.in_delay_slot = DELAY_NONE;
  2853     return 0;
.