Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 732:f05753bbe723
prev675:b97020f9af1c
next733:633ee022f52e
author nkeynes
date Thu Jul 10 01:46:00 2008 +0000 (12 years ago)
permissions -rw-r--r--
last change Fix alignment check for 64-bit FMOVs
Add missing MMU code etc to FMOV emu implementation
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/x86op.h"
    34 #include "clock.h"
    36 #define DEFAULT_BACKPATCH_SIZE 4096
    38 struct backpatch_record {
    39     uint32_t fixup_offset;
    40     uint32_t fixup_icount;
    41     int32_t exc_code;
    42 };
    44 #define MAX_RECOVERY_SIZE 2048
    46 #define DELAY_NONE 0
    47 #define DELAY_PC 1
    48 #define DELAY_PC_PR 2
    50 /** 
    51  * Struct to manage internal translation state. This state is not saved -
    52  * it is only valid between calls to sh4_translate_begin_block() and
    53  * sh4_translate_end_block()
    54  */
    55 struct sh4_x86_state {
    56     int in_delay_slot;
    57     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    58     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    59     gboolean branch_taken; /* true if we branched unconditionally */
    60     uint32_t block_start_pc;
    61     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    62     int tstate;
    64     /* mode flags */
    65     gboolean tlb_on; /* True if tlb translation is active */
    67     /* Allocated memory for the (block-wide) back-patch list */
    68     struct backpatch_record *backpatch_list;
    69     uint32_t backpatch_posn;
    70     uint32_t backpatch_size;
    71 };
    73 #define TSTATE_NONE -1
    74 #define TSTATE_O    0
    75 #define TSTATE_C    2
    76 #define TSTATE_E    4
    77 #define TSTATE_NE   5
    78 #define TSTATE_G    0xF
    79 #define TSTATE_GE   0xD
    80 #define TSTATE_A    7
    81 #define TSTATE_AE   3
    83 #ifdef ENABLE_SH4STATS
    84 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    85 #else
    86 #define COUNT_INST(id)
    87 #endif
    89 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    90 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    91 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    92     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    94 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    95 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    96 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    97     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
    99 static struct sh4_x86_state sh4_x86;
   101 static uint32_t max_int = 0x7FFFFFFF;
   102 static uint32_t min_int = 0x80000000;
   103 static uint32_t save_fcw; /* save value for fpu control word */
   104 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   106 void sh4_translate_init(void)
   107 {
   108     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   109     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   110 }
   113 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   114 {
   115     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   116 	sh4_x86.backpatch_size <<= 1;
   117 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   118 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   119 	assert( sh4_x86.backpatch_list != NULL );
   120     }
   121     if( sh4_x86.in_delay_slot ) {
   122 	fixup_pc -= 2;
   123     }
   124     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   125 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   126     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   127     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   128     sh4_x86.backpatch_posn++;
   129 }
   131 /**
   132  * Emit an instruction to load an SH4 reg into a real register
   133  */
   134 static inline void load_reg( int x86reg, int sh4reg ) 
   135 {
   136     /* mov [bp+n], reg */
   137     OP(0x8B);
   138     OP(0x45 + (x86reg<<3));
   139     OP(REG_OFFSET(r[sh4reg]));
   140 }
   142 static inline void load_reg16s( int x86reg, int sh4reg )
   143 {
   144     OP(0x0F);
   145     OP(0xBF);
   146     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   147 }
   149 static inline void load_reg16u( int x86reg, int sh4reg )
   150 {
   151     OP(0x0F);
   152     OP(0xB7);
   153     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   155 }
   157 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   158 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   159 /**
   160  * Emit an instruction to load an immediate value into a register
   161  */
   162 static inline void load_imm32( int x86reg, uint32_t value ) {
   163     /* mov #value, reg */
   164     OP(0xB8 + x86reg);
   165     OP32(value);
   166 }
   168 /**
   169  * Load an immediate 64-bit quantity (note: x86-64 only)
   170  */
   171 static inline void load_imm64( int x86reg, uint32_t value ) {
   172     /* mov #value, reg */
   173     REXW();
   174     OP(0xB8 + x86reg);
   175     OP64(value);
   176 }
   178 /**
   179  * Emit an instruction to store an SH4 reg (RN)
   180  */
   181 void static inline store_reg( int x86reg, int sh4reg ) {
   182     /* mov reg, [bp+n] */
   183     OP(0x89);
   184     OP(0x45 + (x86reg<<3));
   185     OP(REG_OFFSET(r[sh4reg]));
   186 }
   188 /**
   189  * Load an FR register (single-precision floating point) into an integer x86
   190  * register (eg for register-to-register moves)
   191  */
   192 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   193 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   195 /**
   196  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   197  */
   198 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   199 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   201 /**
   202  * Store an FR register (single-precision floating point) from an integer x86+
   203  * register (eg for register-to-register moves)
   204  */
   205 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   209 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   212 #define push_fpul()  FLDF_sh4r(R_FPUL)
   213 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   214 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   215 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   216 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   217 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   218 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   219 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   220 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   221 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   225 /* Exception checks - Note that all exception checks will clobber EAX */
   227 #define check_priv( ) \
   228     if( !sh4_x86.priv_checked ) { \
   229 	sh4_x86.priv_checked = TRUE;\
   230 	load_spreg( R_EAX, R_SR );\
   231 	AND_imm32_r32( SR_MD, R_EAX );\
   232 	if( sh4_x86.in_delay_slot ) {\
   233 	    JE_exc( EXC_SLOT_ILLEGAL );\
   234 	} else {\
   235 	    JE_exc( EXC_ILLEGAL );\
   236 	}\
   237     }\
   239 #define check_fpuen( ) \
   240     if( !sh4_x86.fpuen_checked ) {\
   241 	sh4_x86.fpuen_checked = TRUE;\
   242 	load_spreg( R_EAX, R_SR );\
   243 	AND_imm32_r32( SR_FD, R_EAX );\
   244 	if( sh4_x86.in_delay_slot ) {\
   245 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   246 	} else {\
   247 	    JNE_exc(EXC_FPU_DISABLED);\
   248 	}\
   249     }
   251 #define check_ralign16( x86reg ) \
   252     TEST_imm32_r32( 0x00000001, x86reg ); \
   253     JNE_exc(EXC_DATA_ADDR_READ)
   255 #define check_walign16( x86reg ) \
   256     TEST_imm32_r32( 0x00000001, x86reg ); \
   257     JNE_exc(EXC_DATA_ADDR_WRITE);
   259 #define check_ralign32( x86reg ) \
   260     TEST_imm32_r32( 0x00000003, x86reg ); \
   261     JNE_exc(EXC_DATA_ADDR_READ)
   263 #define check_walign32( x86reg ) \
   264     TEST_imm32_r32( 0x00000003, x86reg ); \
   265     JNE_exc(EXC_DATA_ADDR_WRITE);
   267 #define check_ralign64( x86reg ) \
   268     TEST_imm32_r32( 0x00000007, x86reg ); \
   269     JNE_exc(EXC_DATA_ADDR_READ)
   271 #define check_walign64( x86reg ) \
   272     TEST_imm32_r32( 0x00000007, x86reg ); \
   273     JNE_exc(EXC_DATA_ADDR_WRITE);
   275 #define UNDEF()
   276 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   277 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   278 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   279 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   280 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   281 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   282 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   284 /**
   285  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   286  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   287  */
   288 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   290 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   291 /**
   292  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   293  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   294  */
   295 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   297 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   298 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   299 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   301 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   303 /****** Import appropriate calling conventions ******/
   304 #if SIZEOF_VOID_P == 8
   305 #include "sh4/ia64abi.h"
   306 #else /* 32-bit system */
   307 #ifdef APPLE_BUILD
   308 #include "sh4/ia32mac.h"
   309 #else
   310 #include "sh4/ia32abi.h"
   311 #endif
   312 #endif
   314 uint32_t sh4_translate_end_block_size()
   315 {
   316     if( sh4_x86.backpatch_posn <= 3 ) {
   317 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   318     } else {
   319 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   320     }
   321 }
   324 /**
   325  * Embed a breakpoint into the generated code
   326  */
   327 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   328 {
   329     load_imm32( R_EAX, pc );
   330     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   331 }
   334 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   336 /**
   337  * Embed a call to sh4_execute_instruction for situations that we
   338  * can't translate (just page-crossing delay slots at the moment).
   339  * Caller is responsible for setting new_pc before calling this function.
   340  *
   341  * Performs:
   342  *   Set PC = endpc
   343  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   344  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   345  *   Call sh4_execute_instruction
   346  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   347  */
   348 void exit_block_emu( sh4vma_t endpc )
   349 {
   350     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   351     ADD_r32_sh4r( R_ECX, R_PC );
   353     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   354     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   355     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   356     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   358     call_func0( sh4_execute_instruction );    
   359     load_spreg( R_EAX, R_PC );
   360     if( sh4_x86.tlb_on ) {
   361 	call_func1(xlat_get_code_by_vma,R_EAX);
   362     } else {
   363 	call_func1(xlat_get_code,R_EAX);
   364     }
   365     AND_imm8s_rptr( 0xFC, R_EAX );
   366     POP_r32(R_EBP);
   367     RET();
   368 } 
   370 /**
   371  * Translate a single instruction. Delayed branches are handled specially
   372  * by translating both branch and delayed instruction as a single unit (as
   373  * 
   374  * The instruction MUST be in the icache (assert check)
   375  *
   376  * @return true if the instruction marks the end of a basic block
   377  * (eg a branch or 
   378  */
   379 uint32_t sh4_translate_instruction( sh4vma_t pc )
   380 {
   381     uint32_t ir;
   382     /* Read instruction from icache */
   383     assert( IS_IN_ICACHE(pc) );
   384     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   386 	/* PC is not in the current icache - this usually means we're running
   387 	 * with MMU on, and we've gone past the end of the page. And since 
   388 	 * sh4_translate_block is pretty careful about this, it means we're
   389 	 * almost certainly in a delay slot.
   390 	 *
   391 	 * Since we can't assume the page is present (and we can't fault it in
   392 	 * at this point, inline a call to sh4_execute_instruction (with a few
   393 	 * small repairs to cope with the different environment).
   394 	 */
   396     if( !sh4_x86.in_delay_slot ) {
   397 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   398     }
   399 %%
   400 /* ALU operations */
   401 ADD Rm, Rn {:
   402     COUNT_INST(I_ADD);
   403     load_reg( R_EAX, Rm );
   404     load_reg( R_ECX, Rn );
   405     ADD_r32_r32( R_EAX, R_ECX );
   406     store_reg( R_ECX, Rn );
   407     sh4_x86.tstate = TSTATE_NONE;
   408 :}
   409 ADD #imm, Rn {:  
   410     COUNT_INST(I_ADDI);
   411     load_reg( R_EAX, Rn );
   412     ADD_imm8s_r32( imm, R_EAX );
   413     store_reg( R_EAX, Rn );
   414     sh4_x86.tstate = TSTATE_NONE;
   415 :}
   416 ADDC Rm, Rn {:
   417     COUNT_INST(I_ADDC);
   418     if( sh4_x86.tstate != TSTATE_C ) {
   419 	LDC_t();
   420     }
   421     load_reg( R_EAX, Rm );
   422     load_reg( R_ECX, Rn );
   423     ADC_r32_r32( R_EAX, R_ECX );
   424     store_reg( R_ECX, Rn );
   425     SETC_t();
   426     sh4_x86.tstate = TSTATE_C;
   427 :}
   428 ADDV Rm, Rn {:
   429     COUNT_INST(I_ADDV);
   430     load_reg( R_EAX, Rm );
   431     load_reg( R_ECX, Rn );
   432     ADD_r32_r32( R_EAX, R_ECX );
   433     store_reg( R_ECX, Rn );
   434     SETO_t();
   435     sh4_x86.tstate = TSTATE_O;
   436 :}
   437 AND Rm, Rn {:
   438     COUNT_INST(I_AND);
   439     load_reg( R_EAX, Rm );
   440     load_reg( R_ECX, Rn );
   441     AND_r32_r32( R_EAX, R_ECX );
   442     store_reg( R_ECX, Rn );
   443     sh4_x86.tstate = TSTATE_NONE;
   444 :}
   445 AND #imm, R0 {:  
   446     COUNT_INST(I_ANDI);
   447     load_reg( R_EAX, 0 );
   448     AND_imm32_r32(imm, R_EAX); 
   449     store_reg( R_EAX, 0 );
   450     sh4_x86.tstate = TSTATE_NONE;
   451 :}
   452 AND.B #imm, @(R0, GBR) {: 
   453     COUNT_INST(I_ANDB);
   454     load_reg( R_EAX, 0 );
   455     load_spreg( R_ECX, R_GBR );
   456     ADD_r32_r32( R_ECX, R_EAX );
   457     MMU_TRANSLATE_WRITE( R_EAX );
   458     PUSH_realigned_r32(R_EAX);
   459     MEM_READ_BYTE( R_EAX, R_EAX );
   460     POP_realigned_r32(R_ECX);
   461     AND_imm32_r32(imm, R_EAX );
   462     MEM_WRITE_BYTE( R_ECX, R_EAX );
   463     sh4_x86.tstate = TSTATE_NONE;
   464 :}
   465 CMP/EQ Rm, Rn {:  
   466     COUNT_INST(I_CMPEQ);
   467     load_reg( R_EAX, Rm );
   468     load_reg( R_ECX, Rn );
   469     CMP_r32_r32( R_EAX, R_ECX );
   470     SETE_t();
   471     sh4_x86.tstate = TSTATE_E;
   472 :}
   473 CMP/EQ #imm, R0 {:  
   474     COUNT_INST(I_CMPEQI);
   475     load_reg( R_EAX, 0 );
   476     CMP_imm8s_r32(imm, R_EAX);
   477     SETE_t();
   478     sh4_x86.tstate = TSTATE_E;
   479 :}
   480 CMP/GE Rm, Rn {:  
   481     COUNT_INST(I_CMPGE);
   482     load_reg( R_EAX, Rm );
   483     load_reg( R_ECX, Rn );
   484     CMP_r32_r32( R_EAX, R_ECX );
   485     SETGE_t();
   486     sh4_x86.tstate = TSTATE_GE;
   487 :}
   488 CMP/GT Rm, Rn {: 
   489     COUNT_INST(I_CMPGT);
   490     load_reg( R_EAX, Rm );
   491     load_reg( R_ECX, Rn );
   492     CMP_r32_r32( R_EAX, R_ECX );
   493     SETG_t();
   494     sh4_x86.tstate = TSTATE_G;
   495 :}
   496 CMP/HI Rm, Rn {:  
   497     COUNT_INST(I_CMPHI);
   498     load_reg( R_EAX, Rm );
   499     load_reg( R_ECX, Rn );
   500     CMP_r32_r32( R_EAX, R_ECX );
   501     SETA_t();
   502     sh4_x86.tstate = TSTATE_A;
   503 :}
   504 CMP/HS Rm, Rn {: 
   505     COUNT_INST(I_CMPHS);
   506     load_reg( R_EAX, Rm );
   507     load_reg( R_ECX, Rn );
   508     CMP_r32_r32( R_EAX, R_ECX );
   509     SETAE_t();
   510     sh4_x86.tstate = TSTATE_AE;
   511  :}
   512 CMP/PL Rn {: 
   513     COUNT_INST(I_CMPPL);
   514     load_reg( R_EAX, Rn );
   515     CMP_imm8s_r32( 0, R_EAX );
   516     SETG_t();
   517     sh4_x86.tstate = TSTATE_G;
   518 :}
   519 CMP/PZ Rn {:  
   520     COUNT_INST(I_CMPPZ);
   521     load_reg( R_EAX, Rn );
   522     CMP_imm8s_r32( 0, R_EAX );
   523     SETGE_t();
   524     sh4_x86.tstate = TSTATE_GE;
   525 :}
   526 CMP/STR Rm, Rn {:  
   527     COUNT_INST(I_CMPSTR);
   528     load_reg( R_EAX, Rm );
   529     load_reg( R_ECX, Rn );
   530     XOR_r32_r32( R_ECX, R_EAX );
   531     TEST_r8_r8( R_AL, R_AL );
   532     JE_rel8(target1);
   533     TEST_r8_r8( R_AH, R_AH );
   534     JE_rel8(target2);
   535     SHR_imm8_r32( 16, R_EAX );
   536     TEST_r8_r8( R_AL, R_AL );
   537     JE_rel8(target3);
   538     TEST_r8_r8( R_AH, R_AH );
   539     JMP_TARGET(target1);
   540     JMP_TARGET(target2);
   541     JMP_TARGET(target3);
   542     SETE_t();
   543     sh4_x86.tstate = TSTATE_E;
   544 :}
   545 DIV0S Rm, Rn {:
   546     COUNT_INST(I_DIV0S);
   547     load_reg( R_EAX, Rm );
   548     load_reg( R_ECX, Rn );
   549     SHR_imm8_r32( 31, R_EAX );
   550     SHR_imm8_r32( 31, R_ECX );
   551     store_spreg( R_EAX, R_M );
   552     store_spreg( R_ECX, R_Q );
   553     CMP_r32_r32( R_EAX, R_ECX );
   554     SETNE_t();
   555     sh4_x86.tstate = TSTATE_NE;
   556 :}
   557 DIV0U {:  
   558     COUNT_INST(I_DIV0U);
   559     XOR_r32_r32( R_EAX, R_EAX );
   560     store_spreg( R_EAX, R_Q );
   561     store_spreg( R_EAX, R_M );
   562     store_spreg( R_EAX, R_T );
   563     sh4_x86.tstate = TSTATE_C; // works for DIV1
   564 :}
   565 DIV1 Rm, Rn {:
   566     COUNT_INST(I_DIV1);
   567     load_spreg( R_ECX, R_M );
   568     load_reg( R_EAX, Rn );
   569     if( sh4_x86.tstate != TSTATE_C ) {
   570 	LDC_t();
   571     }
   572     RCL1_r32( R_EAX );
   573     SETC_r8( R_DL ); // Q'
   574     CMP_sh4r_r32( R_Q, R_ECX );
   575     JE_rel8(mqequal);
   576     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   577     JMP_rel8(end);
   578     JMP_TARGET(mqequal);
   579     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   580     JMP_TARGET(end);
   581     store_reg( R_EAX, Rn ); // Done with Rn now
   582     SETC_r8(R_AL); // tmp1
   583     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   584     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   585     store_spreg( R_ECX, R_Q );
   586     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   587     MOVZX_r8_r32( R_AL, R_EAX );
   588     store_spreg( R_EAX, R_T );
   589     sh4_x86.tstate = TSTATE_NONE;
   590 :}
   591 DMULS.L Rm, Rn {:  
   592     COUNT_INST(I_DMULS);
   593     load_reg( R_EAX, Rm );
   594     load_reg( R_ECX, Rn );
   595     IMUL_r32(R_ECX);
   596     store_spreg( R_EDX, R_MACH );
   597     store_spreg( R_EAX, R_MACL );
   598     sh4_x86.tstate = TSTATE_NONE;
   599 :}
   600 DMULU.L Rm, Rn {:  
   601     COUNT_INST(I_DMULU);
   602     load_reg( R_EAX, Rm );
   603     load_reg( R_ECX, Rn );
   604     MUL_r32(R_ECX);
   605     store_spreg( R_EDX, R_MACH );
   606     store_spreg( R_EAX, R_MACL );    
   607     sh4_x86.tstate = TSTATE_NONE;
   608 :}
   609 DT Rn {:  
   610     COUNT_INST(I_DT);
   611     load_reg( R_EAX, Rn );
   612     ADD_imm8s_r32( -1, R_EAX );
   613     store_reg( R_EAX, Rn );
   614     SETE_t();
   615     sh4_x86.tstate = TSTATE_E;
   616 :}
   617 EXTS.B Rm, Rn {:  
   618     COUNT_INST(I_EXTSB);
   619     load_reg( R_EAX, Rm );
   620     MOVSX_r8_r32( R_EAX, R_EAX );
   621     store_reg( R_EAX, Rn );
   622 :}
   623 EXTS.W Rm, Rn {:  
   624     COUNT_INST(I_EXTSW);
   625     load_reg( R_EAX, Rm );
   626     MOVSX_r16_r32( R_EAX, R_EAX );
   627     store_reg( R_EAX, Rn );
   628 :}
   629 EXTU.B Rm, Rn {:  
   630     COUNT_INST(I_EXTUB);
   631     load_reg( R_EAX, Rm );
   632     MOVZX_r8_r32( R_EAX, R_EAX );
   633     store_reg( R_EAX, Rn );
   634 :}
   635 EXTU.W Rm, Rn {:  
   636     COUNT_INST(I_EXTUW);
   637     load_reg( R_EAX, Rm );
   638     MOVZX_r16_r32( R_EAX, R_EAX );
   639     store_reg( R_EAX, Rn );
   640 :}
   641 MAC.L @Rm+, @Rn+ {:
   642     COUNT_INST(I_MACL);
   643     if( Rm == Rn ) {
   644 	load_reg( R_EAX, Rm );
   645 	check_ralign32( R_EAX );
   646 	MMU_TRANSLATE_READ( R_EAX );
   647 	PUSH_realigned_r32( R_EAX );
   648 	load_reg( R_EAX, Rn );
   649 	ADD_imm8s_r32( 4, R_EAX );
   650 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   651 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   652 	// Note translate twice in case of page boundaries. Maybe worth
   653 	// adding a page-boundary check to skip the second translation
   654     } else {
   655 	load_reg( R_EAX, Rm );
   656 	check_ralign32( R_EAX );
   657 	MMU_TRANSLATE_READ( R_EAX );
   658 	load_reg( R_ECX, Rn );
   659 	check_ralign32( R_ECX );
   660 	PUSH_realigned_r32( R_EAX );
   661 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   662 	MOV_r32_r32( R_ECX, R_EAX );
   663 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   664 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   665     }
   666     MEM_READ_LONG( R_EAX, R_EAX );
   667     POP_r32( R_ECX );
   668     PUSH_r32( R_EAX );
   669     MEM_READ_LONG( R_ECX, R_EAX );
   670     POP_realigned_r32( R_ECX );
   672     IMUL_r32( R_ECX );
   673     ADD_r32_sh4r( R_EAX, R_MACL );
   674     ADC_r32_sh4r( R_EDX, R_MACH );
   676     load_spreg( R_ECX, R_S );
   677     TEST_r32_r32(R_ECX, R_ECX);
   678     JE_rel8( nosat );
   679     call_func0( signsat48 );
   680     JMP_TARGET( nosat );
   681     sh4_x86.tstate = TSTATE_NONE;
   682 :}
   683 MAC.W @Rm+, @Rn+ {:  
   684     COUNT_INST(I_MACW);
   685     if( Rm == Rn ) {
   686 	load_reg( R_EAX, Rm );
   687 	check_ralign16( R_EAX );
   688 	MMU_TRANSLATE_READ( R_EAX );
   689 	PUSH_realigned_r32( R_EAX );
   690 	load_reg( R_EAX, Rn );
   691 	ADD_imm8s_r32( 2, R_EAX );
   692 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   693 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   694 	// Note translate twice in case of page boundaries. Maybe worth
   695 	// adding a page-boundary check to skip the second translation
   696     } else {
   697 	load_reg( R_EAX, Rm );
   698 	check_ralign16( R_EAX );
   699 	MMU_TRANSLATE_READ( R_EAX );
   700 	load_reg( R_ECX, Rn );
   701 	check_ralign16( R_ECX );
   702 	PUSH_realigned_r32( R_EAX );
   703 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   704 	MOV_r32_r32( R_ECX, R_EAX );
   705 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   706 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   707     }
   708     MEM_READ_WORD( R_EAX, R_EAX );
   709     POP_r32( R_ECX );
   710     PUSH_r32( R_EAX );
   711     MEM_READ_WORD( R_ECX, R_EAX );
   712     POP_realigned_r32( R_ECX );
   713     IMUL_r32( R_ECX );
   715     load_spreg( R_ECX, R_S );
   716     TEST_r32_r32( R_ECX, R_ECX );
   717     JE_rel8( nosat );
   719     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   720     JNO_rel8( end );            // 2
   721     load_imm32( R_EDX, 1 );         // 5
   722     store_spreg( R_EDX, R_MACH );   // 6
   723     JS_rel8( positive );        // 2
   724     load_imm32( R_EAX, 0x80000000 );// 5
   725     store_spreg( R_EAX, R_MACL );   // 6
   726     JMP_rel8(end2);           // 2
   728     JMP_TARGET(positive);
   729     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   730     store_spreg( R_EAX, R_MACL );   // 6
   731     JMP_rel8(end3);            // 2
   733     JMP_TARGET(nosat);
   734     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   735     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   736     JMP_TARGET(end);
   737     JMP_TARGET(end2);
   738     JMP_TARGET(end3);
   739     sh4_x86.tstate = TSTATE_NONE;
   740 :}
   741 MOVT Rn {:  
   742     COUNT_INST(I_MOVT);
   743     load_spreg( R_EAX, R_T );
   744     store_reg( R_EAX, Rn );
   745 :}
   746 MUL.L Rm, Rn {:  
   747     COUNT_INST(I_MULL);
   748     load_reg( R_EAX, Rm );
   749     load_reg( R_ECX, Rn );
   750     MUL_r32( R_ECX );
   751     store_spreg( R_EAX, R_MACL );
   752     sh4_x86.tstate = TSTATE_NONE;
   753 :}
   754 MULS.W Rm, Rn {:
   755     COUNT_INST(I_MULSW);
   756     load_reg16s( R_EAX, Rm );
   757     load_reg16s( R_ECX, Rn );
   758     MUL_r32( R_ECX );
   759     store_spreg( R_EAX, R_MACL );
   760     sh4_x86.tstate = TSTATE_NONE;
   761 :}
   762 MULU.W Rm, Rn {:  
   763     COUNT_INST(I_MULUW);
   764     load_reg16u( R_EAX, Rm );
   765     load_reg16u( R_ECX, Rn );
   766     MUL_r32( R_ECX );
   767     store_spreg( R_EAX, R_MACL );
   768     sh4_x86.tstate = TSTATE_NONE;
   769 :}
   770 NEG Rm, Rn {:
   771     COUNT_INST(I_NEG);
   772     load_reg( R_EAX, Rm );
   773     NEG_r32( R_EAX );
   774     store_reg( R_EAX, Rn );
   775     sh4_x86.tstate = TSTATE_NONE;
   776 :}
   777 NEGC Rm, Rn {:  
   778     COUNT_INST(I_NEGC);
   779     load_reg( R_EAX, Rm );
   780     XOR_r32_r32( R_ECX, R_ECX );
   781     LDC_t();
   782     SBB_r32_r32( R_EAX, R_ECX );
   783     store_reg( R_ECX, Rn );
   784     SETC_t();
   785     sh4_x86.tstate = TSTATE_C;
   786 :}
   787 NOT Rm, Rn {:  
   788     COUNT_INST(I_NOT);
   789     load_reg( R_EAX, Rm );
   790     NOT_r32( R_EAX );
   791     store_reg( R_EAX, Rn );
   792     sh4_x86.tstate = TSTATE_NONE;
   793 :}
   794 OR Rm, Rn {:  
   795     COUNT_INST(I_OR);
   796     load_reg( R_EAX, Rm );
   797     load_reg( R_ECX, Rn );
   798     OR_r32_r32( R_EAX, R_ECX );
   799     store_reg( R_ECX, Rn );
   800     sh4_x86.tstate = TSTATE_NONE;
   801 :}
   802 OR #imm, R0 {:
   803     COUNT_INST(I_ORI);
   804     load_reg( R_EAX, 0 );
   805     OR_imm32_r32(imm, R_EAX);
   806     store_reg( R_EAX, 0 );
   807     sh4_x86.tstate = TSTATE_NONE;
   808 :}
   809 OR.B #imm, @(R0, GBR) {:  
   810     COUNT_INST(I_ORB);
   811     load_reg( R_EAX, 0 );
   812     load_spreg( R_ECX, R_GBR );
   813     ADD_r32_r32( R_ECX, R_EAX );
   814     MMU_TRANSLATE_WRITE( R_EAX );
   815     PUSH_realigned_r32(R_EAX);
   816     MEM_READ_BYTE( R_EAX, R_EAX );
   817     POP_realigned_r32(R_ECX);
   818     OR_imm32_r32(imm, R_EAX );
   819     MEM_WRITE_BYTE( R_ECX, R_EAX );
   820     sh4_x86.tstate = TSTATE_NONE;
   821 :}
   822 ROTCL Rn {:
   823     COUNT_INST(I_ROTCL);
   824     load_reg( R_EAX, Rn );
   825     if( sh4_x86.tstate != TSTATE_C ) {
   826 	LDC_t();
   827     }
   828     RCL1_r32( R_EAX );
   829     store_reg( R_EAX, Rn );
   830     SETC_t();
   831     sh4_x86.tstate = TSTATE_C;
   832 :}
   833 ROTCR Rn {:  
   834     COUNT_INST(I_ROTCR);
   835     load_reg( R_EAX, Rn );
   836     if( sh4_x86.tstate != TSTATE_C ) {
   837 	LDC_t();
   838     }
   839     RCR1_r32( R_EAX );
   840     store_reg( R_EAX, Rn );
   841     SETC_t();
   842     sh4_x86.tstate = TSTATE_C;
   843 :}
   844 ROTL Rn {:  
   845     COUNT_INST(I_ROTL);
   846     load_reg( R_EAX, Rn );
   847     ROL1_r32( R_EAX );
   848     store_reg( R_EAX, Rn );
   849     SETC_t();
   850     sh4_x86.tstate = TSTATE_C;
   851 :}
   852 ROTR Rn {:  
   853     COUNT_INST(I_ROTR);
   854     load_reg( R_EAX, Rn );
   855     ROR1_r32( R_EAX );
   856     store_reg( R_EAX, Rn );
   857     SETC_t();
   858     sh4_x86.tstate = TSTATE_C;
   859 :}
   860 SHAD Rm, Rn {:
   861     COUNT_INST(I_SHAD);
   862     /* Annoyingly enough, not directly convertible */
   863     load_reg( R_EAX, Rn );
   864     load_reg( R_ECX, Rm );
   865     CMP_imm32_r32( 0, R_ECX );
   866     JGE_rel8(doshl);
   868     NEG_r32( R_ECX );      // 2
   869     AND_imm8_r8( 0x1F, R_CL ); // 3
   870     JE_rel8(emptysar);     // 2
   871     SAR_r32_CL( R_EAX );       // 2
   872     JMP_rel8(end);          // 2
   874     JMP_TARGET(emptysar);
   875     SAR_imm8_r32(31, R_EAX );  // 3
   876     JMP_rel8(end2);
   878     JMP_TARGET(doshl);
   879     AND_imm8_r8( 0x1F, R_CL ); // 3
   880     SHL_r32_CL( R_EAX );       // 2
   881     JMP_TARGET(end);
   882     JMP_TARGET(end2);
   883     store_reg( R_EAX, Rn );
   884     sh4_x86.tstate = TSTATE_NONE;
   885 :}
   886 SHLD Rm, Rn {:  
   887     COUNT_INST(I_SHLD);
   888     load_reg( R_EAX, Rn );
   889     load_reg( R_ECX, Rm );
   890     CMP_imm32_r32( 0, R_ECX );
   891     JGE_rel8(doshl);
   893     NEG_r32( R_ECX );      // 2
   894     AND_imm8_r8( 0x1F, R_CL ); // 3
   895     JE_rel8(emptyshr );
   896     SHR_r32_CL( R_EAX );       // 2
   897     JMP_rel8(end);          // 2
   899     JMP_TARGET(emptyshr);
   900     XOR_r32_r32( R_EAX, R_EAX );
   901     JMP_rel8(end2);
   903     JMP_TARGET(doshl);
   904     AND_imm8_r8( 0x1F, R_CL ); // 3
   905     SHL_r32_CL( R_EAX );       // 2
   906     JMP_TARGET(end);
   907     JMP_TARGET(end2);
   908     store_reg( R_EAX, Rn );
   909     sh4_x86.tstate = TSTATE_NONE;
   910 :}
   911 SHAL Rn {: 
   912     COUNT_INST(I_SHAL);
   913     load_reg( R_EAX, Rn );
   914     SHL1_r32( R_EAX );
   915     SETC_t();
   916     store_reg( R_EAX, Rn );
   917     sh4_x86.tstate = TSTATE_C;
   918 :}
   919 SHAR Rn {:  
   920     COUNT_INST(I_SHAR);
   921     load_reg( R_EAX, Rn );
   922     SAR1_r32( R_EAX );
   923     SETC_t();
   924     store_reg( R_EAX, Rn );
   925     sh4_x86.tstate = TSTATE_C;
   926 :}
   927 SHLL Rn {:  
   928     COUNT_INST(I_SHLL);
   929     load_reg( R_EAX, Rn );
   930     SHL1_r32( R_EAX );
   931     SETC_t();
   932     store_reg( R_EAX, Rn );
   933     sh4_x86.tstate = TSTATE_C;
   934 :}
   935 SHLL2 Rn {:
   936     COUNT_INST(I_SHLL);
   937     load_reg( R_EAX, Rn );
   938     SHL_imm8_r32( 2, R_EAX );
   939     store_reg( R_EAX, Rn );
   940     sh4_x86.tstate = TSTATE_NONE;
   941 :}
   942 SHLL8 Rn {:  
   943     COUNT_INST(I_SHLL);
   944     load_reg( R_EAX, Rn );
   945     SHL_imm8_r32( 8, R_EAX );
   946     store_reg( R_EAX, Rn );
   947     sh4_x86.tstate = TSTATE_NONE;
   948 :}
   949 SHLL16 Rn {:  
   950     COUNT_INST(I_SHLL);
   951     load_reg( R_EAX, Rn );
   952     SHL_imm8_r32( 16, R_EAX );
   953     store_reg( R_EAX, Rn );
   954     sh4_x86.tstate = TSTATE_NONE;
   955 :}
   956 SHLR Rn {:  
   957     COUNT_INST(I_SHLR);
   958     load_reg( R_EAX, Rn );
   959     SHR1_r32( R_EAX );
   960     SETC_t();
   961     store_reg( R_EAX, Rn );
   962     sh4_x86.tstate = TSTATE_C;
   963 :}
   964 SHLR2 Rn {:  
   965     COUNT_INST(I_SHLR);
   966     load_reg( R_EAX, Rn );
   967     SHR_imm8_r32( 2, R_EAX );
   968     store_reg( R_EAX, Rn );
   969     sh4_x86.tstate = TSTATE_NONE;
   970 :}
   971 SHLR8 Rn {:  
   972     COUNT_INST(I_SHLR);
   973     load_reg( R_EAX, Rn );
   974     SHR_imm8_r32( 8, R_EAX );
   975     store_reg( R_EAX, Rn );
   976     sh4_x86.tstate = TSTATE_NONE;
   977 :}
   978 SHLR16 Rn {:  
   979     COUNT_INST(I_SHLR);
   980     load_reg( R_EAX, Rn );
   981     SHR_imm8_r32( 16, R_EAX );
   982     store_reg( R_EAX, Rn );
   983     sh4_x86.tstate = TSTATE_NONE;
   984 :}
   985 SUB Rm, Rn {:  
   986     COUNT_INST(I_SUB);
   987     load_reg( R_EAX, Rm );
   988     load_reg( R_ECX, Rn );
   989     SUB_r32_r32( R_EAX, R_ECX );
   990     store_reg( R_ECX, Rn );
   991     sh4_x86.tstate = TSTATE_NONE;
   992 :}
   993 SUBC Rm, Rn {:  
   994     COUNT_INST(I_SUBC);
   995     load_reg( R_EAX, Rm );
   996     load_reg( R_ECX, Rn );
   997     if( sh4_x86.tstate != TSTATE_C ) {
   998 	LDC_t();
   999     }
  1000     SBB_r32_r32( R_EAX, R_ECX );
  1001     store_reg( R_ECX, Rn );
  1002     SETC_t();
  1003     sh4_x86.tstate = TSTATE_C;
  1004 :}
  1005 SUBV Rm, Rn {:  
  1006     COUNT_INST(I_SUBV);
  1007     load_reg( R_EAX, Rm );
  1008     load_reg( R_ECX, Rn );
  1009     SUB_r32_r32( R_EAX, R_ECX );
  1010     store_reg( R_ECX, Rn );
  1011     SETO_t();
  1012     sh4_x86.tstate = TSTATE_O;
  1013 :}
  1014 SWAP.B Rm, Rn {:  
  1015     COUNT_INST(I_SWAPB);
  1016     load_reg( R_EAX, Rm );
  1017     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1018     store_reg( R_EAX, Rn );
  1019 :}
  1020 SWAP.W Rm, Rn {:  
  1021     COUNT_INST(I_SWAPB);
  1022     load_reg( R_EAX, Rm );
  1023     MOV_r32_r32( R_EAX, R_ECX );
  1024     SHL_imm8_r32( 16, R_ECX );
  1025     SHR_imm8_r32( 16, R_EAX );
  1026     OR_r32_r32( R_EAX, R_ECX );
  1027     store_reg( R_ECX, Rn );
  1028     sh4_x86.tstate = TSTATE_NONE;
  1029 :}
  1030 TAS.B @Rn {:  
  1031     COUNT_INST(I_TASB);
  1032     load_reg( R_EAX, Rn );
  1033     MMU_TRANSLATE_WRITE( R_EAX );
  1034     PUSH_realigned_r32( R_EAX );
  1035     MEM_READ_BYTE( R_EAX, R_EAX );
  1036     TEST_r8_r8( R_AL, R_AL );
  1037     SETE_t();
  1038     OR_imm8_r8( 0x80, R_AL );
  1039     POP_realigned_r32( R_ECX );
  1040     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1041     sh4_x86.tstate = TSTATE_NONE;
  1042 :}
  1043 TST Rm, Rn {:  
  1044     COUNT_INST(I_TST);
  1045     load_reg( R_EAX, Rm );
  1046     load_reg( R_ECX, Rn );
  1047     TEST_r32_r32( R_EAX, R_ECX );
  1048     SETE_t();
  1049     sh4_x86.tstate = TSTATE_E;
  1050 :}
  1051 TST #imm, R0 {:  
  1052     COUNT_INST(I_TSTI);
  1053     load_reg( R_EAX, 0 );
  1054     TEST_imm32_r32( imm, R_EAX );
  1055     SETE_t();
  1056     sh4_x86.tstate = TSTATE_E;
  1057 :}
  1058 TST.B #imm, @(R0, GBR) {:  
  1059     COUNT_INST(I_TSTB);
  1060     load_reg( R_EAX, 0);
  1061     load_reg( R_ECX, R_GBR);
  1062     ADD_r32_r32( R_ECX, R_EAX );
  1063     MMU_TRANSLATE_READ( R_EAX );
  1064     MEM_READ_BYTE( R_EAX, R_EAX );
  1065     TEST_imm8_r8( imm, R_AL );
  1066     SETE_t();
  1067     sh4_x86.tstate = TSTATE_E;
  1068 :}
  1069 XOR Rm, Rn {:  
  1070     COUNT_INST(I_XOR);
  1071     load_reg( R_EAX, Rm );
  1072     load_reg( R_ECX, Rn );
  1073     XOR_r32_r32( R_EAX, R_ECX );
  1074     store_reg( R_ECX, Rn );
  1075     sh4_x86.tstate = TSTATE_NONE;
  1076 :}
  1077 XOR #imm, R0 {:  
  1078     COUNT_INST(I_XORI);
  1079     load_reg( R_EAX, 0 );
  1080     XOR_imm32_r32( imm, R_EAX );
  1081     store_reg( R_EAX, 0 );
  1082     sh4_x86.tstate = TSTATE_NONE;
  1083 :}
  1084 XOR.B #imm, @(R0, GBR) {:  
  1085     COUNT_INST(I_XORB);
  1086     load_reg( R_EAX, 0 );
  1087     load_spreg( R_ECX, R_GBR );
  1088     ADD_r32_r32( R_ECX, R_EAX );
  1089     MMU_TRANSLATE_WRITE( R_EAX );
  1090     PUSH_realigned_r32(R_EAX);
  1091     MEM_READ_BYTE(R_EAX, R_EAX);
  1092     POP_realigned_r32(R_ECX);
  1093     XOR_imm32_r32( imm, R_EAX );
  1094     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1095     sh4_x86.tstate = TSTATE_NONE;
  1096 :}
  1097 XTRCT Rm, Rn {:
  1098     COUNT_INST(I_XTRCT);
  1099     load_reg( R_EAX, Rm );
  1100     load_reg( R_ECX, Rn );
  1101     SHL_imm8_r32( 16, R_EAX );
  1102     SHR_imm8_r32( 16, R_ECX );
  1103     OR_r32_r32( R_EAX, R_ECX );
  1104     store_reg( R_ECX, Rn );
  1105     sh4_x86.tstate = TSTATE_NONE;
  1106 :}
  1108 /* Data move instructions */
  1109 MOV Rm, Rn {:  
  1110     COUNT_INST(I_MOV);
  1111     load_reg( R_EAX, Rm );
  1112     store_reg( R_EAX, Rn );
  1113 :}
  1114 MOV #imm, Rn {:  
  1115     COUNT_INST(I_MOVI);
  1116     load_imm32( R_EAX, imm );
  1117     store_reg( R_EAX, Rn );
  1118 :}
  1119 MOV.B Rm, @Rn {:  
  1120     COUNT_INST(I_MOVB);
  1121     load_reg( R_EAX, Rn );
  1122     MMU_TRANSLATE_WRITE( R_EAX );
  1123     load_reg( R_EDX, Rm );
  1124     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1125     sh4_x86.tstate = TSTATE_NONE;
  1126 :}
  1127 MOV.B Rm, @-Rn {:  
  1128     COUNT_INST(I_MOVB);
  1129     load_reg( R_EAX, Rn );
  1130     ADD_imm8s_r32( -1, R_EAX );
  1131     MMU_TRANSLATE_WRITE( R_EAX );
  1132     load_reg( R_EDX, Rm );
  1133     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1134     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1135     sh4_x86.tstate = TSTATE_NONE;
  1136 :}
  1137 MOV.B Rm, @(R0, Rn) {:  
  1138     COUNT_INST(I_MOVB);
  1139     load_reg( R_EAX, 0 );
  1140     load_reg( R_ECX, Rn );
  1141     ADD_r32_r32( R_ECX, R_EAX );
  1142     MMU_TRANSLATE_WRITE( R_EAX );
  1143     load_reg( R_EDX, Rm );
  1144     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1145     sh4_x86.tstate = TSTATE_NONE;
  1146 :}
  1147 MOV.B R0, @(disp, GBR) {:  
  1148     COUNT_INST(I_MOVB);
  1149     load_spreg( R_EAX, R_GBR );
  1150     ADD_imm32_r32( disp, R_EAX );
  1151     MMU_TRANSLATE_WRITE( R_EAX );
  1152     load_reg( R_EDX, 0 );
  1153     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1154     sh4_x86.tstate = TSTATE_NONE;
  1155 :}
  1156 MOV.B R0, @(disp, Rn) {:  
  1157     COUNT_INST(I_MOVB);
  1158     load_reg( R_EAX, Rn );
  1159     ADD_imm32_r32( disp, R_EAX );
  1160     MMU_TRANSLATE_WRITE( R_EAX );
  1161     load_reg( R_EDX, 0 );
  1162     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.B @Rm, Rn {:  
  1166     COUNT_INST(I_MOVB);
  1167     load_reg( R_EAX, Rm );
  1168     MMU_TRANSLATE_READ( R_EAX );
  1169     MEM_READ_BYTE( R_EAX, R_EAX );
  1170     store_reg( R_EAX, Rn );
  1171     sh4_x86.tstate = TSTATE_NONE;
  1172 :}
  1173 MOV.B @Rm+, Rn {:  
  1174     COUNT_INST(I_MOVB);
  1175     load_reg( R_EAX, Rm );
  1176     MMU_TRANSLATE_READ( R_EAX );
  1177     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1178     MEM_READ_BYTE( R_EAX, R_EAX );
  1179     store_reg( R_EAX, Rn );
  1180     sh4_x86.tstate = TSTATE_NONE;
  1181 :}
  1182 MOV.B @(R0, Rm), Rn {:  
  1183     COUNT_INST(I_MOVB);
  1184     load_reg( R_EAX, 0 );
  1185     load_reg( R_ECX, Rm );
  1186     ADD_r32_r32( R_ECX, R_EAX );
  1187     MMU_TRANSLATE_READ( R_EAX )
  1188     MEM_READ_BYTE( R_EAX, R_EAX );
  1189     store_reg( R_EAX, Rn );
  1190     sh4_x86.tstate = TSTATE_NONE;
  1191 :}
  1192 MOV.B @(disp, GBR), R0 {:  
  1193     COUNT_INST(I_MOVB);
  1194     load_spreg( R_EAX, R_GBR );
  1195     ADD_imm32_r32( disp, R_EAX );
  1196     MMU_TRANSLATE_READ( R_EAX );
  1197     MEM_READ_BYTE( R_EAX, R_EAX );
  1198     store_reg( R_EAX, 0 );
  1199     sh4_x86.tstate = TSTATE_NONE;
  1200 :}
  1201 MOV.B @(disp, Rm), R0 {:  
  1202     COUNT_INST(I_MOVB);
  1203     load_reg( R_EAX, Rm );
  1204     ADD_imm32_r32( disp, R_EAX );
  1205     MMU_TRANSLATE_READ( R_EAX );
  1206     MEM_READ_BYTE( R_EAX, R_EAX );
  1207     store_reg( R_EAX, 0 );
  1208     sh4_x86.tstate = TSTATE_NONE;
  1209 :}
  1210 MOV.L Rm, @Rn {:
  1211     COUNT_INST(I_MOVL);
  1212     load_reg( R_EAX, Rn );
  1213     check_walign32(R_EAX);
  1214     MMU_TRANSLATE_WRITE( R_EAX );
  1215     load_reg( R_EDX, Rm );
  1216     MEM_WRITE_LONG( R_EAX, R_EDX );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1219 MOV.L Rm, @-Rn {:  
  1220     COUNT_INST(I_MOVL);
  1221     load_reg( R_EAX, Rn );
  1222     ADD_imm8s_r32( -4, R_EAX );
  1223     check_walign32( R_EAX );
  1224     MMU_TRANSLATE_WRITE( R_EAX );
  1225     load_reg( R_EDX, Rm );
  1226     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1227     MEM_WRITE_LONG( R_EAX, R_EDX );
  1228     sh4_x86.tstate = TSTATE_NONE;
  1229 :}
  1230 MOV.L Rm, @(R0, Rn) {:  
  1231     COUNT_INST(I_MOVL);
  1232     load_reg( R_EAX, 0 );
  1233     load_reg( R_ECX, Rn );
  1234     ADD_r32_r32( R_ECX, R_EAX );
  1235     check_walign32( R_EAX );
  1236     MMU_TRANSLATE_WRITE( R_EAX );
  1237     load_reg( R_EDX, Rm );
  1238     MEM_WRITE_LONG( R_EAX, R_EDX );
  1239     sh4_x86.tstate = TSTATE_NONE;
  1240 :}
  1241 MOV.L R0, @(disp, GBR) {:  
  1242     COUNT_INST(I_MOVL);
  1243     load_spreg( R_EAX, R_GBR );
  1244     ADD_imm32_r32( disp, R_EAX );
  1245     check_walign32( R_EAX );
  1246     MMU_TRANSLATE_WRITE( R_EAX );
  1247     load_reg( R_EDX, 0 );
  1248     MEM_WRITE_LONG( R_EAX, R_EDX );
  1249     sh4_x86.tstate = TSTATE_NONE;
  1250 :}
  1251 MOV.L Rm, @(disp, Rn) {:  
  1252     COUNT_INST(I_MOVL);
  1253     load_reg( R_EAX, Rn );
  1254     ADD_imm32_r32( disp, R_EAX );
  1255     check_walign32( R_EAX );
  1256     MMU_TRANSLATE_WRITE( R_EAX );
  1257     load_reg( R_EDX, Rm );
  1258     MEM_WRITE_LONG( R_EAX, R_EDX );
  1259     sh4_x86.tstate = TSTATE_NONE;
  1260 :}
  1261 MOV.L @Rm, Rn {:  
  1262     COUNT_INST(I_MOVL);
  1263     load_reg( R_EAX, Rm );
  1264     check_ralign32( R_EAX );
  1265     MMU_TRANSLATE_READ( R_EAX );
  1266     MEM_READ_LONG( R_EAX, R_EAX );
  1267     store_reg( R_EAX, Rn );
  1268     sh4_x86.tstate = TSTATE_NONE;
  1269 :}
  1270 MOV.L @Rm+, Rn {:  
  1271     COUNT_INST(I_MOVL);
  1272     load_reg( R_EAX, Rm );
  1273     check_ralign32( R_EAX );
  1274     MMU_TRANSLATE_READ( R_EAX );
  1275     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1276     MEM_READ_LONG( R_EAX, R_EAX );
  1277     store_reg( R_EAX, Rn );
  1278     sh4_x86.tstate = TSTATE_NONE;
  1279 :}
  1280 MOV.L @(R0, Rm), Rn {:  
  1281     COUNT_INST(I_MOVL);
  1282     load_reg( R_EAX, 0 );
  1283     load_reg( R_ECX, Rm );
  1284     ADD_r32_r32( R_ECX, R_EAX );
  1285     check_ralign32( R_EAX );
  1286     MMU_TRANSLATE_READ( R_EAX );
  1287     MEM_READ_LONG( R_EAX, R_EAX );
  1288     store_reg( R_EAX, Rn );
  1289     sh4_x86.tstate = TSTATE_NONE;
  1290 :}
  1291 MOV.L @(disp, GBR), R0 {:
  1292     COUNT_INST(I_MOVL);
  1293     load_spreg( R_EAX, R_GBR );
  1294     ADD_imm32_r32( disp, R_EAX );
  1295     check_ralign32( R_EAX );
  1296     MMU_TRANSLATE_READ( R_EAX );
  1297     MEM_READ_LONG( R_EAX, R_EAX );
  1298     store_reg( R_EAX, 0 );
  1299     sh4_x86.tstate = TSTATE_NONE;
  1300 :}
  1301 MOV.L @(disp, PC), Rn {:  
  1302     COUNT_INST(I_MOVLPC);
  1303     if( sh4_x86.in_delay_slot ) {
  1304 	SLOTILLEGAL();
  1305     } else {
  1306 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1307 	if( IS_IN_ICACHE(target) ) {
  1308 	    // If the target address is in the same page as the code, it's
  1309 	    // pretty safe to just ref it directly and circumvent the whole
  1310 	    // memory subsystem. (this is a big performance win)
  1312 	    // FIXME: There's a corner-case that's not handled here when
  1313 	    // the current code-page is in the ITLB but not in the UTLB.
  1314 	    // (should generate a TLB miss although need to test SH4 
  1315 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1316 	    // behaviour though.
  1317 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1318 	    MOV_moff32_EAX( ptr );
  1319 	} else {
  1320 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1321 	    // different virtual address than the translation was done with,
  1322 	    // but we can safely assume that the low bits are the same.
  1323 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1324 	    ADD_sh4r_r32( R_PC, R_EAX );
  1325 	    MMU_TRANSLATE_READ( R_EAX );
  1326 	    MEM_READ_LONG( R_EAX, R_EAX );
  1327 	    sh4_x86.tstate = TSTATE_NONE;
  1329 	store_reg( R_EAX, Rn );
  1331 :}
  1332 MOV.L @(disp, Rm), Rn {:  
  1333     COUNT_INST(I_MOVL);
  1334     load_reg( R_EAX, Rm );
  1335     ADD_imm8s_r32( disp, R_EAX );
  1336     check_ralign32( R_EAX );
  1337     MMU_TRANSLATE_READ( R_EAX );
  1338     MEM_READ_LONG( R_EAX, R_EAX );
  1339     store_reg( R_EAX, Rn );
  1340     sh4_x86.tstate = TSTATE_NONE;
  1341 :}
  1342 MOV.W Rm, @Rn {:  
  1343     COUNT_INST(I_MOVW);
  1344     load_reg( R_EAX, Rn );
  1345     check_walign16( R_EAX );
  1346     MMU_TRANSLATE_WRITE( R_EAX )
  1347     load_reg( R_EDX, Rm );
  1348     MEM_WRITE_WORD( R_EAX, R_EDX );
  1349     sh4_x86.tstate = TSTATE_NONE;
  1350 :}
  1351 MOV.W Rm, @-Rn {:  
  1352     COUNT_INST(I_MOVW);
  1353     load_reg( R_EAX, Rn );
  1354     ADD_imm8s_r32( -2, R_EAX );
  1355     check_walign16( R_EAX );
  1356     MMU_TRANSLATE_WRITE( R_EAX );
  1357     load_reg( R_EDX, Rm );
  1358     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1359     MEM_WRITE_WORD( R_EAX, R_EDX );
  1360     sh4_x86.tstate = TSTATE_NONE;
  1361 :}
  1362 MOV.W Rm, @(R0, Rn) {:  
  1363     COUNT_INST(I_MOVW);
  1364     load_reg( R_EAX, 0 );
  1365     load_reg( R_ECX, Rn );
  1366     ADD_r32_r32( R_ECX, R_EAX );
  1367     check_walign16( R_EAX );
  1368     MMU_TRANSLATE_WRITE( R_EAX );
  1369     load_reg( R_EDX, Rm );
  1370     MEM_WRITE_WORD( R_EAX, R_EDX );
  1371     sh4_x86.tstate = TSTATE_NONE;
  1372 :}
  1373 MOV.W R0, @(disp, GBR) {:  
  1374     COUNT_INST(I_MOVW);
  1375     load_spreg( R_EAX, R_GBR );
  1376     ADD_imm32_r32( disp, R_EAX );
  1377     check_walign16( R_EAX );
  1378     MMU_TRANSLATE_WRITE( R_EAX );
  1379     load_reg( R_EDX, 0 );
  1380     MEM_WRITE_WORD( R_EAX, R_EDX );
  1381     sh4_x86.tstate = TSTATE_NONE;
  1382 :}
  1383 MOV.W R0, @(disp, Rn) {:  
  1384     COUNT_INST(I_MOVW);
  1385     load_reg( R_EAX, Rn );
  1386     ADD_imm32_r32( disp, R_EAX );
  1387     check_walign16( R_EAX );
  1388     MMU_TRANSLATE_WRITE( R_EAX );
  1389     load_reg( R_EDX, 0 );
  1390     MEM_WRITE_WORD( R_EAX, R_EDX );
  1391     sh4_x86.tstate = TSTATE_NONE;
  1392 :}
  1393 MOV.W @Rm, Rn {:  
  1394     COUNT_INST(I_MOVW);
  1395     load_reg( R_EAX, Rm );
  1396     check_ralign16( R_EAX );
  1397     MMU_TRANSLATE_READ( R_EAX );
  1398     MEM_READ_WORD( R_EAX, R_EAX );
  1399     store_reg( R_EAX, Rn );
  1400     sh4_x86.tstate = TSTATE_NONE;
  1401 :}
  1402 MOV.W @Rm+, Rn {:  
  1403     COUNT_INST(I_MOVW);
  1404     load_reg( R_EAX, Rm );
  1405     check_ralign16( R_EAX );
  1406     MMU_TRANSLATE_READ( R_EAX );
  1407     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1408     MEM_READ_WORD( R_EAX, R_EAX );
  1409     store_reg( R_EAX, Rn );
  1410     sh4_x86.tstate = TSTATE_NONE;
  1411 :}
  1412 MOV.W @(R0, Rm), Rn {:  
  1413     COUNT_INST(I_MOVW);
  1414     load_reg( R_EAX, 0 );
  1415     load_reg( R_ECX, Rm );
  1416     ADD_r32_r32( R_ECX, R_EAX );
  1417     check_ralign16( R_EAX );
  1418     MMU_TRANSLATE_READ( R_EAX );
  1419     MEM_READ_WORD( R_EAX, R_EAX );
  1420     store_reg( R_EAX, Rn );
  1421     sh4_x86.tstate = TSTATE_NONE;
  1422 :}
  1423 MOV.W @(disp, GBR), R0 {:  
  1424     COUNT_INST(I_MOVW);
  1425     load_spreg( R_EAX, R_GBR );
  1426     ADD_imm32_r32( disp, R_EAX );
  1427     check_ralign16( R_EAX );
  1428     MMU_TRANSLATE_READ( R_EAX );
  1429     MEM_READ_WORD( R_EAX, R_EAX );
  1430     store_reg( R_EAX, 0 );
  1431     sh4_x86.tstate = TSTATE_NONE;
  1432 :}
  1433 MOV.W @(disp, PC), Rn {:  
  1434     COUNT_INST(I_MOVW);
  1435     if( sh4_x86.in_delay_slot ) {
  1436 	SLOTILLEGAL();
  1437     } else {
  1438 	// See comments for MOV.L @(disp, PC), Rn
  1439 	uint32_t target = pc + disp + 4;
  1440 	if( IS_IN_ICACHE(target) ) {
  1441 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1442 	    MOV_moff32_EAX( ptr );
  1443 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1444 	} else {
  1445 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1446 	    ADD_sh4r_r32( R_PC, R_EAX );
  1447 	    MMU_TRANSLATE_READ( R_EAX );
  1448 	    MEM_READ_WORD( R_EAX, R_EAX );
  1449 	    sh4_x86.tstate = TSTATE_NONE;
  1451 	store_reg( R_EAX, Rn );
  1453 :}
  1454 MOV.W @(disp, Rm), R0 {:  
  1455     COUNT_INST(I_MOVW);
  1456     load_reg( R_EAX, Rm );
  1457     ADD_imm32_r32( disp, R_EAX );
  1458     check_ralign16( R_EAX );
  1459     MMU_TRANSLATE_READ( R_EAX );
  1460     MEM_READ_WORD( R_EAX, R_EAX );
  1461     store_reg( R_EAX, 0 );
  1462     sh4_x86.tstate = TSTATE_NONE;
  1463 :}
  1464 MOVA @(disp, PC), R0 {:  
  1465     COUNT_INST(I_MOVA);
  1466     if( sh4_x86.in_delay_slot ) {
  1467 	SLOTILLEGAL();
  1468     } else {
  1469 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1470 	ADD_sh4r_r32( R_PC, R_ECX );
  1471 	store_reg( R_ECX, 0 );
  1472 	sh4_x86.tstate = TSTATE_NONE;
  1474 :}
  1475 MOVCA.L R0, @Rn {:  
  1476     COUNT_INST(I_MOVCA);
  1477     load_reg( R_EAX, Rn );
  1478     check_walign32( R_EAX );
  1479     MMU_TRANSLATE_WRITE( R_EAX );
  1480     load_reg( R_EDX, 0 );
  1481     MEM_WRITE_LONG( R_EAX, R_EDX );
  1482     sh4_x86.tstate = TSTATE_NONE;
  1483 :}
  1485 /* Control transfer instructions */
  1486 BF disp {:
  1487     COUNT_INST(I_BF);
  1488     if( sh4_x86.in_delay_slot ) {
  1489 	SLOTILLEGAL();
  1490     } else {
  1491 	sh4vma_t target = disp + pc + 4;
  1492 	JT_rel8( nottaken );
  1493 	exit_block_rel(target, pc+2 );
  1494 	JMP_TARGET(nottaken);
  1495 	return 2;
  1497 :}
  1498 BF/S disp {:
  1499     COUNT_INST(I_BFS);
  1500     if( sh4_x86.in_delay_slot ) {
  1501 	SLOTILLEGAL();
  1502     } else {
  1503 	sh4_x86.in_delay_slot = DELAY_PC;
  1504 	if( UNTRANSLATABLE(pc+2) ) {
  1505 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1506 	    JT_rel8(nottaken);
  1507 	    ADD_imm32_r32( disp, R_EAX );
  1508 	    JMP_TARGET(nottaken);
  1509 	    ADD_sh4r_r32( R_PC, R_EAX );
  1510 	    store_spreg( R_EAX, R_NEW_PC );
  1511 	    exit_block_emu(pc+2);
  1512 	    sh4_x86.branch_taken = TRUE;
  1513 	    return 2;
  1514 	} else {
  1515 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1516 		CMP_imm8s_sh4r( 1, R_T );
  1517 		sh4_x86.tstate = TSTATE_E;
  1519 	    sh4vma_t target = disp + pc + 4;
  1520 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1521 	    sh4_translate_instruction(pc+2);
  1522 	    exit_block_rel( target, pc+4 );
  1524 	    // not taken
  1525 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1526 	    sh4_translate_instruction(pc+2);
  1527 	    return 4;
  1530 :}
  1531 BRA disp {:  
  1532     COUNT_INST(I_BRA);
  1533     if( sh4_x86.in_delay_slot ) {
  1534 	SLOTILLEGAL();
  1535     } else {
  1536 	sh4_x86.in_delay_slot = DELAY_PC;
  1537 	sh4_x86.branch_taken = TRUE;
  1538 	if( UNTRANSLATABLE(pc+2) ) {
  1539 	    load_spreg( R_EAX, R_PC );
  1540 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1541 	    store_spreg( R_EAX, R_NEW_PC );
  1542 	    exit_block_emu(pc+2);
  1543 	    return 2;
  1544 	} else {
  1545 	    sh4_translate_instruction( pc + 2 );
  1546 	    exit_block_rel( disp + pc + 4, pc+4 );
  1547 	    return 4;
  1550 :}
  1551 BRAF Rn {:  
  1552     COUNT_INST(I_BRAF);
  1553     if( sh4_x86.in_delay_slot ) {
  1554 	SLOTILLEGAL();
  1555     } else {
  1556 	load_spreg( R_EAX, R_PC );
  1557 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1558 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1559 	store_spreg( R_EAX, R_NEW_PC );
  1560 	sh4_x86.in_delay_slot = DELAY_PC;
  1561 	sh4_x86.tstate = TSTATE_NONE;
  1562 	sh4_x86.branch_taken = TRUE;
  1563 	if( UNTRANSLATABLE(pc+2) ) {
  1564 	    exit_block_emu(pc+2);
  1565 	    return 2;
  1566 	} else {
  1567 	    sh4_translate_instruction( pc + 2 );
  1568 	    exit_block_newpcset(pc+2);
  1569 	    return 4;
  1572 :}
  1573 BSR disp {:  
  1574     COUNT_INST(I_BSR);
  1575     if( sh4_x86.in_delay_slot ) {
  1576 	SLOTILLEGAL();
  1577     } else {
  1578 	load_spreg( R_EAX, R_PC );
  1579 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1580 	store_spreg( R_EAX, R_PR );
  1581 	sh4_x86.in_delay_slot = DELAY_PC;
  1582 	sh4_x86.branch_taken = TRUE;
  1583 	sh4_x86.tstate = TSTATE_NONE;
  1584 	if( UNTRANSLATABLE(pc+2) ) {
  1585 	    ADD_imm32_r32( disp, R_EAX );
  1586 	    store_spreg( R_EAX, R_NEW_PC );
  1587 	    exit_block_emu(pc+2);
  1588 	    return 2;
  1589 	} else {
  1590 	    sh4_translate_instruction( pc + 2 );
  1591 	    exit_block_rel( disp + pc + 4, pc+4 );
  1592 	    return 4;
  1595 :}
  1596 BSRF Rn {:  
  1597     COUNT_INST(I_BSRF);
  1598     if( sh4_x86.in_delay_slot ) {
  1599 	SLOTILLEGAL();
  1600     } else {
  1601 	load_spreg( R_EAX, R_PC );
  1602 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1603 	store_spreg( R_EAX, R_PR );
  1604 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1605 	store_spreg( R_EAX, R_NEW_PC );
  1607 	sh4_x86.in_delay_slot = DELAY_PC;
  1608 	sh4_x86.tstate = TSTATE_NONE;
  1609 	sh4_x86.branch_taken = TRUE;
  1610 	if( UNTRANSLATABLE(pc+2) ) {
  1611 	    exit_block_emu(pc+2);
  1612 	    return 2;
  1613 	} else {
  1614 	    sh4_translate_instruction( pc + 2 );
  1615 	    exit_block_newpcset(pc+2);
  1616 	    return 4;
  1619 :}
  1620 BT disp {:
  1621     COUNT_INST(I_BT);
  1622     if( sh4_x86.in_delay_slot ) {
  1623 	SLOTILLEGAL();
  1624     } else {
  1625 	sh4vma_t target = disp + pc + 4;
  1626 	JF_rel8( nottaken );
  1627 	exit_block_rel(target, pc+2 );
  1628 	JMP_TARGET(nottaken);
  1629 	return 2;
  1631 :}
  1632 BT/S disp {:
  1633     COUNT_INST(I_BTS);
  1634     if( sh4_x86.in_delay_slot ) {
  1635 	SLOTILLEGAL();
  1636     } else {
  1637 	sh4_x86.in_delay_slot = DELAY_PC;
  1638 	if( UNTRANSLATABLE(pc+2) ) {
  1639 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1640 	    JF_rel8(nottaken);
  1641 	    ADD_imm32_r32( disp, R_EAX );
  1642 	    JMP_TARGET(nottaken);
  1643 	    ADD_sh4r_r32( R_PC, R_EAX );
  1644 	    store_spreg( R_EAX, R_NEW_PC );
  1645 	    exit_block_emu(pc+2);
  1646 	    sh4_x86.branch_taken = TRUE;
  1647 	    return 2;
  1648 	} else {
  1649 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1650 		CMP_imm8s_sh4r( 1, R_T );
  1651 		sh4_x86.tstate = TSTATE_E;
  1653 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1654 	    sh4_translate_instruction(pc+2);
  1655 	    exit_block_rel( disp + pc + 4, pc+4 );
  1656 	    // not taken
  1657 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1658 	    sh4_translate_instruction(pc+2);
  1659 	    return 4;
  1662 :}
  1663 JMP @Rn {:  
  1664     COUNT_INST(I_JMP);
  1665     if( sh4_x86.in_delay_slot ) {
  1666 	SLOTILLEGAL();
  1667     } else {
  1668 	load_reg( R_ECX, Rn );
  1669 	store_spreg( R_ECX, R_NEW_PC );
  1670 	sh4_x86.in_delay_slot = DELAY_PC;
  1671 	sh4_x86.branch_taken = TRUE;
  1672 	if( UNTRANSLATABLE(pc+2) ) {
  1673 	    exit_block_emu(pc+2);
  1674 	    return 2;
  1675 	} else {
  1676 	    sh4_translate_instruction(pc+2);
  1677 	    exit_block_newpcset(pc+2);
  1678 	    return 4;
  1681 :}
  1682 JSR @Rn {:  
  1683     COUNT_INST(I_JSR);
  1684     if( sh4_x86.in_delay_slot ) {
  1685 	SLOTILLEGAL();
  1686     } else {
  1687 	load_spreg( R_EAX, R_PC );
  1688 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1689 	store_spreg( R_EAX, R_PR );
  1690 	load_reg( R_ECX, Rn );
  1691 	store_spreg( R_ECX, R_NEW_PC );
  1692 	sh4_x86.in_delay_slot = DELAY_PC;
  1693 	sh4_x86.branch_taken = TRUE;
  1694 	sh4_x86.tstate = TSTATE_NONE;
  1695 	if( UNTRANSLATABLE(pc+2) ) {
  1696 	    exit_block_emu(pc+2);
  1697 	    return 2;
  1698 	} else {
  1699 	    sh4_translate_instruction(pc+2);
  1700 	    exit_block_newpcset(pc+2);
  1701 	    return 4;
  1704 :}
  1705 RTE {:  
  1706     COUNT_INST(I_RTE);
  1707     if( sh4_x86.in_delay_slot ) {
  1708 	SLOTILLEGAL();
  1709     } else {
  1710 	check_priv();
  1711 	load_spreg( R_ECX, R_SPC );
  1712 	store_spreg( R_ECX, R_NEW_PC );
  1713 	load_spreg( R_EAX, R_SSR );
  1714 	call_func1( sh4_write_sr, R_EAX );
  1715 	sh4_x86.in_delay_slot = DELAY_PC;
  1716 	sh4_x86.priv_checked = FALSE;
  1717 	sh4_x86.fpuen_checked = FALSE;
  1718 	sh4_x86.tstate = TSTATE_NONE;
  1719 	sh4_x86.branch_taken = TRUE;
  1720 	if( UNTRANSLATABLE(pc+2) ) {
  1721 	    exit_block_emu(pc+2);
  1722 	    return 2;
  1723 	} else {
  1724 	    sh4_translate_instruction(pc+2);
  1725 	    exit_block_newpcset(pc+2);
  1726 	    return 4;
  1729 :}
  1730 RTS {:  
  1731     COUNT_INST(I_RTS);
  1732     if( sh4_x86.in_delay_slot ) {
  1733 	SLOTILLEGAL();
  1734     } else {
  1735 	load_spreg( R_ECX, R_PR );
  1736 	store_spreg( R_ECX, R_NEW_PC );
  1737 	sh4_x86.in_delay_slot = DELAY_PC;
  1738 	sh4_x86.branch_taken = TRUE;
  1739 	if( UNTRANSLATABLE(pc+2) ) {
  1740 	    exit_block_emu(pc+2);
  1741 	    return 2;
  1742 	} else {
  1743 	    sh4_translate_instruction(pc+2);
  1744 	    exit_block_newpcset(pc+2);
  1745 	    return 4;
  1748 :}
  1749 TRAPA #imm {:  
  1750     COUNT_INST(I_TRAPA);
  1751     if( sh4_x86.in_delay_slot ) {
  1752 	SLOTILLEGAL();
  1753     } else {
  1754 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1755 	ADD_r32_sh4r( R_ECX, R_PC );
  1756 	load_imm32( R_EAX, imm );
  1757 	call_func1( sh4_raise_trap, R_EAX );
  1758 	sh4_x86.tstate = TSTATE_NONE;
  1759 	exit_block_pcset(pc);
  1760 	sh4_x86.branch_taken = TRUE;
  1761 	return 2;
  1763 :}
  1764 UNDEF {:  
  1765     COUNT_INST(I_UNDEF);
  1766     if( sh4_x86.in_delay_slot ) {
  1767 	SLOTILLEGAL();
  1768     } else {
  1769 	JMP_exc(EXC_ILLEGAL);
  1770 	return 2;
  1772 :}
  1774 CLRMAC {:  
  1775     COUNT_INST(I_CLRMAC);
  1776     XOR_r32_r32(R_EAX, R_EAX);
  1777     store_spreg( R_EAX, R_MACL );
  1778     store_spreg( R_EAX, R_MACH );
  1779     sh4_x86.tstate = TSTATE_NONE;
  1780 :}
  1781 CLRS {:
  1782     COUNT_INST(I_CLRS);
  1783     CLC();
  1784     SETC_sh4r(R_S);
  1785     sh4_x86.tstate = TSTATE_C;
  1786 :}
  1787 CLRT {:  
  1788     COUNT_INST(I_CLRT);
  1789     CLC();
  1790     SETC_t();
  1791     sh4_x86.tstate = TSTATE_C;
  1792 :}
  1793 SETS {:  
  1794     COUNT_INST(I_SETS);
  1795     STC();
  1796     SETC_sh4r(R_S);
  1797     sh4_x86.tstate = TSTATE_C;
  1798 :}
  1799 SETT {:  
  1800     COUNT_INST(I_SETT);
  1801     STC();
  1802     SETC_t();
  1803     sh4_x86.tstate = TSTATE_C;
  1804 :}
  1806 /* Floating point moves */
  1807 FMOV FRm, FRn {:  
  1808     COUNT_INST(I_FMOV1);
  1809     check_fpuen();
  1810     load_spreg( R_ECX, R_FPSCR );
  1811     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1812     JNE_rel8(doublesize);
  1813     load_fr( R_EAX, FRm ); // SZ=0 branch
  1814     store_fr( R_EAX, FRn );
  1815     JMP_rel8(end);
  1816     JMP_TARGET(doublesize);
  1817     load_dr0( R_EAX, FRm );
  1818     load_dr1( R_ECX, FRm );
  1819     store_dr0( R_EAX, FRn );
  1820     store_dr1( R_ECX, FRn );
  1821     JMP_TARGET(end);
  1822     sh4_x86.tstate = TSTATE_NONE;
  1823 :}
  1824 FMOV FRm, @Rn {: 
  1825     COUNT_INST(I_FMOV2);
  1826     check_fpuen();
  1827     load_reg( R_EAX, Rn );
  1828     load_spreg( R_EDX, R_FPSCR );
  1829     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1830     JNE_rel8(doublesize);
  1832     check_walign32( R_EAX );
  1833     MMU_TRANSLATE_WRITE( R_EAX );
  1834     load_fr( R_ECX, FRm );
  1835     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1836     JMP_rel8(end);
  1838     JMP_TARGET(doublesize);
  1839     check_walign64( R_EAX );
  1840     MMU_TRANSLATE_WRITE( R_EAX );
  1841     load_dr0( R_ECX, FRm );
  1842     load_dr1( R_EDX, FRm );
  1843     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1844     JMP_TARGET(end);
  1845     sh4_x86.tstate = TSTATE_NONE;
  1846 :}
  1847 FMOV @Rm, FRn {:  
  1848     COUNT_INST(I_FMOV5);
  1849     check_fpuen();
  1850     load_reg( R_EAX, Rm );
  1851     load_spreg( R_EDX, R_FPSCR );
  1852     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1853     JNE_rel8(doublesize);
  1855     check_ralign32( R_EAX );
  1856     MMU_TRANSLATE_READ( R_EAX );
  1857     MEM_READ_LONG( R_EAX, R_EAX );
  1858     store_fr( R_EAX, FRn );
  1859     JMP_rel8(end);
  1861     JMP_TARGET(doublesize);
  1862     check_ralign64( R_EAX );
  1863     MMU_TRANSLATE_READ( R_EAX );
  1864     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1865     store_dr0( R_ECX, FRn );
  1866     store_dr1( R_EAX, FRn );
  1867     JMP_TARGET(end);
  1868     sh4_x86.tstate = TSTATE_NONE;
  1869 :}
  1870 FMOV FRm, @-Rn {:  
  1871     COUNT_INST(I_FMOV3);
  1872     check_fpuen();
  1873     load_reg( R_EAX, Rn );
  1874     load_spreg( R_EDX, R_FPSCR );
  1875     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1876     JNE_rel8(doublesize);
  1878     check_walign32( R_EAX );
  1879     ADD_imm8s_r32( -4, R_EAX );
  1880     MMU_TRANSLATE_WRITE( R_EAX );
  1881     load_fr( R_ECX, FRm );
  1882     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1883     MEM_WRITE_LONG( R_EAX, R_ECX );
  1884     JMP_rel8(end);
  1886     JMP_TARGET(doublesize);
  1887     check_walign64( R_EAX );
  1888     ADD_imm8s_r32(-8,R_EAX);
  1889     MMU_TRANSLATE_WRITE( R_EAX );
  1890     load_dr0( R_ECX, FRm );
  1891     load_dr1( R_EDX, FRm );
  1892     ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1893     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1894     JMP_TARGET(end);
  1896     sh4_x86.tstate = TSTATE_NONE;
  1897 :}
  1898 FMOV @Rm+, FRn {:
  1899     COUNT_INST(I_FMOV6);
  1900     check_fpuen();
  1901     load_reg( R_EAX, Rm );
  1902     load_spreg( R_EDX, R_FPSCR );
  1903     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1904     JNE_rel8(doublesize);
  1906     check_ralign32( R_EAX );
  1907     MMU_TRANSLATE_READ( R_EAX );
  1908     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1909     MEM_READ_LONG( R_EAX, R_EAX );
  1910     store_fr( R_EAX, FRn );
  1911     JMP_rel8(end);
  1913     JMP_TARGET(doublesize);
  1914     check_ralign64( R_EAX );
  1915     MMU_TRANSLATE_READ( R_EAX );
  1916     ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1917     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1918     store_dr0( R_ECX, FRn );
  1919     store_dr1( R_EAX, FRn );
  1920     JMP_TARGET(end);
  1922     sh4_x86.tstate = TSTATE_NONE;
  1923 :}
  1924 FMOV FRm, @(R0, Rn) {:  
  1925     COUNT_INST(I_FMOV4);
  1926     check_fpuen();
  1927     load_reg( R_EAX, Rn );
  1928     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1929     load_spreg( R_EDX, R_FPSCR );
  1930     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1931     JNE_rel8(doublesize);
  1933     check_walign32( R_EAX );
  1934     MMU_TRANSLATE_WRITE( R_EAX );
  1935     load_fr( R_ECX, FRm );
  1936     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1937     JMP_rel8(end);
  1939     JMP_TARGET(doublesize);
  1940     check_walign64( R_EAX );
  1941     MMU_TRANSLATE_WRITE( R_EAX );
  1942     load_dr0( R_ECX, FRm );
  1943     load_dr1( R_EDX, FRm );
  1944     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1945     JMP_TARGET(end);
  1947     sh4_x86.tstate = TSTATE_NONE;
  1948 :}
  1949 FMOV @(R0, Rm), FRn {:  
  1950     COUNT_INST(I_FMOV7);
  1951     check_fpuen();
  1952     load_reg( R_EAX, Rm );
  1953     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1954     load_spreg( R_EDX, R_FPSCR );
  1955     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1956     JNE_rel8(doublesize);
  1958     check_ralign32( R_EAX );
  1959     MMU_TRANSLATE_READ( R_EAX );
  1960     MEM_READ_LONG( R_EAX, R_EAX );
  1961     store_fr( R_EAX, FRn );
  1962     JMP_rel8(end);
  1964     JMP_TARGET(doublesize);
  1965     check_ralign64( R_EAX );
  1966     MMU_TRANSLATE_READ( R_EAX );
  1967     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1968     store_dr0( R_ECX, FRn );
  1969     store_dr1( R_EAX, FRn );
  1970     JMP_TARGET(end);
  1972     sh4_x86.tstate = TSTATE_NONE;
  1973 :}
  1974 FLDI0 FRn {:  /* IFF PR=0 */
  1975     COUNT_INST(I_FLDI0);
  1976     check_fpuen();
  1977     load_spreg( R_ECX, R_FPSCR );
  1978     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1979     JNE_rel8(end);
  1980     XOR_r32_r32( R_EAX, R_EAX );
  1981     store_fr( R_EAX, FRn );
  1982     JMP_TARGET(end);
  1983     sh4_x86.tstate = TSTATE_NONE;
  1984 :}
  1985 FLDI1 FRn {:  /* IFF PR=0 */
  1986     COUNT_INST(I_FLDI1);
  1987     check_fpuen();
  1988     load_spreg( R_ECX, R_FPSCR );
  1989     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1990     JNE_rel8(end);
  1991     load_imm32(R_EAX, 0x3F800000);
  1992     store_fr( R_EAX, FRn );
  1993     JMP_TARGET(end);
  1994     sh4_x86.tstate = TSTATE_NONE;
  1995 :}
  1997 FLOAT FPUL, FRn {:  
  1998     COUNT_INST(I_FLOAT);
  1999     check_fpuen();
  2000     load_spreg( R_ECX, R_FPSCR );
  2001     FILD_sh4r(R_FPUL);
  2002     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2003     JNE_rel8(doubleprec);
  2004     pop_fr( FRn );
  2005     JMP_rel8(end);
  2006     JMP_TARGET(doubleprec);
  2007     pop_dr( FRn );
  2008     JMP_TARGET(end);
  2009     sh4_x86.tstate = TSTATE_NONE;
  2010 :}
  2011 FTRC FRm, FPUL {:  
  2012     COUNT_INST(I_FTRC);
  2013     check_fpuen();
  2014     load_spreg( R_ECX, R_FPSCR );
  2015     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2016     JNE_rel8(doubleprec);
  2017     push_fr( FRm );
  2018     JMP_rel8(doop);
  2019     JMP_TARGET(doubleprec);
  2020     push_dr( FRm );
  2021     JMP_TARGET( doop );
  2022     load_imm32( R_ECX, (uint32_t)&max_int );
  2023     FILD_r32ind( R_ECX );
  2024     FCOMIP_st(1);
  2025     JNA_rel8( sat );
  2026     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2027     FILD_r32ind( R_ECX );           // 2
  2028     FCOMIP_st(1);                   // 2
  2029     JAE_rel8( sat2 );            // 2
  2030     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2031     FNSTCW_r32ind( R_EAX );
  2032     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2033     FLDCW_r32ind( R_EDX );
  2034     FISTP_sh4r(R_FPUL);             // 3
  2035     FLDCW_r32ind( R_EAX );
  2036     JMP_rel8(end);             // 2
  2038     JMP_TARGET(sat);
  2039     JMP_TARGET(sat2);
  2040     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2041     store_spreg( R_ECX, R_FPUL );
  2042     FPOP_st();
  2043     JMP_TARGET(end);
  2044     sh4_x86.tstate = TSTATE_NONE;
  2045 :}
  2046 FLDS FRm, FPUL {:  
  2047     COUNT_INST(I_FLDS);
  2048     check_fpuen();
  2049     load_fr( R_EAX, FRm );
  2050     store_spreg( R_EAX, R_FPUL );
  2051     sh4_x86.tstate = TSTATE_NONE;
  2052 :}
  2053 FSTS FPUL, FRn {:  
  2054     COUNT_INST(I_FSTS);
  2055     check_fpuen();
  2056     load_spreg( R_EAX, R_FPUL );
  2057     store_fr( R_EAX, FRn );
  2058     sh4_x86.tstate = TSTATE_NONE;
  2059 :}
  2060 FCNVDS FRm, FPUL {:  
  2061     COUNT_INST(I_FCNVDS);
  2062     check_fpuen();
  2063     load_spreg( R_ECX, R_FPSCR );
  2064     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2065     JE_rel8(end); // only when PR=1
  2066     push_dr( FRm );
  2067     pop_fpul();
  2068     JMP_TARGET(end);
  2069     sh4_x86.tstate = TSTATE_NONE;
  2070 :}
  2071 FCNVSD FPUL, FRn {:  
  2072     COUNT_INST(I_FCNVSD);
  2073     check_fpuen();
  2074     load_spreg( R_ECX, R_FPSCR );
  2075     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2076     JE_rel8(end); // only when PR=1
  2077     push_fpul();
  2078     pop_dr( FRn );
  2079     JMP_TARGET(end);
  2080     sh4_x86.tstate = TSTATE_NONE;
  2081 :}
  2083 /* Floating point instructions */
  2084 FABS FRn {:  
  2085     COUNT_INST(I_FABS);
  2086     check_fpuen();
  2087     load_spreg( R_ECX, R_FPSCR );
  2088     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2089     JNE_rel8(doubleprec);
  2090     push_fr(FRn); // 6
  2091     FABS_st0(); // 2
  2092     pop_fr(FRn); //6
  2093     JMP_rel8(end); // 2
  2094     JMP_TARGET(doubleprec);
  2095     push_dr(FRn);
  2096     FABS_st0();
  2097     pop_dr(FRn);
  2098     JMP_TARGET(end);
  2099     sh4_x86.tstate = TSTATE_NONE;
  2100 :}
  2101 FADD FRm, FRn {:  
  2102     COUNT_INST(I_FADD);
  2103     check_fpuen();
  2104     load_spreg( R_ECX, R_FPSCR );
  2105     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2106     JNE_rel8(doubleprec);
  2107     push_fr(FRm);
  2108     push_fr(FRn);
  2109     FADDP_st(1);
  2110     pop_fr(FRn);
  2111     JMP_rel8(end);
  2112     JMP_TARGET(doubleprec);
  2113     push_dr(FRm);
  2114     push_dr(FRn);
  2115     FADDP_st(1);
  2116     pop_dr(FRn);
  2117     JMP_TARGET(end);
  2118     sh4_x86.tstate = TSTATE_NONE;
  2119 :}
  2120 FDIV FRm, FRn {:  
  2121     COUNT_INST(I_FDIV);
  2122     check_fpuen();
  2123     load_spreg( R_ECX, R_FPSCR );
  2124     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2125     JNE_rel8(doubleprec);
  2126     push_fr(FRn);
  2127     push_fr(FRm);
  2128     FDIVP_st(1);
  2129     pop_fr(FRn);
  2130     JMP_rel8(end);
  2131     JMP_TARGET(doubleprec);
  2132     push_dr(FRn);
  2133     push_dr(FRm);
  2134     FDIVP_st(1);
  2135     pop_dr(FRn);
  2136     JMP_TARGET(end);
  2137     sh4_x86.tstate = TSTATE_NONE;
  2138 :}
  2139 FMAC FR0, FRm, FRn {:  
  2140     COUNT_INST(I_FMAC);
  2141     check_fpuen();
  2142     load_spreg( R_ECX, R_FPSCR );
  2143     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2144     JNE_rel8(doubleprec);
  2145     push_fr( 0 );
  2146     push_fr( FRm );
  2147     FMULP_st(1);
  2148     push_fr( FRn );
  2149     FADDP_st(1);
  2150     pop_fr( FRn );
  2151     JMP_rel8(end);
  2152     JMP_TARGET(doubleprec);
  2153     push_dr( 0 );
  2154     push_dr( FRm );
  2155     FMULP_st(1);
  2156     push_dr( FRn );
  2157     FADDP_st(1);
  2158     pop_dr( FRn );
  2159     JMP_TARGET(end);
  2160     sh4_x86.tstate = TSTATE_NONE;
  2161 :}
  2163 FMUL FRm, FRn {:  
  2164     COUNT_INST(I_FMUL);
  2165     check_fpuen();
  2166     load_spreg( R_ECX, R_FPSCR );
  2167     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2168     JNE_rel8(doubleprec);
  2169     push_fr(FRm);
  2170     push_fr(FRn);
  2171     FMULP_st(1);
  2172     pop_fr(FRn);
  2173     JMP_rel8(end);
  2174     JMP_TARGET(doubleprec);
  2175     push_dr(FRm);
  2176     push_dr(FRn);
  2177     FMULP_st(1);
  2178     pop_dr(FRn);
  2179     JMP_TARGET(end);
  2180     sh4_x86.tstate = TSTATE_NONE;
  2181 :}
  2182 FNEG FRn {:  
  2183     COUNT_INST(I_FNEG);
  2184     check_fpuen();
  2185     load_spreg( R_ECX, R_FPSCR );
  2186     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2187     JNE_rel8(doubleprec);
  2188     push_fr(FRn);
  2189     FCHS_st0();
  2190     pop_fr(FRn);
  2191     JMP_rel8(end);
  2192     JMP_TARGET(doubleprec);
  2193     push_dr(FRn);
  2194     FCHS_st0();
  2195     pop_dr(FRn);
  2196     JMP_TARGET(end);
  2197     sh4_x86.tstate = TSTATE_NONE;
  2198 :}
  2199 FSRRA FRn {:  
  2200     COUNT_INST(I_FSRRA);
  2201     check_fpuen();
  2202     load_spreg( R_ECX, R_FPSCR );
  2203     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2204     JNE_rel8(end); // PR=0 only
  2205     FLD1_st0();
  2206     push_fr(FRn);
  2207     FSQRT_st0();
  2208     FDIVP_st(1);
  2209     pop_fr(FRn);
  2210     JMP_TARGET(end);
  2211     sh4_x86.tstate = TSTATE_NONE;
  2212 :}
  2213 FSQRT FRn {:  
  2214     COUNT_INST(I_FSQRT);
  2215     check_fpuen();
  2216     load_spreg( R_ECX, R_FPSCR );
  2217     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2218     JNE_rel8(doubleprec);
  2219     push_fr(FRn);
  2220     FSQRT_st0();
  2221     pop_fr(FRn);
  2222     JMP_rel8(end);
  2223     JMP_TARGET(doubleprec);
  2224     push_dr(FRn);
  2225     FSQRT_st0();
  2226     pop_dr(FRn);
  2227     JMP_TARGET(end);
  2228     sh4_x86.tstate = TSTATE_NONE;
  2229 :}
  2230 FSUB FRm, FRn {:  
  2231     COUNT_INST(I_FSUB);
  2232     check_fpuen();
  2233     load_spreg( R_ECX, R_FPSCR );
  2234     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2235     JNE_rel8(doubleprec);
  2236     push_fr(FRn);
  2237     push_fr(FRm);
  2238     FSUBP_st(1);
  2239     pop_fr(FRn);
  2240     JMP_rel8(end);
  2241     JMP_TARGET(doubleprec);
  2242     push_dr(FRn);
  2243     push_dr(FRm);
  2244     FSUBP_st(1);
  2245     pop_dr(FRn);
  2246     JMP_TARGET(end);
  2247     sh4_x86.tstate = TSTATE_NONE;
  2248 :}
  2250 FCMP/EQ FRm, FRn {:  
  2251     COUNT_INST(I_FCMPEQ);
  2252     check_fpuen();
  2253     load_spreg( R_ECX, R_FPSCR );
  2254     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2255     JNE_rel8(doubleprec);
  2256     push_fr(FRm);
  2257     push_fr(FRn);
  2258     JMP_rel8(end);
  2259     JMP_TARGET(doubleprec);
  2260     push_dr(FRm);
  2261     push_dr(FRn);
  2262     JMP_TARGET(end);
  2263     FCOMIP_st(1);
  2264     SETE_t();
  2265     FPOP_st();
  2266     sh4_x86.tstate = TSTATE_NONE;
  2267 :}
  2268 FCMP/GT FRm, FRn {:  
  2269     COUNT_INST(I_FCMPGT);
  2270     check_fpuen();
  2271     load_spreg( R_ECX, R_FPSCR );
  2272     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2273     JNE_rel8(doubleprec);
  2274     push_fr(FRm);
  2275     push_fr(FRn);
  2276     JMP_rel8(end);
  2277     JMP_TARGET(doubleprec);
  2278     push_dr(FRm);
  2279     push_dr(FRn);
  2280     JMP_TARGET(end);
  2281     FCOMIP_st(1);
  2282     SETA_t();
  2283     FPOP_st();
  2284     sh4_x86.tstate = TSTATE_NONE;
  2285 :}
  2287 FSCA FPUL, FRn {:  
  2288     COUNT_INST(I_FSCA);
  2289     check_fpuen();
  2290     load_spreg( R_ECX, R_FPSCR );
  2291     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2292     JNE_rel8(doubleprec );
  2293     LEA_sh4r_r32( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
  2294     load_spreg( R_EDX, R_FPUL );
  2295     call_func2( sh4_fsca, R_EDX, R_ECX );
  2296     JMP_TARGET(doubleprec);
  2297     sh4_x86.tstate = TSTATE_NONE;
  2298 :}
  2299 FIPR FVm, FVn {:  
  2300     COUNT_INST(I_FIPR);
  2301     check_fpuen();
  2302     load_spreg( R_ECX, R_FPSCR );
  2303     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2304     JNE_rel8( doubleprec);
  2306     push_fr( FVm<<2 );
  2307     push_fr( FVn<<2 );
  2308     FMULP_st(1);
  2309     push_fr( (FVm<<2)+1);
  2310     push_fr( (FVn<<2)+1);
  2311     FMULP_st(1);
  2312     FADDP_st(1);
  2313     push_fr( (FVm<<2)+2);
  2314     push_fr( (FVn<<2)+2);
  2315     FMULP_st(1);
  2316     FADDP_st(1);
  2317     push_fr( (FVm<<2)+3);
  2318     push_fr( (FVn<<2)+3);
  2319     FMULP_st(1);
  2320     FADDP_st(1);
  2321     pop_fr( (FVn<<2)+3);
  2322     JMP_TARGET(doubleprec);
  2323     sh4_x86.tstate = TSTATE_NONE;
  2324 :}
  2325 FTRV XMTRX, FVn {:  
  2326     COUNT_INST(I_FTRV);
  2327     check_fpuen();
  2328     load_spreg( R_ECX, R_FPSCR );
  2329     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2330     JNE_rel8( doubleprec );
  2331     LEA_sh4r_r32( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
  2332     call_func1( sh4_ftrv, R_EDX );  // 12
  2333     JMP_TARGET(doubleprec);
  2334     sh4_x86.tstate = TSTATE_NONE;
  2335 :}
  2337 FRCHG {:  
  2338     COUNT_INST(I_FRCHG);
  2339     check_fpuen();
  2340     load_spreg( R_ECX, R_FPSCR );
  2341     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2342     store_spreg( R_ECX, R_FPSCR );
  2343     call_func0( sh4_switch_fr_banks );
  2344     sh4_x86.tstate = TSTATE_NONE;
  2345 :}
  2346 FSCHG {:  
  2347     COUNT_INST(I_FSCHG);
  2348     check_fpuen();
  2349     load_spreg( R_ECX, R_FPSCR );
  2350     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2351     store_spreg( R_ECX, R_FPSCR );
  2352     sh4_x86.tstate = TSTATE_NONE;
  2353 :}
  2355 /* Processor control instructions */
  2356 LDC Rm, SR {:
  2357     COUNT_INST(I_LDCSR);
  2358     if( sh4_x86.in_delay_slot ) {
  2359 	SLOTILLEGAL();
  2360     } else {
  2361 	check_priv();
  2362 	load_reg( R_EAX, Rm );
  2363 	call_func1( sh4_write_sr, R_EAX );
  2364 	sh4_x86.priv_checked = FALSE;
  2365 	sh4_x86.fpuen_checked = FALSE;
  2366 	sh4_x86.tstate = TSTATE_NONE;
  2368 :}
  2369 LDC Rm, GBR {: 
  2370     COUNT_INST(I_LDC);
  2371     load_reg( R_EAX, Rm );
  2372     store_spreg( R_EAX, R_GBR );
  2373 :}
  2374 LDC Rm, VBR {:  
  2375     COUNT_INST(I_LDC);
  2376     check_priv();
  2377     load_reg( R_EAX, Rm );
  2378     store_spreg( R_EAX, R_VBR );
  2379     sh4_x86.tstate = TSTATE_NONE;
  2380 :}
  2381 LDC Rm, SSR {:  
  2382     COUNT_INST(I_LDC);
  2383     check_priv();
  2384     load_reg( R_EAX, Rm );
  2385     store_spreg( R_EAX, R_SSR );
  2386     sh4_x86.tstate = TSTATE_NONE;
  2387 :}
  2388 LDC Rm, SGR {:  
  2389     COUNT_INST(I_LDC);
  2390     check_priv();
  2391     load_reg( R_EAX, Rm );
  2392     store_spreg( R_EAX, R_SGR );
  2393     sh4_x86.tstate = TSTATE_NONE;
  2394 :}
  2395 LDC Rm, SPC {:  
  2396     COUNT_INST(I_LDC);
  2397     check_priv();
  2398     load_reg( R_EAX, Rm );
  2399     store_spreg( R_EAX, R_SPC );
  2400     sh4_x86.tstate = TSTATE_NONE;
  2401 :}
  2402 LDC Rm, DBR {:  
  2403     COUNT_INST(I_LDC);
  2404     check_priv();
  2405     load_reg( R_EAX, Rm );
  2406     store_spreg( R_EAX, R_DBR );
  2407     sh4_x86.tstate = TSTATE_NONE;
  2408 :}
  2409 LDC Rm, Rn_BANK {:  
  2410     COUNT_INST(I_LDC);
  2411     check_priv();
  2412     load_reg( R_EAX, Rm );
  2413     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2414     sh4_x86.tstate = TSTATE_NONE;
  2415 :}
  2416 LDC.L @Rm+, GBR {:  
  2417     COUNT_INST(I_LDCM);
  2418     load_reg( R_EAX, Rm );
  2419     check_ralign32( R_EAX );
  2420     MMU_TRANSLATE_READ( R_EAX );
  2421     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2422     MEM_READ_LONG( R_EAX, R_EAX );
  2423     store_spreg( R_EAX, R_GBR );
  2424     sh4_x86.tstate = TSTATE_NONE;
  2425 :}
  2426 LDC.L @Rm+, SR {:
  2427     COUNT_INST(I_LDCSRM);
  2428     if( sh4_x86.in_delay_slot ) {
  2429 	SLOTILLEGAL();
  2430     } else {
  2431 	check_priv();
  2432 	load_reg( R_EAX, Rm );
  2433 	check_ralign32( R_EAX );
  2434 	MMU_TRANSLATE_READ( R_EAX );
  2435 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2436 	MEM_READ_LONG( R_EAX, R_EAX );
  2437 	call_func1( sh4_write_sr, R_EAX );
  2438 	sh4_x86.priv_checked = FALSE;
  2439 	sh4_x86.fpuen_checked = FALSE;
  2440 	sh4_x86.tstate = TSTATE_NONE;
  2442 :}
  2443 LDC.L @Rm+, VBR {:  
  2444     COUNT_INST(I_LDCM);
  2445     check_priv();
  2446     load_reg( R_EAX, Rm );
  2447     check_ralign32( R_EAX );
  2448     MMU_TRANSLATE_READ( R_EAX );
  2449     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2450     MEM_READ_LONG( R_EAX, R_EAX );
  2451     store_spreg( R_EAX, R_VBR );
  2452     sh4_x86.tstate = TSTATE_NONE;
  2453 :}
  2454 LDC.L @Rm+, SSR {:
  2455     COUNT_INST(I_LDCM);
  2456     check_priv();
  2457     load_reg( R_EAX, Rm );
  2458     check_ralign32( R_EAX );
  2459     MMU_TRANSLATE_READ( R_EAX );
  2460     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2461     MEM_READ_LONG( R_EAX, R_EAX );
  2462     store_spreg( R_EAX, R_SSR );
  2463     sh4_x86.tstate = TSTATE_NONE;
  2464 :}
  2465 LDC.L @Rm+, SGR {:  
  2466     COUNT_INST(I_LDCM);
  2467     check_priv();
  2468     load_reg( R_EAX, Rm );
  2469     check_ralign32( R_EAX );
  2470     MMU_TRANSLATE_READ( R_EAX );
  2471     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2472     MEM_READ_LONG( R_EAX, R_EAX );
  2473     store_spreg( R_EAX, R_SGR );
  2474     sh4_x86.tstate = TSTATE_NONE;
  2475 :}
  2476 LDC.L @Rm+, SPC {:  
  2477     COUNT_INST(I_LDCM);
  2478     check_priv();
  2479     load_reg( R_EAX, Rm );
  2480     check_ralign32( R_EAX );
  2481     MMU_TRANSLATE_READ( R_EAX );
  2482     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2483     MEM_READ_LONG( R_EAX, R_EAX );
  2484     store_spreg( R_EAX, R_SPC );
  2485     sh4_x86.tstate = TSTATE_NONE;
  2486 :}
  2487 LDC.L @Rm+, DBR {:  
  2488     COUNT_INST(I_LDCM);
  2489     check_priv();
  2490     load_reg( R_EAX, Rm );
  2491     check_ralign32( R_EAX );
  2492     MMU_TRANSLATE_READ( R_EAX );
  2493     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2494     MEM_READ_LONG( R_EAX, R_EAX );
  2495     store_spreg( R_EAX, R_DBR );
  2496     sh4_x86.tstate = TSTATE_NONE;
  2497 :}
  2498 LDC.L @Rm+, Rn_BANK {:  
  2499     COUNT_INST(I_LDCM);
  2500     check_priv();
  2501     load_reg( R_EAX, Rm );
  2502     check_ralign32( R_EAX );
  2503     MMU_TRANSLATE_READ( R_EAX );
  2504     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2505     MEM_READ_LONG( R_EAX, R_EAX );
  2506     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2507     sh4_x86.tstate = TSTATE_NONE;
  2508 :}
  2509 LDS Rm, FPSCR {:
  2510     COUNT_INST(I_LDSFPSCR);
  2511     check_fpuen();
  2512     load_reg( R_EAX, Rm );
  2513     call_func1( sh4_write_fpscr, R_EAX );
  2514     sh4_x86.tstate = TSTATE_NONE;
  2515 :}
  2516 LDS.L @Rm+, FPSCR {:  
  2517     COUNT_INST(I_LDSFPSCRM);
  2518     check_fpuen();
  2519     load_reg( R_EAX, Rm );
  2520     check_ralign32( R_EAX );
  2521     MMU_TRANSLATE_READ( R_EAX );
  2522     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2523     MEM_READ_LONG( R_EAX, R_EAX );
  2524     call_func1( sh4_write_fpscr, R_EAX );
  2525     sh4_x86.tstate = TSTATE_NONE;
  2526 :}
  2527 LDS Rm, FPUL {:  
  2528     COUNT_INST(I_LDS);
  2529     check_fpuen();
  2530     load_reg( R_EAX, Rm );
  2531     store_spreg( R_EAX, R_FPUL );
  2532 :}
  2533 LDS.L @Rm+, FPUL {:  
  2534     COUNT_INST(I_LDSM);
  2535     check_fpuen();
  2536     load_reg( R_EAX, Rm );
  2537     check_ralign32( R_EAX );
  2538     MMU_TRANSLATE_READ( R_EAX );
  2539     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2540     MEM_READ_LONG( R_EAX, R_EAX );
  2541     store_spreg( R_EAX, R_FPUL );
  2542     sh4_x86.tstate = TSTATE_NONE;
  2543 :}
  2544 LDS Rm, MACH {: 
  2545     COUNT_INST(I_LDS);
  2546     load_reg( R_EAX, Rm );
  2547     store_spreg( R_EAX, R_MACH );
  2548 :}
  2549 LDS.L @Rm+, MACH {:  
  2550     COUNT_INST(I_LDSM);
  2551     load_reg( R_EAX, Rm );
  2552     check_ralign32( R_EAX );
  2553     MMU_TRANSLATE_READ( R_EAX );
  2554     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2555     MEM_READ_LONG( R_EAX, R_EAX );
  2556     store_spreg( R_EAX, R_MACH );
  2557     sh4_x86.tstate = TSTATE_NONE;
  2558 :}
  2559 LDS Rm, MACL {:  
  2560     COUNT_INST(I_LDS);
  2561     load_reg( R_EAX, Rm );
  2562     store_spreg( R_EAX, R_MACL );
  2563 :}
  2564 LDS.L @Rm+, MACL {:  
  2565     COUNT_INST(I_LDSM);
  2566     load_reg( R_EAX, Rm );
  2567     check_ralign32( R_EAX );
  2568     MMU_TRANSLATE_READ( R_EAX );
  2569     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2570     MEM_READ_LONG( R_EAX, R_EAX );
  2571     store_spreg( R_EAX, R_MACL );
  2572     sh4_x86.tstate = TSTATE_NONE;
  2573 :}
  2574 LDS Rm, PR {:  
  2575     COUNT_INST(I_LDS);
  2576     load_reg( R_EAX, Rm );
  2577     store_spreg( R_EAX, R_PR );
  2578 :}
  2579 LDS.L @Rm+, PR {:  
  2580     COUNT_INST(I_LDSM);
  2581     load_reg( R_EAX, Rm );
  2582     check_ralign32( R_EAX );
  2583     MMU_TRANSLATE_READ( R_EAX );
  2584     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2585     MEM_READ_LONG( R_EAX, R_EAX );
  2586     store_spreg( R_EAX, R_PR );
  2587     sh4_x86.tstate = TSTATE_NONE;
  2588 :}
  2589 LDTLB {:  
  2590     COUNT_INST(I_LDTLB);
  2591     call_func0( MMU_ldtlb );
  2592 :}
  2593 OCBI @Rn {:
  2594     COUNT_INST(I_OCBI);
  2595 :}
  2596 OCBP @Rn {:
  2597     COUNT_INST(I_OCBP);
  2598 :}
  2599 OCBWB @Rn {:
  2600     COUNT_INST(I_OCBWB);
  2601 :}
  2602 PREF @Rn {:
  2603     COUNT_INST(I_PREF);
  2604     load_reg( R_EAX, Rn );
  2605     MOV_r32_r32( R_EAX, R_ECX );
  2606     AND_imm32_r32( 0xFC000000, R_EAX );
  2607     CMP_imm32_r32( 0xE0000000, R_EAX );
  2608     JNE_rel8(end);
  2609     call_func1( sh4_flush_store_queue, R_ECX );
  2610     TEST_r32_r32( R_EAX, R_EAX );
  2611     JE_exc(-1);
  2612     JMP_TARGET(end);
  2613     sh4_x86.tstate = TSTATE_NONE;
  2614 :}
  2615 SLEEP {: 
  2616     COUNT_INST(I_SLEEP);
  2617     check_priv();
  2618     call_func0( sh4_sleep );
  2619     sh4_x86.tstate = TSTATE_NONE;
  2620     sh4_x86.in_delay_slot = DELAY_NONE;
  2621     return 2;
  2622 :}
  2623 STC SR, Rn {:
  2624     COUNT_INST(I_STCSR);
  2625     check_priv();
  2626     call_func0(sh4_read_sr);
  2627     store_reg( R_EAX, Rn );
  2628     sh4_x86.tstate = TSTATE_NONE;
  2629 :}
  2630 STC GBR, Rn {:  
  2631     COUNT_INST(I_STC);
  2632     load_spreg( R_EAX, R_GBR );
  2633     store_reg( R_EAX, Rn );
  2634 :}
  2635 STC VBR, Rn {:  
  2636     COUNT_INST(I_STC);
  2637     check_priv();
  2638     load_spreg( R_EAX, R_VBR );
  2639     store_reg( R_EAX, Rn );
  2640     sh4_x86.tstate = TSTATE_NONE;
  2641 :}
  2642 STC SSR, Rn {:  
  2643     COUNT_INST(I_STC);
  2644     check_priv();
  2645     load_spreg( R_EAX, R_SSR );
  2646     store_reg( R_EAX, Rn );
  2647     sh4_x86.tstate = TSTATE_NONE;
  2648 :}
  2649 STC SPC, Rn {:  
  2650     COUNT_INST(I_STC);
  2651     check_priv();
  2652     load_spreg( R_EAX, R_SPC );
  2653     store_reg( R_EAX, Rn );
  2654     sh4_x86.tstate = TSTATE_NONE;
  2655 :}
  2656 STC SGR, Rn {:  
  2657     COUNT_INST(I_STC);
  2658     check_priv();
  2659     load_spreg( R_EAX, R_SGR );
  2660     store_reg( R_EAX, Rn );
  2661     sh4_x86.tstate = TSTATE_NONE;
  2662 :}
  2663 STC DBR, Rn {:  
  2664     COUNT_INST(I_STC);
  2665     check_priv();
  2666     load_spreg( R_EAX, R_DBR );
  2667     store_reg( R_EAX, Rn );
  2668     sh4_x86.tstate = TSTATE_NONE;
  2669 :}
  2670 STC Rm_BANK, Rn {:
  2671     COUNT_INST(I_STC);
  2672     check_priv();
  2673     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2674     store_reg( R_EAX, Rn );
  2675     sh4_x86.tstate = TSTATE_NONE;
  2676 :}
  2677 STC.L SR, @-Rn {:
  2678     COUNT_INST(I_STCSRM);
  2679     check_priv();
  2680     load_reg( R_EAX, Rn );
  2681     check_walign32( R_EAX );
  2682     ADD_imm8s_r32( -4, R_EAX );
  2683     MMU_TRANSLATE_WRITE( R_EAX );
  2684     PUSH_realigned_r32( R_EAX );
  2685     call_func0( sh4_read_sr );
  2686     POP_realigned_r32( R_ECX );
  2687     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2688     MEM_WRITE_LONG( R_ECX, R_EAX );
  2689     sh4_x86.tstate = TSTATE_NONE;
  2690 :}
  2691 STC.L VBR, @-Rn {:  
  2692     COUNT_INST(I_STCM);
  2693     check_priv();
  2694     load_reg( R_EAX, Rn );
  2695     check_walign32( R_EAX );
  2696     ADD_imm8s_r32( -4, R_EAX );
  2697     MMU_TRANSLATE_WRITE( R_EAX );
  2698     load_spreg( R_EDX, R_VBR );
  2699     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2700     MEM_WRITE_LONG( R_EAX, R_EDX );
  2701     sh4_x86.tstate = TSTATE_NONE;
  2702 :}
  2703 STC.L SSR, @-Rn {:  
  2704     COUNT_INST(I_STCM);
  2705     check_priv();
  2706     load_reg( R_EAX, Rn );
  2707     check_walign32( R_EAX );
  2708     ADD_imm8s_r32( -4, R_EAX );
  2709     MMU_TRANSLATE_WRITE( R_EAX );
  2710     load_spreg( R_EDX, R_SSR );
  2711     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2712     MEM_WRITE_LONG( R_EAX, R_EDX );
  2713     sh4_x86.tstate = TSTATE_NONE;
  2714 :}
  2715 STC.L SPC, @-Rn {:
  2716     COUNT_INST(I_STCM);
  2717     check_priv();
  2718     load_reg( R_EAX, Rn );
  2719     check_walign32( R_EAX );
  2720     ADD_imm8s_r32( -4, R_EAX );
  2721     MMU_TRANSLATE_WRITE( R_EAX );
  2722     load_spreg( R_EDX, R_SPC );
  2723     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2724     MEM_WRITE_LONG( R_EAX, R_EDX );
  2725     sh4_x86.tstate = TSTATE_NONE;
  2726 :}
  2727 STC.L SGR, @-Rn {:  
  2728     COUNT_INST(I_STCM);
  2729     check_priv();
  2730     load_reg( R_EAX, Rn );
  2731     check_walign32( R_EAX );
  2732     ADD_imm8s_r32( -4, R_EAX );
  2733     MMU_TRANSLATE_WRITE( R_EAX );
  2734     load_spreg( R_EDX, R_SGR );
  2735     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2736     MEM_WRITE_LONG( R_EAX, R_EDX );
  2737     sh4_x86.tstate = TSTATE_NONE;
  2738 :}
  2739 STC.L DBR, @-Rn {:  
  2740     COUNT_INST(I_STCM);
  2741     check_priv();
  2742     load_reg( R_EAX, Rn );
  2743     check_walign32( R_EAX );
  2744     ADD_imm8s_r32( -4, R_EAX );
  2745     MMU_TRANSLATE_WRITE( R_EAX );
  2746     load_spreg( R_EDX, R_DBR );
  2747     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2748     MEM_WRITE_LONG( R_EAX, R_EDX );
  2749     sh4_x86.tstate = TSTATE_NONE;
  2750 :}
  2751 STC.L Rm_BANK, @-Rn {:  
  2752     COUNT_INST(I_STCM);
  2753     check_priv();
  2754     load_reg( R_EAX, Rn );
  2755     check_walign32( R_EAX );
  2756     ADD_imm8s_r32( -4, R_EAX );
  2757     MMU_TRANSLATE_WRITE( R_EAX );
  2758     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2759     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2760     MEM_WRITE_LONG( R_EAX, R_EDX );
  2761     sh4_x86.tstate = TSTATE_NONE;
  2762 :}
  2763 STC.L GBR, @-Rn {:  
  2764     COUNT_INST(I_STCM);
  2765     load_reg( R_EAX, Rn );
  2766     check_walign32( R_EAX );
  2767     ADD_imm8s_r32( -4, R_EAX );
  2768     MMU_TRANSLATE_WRITE( R_EAX );
  2769     load_spreg( R_EDX, R_GBR );
  2770     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2771     MEM_WRITE_LONG( R_EAX, R_EDX );
  2772     sh4_x86.tstate = TSTATE_NONE;
  2773 :}
  2774 STS FPSCR, Rn {:  
  2775     COUNT_INST(I_STSFPSCR);
  2776     check_fpuen();
  2777     load_spreg( R_EAX, R_FPSCR );
  2778     store_reg( R_EAX, Rn );
  2779 :}
  2780 STS.L FPSCR, @-Rn {:  
  2781     COUNT_INST(I_STSFPSCRM);
  2782     check_fpuen();
  2783     load_reg( R_EAX, Rn );
  2784     check_walign32( R_EAX );
  2785     ADD_imm8s_r32( -4, R_EAX );
  2786     MMU_TRANSLATE_WRITE( R_EAX );
  2787     load_spreg( R_EDX, R_FPSCR );
  2788     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2789     MEM_WRITE_LONG( R_EAX, R_EDX );
  2790     sh4_x86.tstate = TSTATE_NONE;
  2791 :}
  2792 STS FPUL, Rn {:  
  2793     COUNT_INST(I_STS);
  2794     check_fpuen();
  2795     load_spreg( R_EAX, R_FPUL );
  2796     store_reg( R_EAX, Rn );
  2797 :}
  2798 STS.L FPUL, @-Rn {:  
  2799     COUNT_INST(I_STSM);
  2800     check_fpuen();
  2801     load_reg( R_EAX, Rn );
  2802     check_walign32( R_EAX );
  2803     ADD_imm8s_r32( -4, R_EAX );
  2804     MMU_TRANSLATE_WRITE( R_EAX );
  2805     load_spreg( R_EDX, R_FPUL );
  2806     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2807     MEM_WRITE_LONG( R_EAX, R_EDX );
  2808     sh4_x86.tstate = TSTATE_NONE;
  2809 :}
  2810 STS MACH, Rn {:  
  2811     COUNT_INST(I_STS);
  2812     load_spreg( R_EAX, R_MACH );
  2813     store_reg( R_EAX, Rn );
  2814 :}
  2815 STS.L MACH, @-Rn {:  
  2816     COUNT_INST(I_STSM);
  2817     load_reg( R_EAX, Rn );
  2818     check_walign32( R_EAX );
  2819     ADD_imm8s_r32( -4, R_EAX );
  2820     MMU_TRANSLATE_WRITE( R_EAX );
  2821     load_spreg( R_EDX, R_MACH );
  2822     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2823     MEM_WRITE_LONG( R_EAX, R_EDX );
  2824     sh4_x86.tstate = TSTATE_NONE;
  2825 :}
  2826 STS MACL, Rn {:  
  2827     COUNT_INST(I_STS);
  2828     load_spreg( R_EAX, R_MACL );
  2829     store_reg( R_EAX, Rn );
  2830 :}
  2831 STS.L MACL, @-Rn {:  
  2832     COUNT_INST(I_STSM);
  2833     load_reg( R_EAX, Rn );
  2834     check_walign32( R_EAX );
  2835     ADD_imm8s_r32( -4, R_EAX );
  2836     MMU_TRANSLATE_WRITE( R_EAX );
  2837     load_spreg( R_EDX, R_MACL );
  2838     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2839     MEM_WRITE_LONG( R_EAX, R_EDX );
  2840     sh4_x86.tstate = TSTATE_NONE;
  2841 :}
  2842 STS PR, Rn {:  
  2843     COUNT_INST(I_STS);
  2844     load_spreg( R_EAX, R_PR );
  2845     store_reg( R_EAX, Rn );
  2846 :}
  2847 STS.L PR, @-Rn {:  
  2848     COUNT_INST(I_STSM);
  2849     load_reg( R_EAX, Rn );
  2850     check_walign32( R_EAX );
  2851     ADD_imm8s_r32( -4, R_EAX );
  2852     MMU_TRANSLATE_WRITE( R_EAX );
  2853     load_spreg( R_EDX, R_PR );
  2854     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2855     MEM_WRITE_LONG( R_EAX, R_EDX );
  2856     sh4_x86.tstate = TSTATE_NONE;
  2857 :}
  2859 NOP {: 
  2860     COUNT_INST(I_NOP);
  2861     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2862 :}
  2863 %%
  2864     sh4_x86.in_delay_slot = DELAY_NONE;
  2865     return 0;
.