Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 675:b97020f9af1c
prev673:44c579439d73
next732:f05753bbe723
author nkeynes
date Wed Jun 25 10:39:05 2008 +0000 (15 years ago)
permissions -rw-r--r--
last change Remove superfluous call to gdrom_get_native_devices()
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4stat.h"
    32 #include "sh4/sh4mmio.h"
    33 #include "sh4/x86op.h"
    34 #include "clock.h"
    36 #define DEFAULT_BACKPATCH_SIZE 4096
    38 struct backpatch_record {
    39     uint32_t fixup_offset;
    40     uint32_t fixup_icount;
    41     int32_t exc_code;
    42 };
    44 #define MAX_RECOVERY_SIZE 2048
    46 #define DELAY_NONE 0
    47 #define DELAY_PC 1
    48 #define DELAY_PC_PR 2
    50 /** 
    51  * Struct to manage internal translation state. This state is not saved -
    52  * it is only valid between calls to sh4_translate_begin_block() and
    53  * sh4_translate_end_block()
    54  */
    55 struct sh4_x86_state {
    56     int in_delay_slot;
    57     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    58     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    59     gboolean branch_taken; /* true if we branched unconditionally */
    60     uint32_t block_start_pc;
    61     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    62     int tstate;
    64     /* mode flags */
    65     gboolean tlb_on; /* True if tlb translation is active */
    67     /* Allocated memory for the (block-wide) back-patch list */
    68     struct backpatch_record *backpatch_list;
    69     uint32_t backpatch_posn;
    70     uint32_t backpatch_size;
    71 };
    73 #define TSTATE_NONE -1
    74 #define TSTATE_O    0
    75 #define TSTATE_C    2
    76 #define TSTATE_E    4
    77 #define TSTATE_NE   5
    78 #define TSTATE_G    0xF
    79 #define TSTATE_GE   0xD
    80 #define TSTATE_A    7
    81 #define TSTATE_AE   3
    83 #ifdef ENABLE_SH4STATS
    84 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    85 #else
    86 #define COUNT_INST(id)
    87 #endif
    89 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    90 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    91 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    92     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    94 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    95 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    96 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    97     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
    99 static struct sh4_x86_state sh4_x86;
   101 static uint32_t max_int = 0x7FFFFFFF;
   102 static uint32_t min_int = 0x80000000;
   103 static uint32_t save_fcw; /* save value for fpu control word */
   104 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   106 void sh4_translate_init(void)
   107 {
   108     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   109     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   110 }
   113 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   114 {
   115     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   116 	sh4_x86.backpatch_size <<= 1;
   117 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   118 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   119 	assert( sh4_x86.backpatch_list != NULL );
   120     }
   121     if( sh4_x86.in_delay_slot ) {
   122 	fixup_pc -= 2;
   123     }
   124     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   125 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   126     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   127     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   128     sh4_x86.backpatch_posn++;
   129 }
   131 /**
   132  * Emit an instruction to load an SH4 reg into a real register
   133  */
   134 static inline void load_reg( int x86reg, int sh4reg ) 
   135 {
   136     /* mov [bp+n], reg */
   137     OP(0x8B);
   138     OP(0x45 + (x86reg<<3));
   139     OP(REG_OFFSET(r[sh4reg]));
   140 }
   142 static inline void load_reg16s( int x86reg, int sh4reg )
   143 {
   144     OP(0x0F);
   145     OP(0xBF);
   146     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   147 }
   149 static inline void load_reg16u( int x86reg, int sh4reg )
   150 {
   151     OP(0x0F);
   152     OP(0xB7);
   153     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   155 }
   157 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   158 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   159 /**
   160  * Emit an instruction to load an immediate value into a register
   161  */
   162 static inline void load_imm32( int x86reg, uint32_t value ) {
   163     /* mov #value, reg */
   164     OP(0xB8 + x86reg);
   165     OP32(value);
   166 }
   168 /**
   169  * Load an immediate 64-bit quantity (note: x86-64 only)
   170  */
   171 static inline void load_imm64( int x86reg, uint32_t value ) {
   172     /* mov #value, reg */
   173     REXW();
   174     OP(0xB8 + x86reg);
   175     OP64(value);
   176 }
   178 /**
   179  * Emit an instruction to store an SH4 reg (RN)
   180  */
   181 void static inline store_reg( int x86reg, int sh4reg ) {
   182     /* mov reg, [bp+n] */
   183     OP(0x89);
   184     OP(0x45 + (x86reg<<3));
   185     OP(REG_OFFSET(r[sh4reg]));
   186 }
   188 /**
   189  * Load an FR register (single-precision floating point) into an integer x86
   190  * register (eg for register-to-register moves)
   191  */
   192 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   193 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   195 /**
   196  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   197  */
   198 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   199 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   201 /**
   202  * Store an FR register (single-precision floating point) from an integer x86+
   203  * register (eg for register-to-register moves)
   204  */
   205 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   209 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   212 #define push_fpul()  FLDF_sh4r(R_FPUL)
   213 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   214 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   215 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   216 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   217 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   218 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   219 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   220 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   221 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   225 /* Exception checks - Note that all exception checks will clobber EAX */
   227 #define check_priv( ) \
   228     if( !sh4_x86.priv_checked ) { \
   229 	sh4_x86.priv_checked = TRUE;\
   230 	load_spreg( R_EAX, R_SR );\
   231 	AND_imm32_r32( SR_MD, R_EAX );\
   232 	if( sh4_x86.in_delay_slot ) {\
   233 	    JE_exc( EXC_SLOT_ILLEGAL );\
   234 	} else {\
   235 	    JE_exc( EXC_ILLEGAL );\
   236 	}\
   237     }\
   239 #define check_fpuen( ) \
   240     if( !sh4_x86.fpuen_checked ) {\
   241 	sh4_x86.fpuen_checked = TRUE;\
   242 	load_spreg( R_EAX, R_SR );\
   243 	AND_imm32_r32( SR_FD, R_EAX );\
   244 	if( sh4_x86.in_delay_slot ) {\
   245 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   246 	} else {\
   247 	    JNE_exc(EXC_FPU_DISABLED);\
   248 	}\
   249     }
   251 #define check_ralign16( x86reg ) \
   252     TEST_imm32_r32( 0x00000001, x86reg ); \
   253     JNE_exc(EXC_DATA_ADDR_READ)
   255 #define check_walign16( x86reg ) \
   256     TEST_imm32_r32( 0x00000001, x86reg ); \
   257     JNE_exc(EXC_DATA_ADDR_WRITE);
   259 #define check_ralign32( x86reg ) \
   260     TEST_imm32_r32( 0x00000003, x86reg ); \
   261     JNE_exc(EXC_DATA_ADDR_READ)
   263 #define check_walign32( x86reg ) \
   264     TEST_imm32_r32( 0x00000003, x86reg ); \
   265     JNE_exc(EXC_DATA_ADDR_WRITE);
   267 #define UNDEF()
   268 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   269 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   270 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   271 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   272 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   273 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   274 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   276 /**
   277  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   278  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   279  */
   280 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   282 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   283 /**
   284  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   285  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   286  */
   287 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   289 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   290 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   291 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   293 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   295 /****** Import appropriate calling conventions ******/
   296 #if SIZEOF_VOID_P == 8
   297 #include "sh4/ia64abi.h"
   298 #else /* 32-bit system */
   299 #ifdef APPLE_BUILD
   300 #include "sh4/ia32mac.h"
   301 #else
   302 #include "sh4/ia32abi.h"
   303 #endif
   304 #endif
   306 uint32_t sh4_translate_end_block_size()
   307 {
   308     if( sh4_x86.backpatch_posn <= 3 ) {
   309 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   310     } else {
   311 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   312     }
   313 }
   316 /**
   317  * Embed a breakpoint into the generated code
   318  */
   319 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   320 {
   321     load_imm32( R_EAX, pc );
   322     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   323 }
   326 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   328 /**
   329  * Embed a call to sh4_execute_instruction for situations that we
   330  * can't translate (just page-crossing delay slots at the moment).
   331  * Caller is responsible for setting new_pc before calling this function.
   332  *
   333  * Performs:
   334  *   Set PC = endpc
   335  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   336  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   337  *   Call sh4_execute_instruction
   338  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   339  */
   340 void exit_block_emu( sh4vma_t endpc )
   341 {
   342     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   343     ADD_r32_sh4r( R_ECX, R_PC );
   345     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   346     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   347     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   348     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   350     call_func0( sh4_execute_instruction );    
   351     load_spreg( R_EAX, R_PC );
   352     if( sh4_x86.tlb_on ) {
   353 	call_func1(xlat_get_code_by_vma,R_EAX);
   354     } else {
   355 	call_func1(xlat_get_code,R_EAX);
   356     }
   357     AND_imm8s_rptr( 0xFC, R_EAX );
   358     POP_r32(R_EBP);
   359     RET();
   360 } 
   362 /**
   363  * Translate a single instruction. Delayed branches are handled specially
   364  * by translating both branch and delayed instruction as a single unit (as
   365  * 
   366  * The instruction MUST be in the icache (assert check)
   367  *
   368  * @return true if the instruction marks the end of a basic block
   369  * (eg a branch or 
   370  */
   371 uint32_t sh4_translate_instruction( sh4vma_t pc )
   372 {
   373     uint32_t ir;
   374     /* Read instruction from icache */
   375     assert( IS_IN_ICACHE(pc) );
   376     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   378 	/* PC is not in the current icache - this usually means we're running
   379 	 * with MMU on, and we've gone past the end of the page. And since 
   380 	 * sh4_translate_block is pretty careful about this, it means we're
   381 	 * almost certainly in a delay slot.
   382 	 *
   383 	 * Since we can't assume the page is present (and we can't fault it in
   384 	 * at this point, inline a call to sh4_execute_instruction (with a few
   385 	 * small repairs to cope with the different environment).
   386 	 */
   388     if( !sh4_x86.in_delay_slot ) {
   389 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   390     }
   391 %%
   392 /* ALU operations */
   393 ADD Rm, Rn {:
   394     COUNT_INST(I_ADD);
   395     load_reg( R_EAX, Rm );
   396     load_reg( R_ECX, Rn );
   397     ADD_r32_r32( R_EAX, R_ECX );
   398     store_reg( R_ECX, Rn );
   399     sh4_x86.tstate = TSTATE_NONE;
   400 :}
   401 ADD #imm, Rn {:  
   402     COUNT_INST(I_ADDI);
   403     load_reg( R_EAX, Rn );
   404     ADD_imm8s_r32( imm, R_EAX );
   405     store_reg( R_EAX, Rn );
   406     sh4_x86.tstate = TSTATE_NONE;
   407 :}
   408 ADDC Rm, Rn {:
   409     COUNT_INST(I_ADDC);
   410     if( sh4_x86.tstate != TSTATE_C ) {
   411 	LDC_t();
   412     }
   413     load_reg( R_EAX, Rm );
   414     load_reg( R_ECX, Rn );
   415     ADC_r32_r32( R_EAX, R_ECX );
   416     store_reg( R_ECX, Rn );
   417     SETC_t();
   418     sh4_x86.tstate = TSTATE_C;
   419 :}
   420 ADDV Rm, Rn {:
   421     COUNT_INST(I_ADDV);
   422     load_reg( R_EAX, Rm );
   423     load_reg( R_ECX, Rn );
   424     ADD_r32_r32( R_EAX, R_ECX );
   425     store_reg( R_ECX, Rn );
   426     SETO_t();
   427     sh4_x86.tstate = TSTATE_O;
   428 :}
   429 AND Rm, Rn {:
   430     COUNT_INST(I_AND);
   431     load_reg( R_EAX, Rm );
   432     load_reg( R_ECX, Rn );
   433     AND_r32_r32( R_EAX, R_ECX );
   434     store_reg( R_ECX, Rn );
   435     sh4_x86.tstate = TSTATE_NONE;
   436 :}
   437 AND #imm, R0 {:  
   438     COUNT_INST(I_ANDI);
   439     load_reg( R_EAX, 0 );
   440     AND_imm32_r32(imm, R_EAX); 
   441     store_reg( R_EAX, 0 );
   442     sh4_x86.tstate = TSTATE_NONE;
   443 :}
   444 AND.B #imm, @(R0, GBR) {: 
   445     COUNT_INST(I_ANDB);
   446     load_reg( R_EAX, 0 );
   447     load_spreg( R_ECX, R_GBR );
   448     ADD_r32_r32( R_ECX, R_EAX );
   449     MMU_TRANSLATE_WRITE( R_EAX );
   450     PUSH_realigned_r32(R_EAX);
   451     MEM_READ_BYTE( R_EAX, R_EAX );
   452     POP_realigned_r32(R_ECX);
   453     AND_imm32_r32(imm, R_EAX );
   454     MEM_WRITE_BYTE( R_ECX, R_EAX );
   455     sh4_x86.tstate = TSTATE_NONE;
   456 :}
   457 CMP/EQ Rm, Rn {:  
   458     COUNT_INST(I_CMPEQ);
   459     load_reg( R_EAX, Rm );
   460     load_reg( R_ECX, Rn );
   461     CMP_r32_r32( R_EAX, R_ECX );
   462     SETE_t();
   463     sh4_x86.tstate = TSTATE_E;
   464 :}
   465 CMP/EQ #imm, R0 {:  
   466     COUNT_INST(I_CMPEQI);
   467     load_reg( R_EAX, 0 );
   468     CMP_imm8s_r32(imm, R_EAX);
   469     SETE_t();
   470     sh4_x86.tstate = TSTATE_E;
   471 :}
   472 CMP/GE Rm, Rn {:  
   473     COUNT_INST(I_CMPGE);
   474     load_reg( R_EAX, Rm );
   475     load_reg( R_ECX, Rn );
   476     CMP_r32_r32( R_EAX, R_ECX );
   477     SETGE_t();
   478     sh4_x86.tstate = TSTATE_GE;
   479 :}
   480 CMP/GT Rm, Rn {: 
   481     COUNT_INST(I_CMPGT);
   482     load_reg( R_EAX, Rm );
   483     load_reg( R_ECX, Rn );
   484     CMP_r32_r32( R_EAX, R_ECX );
   485     SETG_t();
   486     sh4_x86.tstate = TSTATE_G;
   487 :}
   488 CMP/HI Rm, Rn {:  
   489     COUNT_INST(I_CMPHI);
   490     load_reg( R_EAX, Rm );
   491     load_reg( R_ECX, Rn );
   492     CMP_r32_r32( R_EAX, R_ECX );
   493     SETA_t();
   494     sh4_x86.tstate = TSTATE_A;
   495 :}
   496 CMP/HS Rm, Rn {: 
   497     COUNT_INST(I_CMPHS);
   498     load_reg( R_EAX, Rm );
   499     load_reg( R_ECX, Rn );
   500     CMP_r32_r32( R_EAX, R_ECX );
   501     SETAE_t();
   502     sh4_x86.tstate = TSTATE_AE;
   503  :}
   504 CMP/PL Rn {: 
   505     COUNT_INST(I_CMPPL);
   506     load_reg( R_EAX, Rn );
   507     CMP_imm8s_r32( 0, R_EAX );
   508     SETG_t();
   509     sh4_x86.tstate = TSTATE_G;
   510 :}
   511 CMP/PZ Rn {:  
   512     COUNT_INST(I_CMPPZ);
   513     load_reg( R_EAX, Rn );
   514     CMP_imm8s_r32( 0, R_EAX );
   515     SETGE_t();
   516     sh4_x86.tstate = TSTATE_GE;
   517 :}
   518 CMP/STR Rm, Rn {:  
   519     COUNT_INST(I_CMPSTR);
   520     load_reg( R_EAX, Rm );
   521     load_reg( R_ECX, Rn );
   522     XOR_r32_r32( R_ECX, R_EAX );
   523     TEST_r8_r8( R_AL, R_AL );
   524     JE_rel8(target1);
   525     TEST_r8_r8( R_AH, R_AH );
   526     JE_rel8(target2);
   527     SHR_imm8_r32( 16, R_EAX );
   528     TEST_r8_r8( R_AL, R_AL );
   529     JE_rel8(target3);
   530     TEST_r8_r8( R_AH, R_AH );
   531     JMP_TARGET(target1);
   532     JMP_TARGET(target2);
   533     JMP_TARGET(target3);
   534     SETE_t();
   535     sh4_x86.tstate = TSTATE_E;
   536 :}
   537 DIV0S Rm, Rn {:
   538     COUNT_INST(I_DIV0S);
   539     load_reg( R_EAX, Rm );
   540     load_reg( R_ECX, Rn );
   541     SHR_imm8_r32( 31, R_EAX );
   542     SHR_imm8_r32( 31, R_ECX );
   543     store_spreg( R_EAX, R_M );
   544     store_spreg( R_ECX, R_Q );
   545     CMP_r32_r32( R_EAX, R_ECX );
   546     SETNE_t();
   547     sh4_x86.tstate = TSTATE_NE;
   548 :}
   549 DIV0U {:  
   550     COUNT_INST(I_DIV0U);
   551     XOR_r32_r32( R_EAX, R_EAX );
   552     store_spreg( R_EAX, R_Q );
   553     store_spreg( R_EAX, R_M );
   554     store_spreg( R_EAX, R_T );
   555     sh4_x86.tstate = TSTATE_C; // works for DIV1
   556 :}
   557 DIV1 Rm, Rn {:
   558     COUNT_INST(I_DIV1);
   559     load_spreg( R_ECX, R_M );
   560     load_reg( R_EAX, Rn );
   561     if( sh4_x86.tstate != TSTATE_C ) {
   562 	LDC_t();
   563     }
   564     RCL1_r32( R_EAX );
   565     SETC_r8( R_DL ); // Q'
   566     CMP_sh4r_r32( R_Q, R_ECX );
   567     JE_rel8(mqequal);
   568     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   569     JMP_rel8(end);
   570     JMP_TARGET(mqequal);
   571     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   572     JMP_TARGET(end);
   573     store_reg( R_EAX, Rn ); // Done with Rn now
   574     SETC_r8(R_AL); // tmp1
   575     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   576     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   577     store_spreg( R_ECX, R_Q );
   578     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   579     MOVZX_r8_r32( R_AL, R_EAX );
   580     store_spreg( R_EAX, R_T );
   581     sh4_x86.tstate = TSTATE_NONE;
   582 :}
   583 DMULS.L Rm, Rn {:  
   584     COUNT_INST(I_DMULS);
   585     load_reg( R_EAX, Rm );
   586     load_reg( R_ECX, Rn );
   587     IMUL_r32(R_ECX);
   588     store_spreg( R_EDX, R_MACH );
   589     store_spreg( R_EAX, R_MACL );
   590     sh4_x86.tstate = TSTATE_NONE;
   591 :}
   592 DMULU.L Rm, Rn {:  
   593     COUNT_INST(I_DMULU);
   594     load_reg( R_EAX, Rm );
   595     load_reg( R_ECX, Rn );
   596     MUL_r32(R_ECX);
   597     store_spreg( R_EDX, R_MACH );
   598     store_spreg( R_EAX, R_MACL );    
   599     sh4_x86.tstate = TSTATE_NONE;
   600 :}
   601 DT Rn {:  
   602     COUNT_INST(I_DT);
   603     load_reg( R_EAX, Rn );
   604     ADD_imm8s_r32( -1, R_EAX );
   605     store_reg( R_EAX, Rn );
   606     SETE_t();
   607     sh4_x86.tstate = TSTATE_E;
   608 :}
   609 EXTS.B Rm, Rn {:  
   610     COUNT_INST(I_EXTSB);
   611     load_reg( R_EAX, Rm );
   612     MOVSX_r8_r32( R_EAX, R_EAX );
   613     store_reg( R_EAX, Rn );
   614 :}
   615 EXTS.W Rm, Rn {:  
   616     COUNT_INST(I_EXTSW);
   617     load_reg( R_EAX, Rm );
   618     MOVSX_r16_r32( R_EAX, R_EAX );
   619     store_reg( R_EAX, Rn );
   620 :}
   621 EXTU.B Rm, Rn {:  
   622     COUNT_INST(I_EXTUB);
   623     load_reg( R_EAX, Rm );
   624     MOVZX_r8_r32( R_EAX, R_EAX );
   625     store_reg( R_EAX, Rn );
   626 :}
   627 EXTU.W Rm, Rn {:  
   628     COUNT_INST(I_EXTUW);
   629     load_reg( R_EAX, Rm );
   630     MOVZX_r16_r32( R_EAX, R_EAX );
   631     store_reg( R_EAX, Rn );
   632 :}
   633 MAC.L @Rm+, @Rn+ {:
   634     COUNT_INST(I_MACL);
   635     if( Rm == Rn ) {
   636 	load_reg( R_EAX, Rm );
   637 	check_ralign32( R_EAX );
   638 	MMU_TRANSLATE_READ( R_EAX );
   639 	PUSH_realigned_r32( R_EAX );
   640 	load_reg( R_EAX, Rn );
   641 	ADD_imm8s_r32( 4, R_EAX );
   642 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   643 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   644 	// Note translate twice in case of page boundaries. Maybe worth
   645 	// adding a page-boundary check to skip the second translation
   646     } else {
   647 	load_reg( R_EAX, Rm );
   648 	check_ralign32( R_EAX );
   649 	MMU_TRANSLATE_READ( R_EAX );
   650 	load_reg( R_ECX, Rn );
   651 	check_ralign32( R_ECX );
   652 	PUSH_realigned_r32( R_EAX );
   653 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   654 	MOV_r32_r32( R_ECX, R_EAX );
   655 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   656 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   657     }
   658     MEM_READ_LONG( R_EAX, R_EAX );
   659     POP_r32( R_ECX );
   660     PUSH_r32( R_EAX );
   661     MEM_READ_LONG( R_ECX, R_EAX );
   662     POP_realigned_r32( R_ECX );
   664     IMUL_r32( R_ECX );
   665     ADD_r32_sh4r( R_EAX, R_MACL );
   666     ADC_r32_sh4r( R_EDX, R_MACH );
   668     load_spreg( R_ECX, R_S );
   669     TEST_r32_r32(R_ECX, R_ECX);
   670     JE_rel8( nosat );
   671     call_func0( signsat48 );
   672     JMP_TARGET( nosat );
   673     sh4_x86.tstate = TSTATE_NONE;
   674 :}
   675 MAC.W @Rm+, @Rn+ {:  
   676     COUNT_INST(I_MACW);
   677     if( Rm == Rn ) {
   678 	load_reg( R_EAX, Rm );
   679 	check_ralign16( R_EAX );
   680 	MMU_TRANSLATE_READ( R_EAX );
   681 	PUSH_realigned_r32( R_EAX );
   682 	load_reg( R_EAX, Rn );
   683 	ADD_imm8s_r32( 2, R_EAX );
   684 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   685 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   686 	// Note translate twice in case of page boundaries. Maybe worth
   687 	// adding a page-boundary check to skip the second translation
   688     } else {
   689 	load_reg( R_EAX, Rm );
   690 	check_ralign16( R_EAX );
   691 	MMU_TRANSLATE_READ( R_EAX );
   692 	load_reg( R_ECX, Rn );
   693 	check_ralign16( R_ECX );
   694 	PUSH_realigned_r32( R_EAX );
   695 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   696 	MOV_r32_r32( R_ECX, R_EAX );
   697 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   698 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   699     }
   700     MEM_READ_WORD( R_EAX, R_EAX );
   701     POP_r32( R_ECX );
   702     PUSH_r32( R_EAX );
   703     MEM_READ_WORD( R_ECX, R_EAX );
   704     POP_realigned_r32( R_ECX );
   705     IMUL_r32( R_ECX );
   707     load_spreg( R_ECX, R_S );
   708     TEST_r32_r32( R_ECX, R_ECX );
   709     JE_rel8( nosat );
   711     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   712     JNO_rel8( end );            // 2
   713     load_imm32( R_EDX, 1 );         // 5
   714     store_spreg( R_EDX, R_MACH );   // 6
   715     JS_rel8( positive );        // 2
   716     load_imm32( R_EAX, 0x80000000 );// 5
   717     store_spreg( R_EAX, R_MACL );   // 6
   718     JMP_rel8(end2);           // 2
   720     JMP_TARGET(positive);
   721     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   722     store_spreg( R_EAX, R_MACL );   // 6
   723     JMP_rel8(end3);            // 2
   725     JMP_TARGET(nosat);
   726     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   727     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   728     JMP_TARGET(end);
   729     JMP_TARGET(end2);
   730     JMP_TARGET(end3);
   731     sh4_x86.tstate = TSTATE_NONE;
   732 :}
   733 MOVT Rn {:  
   734     COUNT_INST(I_MOVT);
   735     load_spreg( R_EAX, R_T );
   736     store_reg( R_EAX, Rn );
   737 :}
   738 MUL.L Rm, Rn {:  
   739     COUNT_INST(I_MULL);
   740     load_reg( R_EAX, Rm );
   741     load_reg( R_ECX, Rn );
   742     MUL_r32( R_ECX );
   743     store_spreg( R_EAX, R_MACL );
   744     sh4_x86.tstate = TSTATE_NONE;
   745 :}
   746 MULS.W Rm, Rn {:
   747     COUNT_INST(I_MULSW);
   748     load_reg16s( R_EAX, Rm );
   749     load_reg16s( R_ECX, Rn );
   750     MUL_r32( R_ECX );
   751     store_spreg( R_EAX, R_MACL );
   752     sh4_x86.tstate = TSTATE_NONE;
   753 :}
   754 MULU.W Rm, Rn {:  
   755     COUNT_INST(I_MULUW);
   756     load_reg16u( R_EAX, Rm );
   757     load_reg16u( R_ECX, Rn );
   758     MUL_r32( R_ECX );
   759     store_spreg( R_EAX, R_MACL );
   760     sh4_x86.tstate = TSTATE_NONE;
   761 :}
   762 NEG Rm, Rn {:
   763     COUNT_INST(I_NEG);
   764     load_reg( R_EAX, Rm );
   765     NEG_r32( R_EAX );
   766     store_reg( R_EAX, Rn );
   767     sh4_x86.tstate = TSTATE_NONE;
   768 :}
   769 NEGC Rm, Rn {:  
   770     COUNT_INST(I_NEGC);
   771     load_reg( R_EAX, Rm );
   772     XOR_r32_r32( R_ECX, R_ECX );
   773     LDC_t();
   774     SBB_r32_r32( R_EAX, R_ECX );
   775     store_reg( R_ECX, Rn );
   776     SETC_t();
   777     sh4_x86.tstate = TSTATE_C;
   778 :}
   779 NOT Rm, Rn {:  
   780     COUNT_INST(I_NOT);
   781     load_reg( R_EAX, Rm );
   782     NOT_r32( R_EAX );
   783     store_reg( R_EAX, Rn );
   784     sh4_x86.tstate = TSTATE_NONE;
   785 :}
   786 OR Rm, Rn {:  
   787     COUNT_INST(I_OR);
   788     load_reg( R_EAX, Rm );
   789     load_reg( R_ECX, Rn );
   790     OR_r32_r32( R_EAX, R_ECX );
   791     store_reg( R_ECX, Rn );
   792     sh4_x86.tstate = TSTATE_NONE;
   793 :}
   794 OR #imm, R0 {:
   795     COUNT_INST(I_ORI);
   796     load_reg( R_EAX, 0 );
   797     OR_imm32_r32(imm, R_EAX);
   798     store_reg( R_EAX, 0 );
   799     sh4_x86.tstate = TSTATE_NONE;
   800 :}
   801 OR.B #imm, @(R0, GBR) {:  
   802     COUNT_INST(I_ORB);
   803     load_reg( R_EAX, 0 );
   804     load_spreg( R_ECX, R_GBR );
   805     ADD_r32_r32( R_ECX, R_EAX );
   806     MMU_TRANSLATE_WRITE( R_EAX );
   807     PUSH_realigned_r32(R_EAX);
   808     MEM_READ_BYTE( R_EAX, R_EAX );
   809     POP_realigned_r32(R_ECX);
   810     OR_imm32_r32(imm, R_EAX );
   811     MEM_WRITE_BYTE( R_ECX, R_EAX );
   812     sh4_x86.tstate = TSTATE_NONE;
   813 :}
   814 ROTCL Rn {:
   815     COUNT_INST(I_ROTCL);
   816     load_reg( R_EAX, Rn );
   817     if( sh4_x86.tstate != TSTATE_C ) {
   818 	LDC_t();
   819     }
   820     RCL1_r32( R_EAX );
   821     store_reg( R_EAX, Rn );
   822     SETC_t();
   823     sh4_x86.tstate = TSTATE_C;
   824 :}
   825 ROTCR Rn {:  
   826     COUNT_INST(I_ROTCR);
   827     load_reg( R_EAX, Rn );
   828     if( sh4_x86.tstate != TSTATE_C ) {
   829 	LDC_t();
   830     }
   831     RCR1_r32( R_EAX );
   832     store_reg( R_EAX, Rn );
   833     SETC_t();
   834     sh4_x86.tstate = TSTATE_C;
   835 :}
   836 ROTL Rn {:  
   837     COUNT_INST(I_ROTL);
   838     load_reg( R_EAX, Rn );
   839     ROL1_r32( R_EAX );
   840     store_reg( R_EAX, Rn );
   841     SETC_t();
   842     sh4_x86.tstate = TSTATE_C;
   843 :}
   844 ROTR Rn {:  
   845     COUNT_INST(I_ROTR);
   846     load_reg( R_EAX, Rn );
   847     ROR1_r32( R_EAX );
   848     store_reg( R_EAX, Rn );
   849     SETC_t();
   850     sh4_x86.tstate = TSTATE_C;
   851 :}
   852 SHAD Rm, Rn {:
   853     COUNT_INST(I_SHAD);
   854     /* Annoyingly enough, not directly convertible */
   855     load_reg( R_EAX, Rn );
   856     load_reg( R_ECX, Rm );
   857     CMP_imm32_r32( 0, R_ECX );
   858     JGE_rel8(doshl);
   860     NEG_r32( R_ECX );      // 2
   861     AND_imm8_r8( 0x1F, R_CL ); // 3
   862     JE_rel8(emptysar);     // 2
   863     SAR_r32_CL( R_EAX );       // 2
   864     JMP_rel8(end);          // 2
   866     JMP_TARGET(emptysar);
   867     SAR_imm8_r32(31, R_EAX );  // 3
   868     JMP_rel8(end2);
   870     JMP_TARGET(doshl);
   871     AND_imm8_r8( 0x1F, R_CL ); // 3
   872     SHL_r32_CL( R_EAX );       // 2
   873     JMP_TARGET(end);
   874     JMP_TARGET(end2);
   875     store_reg( R_EAX, Rn );
   876     sh4_x86.tstate = TSTATE_NONE;
   877 :}
   878 SHLD Rm, Rn {:  
   879     COUNT_INST(I_SHLD);
   880     load_reg( R_EAX, Rn );
   881     load_reg( R_ECX, Rm );
   882     CMP_imm32_r32( 0, R_ECX );
   883     JGE_rel8(doshl);
   885     NEG_r32( R_ECX );      // 2
   886     AND_imm8_r8( 0x1F, R_CL ); // 3
   887     JE_rel8(emptyshr );
   888     SHR_r32_CL( R_EAX );       // 2
   889     JMP_rel8(end);          // 2
   891     JMP_TARGET(emptyshr);
   892     XOR_r32_r32( R_EAX, R_EAX );
   893     JMP_rel8(end2);
   895     JMP_TARGET(doshl);
   896     AND_imm8_r8( 0x1F, R_CL ); // 3
   897     SHL_r32_CL( R_EAX );       // 2
   898     JMP_TARGET(end);
   899     JMP_TARGET(end2);
   900     store_reg( R_EAX, Rn );
   901     sh4_x86.tstate = TSTATE_NONE;
   902 :}
   903 SHAL Rn {: 
   904     COUNT_INST(I_SHAL);
   905     load_reg( R_EAX, Rn );
   906     SHL1_r32( R_EAX );
   907     SETC_t();
   908     store_reg( R_EAX, Rn );
   909     sh4_x86.tstate = TSTATE_C;
   910 :}
   911 SHAR Rn {:  
   912     COUNT_INST(I_SHAR);
   913     load_reg( R_EAX, Rn );
   914     SAR1_r32( R_EAX );
   915     SETC_t();
   916     store_reg( R_EAX, Rn );
   917     sh4_x86.tstate = TSTATE_C;
   918 :}
   919 SHLL Rn {:  
   920     COUNT_INST(I_SHLL);
   921     load_reg( R_EAX, Rn );
   922     SHL1_r32( R_EAX );
   923     SETC_t();
   924     store_reg( R_EAX, Rn );
   925     sh4_x86.tstate = TSTATE_C;
   926 :}
   927 SHLL2 Rn {:
   928     COUNT_INST(I_SHLL);
   929     load_reg( R_EAX, Rn );
   930     SHL_imm8_r32( 2, R_EAX );
   931     store_reg( R_EAX, Rn );
   932     sh4_x86.tstate = TSTATE_NONE;
   933 :}
   934 SHLL8 Rn {:  
   935     COUNT_INST(I_SHLL);
   936     load_reg( R_EAX, Rn );
   937     SHL_imm8_r32( 8, R_EAX );
   938     store_reg( R_EAX, Rn );
   939     sh4_x86.tstate = TSTATE_NONE;
   940 :}
   941 SHLL16 Rn {:  
   942     COUNT_INST(I_SHLL);
   943     load_reg( R_EAX, Rn );
   944     SHL_imm8_r32( 16, R_EAX );
   945     store_reg( R_EAX, Rn );
   946     sh4_x86.tstate = TSTATE_NONE;
   947 :}
   948 SHLR Rn {:  
   949     COUNT_INST(I_SHLR);
   950     load_reg( R_EAX, Rn );
   951     SHR1_r32( R_EAX );
   952     SETC_t();
   953     store_reg( R_EAX, Rn );
   954     sh4_x86.tstate = TSTATE_C;
   955 :}
   956 SHLR2 Rn {:  
   957     COUNT_INST(I_SHLR);
   958     load_reg( R_EAX, Rn );
   959     SHR_imm8_r32( 2, R_EAX );
   960     store_reg( R_EAX, Rn );
   961     sh4_x86.tstate = TSTATE_NONE;
   962 :}
   963 SHLR8 Rn {:  
   964     COUNT_INST(I_SHLR);
   965     load_reg( R_EAX, Rn );
   966     SHR_imm8_r32( 8, R_EAX );
   967     store_reg( R_EAX, Rn );
   968     sh4_x86.tstate = TSTATE_NONE;
   969 :}
   970 SHLR16 Rn {:  
   971     COUNT_INST(I_SHLR);
   972     load_reg( R_EAX, Rn );
   973     SHR_imm8_r32( 16, R_EAX );
   974     store_reg( R_EAX, Rn );
   975     sh4_x86.tstate = TSTATE_NONE;
   976 :}
   977 SUB Rm, Rn {:  
   978     COUNT_INST(I_SUB);
   979     load_reg( R_EAX, Rm );
   980     load_reg( R_ECX, Rn );
   981     SUB_r32_r32( R_EAX, R_ECX );
   982     store_reg( R_ECX, Rn );
   983     sh4_x86.tstate = TSTATE_NONE;
   984 :}
   985 SUBC Rm, Rn {:  
   986     COUNT_INST(I_SUBC);
   987     load_reg( R_EAX, Rm );
   988     load_reg( R_ECX, Rn );
   989     if( sh4_x86.tstate != TSTATE_C ) {
   990 	LDC_t();
   991     }
   992     SBB_r32_r32( R_EAX, R_ECX );
   993     store_reg( R_ECX, Rn );
   994     SETC_t();
   995     sh4_x86.tstate = TSTATE_C;
   996 :}
   997 SUBV Rm, Rn {:  
   998     COUNT_INST(I_SUBV);
   999     load_reg( R_EAX, Rm );
  1000     load_reg( R_ECX, Rn );
  1001     SUB_r32_r32( R_EAX, R_ECX );
  1002     store_reg( R_ECX, Rn );
  1003     SETO_t();
  1004     sh4_x86.tstate = TSTATE_O;
  1005 :}
  1006 SWAP.B Rm, Rn {:  
  1007     COUNT_INST(I_SWAPB);
  1008     load_reg( R_EAX, Rm );
  1009     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1010     store_reg( R_EAX, Rn );
  1011 :}
  1012 SWAP.W Rm, Rn {:  
  1013     COUNT_INST(I_SWAPB);
  1014     load_reg( R_EAX, Rm );
  1015     MOV_r32_r32( R_EAX, R_ECX );
  1016     SHL_imm8_r32( 16, R_ECX );
  1017     SHR_imm8_r32( 16, R_EAX );
  1018     OR_r32_r32( R_EAX, R_ECX );
  1019     store_reg( R_ECX, Rn );
  1020     sh4_x86.tstate = TSTATE_NONE;
  1021 :}
  1022 TAS.B @Rn {:  
  1023     COUNT_INST(I_TASB);
  1024     load_reg( R_EAX, Rn );
  1025     MMU_TRANSLATE_WRITE( R_EAX );
  1026     PUSH_realigned_r32( R_EAX );
  1027     MEM_READ_BYTE( R_EAX, R_EAX );
  1028     TEST_r8_r8( R_AL, R_AL );
  1029     SETE_t();
  1030     OR_imm8_r8( 0x80, R_AL );
  1031     POP_realigned_r32( R_ECX );
  1032     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1033     sh4_x86.tstate = TSTATE_NONE;
  1034 :}
  1035 TST Rm, Rn {:  
  1036     COUNT_INST(I_TST);
  1037     load_reg( R_EAX, Rm );
  1038     load_reg( R_ECX, Rn );
  1039     TEST_r32_r32( R_EAX, R_ECX );
  1040     SETE_t();
  1041     sh4_x86.tstate = TSTATE_E;
  1042 :}
  1043 TST #imm, R0 {:  
  1044     COUNT_INST(I_TSTI);
  1045     load_reg( R_EAX, 0 );
  1046     TEST_imm32_r32( imm, R_EAX );
  1047     SETE_t();
  1048     sh4_x86.tstate = TSTATE_E;
  1049 :}
  1050 TST.B #imm, @(R0, GBR) {:  
  1051     COUNT_INST(I_TSTB);
  1052     load_reg( R_EAX, 0);
  1053     load_reg( R_ECX, R_GBR);
  1054     ADD_r32_r32( R_ECX, R_EAX );
  1055     MMU_TRANSLATE_READ( R_EAX );
  1056     MEM_READ_BYTE( R_EAX, R_EAX );
  1057     TEST_imm8_r8( imm, R_AL );
  1058     SETE_t();
  1059     sh4_x86.tstate = TSTATE_E;
  1060 :}
  1061 XOR Rm, Rn {:  
  1062     COUNT_INST(I_XOR);
  1063     load_reg( R_EAX, Rm );
  1064     load_reg( R_ECX, Rn );
  1065     XOR_r32_r32( R_EAX, R_ECX );
  1066     store_reg( R_ECX, Rn );
  1067     sh4_x86.tstate = TSTATE_NONE;
  1068 :}
  1069 XOR #imm, R0 {:  
  1070     COUNT_INST(I_XORI);
  1071     load_reg( R_EAX, 0 );
  1072     XOR_imm32_r32( imm, R_EAX );
  1073     store_reg( R_EAX, 0 );
  1074     sh4_x86.tstate = TSTATE_NONE;
  1075 :}
  1076 XOR.B #imm, @(R0, GBR) {:  
  1077     COUNT_INST(I_XORB);
  1078     load_reg( R_EAX, 0 );
  1079     load_spreg( R_ECX, R_GBR );
  1080     ADD_r32_r32( R_ECX, R_EAX );
  1081     MMU_TRANSLATE_WRITE( R_EAX );
  1082     PUSH_realigned_r32(R_EAX);
  1083     MEM_READ_BYTE(R_EAX, R_EAX);
  1084     POP_realigned_r32(R_ECX);
  1085     XOR_imm32_r32( imm, R_EAX );
  1086     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1087     sh4_x86.tstate = TSTATE_NONE;
  1088 :}
  1089 XTRCT Rm, Rn {:
  1090     COUNT_INST(I_XTRCT);
  1091     load_reg( R_EAX, Rm );
  1092     load_reg( R_ECX, Rn );
  1093     SHL_imm8_r32( 16, R_EAX );
  1094     SHR_imm8_r32( 16, R_ECX );
  1095     OR_r32_r32( R_EAX, R_ECX );
  1096     store_reg( R_ECX, Rn );
  1097     sh4_x86.tstate = TSTATE_NONE;
  1098 :}
  1100 /* Data move instructions */
  1101 MOV Rm, Rn {:  
  1102     COUNT_INST(I_MOV);
  1103     load_reg( R_EAX, Rm );
  1104     store_reg( R_EAX, Rn );
  1105 :}
  1106 MOV #imm, Rn {:  
  1107     COUNT_INST(I_MOVI);
  1108     load_imm32( R_EAX, imm );
  1109     store_reg( R_EAX, Rn );
  1110 :}
  1111 MOV.B Rm, @Rn {:  
  1112     COUNT_INST(I_MOVB);
  1113     load_reg( R_EAX, Rn );
  1114     MMU_TRANSLATE_WRITE( R_EAX );
  1115     load_reg( R_EDX, Rm );
  1116     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1117     sh4_x86.tstate = TSTATE_NONE;
  1118 :}
  1119 MOV.B Rm, @-Rn {:  
  1120     COUNT_INST(I_MOVB);
  1121     load_reg( R_EAX, Rn );
  1122     ADD_imm8s_r32( -1, R_EAX );
  1123     MMU_TRANSLATE_WRITE( R_EAX );
  1124     load_reg( R_EDX, Rm );
  1125     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1126     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1127     sh4_x86.tstate = TSTATE_NONE;
  1128 :}
  1129 MOV.B Rm, @(R0, Rn) {:  
  1130     COUNT_INST(I_MOVB);
  1131     load_reg( R_EAX, 0 );
  1132     load_reg( R_ECX, Rn );
  1133     ADD_r32_r32( R_ECX, R_EAX );
  1134     MMU_TRANSLATE_WRITE( R_EAX );
  1135     load_reg( R_EDX, Rm );
  1136     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1139 MOV.B R0, @(disp, GBR) {:  
  1140     COUNT_INST(I_MOVB);
  1141     load_spreg( R_EAX, R_GBR );
  1142     ADD_imm32_r32( disp, R_EAX );
  1143     MMU_TRANSLATE_WRITE( R_EAX );
  1144     load_reg( R_EDX, 0 );
  1145     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1146     sh4_x86.tstate = TSTATE_NONE;
  1147 :}
  1148 MOV.B R0, @(disp, Rn) {:  
  1149     COUNT_INST(I_MOVB);
  1150     load_reg( R_EAX, Rn );
  1151     ADD_imm32_r32( disp, R_EAX );
  1152     MMU_TRANSLATE_WRITE( R_EAX );
  1153     load_reg( R_EDX, 0 );
  1154     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 MOV.B @Rm, Rn {:  
  1158     COUNT_INST(I_MOVB);
  1159     load_reg( R_EAX, Rm );
  1160     MMU_TRANSLATE_READ( R_EAX );
  1161     MEM_READ_BYTE( R_EAX, R_EAX );
  1162     store_reg( R_EAX, Rn );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.B @Rm+, Rn {:  
  1166     COUNT_INST(I_MOVB);
  1167     load_reg( R_EAX, Rm );
  1168     MMU_TRANSLATE_READ( R_EAX );
  1169     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1170     MEM_READ_BYTE( R_EAX, R_EAX );
  1171     store_reg( R_EAX, Rn );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 MOV.B @(R0, Rm), Rn {:  
  1175     COUNT_INST(I_MOVB);
  1176     load_reg( R_EAX, 0 );
  1177     load_reg( R_ECX, Rm );
  1178     ADD_r32_r32( R_ECX, R_EAX );
  1179     MMU_TRANSLATE_READ( R_EAX )
  1180     MEM_READ_BYTE( R_EAX, R_EAX );
  1181     store_reg( R_EAX, Rn );
  1182     sh4_x86.tstate = TSTATE_NONE;
  1183 :}
  1184 MOV.B @(disp, GBR), R0 {:  
  1185     COUNT_INST(I_MOVB);
  1186     load_spreg( R_EAX, R_GBR );
  1187     ADD_imm32_r32( disp, R_EAX );
  1188     MMU_TRANSLATE_READ( R_EAX );
  1189     MEM_READ_BYTE( R_EAX, R_EAX );
  1190     store_reg( R_EAX, 0 );
  1191     sh4_x86.tstate = TSTATE_NONE;
  1192 :}
  1193 MOV.B @(disp, Rm), R0 {:  
  1194     COUNT_INST(I_MOVB);
  1195     load_reg( R_EAX, Rm );
  1196     ADD_imm32_r32( disp, R_EAX );
  1197     MMU_TRANSLATE_READ( R_EAX );
  1198     MEM_READ_BYTE( R_EAX, R_EAX );
  1199     store_reg( R_EAX, 0 );
  1200     sh4_x86.tstate = TSTATE_NONE;
  1201 :}
  1202 MOV.L Rm, @Rn {:
  1203     COUNT_INST(I_MOVL);
  1204     load_reg( R_EAX, Rn );
  1205     check_walign32(R_EAX);
  1206     MMU_TRANSLATE_WRITE( R_EAX );
  1207     load_reg( R_EDX, Rm );
  1208     MEM_WRITE_LONG( R_EAX, R_EDX );
  1209     sh4_x86.tstate = TSTATE_NONE;
  1210 :}
  1211 MOV.L Rm, @-Rn {:  
  1212     COUNT_INST(I_MOVL);
  1213     load_reg( R_EAX, Rn );
  1214     ADD_imm8s_r32( -4, R_EAX );
  1215     check_walign32( R_EAX );
  1216     MMU_TRANSLATE_WRITE( R_EAX );
  1217     load_reg( R_EDX, Rm );
  1218     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1219     MEM_WRITE_LONG( R_EAX, R_EDX );
  1220     sh4_x86.tstate = TSTATE_NONE;
  1221 :}
  1222 MOV.L Rm, @(R0, Rn) {:  
  1223     COUNT_INST(I_MOVL);
  1224     load_reg( R_EAX, 0 );
  1225     load_reg( R_ECX, Rn );
  1226     ADD_r32_r32( R_ECX, R_EAX );
  1227     check_walign32( R_EAX );
  1228     MMU_TRANSLATE_WRITE( R_EAX );
  1229     load_reg( R_EDX, Rm );
  1230     MEM_WRITE_LONG( R_EAX, R_EDX );
  1231     sh4_x86.tstate = TSTATE_NONE;
  1232 :}
  1233 MOV.L R0, @(disp, GBR) {:  
  1234     COUNT_INST(I_MOVL);
  1235     load_spreg( R_EAX, R_GBR );
  1236     ADD_imm32_r32( disp, R_EAX );
  1237     check_walign32( R_EAX );
  1238     MMU_TRANSLATE_WRITE( R_EAX );
  1239     load_reg( R_EDX, 0 );
  1240     MEM_WRITE_LONG( R_EAX, R_EDX );
  1241     sh4_x86.tstate = TSTATE_NONE;
  1242 :}
  1243 MOV.L Rm, @(disp, Rn) {:  
  1244     COUNT_INST(I_MOVL);
  1245     load_reg( R_EAX, Rn );
  1246     ADD_imm32_r32( disp, R_EAX );
  1247     check_walign32( R_EAX );
  1248     MMU_TRANSLATE_WRITE( R_EAX );
  1249     load_reg( R_EDX, Rm );
  1250     MEM_WRITE_LONG( R_EAX, R_EDX );
  1251     sh4_x86.tstate = TSTATE_NONE;
  1252 :}
  1253 MOV.L @Rm, Rn {:  
  1254     COUNT_INST(I_MOVL);
  1255     load_reg( R_EAX, Rm );
  1256     check_ralign32( R_EAX );
  1257     MMU_TRANSLATE_READ( R_EAX );
  1258     MEM_READ_LONG( R_EAX, R_EAX );
  1259     store_reg( R_EAX, Rn );
  1260     sh4_x86.tstate = TSTATE_NONE;
  1261 :}
  1262 MOV.L @Rm+, Rn {:  
  1263     COUNT_INST(I_MOVL);
  1264     load_reg( R_EAX, Rm );
  1265     check_ralign32( R_EAX );
  1266     MMU_TRANSLATE_READ( R_EAX );
  1267     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1268     MEM_READ_LONG( R_EAX, R_EAX );
  1269     store_reg( R_EAX, Rn );
  1270     sh4_x86.tstate = TSTATE_NONE;
  1271 :}
  1272 MOV.L @(R0, Rm), Rn {:  
  1273     COUNT_INST(I_MOVL);
  1274     load_reg( R_EAX, 0 );
  1275     load_reg( R_ECX, Rm );
  1276     ADD_r32_r32( R_ECX, R_EAX );
  1277     check_ralign32( R_EAX );
  1278     MMU_TRANSLATE_READ( R_EAX );
  1279     MEM_READ_LONG( R_EAX, R_EAX );
  1280     store_reg( R_EAX, Rn );
  1281     sh4_x86.tstate = TSTATE_NONE;
  1282 :}
  1283 MOV.L @(disp, GBR), R0 {:
  1284     COUNT_INST(I_MOVL);
  1285     load_spreg( R_EAX, R_GBR );
  1286     ADD_imm32_r32( disp, R_EAX );
  1287     check_ralign32( R_EAX );
  1288     MMU_TRANSLATE_READ( R_EAX );
  1289     MEM_READ_LONG( R_EAX, R_EAX );
  1290     store_reg( R_EAX, 0 );
  1291     sh4_x86.tstate = TSTATE_NONE;
  1292 :}
  1293 MOV.L @(disp, PC), Rn {:  
  1294     COUNT_INST(I_MOVLPC);
  1295     if( sh4_x86.in_delay_slot ) {
  1296 	SLOTILLEGAL();
  1297     } else {
  1298 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1299 	if( IS_IN_ICACHE(target) ) {
  1300 	    // If the target address is in the same page as the code, it's
  1301 	    // pretty safe to just ref it directly and circumvent the whole
  1302 	    // memory subsystem. (this is a big performance win)
  1304 	    // FIXME: There's a corner-case that's not handled here when
  1305 	    // the current code-page is in the ITLB but not in the UTLB.
  1306 	    // (should generate a TLB miss although need to test SH4 
  1307 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1308 	    // behaviour though.
  1309 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1310 	    MOV_moff32_EAX( ptr );
  1311 	} else {
  1312 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1313 	    // different virtual address than the translation was done with,
  1314 	    // but we can safely assume that the low bits are the same.
  1315 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1316 	    ADD_sh4r_r32( R_PC, R_EAX );
  1317 	    MMU_TRANSLATE_READ( R_EAX );
  1318 	    MEM_READ_LONG( R_EAX, R_EAX );
  1319 	    sh4_x86.tstate = TSTATE_NONE;
  1321 	store_reg( R_EAX, Rn );
  1323 :}
  1324 MOV.L @(disp, Rm), Rn {:  
  1325     COUNT_INST(I_MOVL);
  1326     load_reg( R_EAX, Rm );
  1327     ADD_imm8s_r32( disp, R_EAX );
  1328     check_ralign32( R_EAX );
  1329     MMU_TRANSLATE_READ( R_EAX );
  1330     MEM_READ_LONG( R_EAX, R_EAX );
  1331     store_reg( R_EAX, Rn );
  1332     sh4_x86.tstate = TSTATE_NONE;
  1333 :}
  1334 MOV.W Rm, @Rn {:  
  1335     COUNT_INST(I_MOVW);
  1336     load_reg( R_EAX, Rn );
  1337     check_walign16( R_EAX );
  1338     MMU_TRANSLATE_WRITE( R_EAX )
  1339     load_reg( R_EDX, Rm );
  1340     MEM_WRITE_WORD( R_EAX, R_EDX );
  1341     sh4_x86.tstate = TSTATE_NONE;
  1342 :}
  1343 MOV.W Rm, @-Rn {:  
  1344     COUNT_INST(I_MOVW);
  1345     load_reg( R_EAX, Rn );
  1346     ADD_imm8s_r32( -2, R_EAX );
  1347     check_walign16( R_EAX );
  1348     MMU_TRANSLATE_WRITE( R_EAX );
  1349     load_reg( R_EDX, Rm );
  1350     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1351     MEM_WRITE_WORD( R_EAX, R_EDX );
  1352     sh4_x86.tstate = TSTATE_NONE;
  1353 :}
  1354 MOV.W Rm, @(R0, Rn) {:  
  1355     COUNT_INST(I_MOVW);
  1356     load_reg( R_EAX, 0 );
  1357     load_reg( R_ECX, Rn );
  1358     ADD_r32_r32( R_ECX, R_EAX );
  1359     check_walign16( R_EAX );
  1360     MMU_TRANSLATE_WRITE( R_EAX );
  1361     load_reg( R_EDX, Rm );
  1362     MEM_WRITE_WORD( R_EAX, R_EDX );
  1363     sh4_x86.tstate = TSTATE_NONE;
  1364 :}
  1365 MOV.W R0, @(disp, GBR) {:  
  1366     COUNT_INST(I_MOVW);
  1367     load_spreg( R_EAX, R_GBR );
  1368     ADD_imm32_r32( disp, R_EAX );
  1369     check_walign16( R_EAX );
  1370     MMU_TRANSLATE_WRITE( R_EAX );
  1371     load_reg( R_EDX, 0 );
  1372     MEM_WRITE_WORD( R_EAX, R_EDX );
  1373     sh4_x86.tstate = TSTATE_NONE;
  1374 :}
  1375 MOV.W R0, @(disp, Rn) {:  
  1376     COUNT_INST(I_MOVW);
  1377     load_reg( R_EAX, Rn );
  1378     ADD_imm32_r32( disp, R_EAX );
  1379     check_walign16( R_EAX );
  1380     MMU_TRANSLATE_WRITE( R_EAX );
  1381     load_reg( R_EDX, 0 );
  1382     MEM_WRITE_WORD( R_EAX, R_EDX );
  1383     sh4_x86.tstate = TSTATE_NONE;
  1384 :}
  1385 MOV.W @Rm, Rn {:  
  1386     COUNT_INST(I_MOVW);
  1387     load_reg( R_EAX, Rm );
  1388     check_ralign16( R_EAX );
  1389     MMU_TRANSLATE_READ( R_EAX );
  1390     MEM_READ_WORD( R_EAX, R_EAX );
  1391     store_reg( R_EAX, Rn );
  1392     sh4_x86.tstate = TSTATE_NONE;
  1393 :}
  1394 MOV.W @Rm+, Rn {:  
  1395     COUNT_INST(I_MOVW);
  1396     load_reg( R_EAX, Rm );
  1397     check_ralign16( R_EAX );
  1398     MMU_TRANSLATE_READ( R_EAX );
  1399     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1400     MEM_READ_WORD( R_EAX, R_EAX );
  1401     store_reg( R_EAX, Rn );
  1402     sh4_x86.tstate = TSTATE_NONE;
  1403 :}
  1404 MOV.W @(R0, Rm), Rn {:  
  1405     COUNT_INST(I_MOVW);
  1406     load_reg( R_EAX, 0 );
  1407     load_reg( R_ECX, Rm );
  1408     ADD_r32_r32( R_ECX, R_EAX );
  1409     check_ralign16( R_EAX );
  1410     MMU_TRANSLATE_READ( R_EAX );
  1411     MEM_READ_WORD( R_EAX, R_EAX );
  1412     store_reg( R_EAX, Rn );
  1413     sh4_x86.tstate = TSTATE_NONE;
  1414 :}
  1415 MOV.W @(disp, GBR), R0 {:  
  1416     COUNT_INST(I_MOVW);
  1417     load_spreg( R_EAX, R_GBR );
  1418     ADD_imm32_r32( disp, R_EAX );
  1419     check_ralign16( R_EAX );
  1420     MMU_TRANSLATE_READ( R_EAX );
  1421     MEM_READ_WORD( R_EAX, R_EAX );
  1422     store_reg( R_EAX, 0 );
  1423     sh4_x86.tstate = TSTATE_NONE;
  1424 :}
  1425 MOV.W @(disp, PC), Rn {:  
  1426     COUNT_INST(I_MOVW);
  1427     if( sh4_x86.in_delay_slot ) {
  1428 	SLOTILLEGAL();
  1429     } else {
  1430 	// See comments for MOV.L @(disp, PC), Rn
  1431 	uint32_t target = pc + disp + 4;
  1432 	if( IS_IN_ICACHE(target) ) {
  1433 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1434 	    MOV_moff32_EAX( ptr );
  1435 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1436 	} else {
  1437 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1438 	    ADD_sh4r_r32( R_PC, R_EAX );
  1439 	    MMU_TRANSLATE_READ( R_EAX );
  1440 	    MEM_READ_WORD( R_EAX, R_EAX );
  1441 	    sh4_x86.tstate = TSTATE_NONE;
  1443 	store_reg( R_EAX, Rn );
  1445 :}
  1446 MOV.W @(disp, Rm), R0 {:  
  1447     COUNT_INST(I_MOVW);
  1448     load_reg( R_EAX, Rm );
  1449     ADD_imm32_r32( disp, R_EAX );
  1450     check_ralign16( R_EAX );
  1451     MMU_TRANSLATE_READ( R_EAX );
  1452     MEM_READ_WORD( R_EAX, R_EAX );
  1453     store_reg( R_EAX, 0 );
  1454     sh4_x86.tstate = TSTATE_NONE;
  1455 :}
  1456 MOVA @(disp, PC), R0 {:  
  1457     COUNT_INST(I_MOVA);
  1458     if( sh4_x86.in_delay_slot ) {
  1459 	SLOTILLEGAL();
  1460     } else {
  1461 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1462 	ADD_sh4r_r32( R_PC, R_ECX );
  1463 	store_reg( R_ECX, 0 );
  1464 	sh4_x86.tstate = TSTATE_NONE;
  1466 :}
  1467 MOVCA.L R0, @Rn {:  
  1468     COUNT_INST(I_MOVCA);
  1469     load_reg( R_EAX, Rn );
  1470     check_walign32( R_EAX );
  1471     MMU_TRANSLATE_WRITE( R_EAX );
  1472     load_reg( R_EDX, 0 );
  1473     MEM_WRITE_LONG( R_EAX, R_EDX );
  1474     sh4_x86.tstate = TSTATE_NONE;
  1475 :}
  1477 /* Control transfer instructions */
  1478 BF disp {:
  1479     COUNT_INST(I_BF);
  1480     if( sh4_x86.in_delay_slot ) {
  1481 	SLOTILLEGAL();
  1482     } else {
  1483 	sh4vma_t target = disp + pc + 4;
  1484 	JT_rel8( nottaken );
  1485 	exit_block_rel(target, pc+2 );
  1486 	JMP_TARGET(nottaken);
  1487 	return 2;
  1489 :}
  1490 BF/S disp {:
  1491     COUNT_INST(I_BFS);
  1492     if( sh4_x86.in_delay_slot ) {
  1493 	SLOTILLEGAL();
  1494     } else {
  1495 	sh4_x86.in_delay_slot = DELAY_PC;
  1496 	if( UNTRANSLATABLE(pc+2) ) {
  1497 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1498 	    JT_rel8(nottaken);
  1499 	    ADD_imm32_r32( disp, R_EAX );
  1500 	    JMP_TARGET(nottaken);
  1501 	    ADD_sh4r_r32( R_PC, R_EAX );
  1502 	    store_spreg( R_EAX, R_NEW_PC );
  1503 	    exit_block_emu(pc+2);
  1504 	    sh4_x86.branch_taken = TRUE;
  1505 	    return 2;
  1506 	} else {
  1507 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1508 		CMP_imm8s_sh4r( 1, R_T );
  1509 		sh4_x86.tstate = TSTATE_E;
  1511 	    sh4vma_t target = disp + pc + 4;
  1512 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1513 	    sh4_translate_instruction(pc+2);
  1514 	    exit_block_rel( target, pc+4 );
  1516 	    // not taken
  1517 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1518 	    sh4_translate_instruction(pc+2);
  1519 	    return 4;
  1522 :}
  1523 BRA disp {:  
  1524     COUNT_INST(I_BRA);
  1525     if( sh4_x86.in_delay_slot ) {
  1526 	SLOTILLEGAL();
  1527     } else {
  1528 	sh4_x86.in_delay_slot = DELAY_PC;
  1529 	sh4_x86.branch_taken = TRUE;
  1530 	if( UNTRANSLATABLE(pc+2) ) {
  1531 	    load_spreg( R_EAX, R_PC );
  1532 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1533 	    store_spreg( R_EAX, R_NEW_PC );
  1534 	    exit_block_emu(pc+2);
  1535 	    return 2;
  1536 	} else {
  1537 	    sh4_translate_instruction( pc + 2 );
  1538 	    exit_block_rel( disp + pc + 4, pc+4 );
  1539 	    return 4;
  1542 :}
  1543 BRAF Rn {:  
  1544     COUNT_INST(I_BRAF);
  1545     if( sh4_x86.in_delay_slot ) {
  1546 	SLOTILLEGAL();
  1547     } else {
  1548 	load_spreg( R_EAX, R_PC );
  1549 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1550 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1551 	store_spreg( R_EAX, R_NEW_PC );
  1552 	sh4_x86.in_delay_slot = DELAY_PC;
  1553 	sh4_x86.tstate = TSTATE_NONE;
  1554 	sh4_x86.branch_taken = TRUE;
  1555 	if( UNTRANSLATABLE(pc+2) ) {
  1556 	    exit_block_emu(pc+2);
  1557 	    return 2;
  1558 	} else {
  1559 	    sh4_translate_instruction( pc + 2 );
  1560 	    exit_block_newpcset(pc+2);
  1561 	    return 4;
  1564 :}
  1565 BSR disp {:  
  1566     COUNT_INST(I_BSR);
  1567     if( sh4_x86.in_delay_slot ) {
  1568 	SLOTILLEGAL();
  1569     } else {
  1570 	load_spreg( R_EAX, R_PC );
  1571 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1572 	store_spreg( R_EAX, R_PR );
  1573 	sh4_x86.in_delay_slot = DELAY_PC;
  1574 	sh4_x86.branch_taken = TRUE;
  1575 	sh4_x86.tstate = TSTATE_NONE;
  1576 	if( UNTRANSLATABLE(pc+2) ) {
  1577 	    ADD_imm32_r32( disp, R_EAX );
  1578 	    store_spreg( R_EAX, R_NEW_PC );
  1579 	    exit_block_emu(pc+2);
  1580 	    return 2;
  1581 	} else {
  1582 	    sh4_translate_instruction( pc + 2 );
  1583 	    exit_block_rel( disp + pc + 4, pc+4 );
  1584 	    return 4;
  1587 :}
  1588 BSRF Rn {:  
  1589     COUNT_INST(I_BSRF);
  1590     if( sh4_x86.in_delay_slot ) {
  1591 	SLOTILLEGAL();
  1592     } else {
  1593 	load_spreg( R_EAX, R_PC );
  1594 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1595 	store_spreg( R_EAX, R_PR );
  1596 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1597 	store_spreg( R_EAX, R_NEW_PC );
  1599 	sh4_x86.in_delay_slot = DELAY_PC;
  1600 	sh4_x86.tstate = TSTATE_NONE;
  1601 	sh4_x86.branch_taken = TRUE;
  1602 	if( UNTRANSLATABLE(pc+2) ) {
  1603 	    exit_block_emu(pc+2);
  1604 	    return 2;
  1605 	} else {
  1606 	    sh4_translate_instruction( pc + 2 );
  1607 	    exit_block_newpcset(pc+2);
  1608 	    return 4;
  1611 :}
  1612 BT disp {:
  1613     COUNT_INST(I_BT);
  1614     if( sh4_x86.in_delay_slot ) {
  1615 	SLOTILLEGAL();
  1616     } else {
  1617 	sh4vma_t target = disp + pc + 4;
  1618 	JF_rel8( nottaken );
  1619 	exit_block_rel(target, pc+2 );
  1620 	JMP_TARGET(nottaken);
  1621 	return 2;
  1623 :}
  1624 BT/S disp {:
  1625     COUNT_INST(I_BTS);
  1626     if( sh4_x86.in_delay_slot ) {
  1627 	SLOTILLEGAL();
  1628     } else {
  1629 	sh4_x86.in_delay_slot = DELAY_PC;
  1630 	if( UNTRANSLATABLE(pc+2) ) {
  1631 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1632 	    JF_rel8(nottaken);
  1633 	    ADD_imm32_r32( disp, R_EAX );
  1634 	    JMP_TARGET(nottaken);
  1635 	    ADD_sh4r_r32( R_PC, R_EAX );
  1636 	    store_spreg( R_EAX, R_NEW_PC );
  1637 	    exit_block_emu(pc+2);
  1638 	    sh4_x86.branch_taken = TRUE;
  1639 	    return 2;
  1640 	} else {
  1641 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1642 		CMP_imm8s_sh4r( 1, R_T );
  1643 		sh4_x86.tstate = TSTATE_E;
  1645 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1646 	    sh4_translate_instruction(pc+2);
  1647 	    exit_block_rel( disp + pc + 4, pc+4 );
  1648 	    // not taken
  1649 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1650 	    sh4_translate_instruction(pc+2);
  1651 	    return 4;
  1654 :}
  1655 JMP @Rn {:  
  1656     COUNT_INST(I_JMP);
  1657     if( sh4_x86.in_delay_slot ) {
  1658 	SLOTILLEGAL();
  1659     } else {
  1660 	load_reg( R_ECX, Rn );
  1661 	store_spreg( R_ECX, R_NEW_PC );
  1662 	sh4_x86.in_delay_slot = DELAY_PC;
  1663 	sh4_x86.branch_taken = TRUE;
  1664 	if( UNTRANSLATABLE(pc+2) ) {
  1665 	    exit_block_emu(pc+2);
  1666 	    return 2;
  1667 	} else {
  1668 	    sh4_translate_instruction(pc+2);
  1669 	    exit_block_newpcset(pc+2);
  1670 	    return 4;
  1673 :}
  1674 JSR @Rn {:  
  1675     COUNT_INST(I_JSR);
  1676     if( sh4_x86.in_delay_slot ) {
  1677 	SLOTILLEGAL();
  1678     } else {
  1679 	load_spreg( R_EAX, R_PC );
  1680 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1681 	store_spreg( R_EAX, R_PR );
  1682 	load_reg( R_ECX, Rn );
  1683 	store_spreg( R_ECX, R_NEW_PC );
  1684 	sh4_x86.in_delay_slot = DELAY_PC;
  1685 	sh4_x86.branch_taken = TRUE;
  1686 	sh4_x86.tstate = TSTATE_NONE;
  1687 	if( UNTRANSLATABLE(pc+2) ) {
  1688 	    exit_block_emu(pc+2);
  1689 	    return 2;
  1690 	} else {
  1691 	    sh4_translate_instruction(pc+2);
  1692 	    exit_block_newpcset(pc+2);
  1693 	    return 4;
  1696 :}
  1697 RTE {:  
  1698     COUNT_INST(I_RTE);
  1699     if( sh4_x86.in_delay_slot ) {
  1700 	SLOTILLEGAL();
  1701     } else {
  1702 	check_priv();
  1703 	load_spreg( R_ECX, R_SPC );
  1704 	store_spreg( R_ECX, R_NEW_PC );
  1705 	load_spreg( R_EAX, R_SSR );
  1706 	call_func1( sh4_write_sr, R_EAX );
  1707 	sh4_x86.in_delay_slot = DELAY_PC;
  1708 	sh4_x86.priv_checked = FALSE;
  1709 	sh4_x86.fpuen_checked = FALSE;
  1710 	sh4_x86.tstate = TSTATE_NONE;
  1711 	sh4_x86.branch_taken = TRUE;
  1712 	if( UNTRANSLATABLE(pc+2) ) {
  1713 	    exit_block_emu(pc+2);
  1714 	    return 2;
  1715 	} else {
  1716 	    sh4_translate_instruction(pc+2);
  1717 	    exit_block_newpcset(pc+2);
  1718 	    return 4;
  1721 :}
  1722 RTS {:  
  1723     COUNT_INST(I_RTS);
  1724     if( sh4_x86.in_delay_slot ) {
  1725 	SLOTILLEGAL();
  1726     } else {
  1727 	load_spreg( R_ECX, R_PR );
  1728 	store_spreg( R_ECX, R_NEW_PC );
  1729 	sh4_x86.in_delay_slot = DELAY_PC;
  1730 	sh4_x86.branch_taken = TRUE;
  1731 	if( UNTRANSLATABLE(pc+2) ) {
  1732 	    exit_block_emu(pc+2);
  1733 	    return 2;
  1734 	} else {
  1735 	    sh4_translate_instruction(pc+2);
  1736 	    exit_block_newpcset(pc+2);
  1737 	    return 4;
  1740 :}
  1741 TRAPA #imm {:  
  1742     COUNT_INST(I_TRAPA);
  1743     if( sh4_x86.in_delay_slot ) {
  1744 	SLOTILLEGAL();
  1745     } else {
  1746 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1747 	ADD_r32_sh4r( R_ECX, R_PC );
  1748 	load_imm32( R_EAX, imm );
  1749 	call_func1( sh4_raise_trap, R_EAX );
  1750 	sh4_x86.tstate = TSTATE_NONE;
  1751 	exit_block_pcset(pc);
  1752 	sh4_x86.branch_taken = TRUE;
  1753 	return 2;
  1755 :}
  1756 UNDEF {:  
  1757     COUNT_INST(I_UNDEF);
  1758     if( sh4_x86.in_delay_slot ) {
  1759 	SLOTILLEGAL();
  1760     } else {
  1761 	JMP_exc(EXC_ILLEGAL);
  1762 	return 2;
  1764 :}
  1766 CLRMAC {:  
  1767     COUNT_INST(I_CLRMAC);
  1768     XOR_r32_r32(R_EAX, R_EAX);
  1769     store_spreg( R_EAX, R_MACL );
  1770     store_spreg( R_EAX, R_MACH );
  1771     sh4_x86.tstate = TSTATE_NONE;
  1772 :}
  1773 CLRS {:
  1774     COUNT_INST(I_CLRS);
  1775     CLC();
  1776     SETC_sh4r(R_S);
  1777     sh4_x86.tstate = TSTATE_C;
  1778 :}
  1779 CLRT {:  
  1780     COUNT_INST(I_CLRT);
  1781     CLC();
  1782     SETC_t();
  1783     sh4_x86.tstate = TSTATE_C;
  1784 :}
  1785 SETS {:  
  1786     COUNT_INST(I_SETS);
  1787     STC();
  1788     SETC_sh4r(R_S);
  1789     sh4_x86.tstate = TSTATE_C;
  1790 :}
  1791 SETT {:  
  1792     COUNT_INST(I_SETT);
  1793     STC();
  1794     SETC_t();
  1795     sh4_x86.tstate = TSTATE_C;
  1796 :}
  1798 /* Floating point moves */
  1799 FMOV FRm, FRn {:  
  1800     COUNT_INST(I_FMOV1);
  1801     check_fpuen();
  1802     load_spreg( R_ECX, R_FPSCR );
  1803     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1804     JNE_rel8(doublesize);
  1805     load_fr( R_EAX, FRm ); // SZ=0 branch
  1806     store_fr( R_EAX, FRn );
  1807     JMP_rel8(end);
  1808     JMP_TARGET(doublesize);
  1809     load_dr0( R_EAX, FRm );
  1810     load_dr1( R_ECX, FRm );
  1811     store_dr0( R_EAX, FRn );
  1812     store_dr1( R_ECX, FRn );
  1813     JMP_TARGET(end);
  1814     sh4_x86.tstate = TSTATE_NONE;
  1815 :}
  1816 FMOV FRm, @Rn {: 
  1817     COUNT_INST(I_FMOV2);
  1818     check_fpuen();
  1819     load_reg( R_EAX, Rn );
  1820     check_walign32( R_EAX );
  1821     MMU_TRANSLATE_WRITE( R_EAX );
  1822     load_spreg( R_EDX, R_FPSCR );
  1823     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1824     JNE_rel8(doublesize);
  1826     load_fr( R_ECX, FRm );
  1827     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1828     JMP_rel8(end);
  1830     JMP_TARGET(doublesize);
  1831     load_dr0( R_ECX, FRm );
  1832     load_dr1( R_EDX, FRm );
  1833     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1834     JMP_TARGET(end);
  1835     sh4_x86.tstate = TSTATE_NONE;
  1836 :}
  1837 FMOV @Rm, FRn {:  
  1838     COUNT_INST(I_FMOV5);
  1839     check_fpuen();
  1840     load_reg( R_EAX, Rm );
  1841     check_ralign32( R_EAX );
  1842     MMU_TRANSLATE_READ( R_EAX );
  1843     load_spreg( R_EDX, R_FPSCR );
  1844     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1845     JNE_rel8(doublesize);
  1847     MEM_READ_LONG( R_EAX, R_EAX );
  1848     store_fr( R_EAX, FRn );
  1849     JMP_rel8(end);
  1851     JMP_TARGET(doublesize);
  1852     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1853     store_dr0( R_ECX, FRn );
  1854     store_dr1( R_EAX, FRn );
  1855     JMP_TARGET(end);
  1856     sh4_x86.tstate = TSTATE_NONE;
  1857 :}
  1858 FMOV FRm, @-Rn {:  
  1859     COUNT_INST(I_FMOV3);
  1860     check_fpuen();
  1861     load_reg( R_EAX, Rn );
  1862     check_walign32( R_EAX );
  1863     load_spreg( R_EDX, R_FPSCR );
  1864     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1865     JNE_rel8(doublesize);
  1867     ADD_imm8s_r32( -4, R_EAX );
  1868     MMU_TRANSLATE_WRITE( R_EAX );
  1869     load_fr( R_ECX, FRm );
  1870     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1871     MEM_WRITE_LONG( R_EAX, R_ECX );
  1872     JMP_rel8(end);
  1874     JMP_TARGET(doublesize);
  1875     ADD_imm8s_r32(-8,R_EAX);
  1876     MMU_TRANSLATE_WRITE( R_EAX );
  1877     load_dr0( R_ECX, FRm );
  1878     load_dr1( R_EDX, FRm );
  1879     ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1880     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1881     JMP_TARGET(end);
  1883     sh4_x86.tstate = TSTATE_NONE;
  1884 :}
  1885 FMOV @Rm+, FRn {:
  1886     COUNT_INST(I_FMOV6);
  1887     check_fpuen();
  1888     load_reg( R_EAX, Rm );
  1889     check_ralign32( R_EAX );
  1890     MMU_TRANSLATE_READ( R_EAX );
  1891     load_spreg( R_EDX, R_FPSCR );
  1892     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1893     JNE_rel8(doublesize);
  1895     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1896     MEM_READ_LONG( R_EAX, R_EAX );
  1897     store_fr( R_EAX, FRn );
  1898     JMP_rel8(end);
  1900     JMP_TARGET(doublesize);
  1901     ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1902     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1903     store_dr0( R_ECX, FRn );
  1904     store_dr1( R_EAX, FRn );
  1905     JMP_TARGET(end);
  1907     sh4_x86.tstate = TSTATE_NONE;
  1908 :}
  1909 FMOV FRm, @(R0, Rn) {:  
  1910     COUNT_INST(I_FMOV4);
  1911     check_fpuen();
  1912     load_reg( R_EAX, Rn );
  1913     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1914     check_walign32( R_EAX );
  1915     MMU_TRANSLATE_WRITE( R_EAX );
  1916     load_spreg( R_EDX, R_FPSCR );
  1917     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1918     JNE_rel8(doublesize);
  1920     load_fr( R_ECX, FRm );
  1921     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1922     JMP_rel8(end);
  1924     JMP_TARGET(doublesize);
  1925     load_dr0( R_ECX, FRm );
  1926     load_dr1( R_EDX, FRm );
  1927     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1928     JMP_TARGET(end);
  1930     sh4_x86.tstate = TSTATE_NONE;
  1931 :}
  1932 FMOV @(R0, Rm), FRn {:  
  1933     COUNT_INST(I_FMOV7);
  1934     check_fpuen();
  1935     load_reg( R_EAX, Rm );
  1936     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1937     check_ralign32( R_EAX );
  1938     MMU_TRANSLATE_READ( R_EAX );
  1939     load_spreg( R_EDX, R_FPSCR );
  1940     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1941     JNE_rel8(doublesize);
  1943     MEM_READ_LONG( R_EAX, R_EAX );
  1944     store_fr( R_EAX, FRn );
  1945     JMP_rel8(end);
  1947     JMP_TARGET(doublesize);
  1948     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1949     store_dr0( R_ECX, FRn );
  1950     store_dr1( R_EAX, FRn );
  1951     JMP_TARGET(end);
  1953     sh4_x86.tstate = TSTATE_NONE;
  1954 :}
  1955 FLDI0 FRn {:  /* IFF PR=0 */
  1956     COUNT_INST(I_FLDI0);
  1957     check_fpuen();
  1958     load_spreg( R_ECX, R_FPSCR );
  1959     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1960     JNE_rel8(end);
  1961     XOR_r32_r32( R_EAX, R_EAX );
  1962     store_fr( R_EAX, FRn );
  1963     JMP_TARGET(end);
  1964     sh4_x86.tstate = TSTATE_NONE;
  1965 :}
  1966 FLDI1 FRn {:  /* IFF PR=0 */
  1967     COUNT_INST(I_FLDI1);
  1968     check_fpuen();
  1969     load_spreg( R_ECX, R_FPSCR );
  1970     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1971     JNE_rel8(end);
  1972     load_imm32(R_EAX, 0x3F800000);
  1973     store_fr( R_EAX, FRn );
  1974     JMP_TARGET(end);
  1975     sh4_x86.tstate = TSTATE_NONE;
  1976 :}
  1978 FLOAT FPUL, FRn {:  
  1979     COUNT_INST(I_FLOAT);
  1980     check_fpuen();
  1981     load_spreg( R_ECX, R_FPSCR );
  1982     FILD_sh4r(R_FPUL);
  1983     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1984     JNE_rel8(doubleprec);
  1985     pop_fr( FRn );
  1986     JMP_rel8(end);
  1987     JMP_TARGET(doubleprec);
  1988     pop_dr( FRn );
  1989     JMP_TARGET(end);
  1990     sh4_x86.tstate = TSTATE_NONE;
  1991 :}
  1992 FTRC FRm, FPUL {:  
  1993     COUNT_INST(I_FTRC);
  1994     check_fpuen();
  1995     load_spreg( R_ECX, R_FPSCR );
  1996     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1997     JNE_rel8(doubleprec);
  1998     push_fr( FRm );
  1999     JMP_rel8(doop);
  2000     JMP_TARGET(doubleprec);
  2001     push_dr( FRm );
  2002     JMP_TARGET( doop );
  2003     load_imm32( R_ECX, (uint32_t)&max_int );
  2004     FILD_r32ind( R_ECX );
  2005     FCOMIP_st(1);
  2006     JNA_rel8( sat );
  2007     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2008     FILD_r32ind( R_ECX );           // 2
  2009     FCOMIP_st(1);                   // 2
  2010     JAE_rel8( sat2 );            // 2
  2011     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2012     FNSTCW_r32ind( R_EAX );
  2013     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2014     FLDCW_r32ind( R_EDX );
  2015     FISTP_sh4r(R_FPUL);             // 3
  2016     FLDCW_r32ind( R_EAX );
  2017     JMP_rel8(end);             // 2
  2019     JMP_TARGET(sat);
  2020     JMP_TARGET(sat2);
  2021     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2022     store_spreg( R_ECX, R_FPUL );
  2023     FPOP_st();
  2024     JMP_TARGET(end);
  2025     sh4_x86.tstate = TSTATE_NONE;
  2026 :}
  2027 FLDS FRm, FPUL {:  
  2028     COUNT_INST(I_FLDS);
  2029     check_fpuen();
  2030     load_fr( R_EAX, FRm );
  2031     store_spreg( R_EAX, R_FPUL );
  2032     sh4_x86.tstate = TSTATE_NONE;
  2033 :}
  2034 FSTS FPUL, FRn {:  
  2035     COUNT_INST(I_FSTS);
  2036     check_fpuen();
  2037     load_spreg( R_EAX, R_FPUL );
  2038     store_fr( R_EAX, FRn );
  2039     sh4_x86.tstate = TSTATE_NONE;
  2040 :}
  2041 FCNVDS FRm, FPUL {:  
  2042     COUNT_INST(I_FCNVDS);
  2043     check_fpuen();
  2044     load_spreg( R_ECX, R_FPSCR );
  2045     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2046     JE_rel8(end); // only when PR=1
  2047     push_dr( FRm );
  2048     pop_fpul();
  2049     JMP_TARGET(end);
  2050     sh4_x86.tstate = TSTATE_NONE;
  2051 :}
  2052 FCNVSD FPUL, FRn {:  
  2053     COUNT_INST(I_FCNVSD);
  2054     check_fpuen();
  2055     load_spreg( R_ECX, R_FPSCR );
  2056     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2057     JE_rel8(end); // only when PR=1
  2058     push_fpul();
  2059     pop_dr( FRn );
  2060     JMP_TARGET(end);
  2061     sh4_x86.tstate = TSTATE_NONE;
  2062 :}
  2064 /* Floating point instructions */
  2065 FABS FRn {:  
  2066     COUNT_INST(I_FABS);
  2067     check_fpuen();
  2068     load_spreg( R_ECX, R_FPSCR );
  2069     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2070     JNE_rel8(doubleprec);
  2071     push_fr(FRn); // 6
  2072     FABS_st0(); // 2
  2073     pop_fr(FRn); //6
  2074     JMP_rel8(end); // 2
  2075     JMP_TARGET(doubleprec);
  2076     push_dr(FRn);
  2077     FABS_st0();
  2078     pop_dr(FRn);
  2079     JMP_TARGET(end);
  2080     sh4_x86.tstate = TSTATE_NONE;
  2081 :}
  2082 FADD FRm, FRn {:  
  2083     COUNT_INST(I_FADD);
  2084     check_fpuen();
  2085     load_spreg( R_ECX, R_FPSCR );
  2086     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2087     JNE_rel8(doubleprec);
  2088     push_fr(FRm);
  2089     push_fr(FRn);
  2090     FADDP_st(1);
  2091     pop_fr(FRn);
  2092     JMP_rel8(end);
  2093     JMP_TARGET(doubleprec);
  2094     push_dr(FRm);
  2095     push_dr(FRn);
  2096     FADDP_st(1);
  2097     pop_dr(FRn);
  2098     JMP_TARGET(end);
  2099     sh4_x86.tstate = TSTATE_NONE;
  2100 :}
  2101 FDIV FRm, FRn {:  
  2102     COUNT_INST(I_FDIV);
  2103     check_fpuen();
  2104     load_spreg( R_ECX, R_FPSCR );
  2105     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2106     JNE_rel8(doubleprec);
  2107     push_fr(FRn);
  2108     push_fr(FRm);
  2109     FDIVP_st(1);
  2110     pop_fr(FRn);
  2111     JMP_rel8(end);
  2112     JMP_TARGET(doubleprec);
  2113     push_dr(FRn);
  2114     push_dr(FRm);
  2115     FDIVP_st(1);
  2116     pop_dr(FRn);
  2117     JMP_TARGET(end);
  2118     sh4_x86.tstate = TSTATE_NONE;
  2119 :}
  2120 FMAC FR0, FRm, FRn {:  
  2121     COUNT_INST(I_FMAC);
  2122     check_fpuen();
  2123     load_spreg( R_ECX, R_FPSCR );
  2124     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2125     JNE_rel8(doubleprec);
  2126     push_fr( 0 );
  2127     push_fr( FRm );
  2128     FMULP_st(1);
  2129     push_fr( FRn );
  2130     FADDP_st(1);
  2131     pop_fr( FRn );
  2132     JMP_rel8(end);
  2133     JMP_TARGET(doubleprec);
  2134     push_dr( 0 );
  2135     push_dr( FRm );
  2136     FMULP_st(1);
  2137     push_dr( FRn );
  2138     FADDP_st(1);
  2139     pop_dr( FRn );
  2140     JMP_TARGET(end);
  2141     sh4_x86.tstate = TSTATE_NONE;
  2142 :}
  2144 FMUL FRm, FRn {:  
  2145     COUNT_INST(I_FMUL);
  2146     check_fpuen();
  2147     load_spreg( R_ECX, R_FPSCR );
  2148     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2149     JNE_rel8(doubleprec);
  2150     push_fr(FRm);
  2151     push_fr(FRn);
  2152     FMULP_st(1);
  2153     pop_fr(FRn);
  2154     JMP_rel8(end);
  2155     JMP_TARGET(doubleprec);
  2156     push_dr(FRm);
  2157     push_dr(FRn);
  2158     FMULP_st(1);
  2159     pop_dr(FRn);
  2160     JMP_TARGET(end);
  2161     sh4_x86.tstate = TSTATE_NONE;
  2162 :}
  2163 FNEG FRn {:  
  2164     COUNT_INST(I_FNEG);
  2165     check_fpuen();
  2166     load_spreg( R_ECX, R_FPSCR );
  2167     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2168     JNE_rel8(doubleprec);
  2169     push_fr(FRn);
  2170     FCHS_st0();
  2171     pop_fr(FRn);
  2172     JMP_rel8(end);
  2173     JMP_TARGET(doubleprec);
  2174     push_dr(FRn);
  2175     FCHS_st0();
  2176     pop_dr(FRn);
  2177     JMP_TARGET(end);
  2178     sh4_x86.tstate = TSTATE_NONE;
  2179 :}
  2180 FSRRA FRn {:  
  2181     COUNT_INST(I_FSRRA);
  2182     check_fpuen();
  2183     load_spreg( R_ECX, R_FPSCR );
  2184     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2185     JNE_rel8(end); // PR=0 only
  2186     FLD1_st0();
  2187     push_fr(FRn);
  2188     FSQRT_st0();
  2189     FDIVP_st(1);
  2190     pop_fr(FRn);
  2191     JMP_TARGET(end);
  2192     sh4_x86.tstate = TSTATE_NONE;
  2193 :}
  2194 FSQRT FRn {:  
  2195     COUNT_INST(I_FSQRT);
  2196     check_fpuen();
  2197     load_spreg( R_ECX, R_FPSCR );
  2198     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2199     JNE_rel8(doubleprec);
  2200     push_fr(FRn);
  2201     FSQRT_st0();
  2202     pop_fr(FRn);
  2203     JMP_rel8(end);
  2204     JMP_TARGET(doubleprec);
  2205     push_dr(FRn);
  2206     FSQRT_st0();
  2207     pop_dr(FRn);
  2208     JMP_TARGET(end);
  2209     sh4_x86.tstate = TSTATE_NONE;
  2210 :}
  2211 FSUB FRm, FRn {:  
  2212     COUNT_INST(I_FSUB);
  2213     check_fpuen();
  2214     load_spreg( R_ECX, R_FPSCR );
  2215     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2216     JNE_rel8(doubleprec);
  2217     push_fr(FRn);
  2218     push_fr(FRm);
  2219     FSUBP_st(1);
  2220     pop_fr(FRn);
  2221     JMP_rel8(end);
  2222     JMP_TARGET(doubleprec);
  2223     push_dr(FRn);
  2224     push_dr(FRm);
  2225     FSUBP_st(1);
  2226     pop_dr(FRn);
  2227     JMP_TARGET(end);
  2228     sh4_x86.tstate = TSTATE_NONE;
  2229 :}
  2231 FCMP/EQ FRm, FRn {:  
  2232     COUNT_INST(I_FCMPEQ);
  2233     check_fpuen();
  2234     load_spreg( R_ECX, R_FPSCR );
  2235     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2236     JNE_rel8(doubleprec);
  2237     push_fr(FRm);
  2238     push_fr(FRn);
  2239     JMP_rel8(end);
  2240     JMP_TARGET(doubleprec);
  2241     push_dr(FRm);
  2242     push_dr(FRn);
  2243     JMP_TARGET(end);
  2244     FCOMIP_st(1);
  2245     SETE_t();
  2246     FPOP_st();
  2247     sh4_x86.tstate = TSTATE_NONE;
  2248 :}
  2249 FCMP/GT FRm, FRn {:  
  2250     COUNT_INST(I_FCMPGT);
  2251     check_fpuen();
  2252     load_spreg( R_ECX, R_FPSCR );
  2253     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2254     JNE_rel8(doubleprec);
  2255     push_fr(FRm);
  2256     push_fr(FRn);
  2257     JMP_rel8(end);
  2258     JMP_TARGET(doubleprec);
  2259     push_dr(FRm);
  2260     push_dr(FRn);
  2261     JMP_TARGET(end);
  2262     FCOMIP_st(1);
  2263     SETA_t();
  2264     FPOP_st();
  2265     sh4_x86.tstate = TSTATE_NONE;
  2266 :}
  2268 FSCA FPUL, FRn {:  
  2269     COUNT_INST(I_FSCA);
  2270     check_fpuen();
  2271     load_spreg( R_ECX, R_FPSCR );
  2272     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2273     JNE_rel8(doubleprec );
  2274     LEA_sh4r_r32( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
  2275     load_spreg( R_EDX, R_FPUL );
  2276     call_func2( sh4_fsca, R_EDX, R_ECX );
  2277     JMP_TARGET(doubleprec);
  2278     sh4_x86.tstate = TSTATE_NONE;
  2279 :}
  2280 FIPR FVm, FVn {:  
  2281     COUNT_INST(I_FIPR);
  2282     check_fpuen();
  2283     load_spreg( R_ECX, R_FPSCR );
  2284     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2285     JNE_rel8( doubleprec);
  2287     push_fr( FVm<<2 );
  2288     push_fr( FVn<<2 );
  2289     FMULP_st(1);
  2290     push_fr( (FVm<<2)+1);
  2291     push_fr( (FVn<<2)+1);
  2292     FMULP_st(1);
  2293     FADDP_st(1);
  2294     push_fr( (FVm<<2)+2);
  2295     push_fr( (FVn<<2)+2);
  2296     FMULP_st(1);
  2297     FADDP_st(1);
  2298     push_fr( (FVm<<2)+3);
  2299     push_fr( (FVn<<2)+3);
  2300     FMULP_st(1);
  2301     FADDP_st(1);
  2302     pop_fr( (FVn<<2)+3);
  2303     JMP_TARGET(doubleprec);
  2304     sh4_x86.tstate = TSTATE_NONE;
  2305 :}
  2306 FTRV XMTRX, FVn {:  
  2307     COUNT_INST(I_FTRV);
  2308     check_fpuen();
  2309     load_spreg( R_ECX, R_FPSCR );
  2310     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2311     JNE_rel8( doubleprec );
  2312     LEA_sh4r_r32( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
  2313     call_func1( sh4_ftrv, R_EDX );  // 12
  2314     JMP_TARGET(doubleprec);
  2315     sh4_x86.tstate = TSTATE_NONE;
  2316 :}
  2318 FRCHG {:  
  2319     COUNT_INST(I_FRCHG);
  2320     check_fpuen();
  2321     load_spreg( R_ECX, R_FPSCR );
  2322     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2323     store_spreg( R_ECX, R_FPSCR );
  2324     call_func0( sh4_switch_fr_banks );
  2325     sh4_x86.tstate = TSTATE_NONE;
  2326 :}
  2327 FSCHG {:  
  2328     COUNT_INST(I_FSCHG);
  2329     check_fpuen();
  2330     load_spreg( R_ECX, R_FPSCR );
  2331     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2332     store_spreg( R_ECX, R_FPSCR );
  2333     sh4_x86.tstate = TSTATE_NONE;
  2334 :}
  2336 /* Processor control instructions */
  2337 LDC Rm, SR {:
  2338     COUNT_INST(I_LDCSR);
  2339     if( sh4_x86.in_delay_slot ) {
  2340 	SLOTILLEGAL();
  2341     } else {
  2342 	check_priv();
  2343 	load_reg( R_EAX, Rm );
  2344 	call_func1( sh4_write_sr, R_EAX );
  2345 	sh4_x86.priv_checked = FALSE;
  2346 	sh4_x86.fpuen_checked = FALSE;
  2347 	sh4_x86.tstate = TSTATE_NONE;
  2349 :}
  2350 LDC Rm, GBR {: 
  2351     COUNT_INST(I_LDC);
  2352     load_reg( R_EAX, Rm );
  2353     store_spreg( R_EAX, R_GBR );
  2354 :}
  2355 LDC Rm, VBR {:  
  2356     COUNT_INST(I_LDC);
  2357     check_priv();
  2358     load_reg( R_EAX, Rm );
  2359     store_spreg( R_EAX, R_VBR );
  2360     sh4_x86.tstate = TSTATE_NONE;
  2361 :}
  2362 LDC Rm, SSR {:  
  2363     COUNT_INST(I_LDC);
  2364     check_priv();
  2365     load_reg( R_EAX, Rm );
  2366     store_spreg( R_EAX, R_SSR );
  2367     sh4_x86.tstate = TSTATE_NONE;
  2368 :}
  2369 LDC Rm, SGR {:  
  2370     COUNT_INST(I_LDC);
  2371     check_priv();
  2372     load_reg( R_EAX, Rm );
  2373     store_spreg( R_EAX, R_SGR );
  2374     sh4_x86.tstate = TSTATE_NONE;
  2375 :}
  2376 LDC Rm, SPC {:  
  2377     COUNT_INST(I_LDC);
  2378     check_priv();
  2379     load_reg( R_EAX, Rm );
  2380     store_spreg( R_EAX, R_SPC );
  2381     sh4_x86.tstate = TSTATE_NONE;
  2382 :}
  2383 LDC Rm, DBR {:  
  2384     COUNT_INST(I_LDC);
  2385     check_priv();
  2386     load_reg( R_EAX, Rm );
  2387     store_spreg( R_EAX, R_DBR );
  2388     sh4_x86.tstate = TSTATE_NONE;
  2389 :}
  2390 LDC Rm, Rn_BANK {:  
  2391     COUNT_INST(I_LDC);
  2392     check_priv();
  2393     load_reg( R_EAX, Rm );
  2394     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2395     sh4_x86.tstate = TSTATE_NONE;
  2396 :}
  2397 LDC.L @Rm+, GBR {:  
  2398     COUNT_INST(I_LDCM);
  2399     load_reg( R_EAX, Rm );
  2400     check_ralign32( R_EAX );
  2401     MMU_TRANSLATE_READ( R_EAX );
  2402     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2403     MEM_READ_LONG( R_EAX, R_EAX );
  2404     store_spreg( R_EAX, R_GBR );
  2405     sh4_x86.tstate = TSTATE_NONE;
  2406 :}
  2407 LDC.L @Rm+, SR {:
  2408     COUNT_INST(I_LDCSRM);
  2409     if( sh4_x86.in_delay_slot ) {
  2410 	SLOTILLEGAL();
  2411     } else {
  2412 	check_priv();
  2413 	load_reg( R_EAX, Rm );
  2414 	check_ralign32( R_EAX );
  2415 	MMU_TRANSLATE_READ( R_EAX );
  2416 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2417 	MEM_READ_LONG( R_EAX, R_EAX );
  2418 	call_func1( sh4_write_sr, R_EAX );
  2419 	sh4_x86.priv_checked = FALSE;
  2420 	sh4_x86.fpuen_checked = FALSE;
  2421 	sh4_x86.tstate = TSTATE_NONE;
  2423 :}
  2424 LDC.L @Rm+, VBR {:  
  2425     COUNT_INST(I_LDCM);
  2426     check_priv();
  2427     load_reg( R_EAX, Rm );
  2428     check_ralign32( R_EAX );
  2429     MMU_TRANSLATE_READ( R_EAX );
  2430     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2431     MEM_READ_LONG( R_EAX, R_EAX );
  2432     store_spreg( R_EAX, R_VBR );
  2433     sh4_x86.tstate = TSTATE_NONE;
  2434 :}
  2435 LDC.L @Rm+, SSR {:
  2436     COUNT_INST(I_LDCM);
  2437     check_priv();
  2438     load_reg( R_EAX, Rm );
  2439     check_ralign32( R_EAX );
  2440     MMU_TRANSLATE_READ( R_EAX );
  2441     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2442     MEM_READ_LONG( R_EAX, R_EAX );
  2443     store_spreg( R_EAX, R_SSR );
  2444     sh4_x86.tstate = TSTATE_NONE;
  2445 :}
  2446 LDC.L @Rm+, SGR {:  
  2447     COUNT_INST(I_LDCM);
  2448     check_priv();
  2449     load_reg( R_EAX, Rm );
  2450     check_ralign32( R_EAX );
  2451     MMU_TRANSLATE_READ( R_EAX );
  2452     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2453     MEM_READ_LONG( R_EAX, R_EAX );
  2454     store_spreg( R_EAX, R_SGR );
  2455     sh4_x86.tstate = TSTATE_NONE;
  2456 :}
  2457 LDC.L @Rm+, SPC {:  
  2458     COUNT_INST(I_LDCM);
  2459     check_priv();
  2460     load_reg( R_EAX, Rm );
  2461     check_ralign32( R_EAX );
  2462     MMU_TRANSLATE_READ( R_EAX );
  2463     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2464     MEM_READ_LONG( R_EAX, R_EAX );
  2465     store_spreg( R_EAX, R_SPC );
  2466     sh4_x86.tstate = TSTATE_NONE;
  2467 :}
  2468 LDC.L @Rm+, DBR {:  
  2469     COUNT_INST(I_LDCM);
  2470     check_priv();
  2471     load_reg( R_EAX, Rm );
  2472     check_ralign32( R_EAX );
  2473     MMU_TRANSLATE_READ( R_EAX );
  2474     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2475     MEM_READ_LONG( R_EAX, R_EAX );
  2476     store_spreg( R_EAX, R_DBR );
  2477     sh4_x86.tstate = TSTATE_NONE;
  2478 :}
  2479 LDC.L @Rm+, Rn_BANK {:  
  2480     COUNT_INST(I_LDCM);
  2481     check_priv();
  2482     load_reg( R_EAX, Rm );
  2483     check_ralign32( R_EAX );
  2484     MMU_TRANSLATE_READ( R_EAX );
  2485     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2486     MEM_READ_LONG( R_EAX, R_EAX );
  2487     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2488     sh4_x86.tstate = TSTATE_NONE;
  2489 :}
  2490 LDS Rm, FPSCR {:
  2491     COUNT_INST(I_LDSFPSCR);
  2492     check_fpuen();
  2493     load_reg( R_EAX, Rm );
  2494     call_func1( sh4_write_fpscr, R_EAX );
  2495     sh4_x86.tstate = TSTATE_NONE;
  2496 :}
  2497 LDS.L @Rm+, FPSCR {:  
  2498     COUNT_INST(I_LDSFPSCRM);
  2499     check_fpuen();
  2500     load_reg( R_EAX, Rm );
  2501     check_ralign32( R_EAX );
  2502     MMU_TRANSLATE_READ( R_EAX );
  2503     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2504     MEM_READ_LONG( R_EAX, R_EAX );
  2505     call_func1( sh4_write_fpscr, R_EAX );
  2506     sh4_x86.tstate = TSTATE_NONE;
  2507 :}
  2508 LDS Rm, FPUL {:  
  2509     COUNT_INST(I_LDS);
  2510     check_fpuen();
  2511     load_reg( R_EAX, Rm );
  2512     store_spreg( R_EAX, R_FPUL );
  2513 :}
  2514 LDS.L @Rm+, FPUL {:  
  2515     COUNT_INST(I_LDSM);
  2516     check_fpuen();
  2517     load_reg( R_EAX, Rm );
  2518     check_ralign32( R_EAX );
  2519     MMU_TRANSLATE_READ( R_EAX );
  2520     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2521     MEM_READ_LONG( R_EAX, R_EAX );
  2522     store_spreg( R_EAX, R_FPUL );
  2523     sh4_x86.tstate = TSTATE_NONE;
  2524 :}
  2525 LDS Rm, MACH {: 
  2526     COUNT_INST(I_LDS);
  2527     load_reg( R_EAX, Rm );
  2528     store_spreg( R_EAX, R_MACH );
  2529 :}
  2530 LDS.L @Rm+, MACH {:  
  2531     COUNT_INST(I_LDSM);
  2532     load_reg( R_EAX, Rm );
  2533     check_ralign32( R_EAX );
  2534     MMU_TRANSLATE_READ( R_EAX );
  2535     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2536     MEM_READ_LONG( R_EAX, R_EAX );
  2537     store_spreg( R_EAX, R_MACH );
  2538     sh4_x86.tstate = TSTATE_NONE;
  2539 :}
  2540 LDS Rm, MACL {:  
  2541     COUNT_INST(I_LDS);
  2542     load_reg( R_EAX, Rm );
  2543     store_spreg( R_EAX, R_MACL );
  2544 :}
  2545 LDS.L @Rm+, MACL {:  
  2546     COUNT_INST(I_LDSM);
  2547     load_reg( R_EAX, Rm );
  2548     check_ralign32( R_EAX );
  2549     MMU_TRANSLATE_READ( R_EAX );
  2550     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2551     MEM_READ_LONG( R_EAX, R_EAX );
  2552     store_spreg( R_EAX, R_MACL );
  2553     sh4_x86.tstate = TSTATE_NONE;
  2554 :}
  2555 LDS Rm, PR {:  
  2556     COUNT_INST(I_LDS);
  2557     load_reg( R_EAX, Rm );
  2558     store_spreg( R_EAX, R_PR );
  2559 :}
  2560 LDS.L @Rm+, PR {:  
  2561     COUNT_INST(I_LDSM);
  2562     load_reg( R_EAX, Rm );
  2563     check_ralign32( R_EAX );
  2564     MMU_TRANSLATE_READ( R_EAX );
  2565     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2566     MEM_READ_LONG( R_EAX, R_EAX );
  2567     store_spreg( R_EAX, R_PR );
  2568     sh4_x86.tstate = TSTATE_NONE;
  2569 :}
  2570 LDTLB {:  
  2571     COUNT_INST(I_LDTLB);
  2572     call_func0( MMU_ldtlb );
  2573 :}
  2574 OCBI @Rn {:
  2575     COUNT_INST(I_OCBI);
  2576 :}
  2577 OCBP @Rn {:
  2578     COUNT_INST(I_OCBP);
  2579 :}
  2580 OCBWB @Rn {:
  2581     COUNT_INST(I_OCBWB);
  2582 :}
  2583 PREF @Rn {:
  2584     COUNT_INST(I_PREF);
  2585     load_reg( R_EAX, Rn );
  2586     MOV_r32_r32( R_EAX, R_ECX );
  2587     AND_imm32_r32( 0xFC000000, R_EAX );
  2588     CMP_imm32_r32( 0xE0000000, R_EAX );
  2589     JNE_rel8(end);
  2590     call_func1( sh4_flush_store_queue, R_ECX );
  2591     TEST_r32_r32( R_EAX, R_EAX );
  2592     JE_exc(-1);
  2593     JMP_TARGET(end);
  2594     sh4_x86.tstate = TSTATE_NONE;
  2595 :}
  2596 SLEEP {: 
  2597     COUNT_INST(I_SLEEP);
  2598     check_priv();
  2599     call_func0( sh4_sleep );
  2600     sh4_x86.tstate = TSTATE_NONE;
  2601     sh4_x86.in_delay_slot = DELAY_NONE;
  2602     return 2;
  2603 :}
  2604 STC SR, Rn {:
  2605     COUNT_INST(I_STCSR);
  2606     check_priv();
  2607     call_func0(sh4_read_sr);
  2608     store_reg( R_EAX, Rn );
  2609     sh4_x86.tstate = TSTATE_NONE;
  2610 :}
  2611 STC GBR, Rn {:  
  2612     COUNT_INST(I_STC);
  2613     load_spreg( R_EAX, R_GBR );
  2614     store_reg( R_EAX, Rn );
  2615 :}
  2616 STC VBR, Rn {:  
  2617     COUNT_INST(I_STC);
  2618     check_priv();
  2619     load_spreg( R_EAX, R_VBR );
  2620     store_reg( R_EAX, Rn );
  2621     sh4_x86.tstate = TSTATE_NONE;
  2622 :}
  2623 STC SSR, Rn {:  
  2624     COUNT_INST(I_STC);
  2625     check_priv();
  2626     load_spreg( R_EAX, R_SSR );
  2627     store_reg( R_EAX, Rn );
  2628     sh4_x86.tstate = TSTATE_NONE;
  2629 :}
  2630 STC SPC, Rn {:  
  2631     COUNT_INST(I_STC);
  2632     check_priv();
  2633     load_spreg( R_EAX, R_SPC );
  2634     store_reg( R_EAX, Rn );
  2635     sh4_x86.tstate = TSTATE_NONE;
  2636 :}
  2637 STC SGR, Rn {:  
  2638     COUNT_INST(I_STC);
  2639     check_priv();
  2640     load_spreg( R_EAX, R_SGR );
  2641     store_reg( R_EAX, Rn );
  2642     sh4_x86.tstate = TSTATE_NONE;
  2643 :}
  2644 STC DBR, Rn {:  
  2645     COUNT_INST(I_STC);
  2646     check_priv();
  2647     load_spreg( R_EAX, R_DBR );
  2648     store_reg( R_EAX, Rn );
  2649     sh4_x86.tstate = TSTATE_NONE;
  2650 :}
  2651 STC Rm_BANK, Rn {:
  2652     COUNT_INST(I_STC);
  2653     check_priv();
  2654     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2655     store_reg( R_EAX, Rn );
  2656     sh4_x86.tstate = TSTATE_NONE;
  2657 :}
  2658 STC.L SR, @-Rn {:
  2659     COUNT_INST(I_STCSRM);
  2660     check_priv();
  2661     load_reg( R_EAX, Rn );
  2662     check_walign32( R_EAX );
  2663     ADD_imm8s_r32( -4, R_EAX );
  2664     MMU_TRANSLATE_WRITE( R_EAX );
  2665     PUSH_realigned_r32( R_EAX );
  2666     call_func0( sh4_read_sr );
  2667     POP_realigned_r32( R_ECX );
  2668     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2669     MEM_WRITE_LONG( R_ECX, R_EAX );
  2670     sh4_x86.tstate = TSTATE_NONE;
  2671 :}
  2672 STC.L VBR, @-Rn {:  
  2673     COUNT_INST(I_STCM);
  2674     check_priv();
  2675     load_reg( R_EAX, Rn );
  2676     check_walign32( R_EAX );
  2677     ADD_imm8s_r32( -4, R_EAX );
  2678     MMU_TRANSLATE_WRITE( R_EAX );
  2679     load_spreg( R_EDX, R_VBR );
  2680     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2681     MEM_WRITE_LONG( R_EAX, R_EDX );
  2682     sh4_x86.tstate = TSTATE_NONE;
  2683 :}
  2684 STC.L SSR, @-Rn {:  
  2685     COUNT_INST(I_STCM);
  2686     check_priv();
  2687     load_reg( R_EAX, Rn );
  2688     check_walign32( R_EAX );
  2689     ADD_imm8s_r32( -4, R_EAX );
  2690     MMU_TRANSLATE_WRITE( R_EAX );
  2691     load_spreg( R_EDX, R_SSR );
  2692     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2693     MEM_WRITE_LONG( R_EAX, R_EDX );
  2694     sh4_x86.tstate = TSTATE_NONE;
  2695 :}
  2696 STC.L SPC, @-Rn {:
  2697     COUNT_INST(I_STCM);
  2698     check_priv();
  2699     load_reg( R_EAX, Rn );
  2700     check_walign32( R_EAX );
  2701     ADD_imm8s_r32( -4, R_EAX );
  2702     MMU_TRANSLATE_WRITE( R_EAX );
  2703     load_spreg( R_EDX, R_SPC );
  2704     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2705     MEM_WRITE_LONG( R_EAX, R_EDX );
  2706     sh4_x86.tstate = TSTATE_NONE;
  2707 :}
  2708 STC.L SGR, @-Rn {:  
  2709     COUNT_INST(I_STCM);
  2710     check_priv();
  2711     load_reg( R_EAX, Rn );
  2712     check_walign32( R_EAX );
  2713     ADD_imm8s_r32( -4, R_EAX );
  2714     MMU_TRANSLATE_WRITE( R_EAX );
  2715     load_spreg( R_EDX, R_SGR );
  2716     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2717     MEM_WRITE_LONG( R_EAX, R_EDX );
  2718     sh4_x86.tstate = TSTATE_NONE;
  2719 :}
  2720 STC.L DBR, @-Rn {:  
  2721     COUNT_INST(I_STCM);
  2722     check_priv();
  2723     load_reg( R_EAX, Rn );
  2724     check_walign32( R_EAX );
  2725     ADD_imm8s_r32( -4, R_EAX );
  2726     MMU_TRANSLATE_WRITE( R_EAX );
  2727     load_spreg( R_EDX, R_DBR );
  2728     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2729     MEM_WRITE_LONG( R_EAX, R_EDX );
  2730     sh4_x86.tstate = TSTATE_NONE;
  2731 :}
  2732 STC.L Rm_BANK, @-Rn {:  
  2733     COUNT_INST(I_STCM);
  2734     check_priv();
  2735     load_reg( R_EAX, Rn );
  2736     check_walign32( R_EAX );
  2737     ADD_imm8s_r32( -4, R_EAX );
  2738     MMU_TRANSLATE_WRITE( R_EAX );
  2739     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2740     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2741     MEM_WRITE_LONG( R_EAX, R_EDX );
  2742     sh4_x86.tstate = TSTATE_NONE;
  2743 :}
  2744 STC.L GBR, @-Rn {:  
  2745     COUNT_INST(I_STCM);
  2746     load_reg( R_EAX, Rn );
  2747     check_walign32( R_EAX );
  2748     ADD_imm8s_r32( -4, R_EAX );
  2749     MMU_TRANSLATE_WRITE( R_EAX );
  2750     load_spreg( R_EDX, R_GBR );
  2751     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2752     MEM_WRITE_LONG( R_EAX, R_EDX );
  2753     sh4_x86.tstate = TSTATE_NONE;
  2754 :}
  2755 STS FPSCR, Rn {:  
  2756     COUNT_INST(I_STSFPSCR);
  2757     check_fpuen();
  2758     load_spreg( R_EAX, R_FPSCR );
  2759     store_reg( R_EAX, Rn );
  2760 :}
  2761 STS.L FPSCR, @-Rn {:  
  2762     COUNT_INST(I_STSFPSCRM);
  2763     check_fpuen();
  2764     load_reg( R_EAX, Rn );
  2765     check_walign32( R_EAX );
  2766     ADD_imm8s_r32( -4, R_EAX );
  2767     MMU_TRANSLATE_WRITE( R_EAX );
  2768     load_spreg( R_EDX, R_FPSCR );
  2769     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2770     MEM_WRITE_LONG( R_EAX, R_EDX );
  2771     sh4_x86.tstate = TSTATE_NONE;
  2772 :}
  2773 STS FPUL, Rn {:  
  2774     COUNT_INST(I_STS);
  2775     check_fpuen();
  2776     load_spreg( R_EAX, R_FPUL );
  2777     store_reg( R_EAX, Rn );
  2778 :}
  2779 STS.L FPUL, @-Rn {:  
  2780     COUNT_INST(I_STSM);
  2781     check_fpuen();
  2782     load_reg( R_EAX, Rn );
  2783     check_walign32( R_EAX );
  2784     ADD_imm8s_r32( -4, R_EAX );
  2785     MMU_TRANSLATE_WRITE( R_EAX );
  2786     load_spreg( R_EDX, R_FPUL );
  2787     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2788     MEM_WRITE_LONG( R_EAX, R_EDX );
  2789     sh4_x86.tstate = TSTATE_NONE;
  2790 :}
  2791 STS MACH, Rn {:  
  2792     COUNT_INST(I_STS);
  2793     load_spreg( R_EAX, R_MACH );
  2794     store_reg( R_EAX, Rn );
  2795 :}
  2796 STS.L MACH, @-Rn {:  
  2797     COUNT_INST(I_STSM);
  2798     load_reg( R_EAX, Rn );
  2799     check_walign32( R_EAX );
  2800     ADD_imm8s_r32( -4, R_EAX );
  2801     MMU_TRANSLATE_WRITE( R_EAX );
  2802     load_spreg( R_EDX, R_MACH );
  2803     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2804     MEM_WRITE_LONG( R_EAX, R_EDX );
  2805     sh4_x86.tstate = TSTATE_NONE;
  2806 :}
  2807 STS MACL, Rn {:  
  2808     COUNT_INST(I_STS);
  2809     load_spreg( R_EAX, R_MACL );
  2810     store_reg( R_EAX, Rn );
  2811 :}
  2812 STS.L MACL, @-Rn {:  
  2813     COUNT_INST(I_STSM);
  2814     load_reg( R_EAX, Rn );
  2815     check_walign32( R_EAX );
  2816     ADD_imm8s_r32( -4, R_EAX );
  2817     MMU_TRANSLATE_WRITE( R_EAX );
  2818     load_spreg( R_EDX, R_MACL );
  2819     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2820     MEM_WRITE_LONG( R_EAX, R_EDX );
  2821     sh4_x86.tstate = TSTATE_NONE;
  2822 :}
  2823 STS PR, Rn {:  
  2824     COUNT_INST(I_STS);
  2825     load_spreg( R_EAX, R_PR );
  2826     store_reg( R_EAX, Rn );
  2827 :}
  2828 STS.L PR, @-Rn {:  
  2829     COUNT_INST(I_STSM);
  2830     load_reg( R_EAX, Rn );
  2831     check_walign32( R_EAX );
  2832     ADD_imm8s_r32( -4, R_EAX );
  2833     MMU_TRANSLATE_WRITE( R_EAX );
  2834     load_spreg( R_EDX, R_PR );
  2835     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2836     MEM_WRITE_LONG( R_EAX, R_EDX );
  2837     sh4_x86.tstate = TSTATE_NONE;
  2838 :}
  2840 NOP {: 
  2841     COUNT_INST(I_NOP);
  2842     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2843 :}
  2844 %%
  2845     sh4_x86.in_delay_slot = DELAY_NONE;
  2846     return 0;
.