Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 927:17b6b9e245d8
prev926:68f3e0fe02f1
next929:fd8cb0c82f5f
next953:f4a156508ad1
author nkeynes
date Mon Dec 15 10:44:56 2008 +0000 (11 years ago)
permissions -rw-r--r--
last change Add return-address-modifying exception return code to mmu TLB lookups (a little bit faster)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/xltcache.h"
    30 #include "sh4/sh4core.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/x86op.h"
    35 #include "clock.h"
    37 #define DEFAULT_BACKPATCH_SIZE 4096
    39 struct backpatch_record {
    40     uint32_t fixup_offset;
    41     uint32_t fixup_icount;
    42     int32_t exc_code;
    43 };
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     gboolean double_prec; /* true if FPU is in double-precision mode */
    60     gboolean double_size; /* true if FPU is in double-size mode */
    61     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    62     uint32_t block_start_pc;
    63     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    64     int tstate;
    66     /* mode flags */
    67     gboolean tlb_on; /* True if tlb translation is active */
    69     /* Allocated memory for the (block-wide) back-patch list */
    70     struct backpatch_record *backpatch_list;
    71     uint32_t backpatch_posn;
    72     uint32_t backpatch_size;
    73 };
    75 #define TSTATE_NONE -1
    76 #define TSTATE_O    0
    77 #define TSTATE_C    2
    78 #define TSTATE_E    4
    79 #define TSTATE_NE   5
    80 #define TSTATE_G    0xF
    81 #define TSTATE_GE   0xD
    82 #define TSTATE_A    7
    83 #define TSTATE_AE   3
    85 #ifdef ENABLE_SH4STATS
    86 #define COUNT_INST(id) load_imm32(R_EAX,id); call_func1(sh4_stats_add, R_EAX); sh4_x86.tstate = TSTATE_NONE
    87 #else
    88 #define COUNT_INST(id)
    89 #endif
    91 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    92 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    93 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    94     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    96 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    97 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    98 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    99     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
   101 static struct sh4_x86_state sh4_x86;
   103 static uint32_t max_int = 0x7FFFFFFF;
   104 static uint32_t min_int = 0x80000000;
   105 static uint32_t save_fcw; /* save value for fpu control word */
   106 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   108 gboolean is_sse3_supported()
   109 {
   110     uint32_t features;
   112     __asm__ __volatile__(
   113         "mov $0x01, %%eax\n\t"
   114         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   115     return (features & 1) ? TRUE : FALSE;
   116 }
   118 void sh4_translate_init(void)
   119 {
   120     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   121     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   122     sh4_x86.sse3_enabled = is_sse3_supported();
   123 }
   126 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   127 {
   128     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   129 	sh4_x86.backpatch_size <<= 1;
   130 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   131 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   132 	assert( sh4_x86.backpatch_list != NULL );
   133     }
   134     if( sh4_x86.in_delay_slot ) {
   135 	fixup_pc -= 2;
   136     }
   137     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   138 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   139     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   140     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   141     sh4_x86.backpatch_posn++;
   142 }
   144 /**
   145  * Emit an instruction to load an SH4 reg into a real register
   146  */
   147 static inline void load_reg( int x86reg, int sh4reg ) 
   148 {
   149     /* mov [bp+n], reg */
   150     OP(0x8B);
   151     OP(0x45 + (x86reg<<3));
   152     OP(REG_OFFSET(r[sh4reg]));
   153 }
   155 static inline void load_reg16s( int x86reg, int sh4reg )
   156 {
   157     OP(0x0F);
   158     OP(0xBF);
   159     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   160 }
   162 static inline void load_reg16u( int x86reg, int sh4reg )
   163 {
   164     OP(0x0F);
   165     OP(0xB7);
   166     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   168 }
   170 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   171 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   172 /**
   173  * Emit an instruction to load an immediate value into a register
   174  */
   175 static inline void load_imm32( int x86reg, uint32_t value ) {
   176     /* mov #value, reg */
   177     OP(0xB8 + x86reg);
   178     OP32(value);
   179 }
   181 /**
   182  * Load an immediate 64-bit quantity (note: x86-64 only)
   183  */
   184 static inline void load_imm64( int x86reg, uint64_t value ) {
   185     /* mov #value, reg */
   186     REXW();
   187     OP(0xB8 + x86reg);
   188     OP64(value);
   189 }
   191 /**
   192  * Emit an instruction to store an SH4 reg (RN)
   193  */
   194 void static inline store_reg( int x86reg, int sh4reg ) {
   195     /* mov reg, [bp+n] */
   196     OP(0x89);
   197     OP(0x45 + (x86reg<<3));
   198     OP(REG_OFFSET(r[sh4reg]));
   199 }
   201 /**
   202  * Load an FR register (single-precision floating point) into an integer x86
   203  * register (eg for register-to-register moves)
   204  */
   205 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   206 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   208 /**
   209  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   210  */
   211 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   212 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   214 /**
   215  * Store an FR register (single-precision floating point) from an integer x86+
   216  * register (eg for register-to-register moves)
   217  */
   218 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   219 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   221 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   222 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   225 #define push_fpul()  FLDF_sh4r(R_FPUL)
   226 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   227 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   228 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   229 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   230 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   231 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   232 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   233 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   234 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   238 /* Exception checks - Note that all exception checks will clobber EAX */
   240 #define check_priv( ) \
   241     if( !sh4_x86.priv_checked ) { \
   242 	sh4_x86.priv_checked = TRUE;\
   243 	load_spreg( R_EAX, R_SR );\
   244 	AND_imm32_r32( SR_MD, R_EAX );\
   245 	if( sh4_x86.in_delay_slot ) {\
   246 	    JE_exc( EXC_SLOT_ILLEGAL );\
   247 	} else {\
   248 	    JE_exc( EXC_ILLEGAL );\
   249 	}\
   250 	sh4_x86.tstate = TSTATE_NONE; \
   251     }\
   253 #define check_fpuen( ) \
   254     if( !sh4_x86.fpuen_checked ) {\
   255 	sh4_x86.fpuen_checked = TRUE;\
   256 	load_spreg( R_EAX, R_SR );\
   257 	AND_imm32_r32( SR_FD, R_EAX );\
   258 	if( sh4_x86.in_delay_slot ) {\
   259 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   260 	} else {\
   261 	    JNE_exc(EXC_FPU_DISABLED);\
   262 	}\
   263 	sh4_x86.tstate = TSTATE_NONE; \
   264     }
   266 #define check_ralign16( x86reg ) \
   267     TEST_imm32_r32( 0x00000001, x86reg ); \
   268     JNE_exc(EXC_DATA_ADDR_READ)
   270 #define check_walign16( x86reg ) \
   271     TEST_imm32_r32( 0x00000001, x86reg ); \
   272     JNE_exc(EXC_DATA_ADDR_WRITE);
   274 #define check_ralign32( x86reg ) \
   275     TEST_imm32_r32( 0x00000003, x86reg ); \
   276     JNE_exc(EXC_DATA_ADDR_READ)
   278 #define check_walign32( x86reg ) \
   279     TEST_imm32_r32( 0x00000003, x86reg ); \
   280     JNE_exc(EXC_DATA_ADDR_WRITE);
   282 #define check_ralign64( x86reg ) \
   283     TEST_imm32_r32( 0x00000007, x86reg ); \
   284     JNE_exc(EXC_DATA_ADDR_READ)
   286 #define check_walign64( x86reg ) \
   287     TEST_imm32_r32( 0x00000007, x86reg ); \
   288     JNE_exc(EXC_DATA_ADDR_WRITE);
   290 #define UNDEF(ir)
   291 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   292 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   293 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   294 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   295 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   296 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   297 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   299 #ifdef HAVE_FRAME_ADDRESS
   300 /**
   301  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   302  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   303  */
   304 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) {  call_func1_exc(mmu_vma_to_phys_read, addr_reg, pc); MEM_RESULT(addr_reg); }
   306 /**
   307  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   308  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   309  */
   310 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1_exc(mmu_vma_to_phys_write, addr_reg, pc); MEM_RESULT(addr_reg); }
   311 #else
   312 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   313 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   314 #endif
   316 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   318 /****** Import appropriate calling conventions ******/
   319 #if SIZEOF_VOID_P == 8
   320 #include "sh4/ia64abi.h"
   321 #else /* 32-bit system */
   322 #include "sh4/ia32abi.h"
   323 #endif
   325 void sh4_translate_begin_block( sh4addr_t pc ) 
   326 {
   327     enter_block();
   328     sh4_x86.in_delay_slot = FALSE;
   329     sh4_x86.priv_checked = FALSE;
   330     sh4_x86.fpuen_checked = FALSE;
   331     sh4_x86.branch_taken = FALSE;
   332     sh4_x86.backpatch_posn = 0;
   333     sh4_x86.block_start_pc = pc;
   334     sh4_x86.tlb_on = IS_MMU_ENABLED();
   335     sh4_x86.tstate = TSTATE_NONE;
   336     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   337     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   338 }
   341 uint32_t sh4_translate_end_block_size()
   342 {
   343     if( sh4_x86.backpatch_posn <= 3 ) {
   344         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   345     } else {
   346         return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   347     }
   348 }
   351 /**
   352  * Embed a breakpoint into the generated code
   353  */
   354 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   355 {
   356     load_imm32( R_EAX, pc );
   357     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   358     sh4_x86.tstate = TSTATE_NONE;
   359 }
   362 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   364 /**
   365  * Embed a call to sh4_execute_instruction for situations that we
   366  * can't translate (just page-crossing delay slots at the moment).
   367  * Caller is responsible for setting new_pc before calling this function.
   368  *
   369  * Performs:
   370  *   Set PC = endpc
   371  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   372  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   373  *   Call sh4_execute_instruction
   374  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   375  */
   376 void exit_block_emu( sh4vma_t endpc )
   377 {
   378     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   379     ADD_r32_sh4r( R_ECX, R_PC );
   381     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   382     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   383     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   384     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   386     call_func0( sh4_execute_instruction );    
   387     load_spreg( R_EAX, R_PC );
   388     if( sh4_x86.tlb_on ) {
   389 	call_func1(xlat_get_code_by_vma,R_EAX);
   390     } else {
   391 	call_func1(xlat_get_code,R_EAX);
   392     }
   393     exit_block();
   394 } 
   396 /**
   397  * Translate a single instruction. Delayed branches are handled specially
   398  * by translating both branch and delayed instruction as a single unit (as
   399  * 
   400  * The instruction MUST be in the icache (assert check)
   401  *
   402  * @return true if the instruction marks the end of a basic block
   403  * (eg a branch or 
   404  */
   405 uint32_t sh4_translate_instruction( sh4vma_t pc )
   406 {
   407     uint32_t ir;
   408     /* Read instruction from icache */
   409     assert( IS_IN_ICACHE(pc) );
   410     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   412     if( !sh4_x86.in_delay_slot ) {
   413 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   414     }
   415 %%
   416 /* ALU operations */
   417 ADD Rm, Rn {:
   418     COUNT_INST(I_ADD);
   419     load_reg( R_EAX, Rm );
   420     load_reg( R_ECX, Rn );
   421     ADD_r32_r32( R_EAX, R_ECX );
   422     store_reg( R_ECX, Rn );
   423     sh4_x86.tstate = TSTATE_NONE;
   424 :}
   425 ADD #imm, Rn {:  
   426     COUNT_INST(I_ADDI);
   427     load_reg( R_EAX, Rn );
   428     ADD_imm8s_r32( imm, R_EAX );
   429     store_reg( R_EAX, Rn );
   430     sh4_x86.tstate = TSTATE_NONE;
   431 :}
   432 ADDC Rm, Rn {:
   433     COUNT_INST(I_ADDC);
   434     if( sh4_x86.tstate != TSTATE_C ) {
   435         LDC_t();
   436     }
   437     load_reg( R_EAX, Rm );
   438     load_reg( R_ECX, Rn );
   439     ADC_r32_r32( R_EAX, R_ECX );
   440     store_reg( R_ECX, Rn );
   441     SETC_t();
   442     sh4_x86.tstate = TSTATE_C;
   443 :}
   444 ADDV Rm, Rn {:
   445     COUNT_INST(I_ADDV);
   446     load_reg( R_EAX, Rm );
   447     load_reg( R_ECX, Rn );
   448     ADD_r32_r32( R_EAX, R_ECX );
   449     store_reg( R_ECX, Rn );
   450     SETO_t();
   451     sh4_x86.tstate = TSTATE_O;
   452 :}
   453 AND Rm, Rn {:
   454     COUNT_INST(I_AND);
   455     load_reg( R_EAX, Rm );
   456     load_reg( R_ECX, Rn );
   457     AND_r32_r32( R_EAX, R_ECX );
   458     store_reg( R_ECX, Rn );
   459     sh4_x86.tstate = TSTATE_NONE;
   460 :}
   461 AND #imm, R0 {:  
   462     COUNT_INST(I_ANDI);
   463     load_reg( R_EAX, 0 );
   464     AND_imm32_r32(imm, R_EAX); 
   465     store_reg( R_EAX, 0 );
   466     sh4_x86.tstate = TSTATE_NONE;
   467 :}
   468 AND.B #imm, @(R0, GBR) {: 
   469     COUNT_INST(I_ANDB);
   470     load_reg( R_EAX, 0 );
   471     load_spreg( R_ECX, R_GBR );
   472     ADD_r32_r32( R_ECX, R_EAX );
   473     MMU_TRANSLATE_WRITE( R_EAX );
   474     MOV_r32_esp8(R_EAX, 0);
   475     MEM_READ_BYTE( R_EAX, R_EDX );
   476     MOV_esp8_r32(0, R_EAX);
   477     AND_imm32_r32(imm, R_EDX );
   478     MEM_WRITE_BYTE( R_EAX, R_EDX );
   479     sh4_x86.tstate = TSTATE_NONE;
   480 :}
   481 CMP/EQ Rm, Rn {:  
   482     COUNT_INST(I_CMPEQ);
   483     load_reg( R_EAX, Rm );
   484     load_reg( R_ECX, Rn );
   485     CMP_r32_r32( R_EAX, R_ECX );
   486     SETE_t();
   487     sh4_x86.tstate = TSTATE_E;
   488 :}
   489 CMP/EQ #imm, R0 {:  
   490     COUNT_INST(I_CMPEQI);
   491     load_reg( R_EAX, 0 );
   492     CMP_imm8s_r32(imm, R_EAX);
   493     SETE_t();
   494     sh4_x86.tstate = TSTATE_E;
   495 :}
   496 CMP/GE Rm, Rn {:  
   497     COUNT_INST(I_CMPGE);
   498     load_reg( R_EAX, Rm );
   499     load_reg( R_ECX, Rn );
   500     CMP_r32_r32( R_EAX, R_ECX );
   501     SETGE_t();
   502     sh4_x86.tstate = TSTATE_GE;
   503 :}
   504 CMP/GT Rm, Rn {: 
   505     COUNT_INST(I_CMPGT);
   506     load_reg( R_EAX, Rm );
   507     load_reg( R_ECX, Rn );
   508     CMP_r32_r32( R_EAX, R_ECX );
   509     SETG_t();
   510     sh4_x86.tstate = TSTATE_G;
   511 :}
   512 CMP/HI Rm, Rn {:  
   513     COUNT_INST(I_CMPHI);
   514     load_reg( R_EAX, Rm );
   515     load_reg( R_ECX, Rn );
   516     CMP_r32_r32( R_EAX, R_ECX );
   517     SETA_t();
   518     sh4_x86.tstate = TSTATE_A;
   519 :}
   520 CMP/HS Rm, Rn {: 
   521     COUNT_INST(I_CMPHS);
   522     load_reg( R_EAX, Rm );
   523     load_reg( R_ECX, Rn );
   524     CMP_r32_r32( R_EAX, R_ECX );
   525     SETAE_t();
   526     sh4_x86.tstate = TSTATE_AE;
   527  :}
   528 CMP/PL Rn {: 
   529     COUNT_INST(I_CMPPL);
   530     load_reg( R_EAX, Rn );
   531     CMP_imm8s_r32( 0, R_EAX );
   532     SETG_t();
   533     sh4_x86.tstate = TSTATE_G;
   534 :}
   535 CMP/PZ Rn {:  
   536     COUNT_INST(I_CMPPZ);
   537     load_reg( R_EAX, Rn );
   538     CMP_imm8s_r32( 0, R_EAX );
   539     SETGE_t();
   540     sh4_x86.tstate = TSTATE_GE;
   541 :}
   542 CMP/STR Rm, Rn {:  
   543     COUNT_INST(I_CMPSTR);
   544     load_reg( R_EAX, Rm );
   545     load_reg( R_ECX, Rn );
   546     XOR_r32_r32( R_ECX, R_EAX );
   547     TEST_r8_r8( R_AL, R_AL );
   548     JE_rel8(target1);
   549     TEST_r8_r8( R_AH, R_AH );
   550     JE_rel8(target2);
   551     SHR_imm8_r32( 16, R_EAX );
   552     TEST_r8_r8( R_AL, R_AL );
   553     JE_rel8(target3);
   554     TEST_r8_r8( R_AH, R_AH );
   555     JMP_TARGET(target1);
   556     JMP_TARGET(target2);
   557     JMP_TARGET(target3);
   558     SETE_t();
   559     sh4_x86.tstate = TSTATE_E;
   560 :}
   561 DIV0S Rm, Rn {:
   562     COUNT_INST(I_DIV0S);
   563     load_reg( R_EAX, Rm );
   564     load_reg( R_ECX, Rn );
   565     SHR_imm8_r32( 31, R_EAX );
   566     SHR_imm8_r32( 31, R_ECX );
   567     store_spreg( R_EAX, R_M );
   568     store_spreg( R_ECX, R_Q );
   569     CMP_r32_r32( R_EAX, R_ECX );
   570     SETNE_t();
   571     sh4_x86.tstate = TSTATE_NE;
   572 :}
   573 DIV0U {:  
   574     COUNT_INST(I_DIV0U);
   575     XOR_r32_r32( R_EAX, R_EAX );
   576     store_spreg( R_EAX, R_Q );
   577     store_spreg( R_EAX, R_M );
   578     store_spreg( R_EAX, R_T );
   579     sh4_x86.tstate = TSTATE_C; // works for DIV1
   580 :}
   581 DIV1 Rm, Rn {:
   582     COUNT_INST(I_DIV1);
   583     load_spreg( R_ECX, R_M );
   584     load_reg( R_EAX, Rn );
   585     if( sh4_x86.tstate != TSTATE_C ) {
   586 	LDC_t();
   587     }
   588     RCL1_r32( R_EAX );
   589     SETC_r8( R_DL ); // Q'
   590     CMP_sh4r_r32( R_Q, R_ECX );
   591     JE_rel8(mqequal);
   592     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   593     JMP_rel8(end);
   594     JMP_TARGET(mqequal);
   595     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   596     JMP_TARGET(end);
   597     store_reg( R_EAX, Rn ); // Done with Rn now
   598     SETC_r8(R_AL); // tmp1
   599     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   600     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   601     store_spreg( R_ECX, R_Q );
   602     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   603     MOVZX_r8_r32( R_AL, R_EAX );
   604     store_spreg( R_EAX, R_T );
   605     sh4_x86.tstate = TSTATE_NONE;
   606 :}
   607 DMULS.L Rm, Rn {:  
   608     COUNT_INST(I_DMULS);
   609     load_reg( R_EAX, Rm );
   610     load_reg( R_ECX, Rn );
   611     IMUL_r32(R_ECX);
   612     store_spreg( R_EDX, R_MACH );
   613     store_spreg( R_EAX, R_MACL );
   614     sh4_x86.tstate = TSTATE_NONE;
   615 :}
   616 DMULU.L Rm, Rn {:  
   617     COUNT_INST(I_DMULU);
   618     load_reg( R_EAX, Rm );
   619     load_reg( R_ECX, Rn );
   620     MUL_r32(R_ECX);
   621     store_spreg( R_EDX, R_MACH );
   622     store_spreg( R_EAX, R_MACL );    
   623     sh4_x86.tstate = TSTATE_NONE;
   624 :}
   625 DT Rn {:  
   626     COUNT_INST(I_DT);
   627     load_reg( R_EAX, Rn );
   628     ADD_imm8s_r32( -1, R_EAX );
   629     store_reg( R_EAX, Rn );
   630     SETE_t();
   631     sh4_x86.tstate = TSTATE_E;
   632 :}
   633 EXTS.B Rm, Rn {:  
   634     COUNT_INST(I_EXTSB);
   635     load_reg( R_EAX, Rm );
   636     MOVSX_r8_r32( R_EAX, R_EAX );
   637     store_reg( R_EAX, Rn );
   638 :}
   639 EXTS.W Rm, Rn {:  
   640     COUNT_INST(I_EXTSW);
   641     load_reg( R_EAX, Rm );
   642     MOVSX_r16_r32( R_EAX, R_EAX );
   643     store_reg( R_EAX, Rn );
   644 :}
   645 EXTU.B Rm, Rn {:  
   646     COUNT_INST(I_EXTUB);
   647     load_reg( R_EAX, Rm );
   648     MOVZX_r8_r32( R_EAX, R_EAX );
   649     store_reg( R_EAX, Rn );
   650 :}
   651 EXTU.W Rm, Rn {:  
   652     COUNT_INST(I_EXTUW);
   653     load_reg( R_EAX, Rm );
   654     MOVZX_r16_r32( R_EAX, R_EAX );
   655     store_reg( R_EAX, Rn );
   656 :}
   657 MAC.L @Rm+, @Rn+ {:
   658     COUNT_INST(I_MACL);
   659     if( Rm == Rn ) {
   660 	load_reg( R_EAX, Rm );
   661 	check_ralign32( R_EAX );
   662 	MMU_TRANSLATE_READ( R_EAX );
   663 	MOV_r32_esp8(R_EAX, 0);
   664 	load_reg( R_EAX, Rn );
   665 	ADD_imm8s_r32( 4, R_EAX );
   666 	MMU_TRANSLATE_READ( R_EAX );
   667 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   668 	// Note translate twice in case of page boundaries. Maybe worth
   669 	// adding a page-boundary check to skip the second translation
   670     } else {
   671 	load_reg( R_EAX, Rm );
   672 	check_ralign32( R_EAX );
   673 	MMU_TRANSLATE_READ( R_EAX );
   674 	MOV_r32_esp8( R_EAX, 0 );
   675 	load_reg( R_EAX, Rn );
   676 	check_ralign32( R_EAX );
   677 	MMU_TRANSLATE_READ( R_EAX );
   678 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   679 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   680     }
   681     MEM_READ_LONG( R_EAX, R_EAX );
   682     MOV_r32_esp8( R_EAX, 4 );
   683     MOV_esp8_r32( 0, R_EAX );
   684     MEM_READ_LONG( R_EAX, R_EAX );
   685     MOV_esp8_r32( 4, R_ECX );
   687     IMUL_r32( R_ECX );
   688     ADD_r32_sh4r( R_EAX, R_MACL );
   689     ADC_r32_sh4r( R_EDX, R_MACH );
   691     load_spreg( R_ECX, R_S );
   692     TEST_r32_r32(R_ECX, R_ECX);
   693     JE_rel8( nosat );
   694     call_func0( signsat48 );
   695     JMP_TARGET( nosat );
   696     sh4_x86.tstate = TSTATE_NONE;
   697 :}
   698 MAC.W @Rm+, @Rn+ {:  
   699     COUNT_INST(I_MACW);
   700     if( Rm == Rn ) {
   701 	load_reg( R_EAX, Rm );
   702 	check_ralign16( R_EAX );
   703 	MMU_TRANSLATE_READ( R_EAX );
   704         MOV_r32_esp8( R_EAX, 0 );
   705 	load_reg( R_EAX, Rn );
   706 	ADD_imm8s_r32( 2, R_EAX );
   707 	MMU_TRANSLATE_READ( R_EAX );
   708 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   709 	// Note translate twice in case of page boundaries. Maybe worth
   710 	// adding a page-boundary check to skip the second translation
   711     } else {
   712 	load_reg( R_EAX, Rm );
   713 	check_ralign16( R_EAX );
   714 	MMU_TRANSLATE_READ( R_EAX );
   715         MOV_r32_esp8( R_EAX, 0 );
   716 	load_reg( R_EAX, Rn );
   717 	check_ralign16( R_EAX );
   718 	MMU_TRANSLATE_READ( R_EAX );
   719 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   720 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   721     }
   722     MEM_READ_WORD( R_EAX, R_EAX );
   723     MOV_r32_esp8( R_EAX, 4 );
   724     MOV_esp8_r32( 0, R_EAX );
   725     MEM_READ_WORD( R_EAX, R_EAX );
   726     MOV_esp8_r32( 4, R_ECX );
   728     IMUL_r32( R_ECX );
   729     load_spreg( R_ECX, R_S );
   730     TEST_r32_r32( R_ECX, R_ECX );
   731     JE_rel8( nosat );
   733     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   734     JNO_rel8( end );            // 2
   735     load_imm32( R_EDX, 1 );         // 5
   736     store_spreg( R_EDX, R_MACH );   // 6
   737     JS_rel8( positive );        // 2
   738     load_imm32( R_EAX, 0x80000000 );// 5
   739     store_spreg( R_EAX, R_MACL );   // 6
   740     JMP_rel8(end2);           // 2
   742     JMP_TARGET(positive);
   743     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   744     store_spreg( R_EAX, R_MACL );   // 6
   745     JMP_rel8(end3);            // 2
   747     JMP_TARGET(nosat);
   748     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   749     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   750     JMP_TARGET(end);
   751     JMP_TARGET(end2);
   752     JMP_TARGET(end3);
   753     sh4_x86.tstate = TSTATE_NONE;
   754 :}
   755 MOVT Rn {:  
   756     COUNT_INST(I_MOVT);
   757     load_spreg( R_EAX, R_T );
   758     store_reg( R_EAX, Rn );
   759 :}
   760 MUL.L Rm, Rn {:  
   761     COUNT_INST(I_MULL);
   762     load_reg( R_EAX, Rm );
   763     load_reg( R_ECX, Rn );
   764     MUL_r32( R_ECX );
   765     store_spreg( R_EAX, R_MACL );
   766     sh4_x86.tstate = TSTATE_NONE;
   767 :}
   768 MULS.W Rm, Rn {:
   769     COUNT_INST(I_MULSW);
   770     load_reg16s( R_EAX, Rm );
   771     load_reg16s( R_ECX, Rn );
   772     MUL_r32( R_ECX );
   773     store_spreg( R_EAX, R_MACL );
   774     sh4_x86.tstate = TSTATE_NONE;
   775 :}
   776 MULU.W Rm, Rn {:  
   777     COUNT_INST(I_MULUW);
   778     load_reg16u( R_EAX, Rm );
   779     load_reg16u( R_ECX, Rn );
   780     MUL_r32( R_ECX );
   781     store_spreg( R_EAX, R_MACL );
   782     sh4_x86.tstate = TSTATE_NONE;
   783 :}
   784 NEG Rm, Rn {:
   785     COUNT_INST(I_NEG);
   786     load_reg( R_EAX, Rm );
   787     NEG_r32( R_EAX );
   788     store_reg( R_EAX, Rn );
   789     sh4_x86.tstate = TSTATE_NONE;
   790 :}
   791 NEGC Rm, Rn {:  
   792     COUNT_INST(I_NEGC);
   793     load_reg( R_EAX, Rm );
   794     XOR_r32_r32( R_ECX, R_ECX );
   795     LDC_t();
   796     SBB_r32_r32( R_EAX, R_ECX );
   797     store_reg( R_ECX, Rn );
   798     SETC_t();
   799     sh4_x86.tstate = TSTATE_C;
   800 :}
   801 NOT Rm, Rn {:  
   802     COUNT_INST(I_NOT);
   803     load_reg( R_EAX, Rm );
   804     NOT_r32( R_EAX );
   805     store_reg( R_EAX, Rn );
   806     sh4_x86.tstate = TSTATE_NONE;
   807 :}
   808 OR Rm, Rn {:  
   809     COUNT_INST(I_OR);
   810     load_reg( R_EAX, Rm );
   811     load_reg( R_ECX, Rn );
   812     OR_r32_r32( R_EAX, R_ECX );
   813     store_reg( R_ECX, Rn );
   814     sh4_x86.tstate = TSTATE_NONE;
   815 :}
   816 OR #imm, R0 {:
   817     COUNT_INST(I_ORI);
   818     load_reg( R_EAX, 0 );
   819     OR_imm32_r32(imm, R_EAX);
   820     store_reg( R_EAX, 0 );
   821     sh4_x86.tstate = TSTATE_NONE;
   822 :}
   823 OR.B #imm, @(R0, GBR) {:  
   824     COUNT_INST(I_ORB);
   825     load_reg( R_EAX, 0 );
   826     load_spreg( R_ECX, R_GBR );
   827     ADD_r32_r32( R_ECX, R_EAX );
   828     MMU_TRANSLATE_WRITE( R_EAX );
   829     MOV_r32_esp8( R_EAX, 0 );
   830     MEM_READ_BYTE( R_EAX, R_EDX );
   831     MOV_esp8_r32( 0, R_EAX );
   832     OR_imm32_r32(imm, R_EDX );
   833     MEM_WRITE_BYTE( R_EAX, R_EDX );
   834     sh4_x86.tstate = TSTATE_NONE;
   835 :}
   836 ROTCL Rn {:
   837     COUNT_INST(I_ROTCL);
   838     load_reg( R_EAX, Rn );
   839     if( sh4_x86.tstate != TSTATE_C ) {
   840 	LDC_t();
   841     }
   842     RCL1_r32( R_EAX );
   843     store_reg( R_EAX, Rn );
   844     SETC_t();
   845     sh4_x86.tstate = TSTATE_C;
   846 :}
   847 ROTCR Rn {:  
   848     COUNT_INST(I_ROTCR);
   849     load_reg( R_EAX, Rn );
   850     if( sh4_x86.tstate != TSTATE_C ) {
   851 	LDC_t();
   852     }
   853     RCR1_r32( R_EAX );
   854     store_reg( R_EAX, Rn );
   855     SETC_t();
   856     sh4_x86.tstate = TSTATE_C;
   857 :}
   858 ROTL Rn {:  
   859     COUNT_INST(I_ROTL);
   860     load_reg( R_EAX, Rn );
   861     ROL1_r32( R_EAX );
   862     store_reg( R_EAX, Rn );
   863     SETC_t();
   864     sh4_x86.tstate = TSTATE_C;
   865 :}
   866 ROTR Rn {:  
   867     COUNT_INST(I_ROTR);
   868     load_reg( R_EAX, Rn );
   869     ROR1_r32( R_EAX );
   870     store_reg( R_EAX, Rn );
   871     SETC_t();
   872     sh4_x86.tstate = TSTATE_C;
   873 :}
   874 SHAD Rm, Rn {:
   875     COUNT_INST(I_SHAD);
   876     /* Annoyingly enough, not directly convertible */
   877     load_reg( R_EAX, Rn );
   878     load_reg( R_ECX, Rm );
   879     CMP_imm32_r32( 0, R_ECX );
   880     JGE_rel8(doshl);
   882     NEG_r32( R_ECX );      // 2
   883     AND_imm8_r8( 0x1F, R_CL ); // 3
   884     JE_rel8(emptysar);     // 2
   885     SAR_r32_CL( R_EAX );       // 2
   886     JMP_rel8(end);          // 2
   888     JMP_TARGET(emptysar);
   889     SAR_imm8_r32(31, R_EAX );  // 3
   890     JMP_rel8(end2);
   892     JMP_TARGET(doshl);
   893     AND_imm8_r8( 0x1F, R_CL ); // 3
   894     SHL_r32_CL( R_EAX );       // 2
   895     JMP_TARGET(end);
   896     JMP_TARGET(end2);
   897     store_reg( R_EAX, Rn );
   898     sh4_x86.tstate = TSTATE_NONE;
   899 :}
   900 SHLD Rm, Rn {:  
   901     COUNT_INST(I_SHLD);
   902     load_reg( R_EAX, Rn );
   903     load_reg( R_ECX, Rm );
   904     CMP_imm32_r32( 0, R_ECX );
   905     JGE_rel8(doshl);
   907     NEG_r32( R_ECX );      // 2
   908     AND_imm8_r8( 0x1F, R_CL ); // 3
   909     JE_rel8(emptyshr );
   910     SHR_r32_CL( R_EAX );       // 2
   911     JMP_rel8(end);          // 2
   913     JMP_TARGET(emptyshr);
   914     XOR_r32_r32( R_EAX, R_EAX );
   915     JMP_rel8(end2);
   917     JMP_TARGET(doshl);
   918     AND_imm8_r8( 0x1F, R_CL ); // 3
   919     SHL_r32_CL( R_EAX );       // 2
   920     JMP_TARGET(end);
   921     JMP_TARGET(end2);
   922     store_reg( R_EAX, Rn );
   923     sh4_x86.tstate = TSTATE_NONE;
   924 :}
   925 SHAL Rn {: 
   926     COUNT_INST(I_SHAL);
   927     load_reg( R_EAX, Rn );
   928     SHL1_r32( R_EAX );
   929     SETC_t();
   930     store_reg( R_EAX, Rn );
   931     sh4_x86.tstate = TSTATE_C;
   932 :}
   933 SHAR Rn {:  
   934     COUNT_INST(I_SHAR);
   935     load_reg( R_EAX, Rn );
   936     SAR1_r32( R_EAX );
   937     SETC_t();
   938     store_reg( R_EAX, Rn );
   939     sh4_x86.tstate = TSTATE_C;
   940 :}
   941 SHLL Rn {:  
   942     COUNT_INST(I_SHLL);
   943     load_reg( R_EAX, Rn );
   944     SHL1_r32( R_EAX );
   945     SETC_t();
   946     store_reg( R_EAX, Rn );
   947     sh4_x86.tstate = TSTATE_C;
   948 :}
   949 SHLL2 Rn {:
   950     COUNT_INST(I_SHLL);
   951     load_reg( R_EAX, Rn );
   952     SHL_imm8_r32( 2, R_EAX );
   953     store_reg( R_EAX, Rn );
   954     sh4_x86.tstate = TSTATE_NONE;
   955 :}
   956 SHLL8 Rn {:  
   957     COUNT_INST(I_SHLL);
   958     load_reg( R_EAX, Rn );
   959     SHL_imm8_r32( 8, R_EAX );
   960     store_reg( R_EAX, Rn );
   961     sh4_x86.tstate = TSTATE_NONE;
   962 :}
   963 SHLL16 Rn {:  
   964     COUNT_INST(I_SHLL);
   965     load_reg( R_EAX, Rn );
   966     SHL_imm8_r32( 16, R_EAX );
   967     store_reg( R_EAX, Rn );
   968     sh4_x86.tstate = TSTATE_NONE;
   969 :}
   970 SHLR Rn {:  
   971     COUNT_INST(I_SHLR);
   972     load_reg( R_EAX, Rn );
   973     SHR1_r32( R_EAX );
   974     SETC_t();
   975     store_reg( R_EAX, Rn );
   976     sh4_x86.tstate = TSTATE_C;
   977 :}
   978 SHLR2 Rn {:  
   979     COUNT_INST(I_SHLR);
   980     load_reg( R_EAX, Rn );
   981     SHR_imm8_r32( 2, R_EAX );
   982     store_reg( R_EAX, Rn );
   983     sh4_x86.tstate = TSTATE_NONE;
   984 :}
   985 SHLR8 Rn {:  
   986     COUNT_INST(I_SHLR);
   987     load_reg( R_EAX, Rn );
   988     SHR_imm8_r32( 8, R_EAX );
   989     store_reg( R_EAX, Rn );
   990     sh4_x86.tstate = TSTATE_NONE;
   991 :}
   992 SHLR16 Rn {:  
   993     COUNT_INST(I_SHLR);
   994     load_reg( R_EAX, Rn );
   995     SHR_imm8_r32( 16, R_EAX );
   996     store_reg( R_EAX, Rn );
   997     sh4_x86.tstate = TSTATE_NONE;
   998 :}
   999 SUB Rm, Rn {:  
  1000     COUNT_INST(I_SUB);
  1001     load_reg( R_EAX, Rm );
  1002     load_reg( R_ECX, Rn );
  1003     SUB_r32_r32( R_EAX, R_ECX );
  1004     store_reg( R_ECX, Rn );
  1005     sh4_x86.tstate = TSTATE_NONE;
  1006 :}
  1007 SUBC Rm, Rn {:  
  1008     COUNT_INST(I_SUBC);
  1009     load_reg( R_EAX, Rm );
  1010     load_reg( R_ECX, Rn );
  1011     if( sh4_x86.tstate != TSTATE_C ) {
  1012 	LDC_t();
  1014     SBB_r32_r32( R_EAX, R_ECX );
  1015     store_reg( R_ECX, Rn );
  1016     SETC_t();
  1017     sh4_x86.tstate = TSTATE_C;
  1018 :}
  1019 SUBV Rm, Rn {:  
  1020     COUNT_INST(I_SUBV);
  1021     load_reg( R_EAX, Rm );
  1022     load_reg( R_ECX, Rn );
  1023     SUB_r32_r32( R_EAX, R_ECX );
  1024     store_reg( R_ECX, Rn );
  1025     SETO_t();
  1026     sh4_x86.tstate = TSTATE_O;
  1027 :}
  1028 SWAP.B Rm, Rn {:  
  1029     COUNT_INST(I_SWAPB);
  1030     load_reg( R_EAX, Rm );
  1031     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
  1032     store_reg( R_EAX, Rn );
  1033 :}
  1034 SWAP.W Rm, Rn {:  
  1035     COUNT_INST(I_SWAPB);
  1036     load_reg( R_EAX, Rm );
  1037     MOV_r32_r32( R_EAX, R_ECX );
  1038     SHL_imm8_r32( 16, R_ECX );
  1039     SHR_imm8_r32( 16, R_EAX );
  1040     OR_r32_r32( R_EAX, R_ECX );
  1041     store_reg( R_ECX, Rn );
  1042     sh4_x86.tstate = TSTATE_NONE;
  1043 :}
  1044 TAS.B @Rn {:  
  1045     COUNT_INST(I_TASB);
  1046     load_reg( R_EAX, Rn );
  1047     MMU_TRANSLATE_WRITE( R_EAX );
  1048     MOV_r32_esp8( R_EAX, 0 );
  1049     MEM_READ_BYTE( R_EAX, R_EDX );
  1050     TEST_r8_r8( R_DL, R_DL );
  1051     SETE_t();
  1052     OR_imm8_r8( 0x80, R_DL );
  1053     MOV_esp8_r32( 0, R_EAX );
  1054     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1055     sh4_x86.tstate = TSTATE_NONE;
  1056 :}
  1057 TST Rm, Rn {:  
  1058     COUNT_INST(I_TST);
  1059     load_reg( R_EAX, Rm );
  1060     load_reg( R_ECX, Rn );
  1061     TEST_r32_r32( R_EAX, R_ECX );
  1062     SETE_t();
  1063     sh4_x86.tstate = TSTATE_E;
  1064 :}
  1065 TST #imm, R0 {:  
  1066     COUNT_INST(I_TSTI);
  1067     load_reg( R_EAX, 0 );
  1068     TEST_imm32_r32( imm, R_EAX );
  1069     SETE_t();
  1070     sh4_x86.tstate = TSTATE_E;
  1071 :}
  1072 TST.B #imm, @(R0, GBR) {:  
  1073     COUNT_INST(I_TSTB);
  1074     load_reg( R_EAX, 0);
  1075     load_reg( R_ECX, R_GBR);
  1076     ADD_r32_r32( R_ECX, R_EAX );
  1077     MMU_TRANSLATE_READ( R_EAX );
  1078     MEM_READ_BYTE( R_EAX, R_EAX );
  1079     TEST_imm8_r8( imm, R_AL );
  1080     SETE_t();
  1081     sh4_x86.tstate = TSTATE_E;
  1082 :}
  1083 XOR Rm, Rn {:  
  1084     COUNT_INST(I_XOR);
  1085     load_reg( R_EAX, Rm );
  1086     load_reg( R_ECX, Rn );
  1087     XOR_r32_r32( R_EAX, R_ECX );
  1088     store_reg( R_ECX, Rn );
  1089     sh4_x86.tstate = TSTATE_NONE;
  1090 :}
  1091 XOR #imm, R0 {:  
  1092     COUNT_INST(I_XORI);
  1093     load_reg( R_EAX, 0 );
  1094     XOR_imm32_r32( imm, R_EAX );
  1095     store_reg( R_EAX, 0 );
  1096     sh4_x86.tstate = TSTATE_NONE;
  1097 :}
  1098 XOR.B #imm, @(R0, GBR) {:  
  1099     COUNT_INST(I_XORB);
  1100     load_reg( R_EAX, 0 );
  1101     load_spreg( R_ECX, R_GBR );
  1102     ADD_r32_r32( R_ECX, R_EAX );
  1103     MMU_TRANSLATE_WRITE( R_EAX );
  1104     MOV_r32_esp8( R_EAX, 0 );
  1105     MEM_READ_BYTE(R_EAX, R_EDX);
  1106     MOV_esp8_r32( 0, R_EAX );
  1107     XOR_imm32_r32( imm, R_EDX );
  1108     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1109     sh4_x86.tstate = TSTATE_NONE;
  1110 :}
  1111 XTRCT Rm, Rn {:
  1112     COUNT_INST(I_XTRCT);
  1113     load_reg( R_EAX, Rm );
  1114     load_reg( R_ECX, Rn );
  1115     SHL_imm8_r32( 16, R_EAX );
  1116     SHR_imm8_r32( 16, R_ECX );
  1117     OR_r32_r32( R_EAX, R_ECX );
  1118     store_reg( R_ECX, Rn );
  1119     sh4_x86.tstate = TSTATE_NONE;
  1120 :}
  1122 /* Data move instructions */
  1123 MOV Rm, Rn {:  
  1124     COUNT_INST(I_MOV);
  1125     load_reg( R_EAX, Rm );
  1126     store_reg( R_EAX, Rn );
  1127 :}
  1128 MOV #imm, Rn {:  
  1129     COUNT_INST(I_MOVI);
  1130     load_imm32( R_EAX, imm );
  1131     store_reg( R_EAX, Rn );
  1132 :}
  1133 MOV.B Rm, @Rn {:  
  1134     COUNT_INST(I_MOVB);
  1135     load_reg( R_EAX, Rn );
  1136     MMU_TRANSLATE_WRITE( R_EAX );
  1137     load_reg( R_EDX, Rm );
  1138     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1139     sh4_x86.tstate = TSTATE_NONE;
  1140 :}
  1141 MOV.B Rm, @-Rn {:  
  1142     COUNT_INST(I_MOVB);
  1143     load_reg( R_EAX, Rn );
  1144     ADD_imm8s_r32( -1, R_EAX );
  1145     MMU_TRANSLATE_WRITE( R_EAX );
  1146     load_reg( R_EDX, Rm );
  1147     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1148     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1149     sh4_x86.tstate = TSTATE_NONE;
  1150 :}
  1151 MOV.B Rm, @(R0, Rn) {:  
  1152     COUNT_INST(I_MOVB);
  1153     load_reg( R_EAX, 0 );
  1154     load_reg( R_ECX, Rn );
  1155     ADD_r32_r32( R_ECX, R_EAX );
  1156     MMU_TRANSLATE_WRITE( R_EAX );
  1157     load_reg( R_EDX, Rm );
  1158     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1159     sh4_x86.tstate = TSTATE_NONE;
  1160 :}
  1161 MOV.B R0, @(disp, GBR) {:  
  1162     COUNT_INST(I_MOVB);
  1163     load_spreg( R_EAX, R_GBR );
  1164     ADD_imm32_r32( disp, R_EAX );
  1165     MMU_TRANSLATE_WRITE( R_EAX );
  1166     load_reg( R_EDX, 0 );
  1167     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1168     sh4_x86.tstate = TSTATE_NONE;
  1169 :}
  1170 MOV.B R0, @(disp, Rn) {:  
  1171     COUNT_INST(I_MOVB);
  1172     load_reg( R_EAX, Rn );
  1173     ADD_imm32_r32( disp, R_EAX );
  1174     MMU_TRANSLATE_WRITE( R_EAX );
  1175     load_reg( R_EDX, 0 );
  1176     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1177     sh4_x86.tstate = TSTATE_NONE;
  1178 :}
  1179 MOV.B @Rm, Rn {:  
  1180     COUNT_INST(I_MOVB);
  1181     load_reg( R_EAX, Rm );
  1182     MMU_TRANSLATE_READ( R_EAX );
  1183     MEM_READ_BYTE( R_EAX, R_EAX );
  1184     store_reg( R_EAX, Rn );
  1185     sh4_x86.tstate = TSTATE_NONE;
  1186 :}
  1187 MOV.B @Rm+, Rn {:  
  1188     COUNT_INST(I_MOVB);
  1189     load_reg( R_EAX, Rm );
  1190     MMU_TRANSLATE_READ( R_EAX );
  1191     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1192     MEM_READ_BYTE( R_EAX, R_EAX );
  1193     store_reg( R_EAX, Rn );
  1194     sh4_x86.tstate = TSTATE_NONE;
  1195 :}
  1196 MOV.B @(R0, Rm), Rn {:  
  1197     COUNT_INST(I_MOVB);
  1198     load_reg( R_EAX, 0 );
  1199     load_reg( R_ECX, Rm );
  1200     ADD_r32_r32( R_ECX, R_EAX );
  1201     MMU_TRANSLATE_READ( R_EAX )
  1202     MEM_READ_BYTE( R_EAX, R_EAX );
  1203     store_reg( R_EAX, Rn );
  1204     sh4_x86.tstate = TSTATE_NONE;
  1205 :}
  1206 MOV.B @(disp, GBR), R0 {:  
  1207     COUNT_INST(I_MOVB);
  1208     load_spreg( R_EAX, R_GBR );
  1209     ADD_imm32_r32( disp, R_EAX );
  1210     MMU_TRANSLATE_READ( R_EAX );
  1211     MEM_READ_BYTE( R_EAX, R_EAX );
  1212     store_reg( R_EAX, 0 );
  1213     sh4_x86.tstate = TSTATE_NONE;
  1214 :}
  1215 MOV.B @(disp, Rm), R0 {:  
  1216     COUNT_INST(I_MOVB);
  1217     load_reg( R_EAX, Rm );
  1218     ADD_imm32_r32( disp, R_EAX );
  1219     MMU_TRANSLATE_READ( R_EAX );
  1220     MEM_READ_BYTE( R_EAX, R_EAX );
  1221     store_reg( R_EAX, 0 );
  1222     sh4_x86.tstate = TSTATE_NONE;
  1223 :}
  1224 MOV.L Rm, @Rn {:
  1225     COUNT_INST(I_MOVL);
  1226     load_reg( R_EAX, Rn );
  1227     check_walign32(R_EAX);
  1228     MMU_TRANSLATE_WRITE( R_EAX );
  1229     load_reg( R_EDX, Rm );
  1230     MEM_WRITE_LONG( R_EAX, R_EDX );
  1231     sh4_x86.tstate = TSTATE_NONE;
  1232 :}
  1233 MOV.L Rm, @-Rn {:  
  1234     COUNT_INST(I_MOVL);
  1235     load_reg( R_EAX, Rn );
  1236     ADD_imm8s_r32( -4, R_EAX );
  1237     check_walign32( R_EAX );
  1238     MMU_TRANSLATE_WRITE( R_EAX );
  1239     load_reg( R_EDX, Rm );
  1240     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1241     MEM_WRITE_LONG( R_EAX, R_EDX );
  1242     sh4_x86.tstate = TSTATE_NONE;
  1243 :}
  1244 MOV.L Rm, @(R0, Rn) {:  
  1245     COUNT_INST(I_MOVL);
  1246     load_reg( R_EAX, 0 );
  1247     load_reg( R_ECX, Rn );
  1248     ADD_r32_r32( R_ECX, R_EAX );
  1249     check_walign32( R_EAX );
  1250     MMU_TRANSLATE_WRITE( R_EAX );
  1251     load_reg( R_EDX, Rm );
  1252     MEM_WRITE_LONG( R_EAX, R_EDX );
  1253     sh4_x86.tstate = TSTATE_NONE;
  1254 :}
  1255 MOV.L R0, @(disp, GBR) {:  
  1256     COUNT_INST(I_MOVL);
  1257     load_spreg( R_EAX, R_GBR );
  1258     ADD_imm32_r32( disp, R_EAX );
  1259     check_walign32( R_EAX );
  1260     MMU_TRANSLATE_WRITE( R_EAX );
  1261     load_reg( R_EDX, 0 );
  1262     MEM_WRITE_LONG( R_EAX, R_EDX );
  1263     sh4_x86.tstate = TSTATE_NONE;
  1264 :}
  1265 MOV.L Rm, @(disp, Rn) {:  
  1266     COUNT_INST(I_MOVL);
  1267     load_reg( R_EAX, Rn );
  1268     ADD_imm32_r32( disp, R_EAX );
  1269     check_walign32( R_EAX );
  1270     MMU_TRANSLATE_WRITE( R_EAX );
  1271     load_reg( R_EDX, Rm );
  1272     MEM_WRITE_LONG( R_EAX, R_EDX );
  1273     sh4_x86.tstate = TSTATE_NONE;
  1274 :}
  1275 MOV.L @Rm, Rn {:  
  1276     COUNT_INST(I_MOVL);
  1277     load_reg( R_EAX, Rm );
  1278     check_ralign32( R_EAX );
  1279     MMU_TRANSLATE_READ( R_EAX );
  1280     MEM_READ_LONG( R_EAX, R_EAX );
  1281     store_reg( R_EAX, Rn );
  1282     sh4_x86.tstate = TSTATE_NONE;
  1283 :}
  1284 MOV.L @Rm+, Rn {:  
  1285     COUNT_INST(I_MOVL);
  1286     load_reg( R_EAX, Rm );
  1287     check_ralign32( R_EAX );
  1288     MMU_TRANSLATE_READ( R_EAX );
  1289     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1290     MEM_READ_LONG( R_EAX, R_EAX );
  1291     store_reg( R_EAX, Rn );
  1292     sh4_x86.tstate = TSTATE_NONE;
  1293 :}
  1294 MOV.L @(R0, Rm), Rn {:  
  1295     COUNT_INST(I_MOVL);
  1296     load_reg( R_EAX, 0 );
  1297     load_reg( R_ECX, Rm );
  1298     ADD_r32_r32( R_ECX, R_EAX );
  1299     check_ralign32( R_EAX );
  1300     MMU_TRANSLATE_READ( R_EAX );
  1301     MEM_READ_LONG( R_EAX, R_EAX );
  1302     store_reg( R_EAX, Rn );
  1303     sh4_x86.tstate = TSTATE_NONE;
  1304 :}
  1305 MOV.L @(disp, GBR), R0 {:
  1306     COUNT_INST(I_MOVL);
  1307     load_spreg( R_EAX, R_GBR );
  1308     ADD_imm32_r32( disp, R_EAX );
  1309     check_ralign32( R_EAX );
  1310     MMU_TRANSLATE_READ( R_EAX );
  1311     MEM_READ_LONG( R_EAX, R_EAX );
  1312     store_reg( R_EAX, 0 );
  1313     sh4_x86.tstate = TSTATE_NONE;
  1314 :}
  1315 MOV.L @(disp, PC), Rn {:  
  1316     COUNT_INST(I_MOVLPC);
  1317     if( sh4_x86.in_delay_slot ) {
  1318 	SLOTILLEGAL();
  1319     } else {
  1320 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1321 	if( IS_IN_ICACHE(target) ) {
  1322 	    // If the target address is in the same page as the code, it's
  1323 	    // pretty safe to just ref it directly and circumvent the whole
  1324 	    // memory subsystem. (this is a big performance win)
  1326 	    // FIXME: There's a corner-case that's not handled here when
  1327 	    // the current code-page is in the ITLB but not in the UTLB.
  1328 	    // (should generate a TLB miss although need to test SH4 
  1329 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1330 	    // behaviour though.
  1331 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1332 	    MOV_moff32_EAX( ptr );
  1333 	} else {
  1334 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1335 	    // different virtual address than the translation was done with,
  1336 	    // but we can safely assume that the low bits are the same.
  1337 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1338 	    ADD_sh4r_r32( R_PC, R_EAX );
  1339 	    MMU_TRANSLATE_READ( R_EAX );
  1340 	    MEM_READ_LONG( R_EAX, R_EAX );
  1341 	    sh4_x86.tstate = TSTATE_NONE;
  1343 	store_reg( R_EAX, Rn );
  1345 :}
  1346 MOV.L @(disp, Rm), Rn {:  
  1347     COUNT_INST(I_MOVL);
  1348     load_reg( R_EAX, Rm );
  1349     ADD_imm8s_r32( disp, R_EAX );
  1350     check_ralign32( R_EAX );
  1351     MMU_TRANSLATE_READ( R_EAX );
  1352     MEM_READ_LONG( R_EAX, R_EAX );
  1353     store_reg( R_EAX, Rn );
  1354     sh4_x86.tstate = TSTATE_NONE;
  1355 :}
  1356 MOV.W Rm, @Rn {:  
  1357     COUNT_INST(I_MOVW);
  1358     load_reg( R_EAX, Rn );
  1359     check_walign16( R_EAX );
  1360     MMU_TRANSLATE_WRITE( R_EAX )
  1361     load_reg( R_EDX, Rm );
  1362     MEM_WRITE_WORD( R_EAX, R_EDX );
  1363     sh4_x86.tstate = TSTATE_NONE;
  1364 :}
  1365 MOV.W Rm, @-Rn {:  
  1366     COUNT_INST(I_MOVW);
  1367     load_reg( R_EAX, Rn );
  1368     ADD_imm8s_r32( -2, R_EAX );
  1369     check_walign16( R_EAX );
  1370     MMU_TRANSLATE_WRITE( R_EAX );
  1371     load_reg( R_EDX, Rm );
  1372     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1373     MEM_WRITE_WORD( R_EAX, R_EDX );
  1374     sh4_x86.tstate = TSTATE_NONE;
  1375 :}
  1376 MOV.W Rm, @(R0, Rn) {:  
  1377     COUNT_INST(I_MOVW);
  1378     load_reg( R_EAX, 0 );
  1379     load_reg( R_ECX, Rn );
  1380     ADD_r32_r32( R_ECX, R_EAX );
  1381     check_walign16( R_EAX );
  1382     MMU_TRANSLATE_WRITE( R_EAX );
  1383     load_reg( R_EDX, Rm );
  1384     MEM_WRITE_WORD( R_EAX, R_EDX );
  1385     sh4_x86.tstate = TSTATE_NONE;
  1386 :}
  1387 MOV.W R0, @(disp, GBR) {:  
  1388     COUNT_INST(I_MOVW);
  1389     load_spreg( R_EAX, R_GBR );
  1390     ADD_imm32_r32( disp, R_EAX );
  1391     check_walign16( R_EAX );
  1392     MMU_TRANSLATE_WRITE( R_EAX );
  1393     load_reg( R_EDX, 0 );
  1394     MEM_WRITE_WORD( R_EAX, R_EDX );
  1395     sh4_x86.tstate = TSTATE_NONE;
  1396 :}
  1397 MOV.W R0, @(disp, Rn) {:  
  1398     COUNT_INST(I_MOVW);
  1399     load_reg( R_EAX, Rn );
  1400     ADD_imm32_r32( disp, R_EAX );
  1401     check_walign16( R_EAX );
  1402     MMU_TRANSLATE_WRITE( R_EAX );
  1403     load_reg( R_EDX, 0 );
  1404     MEM_WRITE_WORD( R_EAX, R_EDX );
  1405     sh4_x86.tstate = TSTATE_NONE;
  1406 :}
  1407 MOV.W @Rm, Rn {:  
  1408     COUNT_INST(I_MOVW);
  1409     load_reg( R_EAX, Rm );
  1410     check_ralign16( R_EAX );
  1411     MMU_TRANSLATE_READ( R_EAX );
  1412     MEM_READ_WORD( R_EAX, R_EAX );
  1413     store_reg( R_EAX, Rn );
  1414     sh4_x86.tstate = TSTATE_NONE;
  1415 :}
  1416 MOV.W @Rm+, Rn {:  
  1417     COUNT_INST(I_MOVW);
  1418     load_reg( R_EAX, Rm );
  1419     check_ralign16( R_EAX );
  1420     MMU_TRANSLATE_READ( R_EAX );
  1421     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1422     MEM_READ_WORD( R_EAX, R_EAX );
  1423     store_reg( R_EAX, Rn );
  1424     sh4_x86.tstate = TSTATE_NONE;
  1425 :}
  1426 MOV.W @(R0, Rm), Rn {:  
  1427     COUNT_INST(I_MOVW);
  1428     load_reg( R_EAX, 0 );
  1429     load_reg( R_ECX, Rm );
  1430     ADD_r32_r32( R_ECX, R_EAX );
  1431     check_ralign16( R_EAX );
  1432     MMU_TRANSLATE_READ( R_EAX );
  1433     MEM_READ_WORD( R_EAX, R_EAX );
  1434     store_reg( R_EAX, Rn );
  1435     sh4_x86.tstate = TSTATE_NONE;
  1436 :}
  1437 MOV.W @(disp, GBR), R0 {:  
  1438     COUNT_INST(I_MOVW);
  1439     load_spreg( R_EAX, R_GBR );
  1440     ADD_imm32_r32( disp, R_EAX );
  1441     check_ralign16( R_EAX );
  1442     MMU_TRANSLATE_READ( R_EAX );
  1443     MEM_READ_WORD( R_EAX, R_EAX );
  1444     store_reg( R_EAX, 0 );
  1445     sh4_x86.tstate = TSTATE_NONE;
  1446 :}
  1447 MOV.W @(disp, PC), Rn {:  
  1448     COUNT_INST(I_MOVW);
  1449     if( sh4_x86.in_delay_slot ) {
  1450 	SLOTILLEGAL();
  1451     } else {
  1452 	// See comments for MOV.L @(disp, PC), Rn
  1453 	uint32_t target = pc + disp + 4;
  1454 	if( IS_IN_ICACHE(target) ) {
  1455 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1456 	    MOV_moff32_EAX( ptr );
  1457 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1458 	} else {
  1459 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1460 	    ADD_sh4r_r32( R_PC, R_EAX );
  1461 	    MMU_TRANSLATE_READ( R_EAX );
  1462 	    MEM_READ_WORD( R_EAX, R_EAX );
  1463 	    sh4_x86.tstate = TSTATE_NONE;
  1465 	store_reg( R_EAX, Rn );
  1467 :}
  1468 MOV.W @(disp, Rm), R0 {:  
  1469     COUNT_INST(I_MOVW);
  1470     load_reg( R_EAX, Rm );
  1471     ADD_imm32_r32( disp, R_EAX );
  1472     check_ralign16( R_EAX );
  1473     MMU_TRANSLATE_READ( R_EAX );
  1474     MEM_READ_WORD( R_EAX, R_EAX );
  1475     store_reg( R_EAX, 0 );
  1476     sh4_x86.tstate = TSTATE_NONE;
  1477 :}
  1478 MOVA @(disp, PC), R0 {:  
  1479     COUNT_INST(I_MOVA);
  1480     if( sh4_x86.in_delay_slot ) {
  1481 	SLOTILLEGAL();
  1482     } else {
  1483 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1484 	ADD_sh4r_r32( R_PC, R_ECX );
  1485 	store_reg( R_ECX, 0 );
  1486 	sh4_x86.tstate = TSTATE_NONE;
  1488 :}
  1489 MOVCA.L R0, @Rn {:  
  1490     COUNT_INST(I_MOVCA);
  1491     load_reg( R_EAX, Rn );
  1492     check_walign32( R_EAX );
  1493     MMU_TRANSLATE_WRITE( R_EAX );
  1494     load_reg( R_EDX, 0 );
  1495     MEM_WRITE_LONG( R_EAX, R_EDX );
  1496     sh4_x86.tstate = TSTATE_NONE;
  1497 :}
  1499 /* Control transfer instructions */
  1500 BF disp {:
  1501     COUNT_INST(I_BF);
  1502     if( sh4_x86.in_delay_slot ) {
  1503 	SLOTILLEGAL();
  1504     } else {
  1505 	sh4vma_t target = disp + pc + 4;
  1506 	JT_rel8( nottaken );
  1507 	exit_block_rel(target, pc+2 );
  1508 	JMP_TARGET(nottaken);
  1509 	return 2;
  1511 :}
  1512 BF/S disp {:
  1513     COUNT_INST(I_BFS);
  1514     if( sh4_x86.in_delay_slot ) {
  1515 	SLOTILLEGAL();
  1516     } else {
  1517 	sh4_x86.in_delay_slot = DELAY_PC;
  1518 	if( UNTRANSLATABLE(pc+2) ) {
  1519 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1520 	    JT_rel8(nottaken);
  1521 	    ADD_imm32_r32( disp, R_EAX );
  1522 	    JMP_TARGET(nottaken);
  1523 	    ADD_sh4r_r32( R_PC, R_EAX );
  1524 	    store_spreg( R_EAX, R_NEW_PC );
  1525 	    exit_block_emu(pc+2);
  1526 	    sh4_x86.branch_taken = TRUE;
  1527 	    return 2;
  1528 	} else {
  1529 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1530 		CMP_imm8s_sh4r( 1, R_T );
  1531 		sh4_x86.tstate = TSTATE_E;
  1533 	    sh4vma_t target = disp + pc + 4;
  1534 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1535 	    int save_tstate = sh4_x86.tstate;
  1536 	    sh4_translate_instruction(pc+2);
  1537 	    exit_block_rel( target, pc+4 );
  1539 	    // not taken
  1540 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1541 	    sh4_x86.tstate = save_tstate;
  1542 	    sh4_translate_instruction(pc+2);
  1543 	    return 4;
  1546 :}
  1547 BRA disp {:  
  1548     COUNT_INST(I_BRA);
  1549     if( sh4_x86.in_delay_slot ) {
  1550 	SLOTILLEGAL();
  1551     } else {
  1552 	sh4_x86.in_delay_slot = DELAY_PC;
  1553 	sh4_x86.branch_taken = TRUE;
  1554 	if( UNTRANSLATABLE(pc+2) ) {
  1555 	    load_spreg( R_EAX, R_PC );
  1556 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1557 	    store_spreg( R_EAX, R_NEW_PC );
  1558 	    exit_block_emu(pc+2);
  1559 	    return 2;
  1560 	} else {
  1561 	    sh4_translate_instruction( pc + 2 );
  1562 	    exit_block_rel( disp + pc + 4, pc+4 );
  1563 	    return 4;
  1566 :}
  1567 BRAF Rn {:  
  1568     COUNT_INST(I_BRAF);
  1569     if( sh4_x86.in_delay_slot ) {
  1570 	SLOTILLEGAL();
  1571     } else {
  1572 	load_spreg( R_EAX, R_PC );
  1573 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1574 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1575 	store_spreg( R_EAX, R_NEW_PC );
  1576 	sh4_x86.in_delay_slot = DELAY_PC;
  1577 	sh4_x86.tstate = TSTATE_NONE;
  1578 	sh4_x86.branch_taken = TRUE;
  1579 	if( UNTRANSLATABLE(pc+2) ) {
  1580 	    exit_block_emu(pc+2);
  1581 	    return 2;
  1582 	} else {
  1583 	    sh4_translate_instruction( pc + 2 );
  1584 	    exit_block_newpcset(pc+2);
  1585 	    return 4;
  1588 :}
  1589 BSR disp {:  
  1590     COUNT_INST(I_BSR);
  1591     if( sh4_x86.in_delay_slot ) {
  1592 	SLOTILLEGAL();
  1593     } else {
  1594 	load_spreg( R_EAX, R_PC );
  1595 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1596 	store_spreg( R_EAX, R_PR );
  1597 	sh4_x86.in_delay_slot = DELAY_PC;
  1598 	sh4_x86.branch_taken = TRUE;
  1599 	sh4_x86.tstate = TSTATE_NONE;
  1600 	if( UNTRANSLATABLE(pc+2) ) {
  1601 	    ADD_imm32_r32( disp, R_EAX );
  1602 	    store_spreg( R_EAX, R_NEW_PC );
  1603 	    exit_block_emu(pc+2);
  1604 	    return 2;
  1605 	} else {
  1606 	    sh4_translate_instruction( pc + 2 );
  1607 	    exit_block_rel( disp + pc + 4, pc+4 );
  1608 	    return 4;
  1611 :}
  1612 BSRF Rn {:  
  1613     COUNT_INST(I_BSRF);
  1614     if( sh4_x86.in_delay_slot ) {
  1615 	SLOTILLEGAL();
  1616     } else {
  1617 	load_spreg( R_EAX, R_PC );
  1618 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1619 	store_spreg( R_EAX, R_PR );
  1620 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1621 	store_spreg( R_EAX, R_NEW_PC );
  1623 	sh4_x86.in_delay_slot = DELAY_PC;
  1624 	sh4_x86.tstate = TSTATE_NONE;
  1625 	sh4_x86.branch_taken = TRUE;
  1626 	if( UNTRANSLATABLE(pc+2) ) {
  1627 	    exit_block_emu(pc+2);
  1628 	    return 2;
  1629 	} else {
  1630 	    sh4_translate_instruction( pc + 2 );
  1631 	    exit_block_newpcset(pc+2);
  1632 	    return 4;
  1635 :}
  1636 BT disp {:
  1637     COUNT_INST(I_BT);
  1638     if( sh4_x86.in_delay_slot ) {
  1639 	SLOTILLEGAL();
  1640     } else {
  1641 	sh4vma_t target = disp + pc + 4;
  1642 	JF_rel8( nottaken );
  1643 	exit_block_rel(target, pc+2 );
  1644 	JMP_TARGET(nottaken);
  1645 	return 2;
  1647 :}
  1648 BT/S disp {:
  1649     COUNT_INST(I_BTS);
  1650     if( sh4_x86.in_delay_slot ) {
  1651 	SLOTILLEGAL();
  1652     } else {
  1653 	sh4_x86.in_delay_slot = DELAY_PC;
  1654 	if( UNTRANSLATABLE(pc+2) ) {
  1655 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1656 	    JF_rel8(nottaken);
  1657 	    ADD_imm32_r32( disp, R_EAX );
  1658 	    JMP_TARGET(nottaken);
  1659 	    ADD_sh4r_r32( R_PC, R_EAX );
  1660 	    store_spreg( R_EAX, R_NEW_PC );
  1661 	    exit_block_emu(pc+2);
  1662 	    sh4_x86.branch_taken = TRUE;
  1663 	    return 2;
  1664 	} else {
  1665 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1666 		CMP_imm8s_sh4r( 1, R_T );
  1667 		sh4_x86.tstate = TSTATE_E;
  1669 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1670 	    int save_tstate = sh4_x86.tstate;
  1671 	    sh4_translate_instruction(pc+2);
  1672 	    exit_block_rel( disp + pc + 4, pc+4 );
  1673 	    // not taken
  1674 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1675 	    sh4_x86.tstate = save_tstate;
  1676 	    sh4_translate_instruction(pc+2);
  1677 	    return 4;
  1680 :}
  1681 JMP @Rn {:  
  1682     COUNT_INST(I_JMP);
  1683     if( sh4_x86.in_delay_slot ) {
  1684 	SLOTILLEGAL();
  1685     } else {
  1686 	load_reg( R_ECX, Rn );
  1687 	store_spreg( R_ECX, R_NEW_PC );
  1688 	sh4_x86.in_delay_slot = DELAY_PC;
  1689 	sh4_x86.branch_taken = TRUE;
  1690 	if( UNTRANSLATABLE(pc+2) ) {
  1691 	    exit_block_emu(pc+2);
  1692 	    return 2;
  1693 	} else {
  1694 	    sh4_translate_instruction(pc+2);
  1695 	    exit_block_newpcset(pc+2);
  1696 	    return 4;
  1699 :}
  1700 JSR @Rn {:  
  1701     COUNT_INST(I_JSR);
  1702     if( sh4_x86.in_delay_slot ) {
  1703 	SLOTILLEGAL();
  1704     } else {
  1705 	load_spreg( R_EAX, R_PC );
  1706 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1707 	store_spreg( R_EAX, R_PR );
  1708 	load_reg( R_ECX, Rn );
  1709 	store_spreg( R_ECX, R_NEW_PC );
  1710 	sh4_x86.in_delay_slot = DELAY_PC;
  1711 	sh4_x86.branch_taken = TRUE;
  1712 	sh4_x86.tstate = TSTATE_NONE;
  1713 	if( UNTRANSLATABLE(pc+2) ) {
  1714 	    exit_block_emu(pc+2);
  1715 	    return 2;
  1716 	} else {
  1717 	    sh4_translate_instruction(pc+2);
  1718 	    exit_block_newpcset(pc+2);
  1719 	    return 4;
  1722 :}
  1723 RTE {:  
  1724     COUNT_INST(I_RTE);
  1725     if( sh4_x86.in_delay_slot ) {
  1726 	SLOTILLEGAL();
  1727     } else {
  1728 	check_priv();
  1729 	load_spreg( R_ECX, R_SPC );
  1730 	store_spreg( R_ECX, R_NEW_PC );
  1731 	load_spreg( R_EAX, R_SSR );
  1732 	call_func1( sh4_write_sr, R_EAX );
  1733 	sh4_x86.in_delay_slot = DELAY_PC;
  1734 	sh4_x86.priv_checked = FALSE;
  1735 	sh4_x86.fpuen_checked = FALSE;
  1736 	sh4_x86.tstate = TSTATE_NONE;
  1737 	sh4_x86.branch_taken = TRUE;
  1738 	if( UNTRANSLATABLE(pc+2) ) {
  1739 	    exit_block_emu(pc+2);
  1740 	    return 2;
  1741 	} else {
  1742 	    sh4_translate_instruction(pc+2);
  1743 	    exit_block_newpcset(pc+2);
  1744 	    return 4;
  1747 :}
  1748 RTS {:  
  1749     COUNT_INST(I_RTS);
  1750     if( sh4_x86.in_delay_slot ) {
  1751 	SLOTILLEGAL();
  1752     } else {
  1753 	load_spreg( R_ECX, R_PR );
  1754 	store_spreg( R_ECX, R_NEW_PC );
  1755 	sh4_x86.in_delay_slot = DELAY_PC;
  1756 	sh4_x86.branch_taken = TRUE;
  1757 	if( UNTRANSLATABLE(pc+2) ) {
  1758 	    exit_block_emu(pc+2);
  1759 	    return 2;
  1760 	} else {
  1761 	    sh4_translate_instruction(pc+2);
  1762 	    exit_block_newpcset(pc+2);
  1763 	    return 4;
  1766 :}
  1767 TRAPA #imm {:  
  1768     COUNT_INST(I_TRAPA);
  1769     if( sh4_x86.in_delay_slot ) {
  1770 	SLOTILLEGAL();
  1771     } else {
  1772 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1773 	ADD_r32_sh4r( R_ECX, R_PC );
  1774 	load_imm32( R_EAX, imm );
  1775 	call_func1( sh4_raise_trap, R_EAX );
  1776 	sh4_x86.tstate = TSTATE_NONE;
  1777 	exit_block_pcset(pc);
  1778 	sh4_x86.branch_taken = TRUE;
  1779 	return 2;
  1781 :}
  1782 UNDEF {:  
  1783     COUNT_INST(I_UNDEF);
  1784     if( sh4_x86.in_delay_slot ) {
  1785 	SLOTILLEGAL();
  1786     } else {
  1787 	JMP_exc(EXC_ILLEGAL);
  1788 	return 2;
  1790 :}
  1792 CLRMAC {:  
  1793     COUNT_INST(I_CLRMAC);
  1794     XOR_r32_r32(R_EAX, R_EAX);
  1795     store_spreg( R_EAX, R_MACL );
  1796     store_spreg( R_EAX, R_MACH );
  1797     sh4_x86.tstate = TSTATE_NONE;
  1798 :}
  1799 CLRS {:
  1800     COUNT_INST(I_CLRS);
  1801     CLC();
  1802     SETC_sh4r(R_S);
  1803     sh4_x86.tstate = TSTATE_NONE;
  1804 :}
  1805 CLRT {:  
  1806     COUNT_INST(I_CLRT);
  1807     CLC();
  1808     SETC_t();
  1809     sh4_x86.tstate = TSTATE_C;
  1810 :}
  1811 SETS {:  
  1812     COUNT_INST(I_SETS);
  1813     STC();
  1814     SETC_sh4r(R_S);
  1815     sh4_x86.tstate = TSTATE_NONE;
  1816 :}
  1817 SETT {:  
  1818     COUNT_INST(I_SETT);
  1819     STC();
  1820     SETC_t();
  1821     sh4_x86.tstate = TSTATE_C;
  1822 :}
  1824 /* Floating point moves */
  1825 FMOV FRm, FRn {:  
  1826     COUNT_INST(I_FMOV1);
  1827     check_fpuen();
  1828     if( sh4_x86.double_size ) {
  1829         load_dr0( R_EAX, FRm );
  1830         load_dr1( R_ECX, FRm );
  1831         store_dr0( R_EAX, FRn );
  1832         store_dr1( R_ECX, FRn );
  1833     } else {
  1834         load_fr( R_EAX, FRm ); // SZ=0 branch
  1835         store_fr( R_EAX, FRn );
  1837 :}
  1838 FMOV FRm, @Rn {: 
  1839     COUNT_INST(I_FMOV2);
  1840     check_fpuen();
  1841     load_reg( R_EAX, Rn );
  1842     if( sh4_x86.double_size ) {
  1843         check_walign64( R_EAX );
  1844         MMU_TRANSLATE_WRITE( R_EAX );
  1845         load_dr0( R_EDX, FRm );
  1846         load_dr1( R_ECX, FRm );
  1847         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1848     } else {
  1849         check_walign32( R_EAX );
  1850         MMU_TRANSLATE_WRITE( R_EAX );
  1851         load_fr( R_EDX, FRm );
  1852         MEM_WRITE_LONG( R_EAX, R_EDX );
  1854     sh4_x86.tstate = TSTATE_NONE;
  1855 :}
  1856 FMOV @Rm, FRn {:  
  1857     COUNT_INST(I_FMOV5);
  1858     check_fpuen();
  1859     load_reg( R_EAX, Rm );
  1860     if( sh4_x86.double_size ) {
  1861         check_ralign64( R_EAX );
  1862         MMU_TRANSLATE_READ( R_EAX );
  1863         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1864         store_dr0( R_EDX, FRn );
  1865         store_dr1( R_EAX, FRn );    
  1866     } else {
  1867         check_ralign32( R_EAX );
  1868         MMU_TRANSLATE_READ( R_EAX );
  1869         MEM_READ_LONG( R_EAX, R_EAX );
  1870         store_fr( R_EAX, FRn );
  1872     sh4_x86.tstate = TSTATE_NONE;
  1873 :}
  1874 FMOV FRm, @-Rn {:  
  1875     COUNT_INST(I_FMOV3);
  1876     check_fpuen();
  1877     load_reg( R_EAX, Rn );
  1878     if( sh4_x86.double_size ) {
  1879         check_walign64( R_EAX );
  1880         ADD_imm8s_r32(-8,R_EAX);
  1881         MMU_TRANSLATE_WRITE( R_EAX );
  1882         load_dr0( R_EDX, FRm );
  1883         load_dr1( R_ECX, FRm );
  1884         ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1885         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1886     } else {
  1887         check_walign32( R_EAX );
  1888         ADD_imm8s_r32( -4, R_EAX );
  1889         MMU_TRANSLATE_WRITE( R_EAX );
  1890         load_fr( R_EDX, FRm );
  1891         ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1892         MEM_WRITE_LONG( R_EAX, R_EDX );
  1894     sh4_x86.tstate = TSTATE_NONE;
  1895 :}
  1896 FMOV @Rm+, FRn {:
  1897     COUNT_INST(I_FMOV6);
  1898     check_fpuen();
  1899     load_reg( R_EAX, Rm );
  1900     if( sh4_x86.double_size ) {
  1901         check_ralign64( R_EAX );
  1902         MMU_TRANSLATE_READ( R_EAX );
  1903         ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1904         MEM_READ_DOUBLE( R_EAX, R_EDX, R_EAX );
  1905         store_dr0( R_EDX, FRn );
  1906         store_dr1( R_EAX, FRn );
  1907     } else {
  1908         check_ralign32( R_EAX );
  1909         MMU_TRANSLATE_READ( R_EAX );
  1910         ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1911         MEM_READ_LONG( R_EAX, R_EAX );
  1912         store_fr( R_EAX, FRn );
  1914     sh4_x86.tstate = TSTATE_NONE;
  1915 :}
  1916 FMOV FRm, @(R0, Rn) {:  
  1917     COUNT_INST(I_FMOV4);
  1918     check_fpuen();
  1919     load_reg( R_EAX, Rn );
  1920     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1921     if( sh4_x86.double_size ) {
  1922         check_walign64( R_EAX );
  1923         MMU_TRANSLATE_WRITE( R_EAX );
  1924         load_dr0( R_EDX, FRm );
  1925         load_dr1( R_ECX, FRm );
  1926         MEM_WRITE_DOUBLE( R_EAX, R_EDX, R_ECX );
  1927     } else {
  1928         check_walign32( R_EAX );
  1929         MMU_TRANSLATE_WRITE( R_EAX );
  1930         load_fr( R_EDX, FRm );
  1931         MEM_WRITE_LONG( R_EAX, R_EDX ); // 12
  1933     sh4_x86.tstate = TSTATE_NONE;
  1934 :}
  1935 FMOV @(R0, Rm), FRn {:  
  1936     COUNT_INST(I_FMOV7);
  1937     check_fpuen();
  1938     load_reg( R_EAX, Rm );
  1939     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1940     if( sh4_x86.double_size ) {
  1941         check_ralign64( R_EAX );
  1942         MMU_TRANSLATE_READ( R_EAX );
  1943         MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1944         store_dr0( R_ECX, FRn );
  1945         store_dr1( R_EAX, FRn );
  1946     } else {
  1947         check_ralign32( R_EAX );
  1948         MMU_TRANSLATE_READ( R_EAX );
  1949         MEM_READ_LONG( R_EAX, R_EAX );
  1950         store_fr( R_EAX, FRn );
  1952     sh4_x86.tstate = TSTATE_NONE;
  1953 :}
  1954 FLDI0 FRn {:  /* IFF PR=0 */
  1955     COUNT_INST(I_FLDI0);
  1956     check_fpuen();
  1957     if( sh4_x86.double_prec == 0 ) {
  1958         XOR_r32_r32( R_EAX, R_EAX );
  1959         store_fr( R_EAX, FRn );
  1961     sh4_x86.tstate = TSTATE_NONE;
  1962 :}
  1963 FLDI1 FRn {:  /* IFF PR=0 */
  1964     COUNT_INST(I_FLDI1);
  1965     check_fpuen();
  1966     if( sh4_x86.double_prec == 0 ) {
  1967         load_imm32(R_EAX, 0x3F800000);
  1968         store_fr( R_EAX, FRn );
  1970 :}
  1972 FLOAT FPUL, FRn {:  
  1973     COUNT_INST(I_FLOAT);
  1974     check_fpuen();
  1975     FILD_sh4r(R_FPUL);
  1976     if( sh4_x86.double_prec ) {
  1977         pop_dr( FRn );
  1978     } else {
  1979         pop_fr( FRn );
  1981 :}
  1982 FTRC FRm, FPUL {:  
  1983     COUNT_INST(I_FTRC);
  1984     check_fpuen();
  1985     if( sh4_x86.double_prec ) {
  1986         push_dr( FRm );
  1987     } else {
  1988         push_fr( FRm );
  1990     load_ptr( R_ECX, &max_int );
  1991     FILD_r32ind( R_ECX );
  1992     FCOMIP_st(1);
  1993     JNA_rel8( sat );
  1994     load_ptr( R_ECX, &min_int );  // 5
  1995     FILD_r32ind( R_ECX );           // 2
  1996     FCOMIP_st(1);                   // 2
  1997     JAE_rel8( sat2 );            // 2
  1998     load_ptr( R_EAX, &save_fcw );
  1999     FNSTCW_r32ind( R_EAX );
  2000     load_ptr( R_EDX, &trunc_fcw );
  2001     FLDCW_r32ind( R_EDX );
  2002     FISTP_sh4r(R_FPUL);             // 3
  2003     FLDCW_r32ind( R_EAX );
  2004     JMP_rel8(end);             // 2
  2006     JMP_TARGET(sat);
  2007     JMP_TARGET(sat2);
  2008     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2009     store_spreg( R_ECX, R_FPUL );
  2010     FPOP_st();
  2011     JMP_TARGET(end);
  2012     sh4_x86.tstate = TSTATE_NONE;
  2013 :}
  2014 FLDS FRm, FPUL {:  
  2015     COUNT_INST(I_FLDS);
  2016     check_fpuen();
  2017     load_fr( R_EAX, FRm );
  2018     store_spreg( R_EAX, R_FPUL );
  2019 :}
  2020 FSTS FPUL, FRn {:  
  2021     COUNT_INST(I_FSTS);
  2022     check_fpuen();
  2023     load_spreg( R_EAX, R_FPUL );
  2024     store_fr( R_EAX, FRn );
  2025 :}
  2026 FCNVDS FRm, FPUL {:  
  2027     COUNT_INST(I_FCNVDS);
  2028     check_fpuen();
  2029     if( sh4_x86.double_prec ) {
  2030         push_dr( FRm );
  2031         pop_fpul();
  2033 :}
  2034 FCNVSD FPUL, FRn {:  
  2035     COUNT_INST(I_FCNVSD);
  2036     check_fpuen();
  2037     if( sh4_x86.double_prec ) {
  2038         push_fpul();
  2039         pop_dr( FRn );
  2041 :}
  2043 /* Floating point instructions */
  2044 FABS FRn {:  
  2045     COUNT_INST(I_FABS);
  2046     check_fpuen();
  2047     if( sh4_x86.double_prec ) {
  2048         push_dr(FRn);
  2049         FABS_st0();
  2050         pop_dr(FRn);
  2051     } else {
  2052         push_fr(FRn);
  2053         FABS_st0();
  2054         pop_fr(FRn);
  2056 :}
  2057 FADD FRm, FRn {:  
  2058     COUNT_INST(I_FADD);
  2059     check_fpuen();
  2060     if( sh4_x86.double_prec ) {
  2061         push_dr(FRm);
  2062         push_dr(FRn);
  2063         FADDP_st(1);
  2064         pop_dr(FRn);
  2065     } else {
  2066         push_fr(FRm);
  2067         push_fr(FRn);
  2068         FADDP_st(1);
  2069         pop_fr(FRn);
  2071 :}
  2072 FDIV FRm, FRn {:  
  2073     COUNT_INST(I_FDIV);
  2074     check_fpuen();
  2075     if( sh4_x86.double_prec ) {
  2076         push_dr(FRn);
  2077         push_dr(FRm);
  2078         FDIVP_st(1);
  2079         pop_dr(FRn);
  2080     } else {
  2081         push_fr(FRn);
  2082         push_fr(FRm);
  2083         FDIVP_st(1);
  2084         pop_fr(FRn);
  2086 :}
  2087 FMAC FR0, FRm, FRn {:  
  2088     COUNT_INST(I_FMAC);
  2089     check_fpuen();
  2090     if( sh4_x86.double_prec ) {
  2091         push_dr( 0 );
  2092         push_dr( FRm );
  2093         FMULP_st(1);
  2094         push_dr( FRn );
  2095         FADDP_st(1);
  2096         pop_dr( FRn );
  2097     } else {
  2098         push_fr( 0 );
  2099         push_fr( FRm );
  2100         FMULP_st(1);
  2101         push_fr( FRn );
  2102         FADDP_st(1);
  2103         pop_fr( FRn );
  2105 :}
  2107 FMUL FRm, FRn {:  
  2108     COUNT_INST(I_FMUL);
  2109     check_fpuen();
  2110     if( sh4_x86.double_prec ) {
  2111         push_dr(FRm);
  2112         push_dr(FRn);
  2113         FMULP_st(1);
  2114         pop_dr(FRn);
  2115     } else {
  2116         push_fr(FRm);
  2117         push_fr(FRn);
  2118         FMULP_st(1);
  2119         pop_fr(FRn);
  2121 :}
  2122 FNEG FRn {:  
  2123     COUNT_INST(I_FNEG);
  2124     check_fpuen();
  2125     if( sh4_x86.double_prec ) {
  2126         push_dr(FRn);
  2127         FCHS_st0();
  2128         pop_dr(FRn);
  2129     } else {
  2130         push_fr(FRn);
  2131         FCHS_st0();
  2132         pop_fr(FRn);
  2134 :}
  2135 FSRRA FRn {:  
  2136     COUNT_INST(I_FSRRA);
  2137     check_fpuen();
  2138     if( sh4_x86.double_prec == 0 ) {
  2139         FLD1_st0();
  2140         push_fr(FRn);
  2141         FSQRT_st0();
  2142         FDIVP_st(1);
  2143         pop_fr(FRn);
  2145 :}
  2146 FSQRT FRn {:  
  2147     COUNT_INST(I_FSQRT);
  2148     check_fpuen();
  2149     if( sh4_x86.double_prec ) {
  2150         push_dr(FRn);
  2151         FSQRT_st0();
  2152         pop_dr(FRn);
  2153     } else {
  2154         push_fr(FRn);
  2155         FSQRT_st0();
  2156         pop_fr(FRn);
  2158 :}
  2159 FSUB FRm, FRn {:  
  2160     COUNT_INST(I_FSUB);
  2161     check_fpuen();
  2162     if( sh4_x86.double_prec ) {
  2163         push_dr(FRn);
  2164         push_dr(FRm);
  2165         FSUBP_st(1);
  2166         pop_dr(FRn);
  2167     } else {
  2168         push_fr(FRn);
  2169         push_fr(FRm);
  2170         FSUBP_st(1);
  2171         pop_fr(FRn);
  2173 :}
  2175 FCMP/EQ FRm, FRn {:  
  2176     COUNT_INST(I_FCMPEQ);
  2177     check_fpuen();
  2178     if( sh4_x86.double_prec ) {
  2179         push_dr(FRm);
  2180         push_dr(FRn);
  2181     } else {
  2182         push_fr(FRm);
  2183         push_fr(FRn);
  2185     FCOMIP_st(1);
  2186     SETE_t();
  2187     FPOP_st();
  2188     sh4_x86.tstate = TSTATE_E;
  2189 :}
  2190 FCMP/GT FRm, FRn {:  
  2191     COUNT_INST(I_FCMPGT);
  2192     check_fpuen();
  2193     if( sh4_x86.double_prec ) {
  2194         push_dr(FRm);
  2195         push_dr(FRn);
  2196     } else {
  2197         push_fr(FRm);
  2198         push_fr(FRn);
  2200     FCOMIP_st(1);
  2201     SETA_t();
  2202     FPOP_st();
  2203     sh4_x86.tstate = TSTATE_A;
  2204 :}
  2206 FSCA FPUL, FRn {:  
  2207     COUNT_INST(I_FSCA);
  2208     check_fpuen();
  2209     if( sh4_x86.double_prec == 0 ) {
  2210         LEA_sh4r_rptr( REG_OFFSET(fr[0][FRn&0x0E]), R_EDX );
  2211         load_spreg( R_EAX, R_FPUL );
  2212         call_func2( sh4_fsca, R_EAX, R_EDX );
  2214     sh4_x86.tstate = TSTATE_NONE;
  2215 :}
  2216 FIPR FVm, FVn {:  
  2217     COUNT_INST(I_FIPR);
  2218     check_fpuen();
  2219     if( sh4_x86.double_prec == 0 ) {
  2220         if( sh4_x86.sse3_enabled ) {
  2221             MOVAPS_sh4r_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2222             MULPS_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2223             HADDPS_xmm_xmm( 4, 4 ); 
  2224             HADDPS_xmm_xmm( 4, 4 );
  2225             MOVSS_xmm_sh4r( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2226         } else {
  2227             push_fr( FVm<<2 );
  2228             push_fr( FVn<<2 );
  2229             FMULP_st(1);
  2230             push_fr( (FVm<<2)+1);
  2231             push_fr( (FVn<<2)+1);
  2232             FMULP_st(1);
  2233             FADDP_st(1);
  2234             push_fr( (FVm<<2)+2);
  2235             push_fr( (FVn<<2)+2);
  2236             FMULP_st(1);
  2237             FADDP_st(1);
  2238             push_fr( (FVm<<2)+3);
  2239             push_fr( (FVn<<2)+3);
  2240             FMULP_st(1);
  2241             FADDP_st(1);
  2242             pop_fr( (FVn<<2)+3);
  2245 :}
  2246 FTRV XMTRX, FVn {:  
  2247     COUNT_INST(I_FTRV);
  2248     check_fpuen();
  2249     if( sh4_x86.double_prec == 0 ) {
  2250         if( sh4_x86.sse3_enabled ) {
  2251             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2252             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2253             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2254             MOVAPS_sh4r_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2256             MOVSLDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2257             MOVSHDUP_sh4r_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2258             MOVAPS_xmm_xmm( 4, 6 );
  2259             MOVAPS_xmm_xmm( 5, 7 );
  2260             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2261             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2262             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2263             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2264             MULPS_xmm_xmm( 0, 4 );
  2265             MULPS_xmm_xmm( 1, 5 );
  2266             MULPS_xmm_xmm( 2, 6 );
  2267             MULPS_xmm_xmm( 3, 7 );
  2268             ADDPS_xmm_xmm( 5, 4 );
  2269             ADDPS_xmm_xmm( 7, 6 );
  2270             ADDPS_xmm_xmm( 6, 4 );
  2271             MOVAPS_xmm_sh4r( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2272         } else {
  2273             LEA_sh4r_rptr( REG_OFFSET(fr[0][FVn<<2]), R_EAX );
  2274             call_func1( sh4_ftrv, R_EAX );
  2277     sh4_x86.tstate = TSTATE_NONE;
  2278 :}
  2280 FRCHG {:  
  2281     COUNT_INST(I_FRCHG);
  2282     check_fpuen();
  2283     load_spreg( R_ECX, R_FPSCR );
  2284     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2285     store_spreg( R_ECX, R_FPSCR );
  2286     call_func0( sh4_switch_fr_banks );
  2287     sh4_x86.tstate = TSTATE_NONE;
  2288 :}
  2289 FSCHG {:  
  2290     COUNT_INST(I_FSCHG);
  2291     check_fpuen();
  2292     load_spreg( R_ECX, R_FPSCR );
  2293     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2294     store_spreg( R_ECX, R_FPSCR );
  2295     sh4_x86.tstate = TSTATE_NONE;
  2296     sh4_x86.double_size = !sh4_x86.double_size;
  2297 :}
  2299 /* Processor control instructions */
  2300 LDC Rm, SR {:
  2301     COUNT_INST(I_LDCSR);
  2302     if( sh4_x86.in_delay_slot ) {
  2303 	SLOTILLEGAL();
  2304     } else {
  2305 	check_priv();
  2306 	load_reg( R_EAX, Rm );
  2307 	call_func1( sh4_write_sr, R_EAX );
  2308 	sh4_x86.priv_checked = FALSE;
  2309 	sh4_x86.fpuen_checked = FALSE;
  2310 	sh4_x86.tstate = TSTATE_NONE;
  2312 :}
  2313 LDC Rm, GBR {: 
  2314     COUNT_INST(I_LDC);
  2315     load_reg( R_EAX, Rm );
  2316     store_spreg( R_EAX, R_GBR );
  2317 :}
  2318 LDC Rm, VBR {:  
  2319     COUNT_INST(I_LDC);
  2320     check_priv();
  2321     load_reg( R_EAX, Rm );
  2322     store_spreg( R_EAX, R_VBR );
  2323     sh4_x86.tstate = TSTATE_NONE;
  2324 :}
  2325 LDC Rm, SSR {:  
  2326     COUNT_INST(I_LDC);
  2327     check_priv();
  2328     load_reg( R_EAX, Rm );
  2329     store_spreg( R_EAX, R_SSR );
  2330     sh4_x86.tstate = TSTATE_NONE;
  2331 :}
  2332 LDC Rm, SGR {:  
  2333     COUNT_INST(I_LDC);
  2334     check_priv();
  2335     load_reg( R_EAX, Rm );
  2336     store_spreg( R_EAX, R_SGR );
  2337     sh4_x86.tstate = TSTATE_NONE;
  2338 :}
  2339 LDC Rm, SPC {:  
  2340     COUNT_INST(I_LDC);
  2341     check_priv();
  2342     load_reg( R_EAX, Rm );
  2343     store_spreg( R_EAX, R_SPC );
  2344     sh4_x86.tstate = TSTATE_NONE;
  2345 :}
  2346 LDC Rm, DBR {:  
  2347     COUNT_INST(I_LDC);
  2348     check_priv();
  2349     load_reg( R_EAX, Rm );
  2350     store_spreg( R_EAX, R_DBR );
  2351     sh4_x86.tstate = TSTATE_NONE;
  2352 :}
  2353 LDC Rm, Rn_BANK {:  
  2354     COUNT_INST(I_LDC);
  2355     check_priv();
  2356     load_reg( R_EAX, Rm );
  2357     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2358     sh4_x86.tstate = TSTATE_NONE;
  2359 :}
  2360 LDC.L @Rm+, GBR {:  
  2361     COUNT_INST(I_LDCM);
  2362     load_reg( R_EAX, Rm );
  2363     check_ralign32( R_EAX );
  2364     MMU_TRANSLATE_READ( R_EAX );
  2365     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2366     MEM_READ_LONG( R_EAX, R_EAX );
  2367     store_spreg( R_EAX, R_GBR );
  2368     sh4_x86.tstate = TSTATE_NONE;
  2369 :}
  2370 LDC.L @Rm+, SR {:
  2371     COUNT_INST(I_LDCSRM);
  2372     if( sh4_x86.in_delay_slot ) {
  2373 	SLOTILLEGAL();
  2374     } else {
  2375 	check_priv();
  2376 	load_reg( R_EAX, Rm );
  2377 	check_ralign32( R_EAX );
  2378 	MMU_TRANSLATE_READ( R_EAX );
  2379 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2380 	MEM_READ_LONG( R_EAX, R_EAX );
  2381 	call_func1( sh4_write_sr, R_EAX );
  2382 	sh4_x86.priv_checked = FALSE;
  2383 	sh4_x86.fpuen_checked = FALSE;
  2384 	sh4_x86.tstate = TSTATE_NONE;
  2386 :}
  2387 LDC.L @Rm+, VBR {:  
  2388     COUNT_INST(I_LDCM);
  2389     check_priv();
  2390     load_reg( R_EAX, Rm );
  2391     check_ralign32( R_EAX );
  2392     MMU_TRANSLATE_READ( R_EAX );
  2393     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2394     MEM_READ_LONG( R_EAX, R_EAX );
  2395     store_spreg( R_EAX, R_VBR );
  2396     sh4_x86.tstate = TSTATE_NONE;
  2397 :}
  2398 LDC.L @Rm+, SSR {:
  2399     COUNT_INST(I_LDCM);
  2400     check_priv();
  2401     load_reg( R_EAX, Rm );
  2402     check_ralign32( R_EAX );
  2403     MMU_TRANSLATE_READ( R_EAX );
  2404     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2405     MEM_READ_LONG( R_EAX, R_EAX );
  2406     store_spreg( R_EAX, R_SSR );
  2407     sh4_x86.tstate = TSTATE_NONE;
  2408 :}
  2409 LDC.L @Rm+, SGR {:  
  2410     COUNT_INST(I_LDCM);
  2411     check_priv();
  2412     load_reg( R_EAX, Rm );
  2413     check_ralign32( R_EAX );
  2414     MMU_TRANSLATE_READ( R_EAX );
  2415     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2416     MEM_READ_LONG( R_EAX, R_EAX );
  2417     store_spreg( R_EAX, R_SGR );
  2418     sh4_x86.tstate = TSTATE_NONE;
  2419 :}
  2420 LDC.L @Rm+, SPC {:  
  2421     COUNT_INST(I_LDCM);
  2422     check_priv();
  2423     load_reg( R_EAX, Rm );
  2424     check_ralign32( R_EAX );
  2425     MMU_TRANSLATE_READ( R_EAX );
  2426     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2427     MEM_READ_LONG( R_EAX, R_EAX );
  2428     store_spreg( R_EAX, R_SPC );
  2429     sh4_x86.tstate = TSTATE_NONE;
  2430 :}
  2431 LDC.L @Rm+, DBR {:  
  2432     COUNT_INST(I_LDCM);
  2433     check_priv();
  2434     load_reg( R_EAX, Rm );
  2435     check_ralign32( R_EAX );
  2436     MMU_TRANSLATE_READ( R_EAX );
  2437     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2438     MEM_READ_LONG( R_EAX, R_EAX );
  2439     store_spreg( R_EAX, R_DBR );
  2440     sh4_x86.tstate = TSTATE_NONE;
  2441 :}
  2442 LDC.L @Rm+, Rn_BANK {:  
  2443     COUNT_INST(I_LDCM);
  2444     check_priv();
  2445     load_reg( R_EAX, Rm );
  2446     check_ralign32( R_EAX );
  2447     MMU_TRANSLATE_READ( R_EAX );
  2448     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2449     MEM_READ_LONG( R_EAX, R_EAX );
  2450     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2451     sh4_x86.tstate = TSTATE_NONE;
  2452 :}
  2453 LDS Rm, FPSCR {:
  2454     COUNT_INST(I_LDSFPSCR);
  2455     check_fpuen();
  2456     load_reg( R_EAX, Rm );
  2457     call_func1( sh4_write_fpscr, R_EAX );
  2458     sh4_x86.tstate = TSTATE_NONE;
  2459     return 2;
  2460 :}
  2461 LDS.L @Rm+, FPSCR {:  
  2462     COUNT_INST(I_LDSFPSCRM);
  2463     check_fpuen();
  2464     load_reg( R_EAX, Rm );
  2465     check_ralign32( R_EAX );
  2466     MMU_TRANSLATE_READ( R_EAX );
  2467     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2468     MEM_READ_LONG( R_EAX, R_EAX );
  2469     call_func1( sh4_write_fpscr, R_EAX );
  2470     sh4_x86.tstate = TSTATE_NONE;
  2471     return 2;
  2472 :}
  2473 LDS Rm, FPUL {:  
  2474     COUNT_INST(I_LDS);
  2475     check_fpuen();
  2476     load_reg( R_EAX, Rm );
  2477     store_spreg( R_EAX, R_FPUL );
  2478 :}
  2479 LDS.L @Rm+, FPUL {:  
  2480     COUNT_INST(I_LDSM);
  2481     check_fpuen();
  2482     load_reg( R_EAX, Rm );
  2483     check_ralign32( R_EAX );
  2484     MMU_TRANSLATE_READ( R_EAX );
  2485     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2486     MEM_READ_LONG( R_EAX, R_EAX );
  2487     store_spreg( R_EAX, R_FPUL );
  2488     sh4_x86.tstate = TSTATE_NONE;
  2489 :}
  2490 LDS Rm, MACH {: 
  2491     COUNT_INST(I_LDS);
  2492     load_reg( R_EAX, Rm );
  2493     store_spreg( R_EAX, R_MACH );
  2494 :}
  2495 LDS.L @Rm+, MACH {:  
  2496     COUNT_INST(I_LDSM);
  2497     load_reg( R_EAX, Rm );
  2498     check_ralign32( R_EAX );
  2499     MMU_TRANSLATE_READ( R_EAX );
  2500     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2501     MEM_READ_LONG( R_EAX, R_EAX );
  2502     store_spreg( R_EAX, R_MACH );
  2503     sh4_x86.tstate = TSTATE_NONE;
  2504 :}
  2505 LDS Rm, MACL {:  
  2506     COUNT_INST(I_LDS);
  2507     load_reg( R_EAX, Rm );
  2508     store_spreg( R_EAX, R_MACL );
  2509 :}
  2510 LDS.L @Rm+, MACL {:  
  2511     COUNT_INST(I_LDSM);
  2512     load_reg( R_EAX, Rm );
  2513     check_ralign32( R_EAX );
  2514     MMU_TRANSLATE_READ( R_EAX );
  2515     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2516     MEM_READ_LONG( R_EAX, R_EAX );
  2517     store_spreg( R_EAX, R_MACL );
  2518     sh4_x86.tstate = TSTATE_NONE;
  2519 :}
  2520 LDS Rm, PR {:  
  2521     COUNT_INST(I_LDS);
  2522     load_reg( R_EAX, Rm );
  2523     store_spreg( R_EAX, R_PR );
  2524 :}
  2525 LDS.L @Rm+, PR {:  
  2526     COUNT_INST(I_LDSM);
  2527     load_reg( R_EAX, Rm );
  2528     check_ralign32( R_EAX );
  2529     MMU_TRANSLATE_READ( R_EAX );
  2530     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2531     MEM_READ_LONG( R_EAX, R_EAX );
  2532     store_spreg( R_EAX, R_PR );
  2533     sh4_x86.tstate = TSTATE_NONE;
  2534 :}
  2535 LDTLB {:  
  2536     COUNT_INST(I_LDTLB);
  2537     call_func0( MMU_ldtlb );
  2538     sh4_x86.tstate = TSTATE_NONE;
  2539 :}
  2540 OCBI @Rn {:
  2541     COUNT_INST(I_OCBI);
  2542 :}
  2543 OCBP @Rn {:
  2544     COUNT_INST(I_OCBP);
  2545 :}
  2546 OCBWB @Rn {:
  2547     COUNT_INST(I_OCBWB);
  2548 :}
  2549 PREF @Rn {:
  2550     COUNT_INST(I_PREF);
  2551     load_reg( R_EAX, Rn );
  2552     MOV_r32_r32( R_EAX, R_ECX );
  2553     AND_imm32_r32( 0xFC000000, R_ECX );
  2554     CMP_imm32_r32( 0xE0000000, R_ECX );
  2555     JNE_rel8(end);
  2556     if( sh4_x86.tlb_on ) {
  2557     	call_func1( sh4_flush_store_queue_mmu, R_EAX );
  2558         TEST_r32_r32( R_EAX, R_EAX );
  2559         JE_exc(-1);
  2560     } else {
  2561     	call_func1( sh4_flush_store_queue, R_EAX );
  2563     JMP_TARGET(end);
  2564     sh4_x86.tstate = TSTATE_NONE;
  2565 :}
  2566 SLEEP {: 
  2567     COUNT_INST(I_SLEEP);
  2568     check_priv();
  2569     call_func0( sh4_sleep );
  2570     sh4_x86.tstate = TSTATE_NONE;
  2571     sh4_x86.in_delay_slot = DELAY_NONE;
  2572     return 2;
  2573 :}
  2574 STC SR, Rn {:
  2575     COUNT_INST(I_STCSR);
  2576     check_priv();
  2577     call_func0(sh4_read_sr);
  2578     store_reg( R_EAX, Rn );
  2579     sh4_x86.tstate = TSTATE_NONE;
  2580 :}
  2581 STC GBR, Rn {:  
  2582     COUNT_INST(I_STC);
  2583     load_spreg( R_EAX, R_GBR );
  2584     store_reg( R_EAX, Rn );
  2585 :}
  2586 STC VBR, Rn {:  
  2587     COUNT_INST(I_STC);
  2588     check_priv();
  2589     load_spreg( R_EAX, R_VBR );
  2590     store_reg( R_EAX, Rn );
  2591     sh4_x86.tstate = TSTATE_NONE;
  2592 :}
  2593 STC SSR, Rn {:  
  2594     COUNT_INST(I_STC);
  2595     check_priv();
  2596     load_spreg( R_EAX, R_SSR );
  2597     store_reg( R_EAX, Rn );
  2598     sh4_x86.tstate = TSTATE_NONE;
  2599 :}
  2600 STC SPC, Rn {:  
  2601     COUNT_INST(I_STC);
  2602     check_priv();
  2603     load_spreg( R_EAX, R_SPC );
  2604     store_reg( R_EAX, Rn );
  2605     sh4_x86.tstate = TSTATE_NONE;
  2606 :}
  2607 STC SGR, Rn {:  
  2608     COUNT_INST(I_STC);
  2609     check_priv();
  2610     load_spreg( R_EAX, R_SGR );
  2611     store_reg( R_EAX, Rn );
  2612     sh4_x86.tstate = TSTATE_NONE;
  2613 :}
  2614 STC DBR, Rn {:  
  2615     COUNT_INST(I_STC);
  2616     check_priv();
  2617     load_spreg( R_EAX, R_DBR );
  2618     store_reg( R_EAX, Rn );
  2619     sh4_x86.tstate = TSTATE_NONE;
  2620 :}
  2621 STC Rm_BANK, Rn {:
  2622     COUNT_INST(I_STC);
  2623     check_priv();
  2624     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2625     store_reg( R_EAX, Rn );
  2626     sh4_x86.tstate = TSTATE_NONE;
  2627 :}
  2628 STC.L SR, @-Rn {:
  2629     COUNT_INST(I_STCSRM);
  2630     check_priv();
  2631     load_reg( R_EAX, Rn );
  2632     check_walign32( R_EAX );
  2633     ADD_imm8s_r32( -4, R_EAX );
  2634     MMU_TRANSLATE_WRITE( R_EAX );
  2635     MOV_r32_esp8( R_EAX, 0 );
  2636     call_func0( sh4_read_sr );
  2637     MOV_r32_r32( R_EAX, R_EDX );
  2638     MOV_esp8_r32( 0, R_EAX );
  2639     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2640     MEM_WRITE_LONG( R_EAX, R_EDX );
  2641     sh4_x86.tstate = TSTATE_NONE;
  2642 :}
  2643 STC.L VBR, @-Rn {:  
  2644     COUNT_INST(I_STCM);
  2645     check_priv();
  2646     load_reg( R_EAX, Rn );
  2647     check_walign32( R_EAX );
  2648     ADD_imm8s_r32( -4, R_EAX );
  2649     MMU_TRANSLATE_WRITE( R_EAX );
  2650     load_spreg( R_EDX, R_VBR );
  2651     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2652     MEM_WRITE_LONG( R_EAX, R_EDX );
  2653     sh4_x86.tstate = TSTATE_NONE;
  2654 :}
  2655 STC.L SSR, @-Rn {:  
  2656     COUNT_INST(I_STCM);
  2657     check_priv();
  2658     load_reg( R_EAX, Rn );
  2659     check_walign32( R_EAX );
  2660     ADD_imm8s_r32( -4, R_EAX );
  2661     MMU_TRANSLATE_WRITE( R_EAX );
  2662     load_spreg( R_EDX, R_SSR );
  2663     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2664     MEM_WRITE_LONG( R_EAX, R_EDX );
  2665     sh4_x86.tstate = TSTATE_NONE;
  2666 :}
  2667 STC.L SPC, @-Rn {:
  2668     COUNT_INST(I_STCM);
  2669     check_priv();
  2670     load_reg( R_EAX, Rn );
  2671     check_walign32( R_EAX );
  2672     ADD_imm8s_r32( -4, R_EAX );
  2673     MMU_TRANSLATE_WRITE( R_EAX );
  2674     load_spreg( R_EDX, R_SPC );
  2675     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2676     MEM_WRITE_LONG( R_EAX, R_EDX );
  2677     sh4_x86.tstate = TSTATE_NONE;
  2678 :}
  2679 STC.L SGR, @-Rn {:  
  2680     COUNT_INST(I_STCM);
  2681     check_priv();
  2682     load_reg( R_EAX, Rn );
  2683     check_walign32( R_EAX );
  2684     ADD_imm8s_r32( -4, R_EAX );
  2685     MMU_TRANSLATE_WRITE( R_EAX );
  2686     load_spreg( R_EDX, R_SGR );
  2687     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2688     MEM_WRITE_LONG( R_EAX, R_EDX );
  2689     sh4_x86.tstate = TSTATE_NONE;
  2690 :}
  2691 STC.L DBR, @-Rn {:  
  2692     COUNT_INST(I_STCM);
  2693     check_priv();
  2694     load_reg( R_EAX, Rn );
  2695     check_walign32( R_EAX );
  2696     ADD_imm8s_r32( -4, R_EAX );
  2697     MMU_TRANSLATE_WRITE( R_EAX );
  2698     load_spreg( R_EDX, R_DBR );
  2699     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2700     MEM_WRITE_LONG( R_EAX, R_EDX );
  2701     sh4_x86.tstate = TSTATE_NONE;
  2702 :}
  2703 STC.L Rm_BANK, @-Rn {:  
  2704     COUNT_INST(I_STCM);
  2705     check_priv();
  2706     load_reg( R_EAX, Rn );
  2707     check_walign32( R_EAX );
  2708     ADD_imm8s_r32( -4, R_EAX );
  2709     MMU_TRANSLATE_WRITE( R_EAX );
  2710     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2711     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2712     MEM_WRITE_LONG( R_EAX, R_EDX );
  2713     sh4_x86.tstate = TSTATE_NONE;
  2714 :}
  2715 STC.L GBR, @-Rn {:  
  2716     COUNT_INST(I_STCM);
  2717     load_reg( R_EAX, Rn );
  2718     check_walign32( R_EAX );
  2719     ADD_imm8s_r32( -4, R_EAX );
  2720     MMU_TRANSLATE_WRITE( R_EAX );
  2721     load_spreg( R_EDX, R_GBR );
  2722     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2723     MEM_WRITE_LONG( R_EAX, R_EDX );
  2724     sh4_x86.tstate = TSTATE_NONE;
  2725 :}
  2726 STS FPSCR, Rn {:  
  2727     COUNT_INST(I_STSFPSCR);
  2728     check_fpuen();
  2729     load_spreg( R_EAX, R_FPSCR );
  2730     store_reg( R_EAX, Rn );
  2731 :}
  2732 STS.L FPSCR, @-Rn {:  
  2733     COUNT_INST(I_STSFPSCRM);
  2734     check_fpuen();
  2735     load_reg( R_EAX, Rn );
  2736     check_walign32( R_EAX );
  2737     ADD_imm8s_r32( -4, R_EAX );
  2738     MMU_TRANSLATE_WRITE( R_EAX );
  2739     load_spreg( R_EDX, R_FPSCR );
  2740     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2741     MEM_WRITE_LONG( R_EAX, R_EDX );
  2742     sh4_x86.tstate = TSTATE_NONE;
  2743 :}
  2744 STS FPUL, Rn {:  
  2745     COUNT_INST(I_STS);
  2746     check_fpuen();
  2747     load_spreg( R_EAX, R_FPUL );
  2748     store_reg( R_EAX, Rn );
  2749 :}
  2750 STS.L FPUL, @-Rn {:  
  2751     COUNT_INST(I_STSM);
  2752     check_fpuen();
  2753     load_reg( R_EAX, Rn );
  2754     check_walign32( R_EAX );
  2755     ADD_imm8s_r32( -4, R_EAX );
  2756     MMU_TRANSLATE_WRITE( R_EAX );
  2757     load_spreg( R_EDX, R_FPUL );
  2758     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2759     MEM_WRITE_LONG( R_EAX, R_EDX );
  2760     sh4_x86.tstate = TSTATE_NONE;
  2761 :}
  2762 STS MACH, Rn {:  
  2763     COUNT_INST(I_STS);
  2764     load_spreg( R_EAX, R_MACH );
  2765     store_reg( R_EAX, Rn );
  2766 :}
  2767 STS.L MACH, @-Rn {:  
  2768     COUNT_INST(I_STSM);
  2769     load_reg( R_EAX, Rn );
  2770     check_walign32( R_EAX );
  2771     ADD_imm8s_r32( -4, R_EAX );
  2772     MMU_TRANSLATE_WRITE( R_EAX );
  2773     load_spreg( R_EDX, R_MACH );
  2774     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2775     MEM_WRITE_LONG( R_EAX, R_EDX );
  2776     sh4_x86.tstate = TSTATE_NONE;
  2777 :}
  2778 STS MACL, Rn {:  
  2779     COUNT_INST(I_STS);
  2780     load_spreg( R_EAX, R_MACL );
  2781     store_reg( R_EAX, Rn );
  2782 :}
  2783 STS.L MACL, @-Rn {:  
  2784     COUNT_INST(I_STSM);
  2785     load_reg( R_EAX, Rn );
  2786     check_walign32( R_EAX );
  2787     ADD_imm8s_r32( -4, R_EAX );
  2788     MMU_TRANSLATE_WRITE( R_EAX );
  2789     load_spreg( R_EDX, R_MACL );
  2790     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2791     MEM_WRITE_LONG( R_EAX, R_EDX );
  2792     sh4_x86.tstate = TSTATE_NONE;
  2793 :}
  2794 STS PR, Rn {:  
  2795     COUNT_INST(I_STS);
  2796     load_spreg( R_EAX, R_PR );
  2797     store_reg( R_EAX, Rn );
  2798 :}
  2799 STS.L PR, @-Rn {:  
  2800     COUNT_INST(I_STSM);
  2801     load_reg( R_EAX, Rn );
  2802     check_walign32( R_EAX );
  2803     ADD_imm8s_r32( -4, R_EAX );
  2804     MMU_TRANSLATE_WRITE( R_EAX );
  2805     load_spreg( R_EDX, R_PR );
  2806     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2807     MEM_WRITE_LONG( R_EAX, R_EDX );
  2808     sh4_x86.tstate = TSTATE_NONE;
  2809 :}
  2811 NOP {: 
  2812     COUNT_INST(I_NOP);
  2813     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  2814 :}
  2815 %%
  2816     sh4_x86.in_delay_slot = DELAY_NONE;
  2817     return 0;
.