Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 669:ab344e42bca9
prev626:a010e30a30e9
next671:a530ea88eebd
author nkeynes
date Mon May 12 10:00:13 2008 +0000 (15 years ago)
permissions -rw-r--r--
last change Cleanup most of the -Wall warnings (getting a bit sloppy...)
Convert FP code to use fixed banks rather than indirect pointer
(3-4% faster this way now)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t fixup_offset;
    39     uint32_t fixup_icount;
    40     int32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     uint32_t block_start_pc;
    60     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    61     int tstate;
    63     /* mode flags */
    64     gboolean tlb_on; /* True if tlb translation is active */
    66     /* Allocated memory for the (block-wide) back-patch list */
    67     struct backpatch_record *backpatch_list;
    68     uint32_t backpatch_posn;
    69     uint32_t backpatch_size;
    70 };
    72 #define TSTATE_NONE -1
    73 #define TSTATE_O    0
    74 #define TSTATE_C    2
    75 #define TSTATE_E    4
    76 #define TSTATE_NE   5
    77 #define TSTATE_G    0xF
    78 #define TSTATE_GE   0xD
    79 #define TSTATE_A    7
    80 #define TSTATE_AE   3
    82 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    83 #define JT_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    84 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    85     OP(0x70+sh4_x86.tstate); MARK_JMP8(label); OP(-1)
    87 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    88 #define JF_rel8(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    89 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    90     OP(0x70+ (sh4_x86.tstate^1)); MARK_JMP8(label); OP(-1)
    92 static struct sh4_x86_state sh4_x86;
    94 static uint32_t max_int = 0x7FFFFFFF;
    95 static uint32_t min_int = 0x80000000;
    96 static uint32_t save_fcw; /* save value for fpu control word */
    97 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    99 void sh4_translate_init(void)
   100 {
   101     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   102     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   103 }
   106 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   107 {
   108     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   109 	sh4_x86.backpatch_size <<= 1;
   110 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   111 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   112 	assert( sh4_x86.backpatch_list != NULL );
   113     }
   114     if( sh4_x86.in_delay_slot ) {
   115 	fixup_pc -= 2;
   116     }
   117     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   118 	((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code);
   119     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   120     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   121     sh4_x86.backpatch_posn++;
   122 }
   124 /**
   125  * Emit an instruction to load an SH4 reg into a real register
   126  */
   127 static inline void load_reg( int x86reg, int sh4reg ) 
   128 {
   129     /* mov [bp+n], reg */
   130     OP(0x8B);
   131     OP(0x45 + (x86reg<<3));
   132     OP(REG_OFFSET(r[sh4reg]));
   133 }
   135 static inline void load_reg16s( int x86reg, int sh4reg )
   136 {
   137     OP(0x0F);
   138     OP(0xBF);
   139     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   140 }
   142 static inline void load_reg16u( int x86reg, int sh4reg )
   143 {
   144     OP(0x0F);
   145     OP(0xB7);
   146     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   148 }
   150 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   151 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   152 /**
   153  * Emit an instruction to load an immediate value into a register
   154  */
   155 static inline void load_imm32( int x86reg, uint32_t value ) {
   156     /* mov #value, reg */
   157     OP(0xB8 + x86reg);
   158     OP32(value);
   159 }
   161 /**
   162  * Load an immediate 64-bit quantity (note: x86-64 only)
   163  */
   164 static inline void load_imm64( int x86reg, uint32_t value ) {
   165     /* mov #value, reg */
   166     REXW();
   167     OP(0xB8 + x86reg);
   168     OP64(value);
   169 }
   171 /**
   172  * Emit an instruction to store an SH4 reg (RN)
   173  */
   174 void static inline store_reg( int x86reg, int sh4reg ) {
   175     /* mov reg, [bp+n] */
   176     OP(0x89);
   177     OP(0x45 + (x86reg<<3));
   178     OP(REG_OFFSET(r[sh4reg]));
   179 }
   181 /**
   182  * Load an FR register (single-precision floating point) into an integer x86
   183  * register (eg for register-to-register moves)
   184  */
   185 #define load_fr(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[0][(frm)^1]) )
   186 #define load_xf(reg,frm)  OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[1][(frm)^1]) )
   188 /**
   189  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   190  */
   191 #define load_dr0(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   192 #define load_dr1(reg,frm) OP(0x8B); MODRM_r32_ebp32(reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   194 /**
   195  * Store an FR register (single-precision floating point) from an integer x86+
   196  * register (eg for register-to-register moves)
   197  */
   198 #define store_fr(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[0][(frm)^1]) )
   199 #define store_xf(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[1][(frm)^1]) )
   201 #define store_dr0(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   202 #define store_dr1(reg,frm) OP(0x89); MODRM_r32_ebp32( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   205 #define push_fpul()  FLDF_sh4r(R_FPUL)
   206 #define pop_fpul()   FSTPF_sh4r(R_FPUL)
   207 #define push_fr(frm) FLDF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   208 #define pop_fr(frm)  FSTPF_sh4r( REG_OFFSET(fr[0][(frm)^1]) )
   209 #define push_xf(frm) FLDF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   210 #define pop_xf(frm)  FSTPF_sh4r( REG_OFFSET(fr[1][(frm)^1]) )
   211 #define push_dr(frm) FLDD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   212 #define pop_dr(frm)  FSTPD_sh4r( REG_OFFSET(fr[0][(frm)&0x0E]) )
   213 #define push_xdr(frm) FLDD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   214 #define pop_xdr(frm)  FSTPD_sh4r( REG_OFFSET(fr[1][(frm)&0x0E]) )
   218 /* Exception checks - Note that all exception checks will clobber EAX */
   220 #define check_priv( ) \
   221     if( !sh4_x86.priv_checked ) { \
   222 	sh4_x86.priv_checked = TRUE;\
   223 	load_spreg( R_EAX, R_SR );\
   224 	AND_imm32_r32( SR_MD, R_EAX );\
   225 	if( sh4_x86.in_delay_slot ) {\
   226 	    JE_exc( EXC_SLOT_ILLEGAL );\
   227 	} else {\
   228 	    JE_exc( EXC_ILLEGAL );\
   229 	}\
   230     }\
   232 #define check_fpuen( ) \
   233     if( !sh4_x86.fpuen_checked ) {\
   234 	sh4_x86.fpuen_checked = TRUE;\
   235 	load_spreg( R_EAX, R_SR );\
   236 	AND_imm32_r32( SR_FD, R_EAX );\
   237 	if( sh4_x86.in_delay_slot ) {\
   238 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   239 	} else {\
   240 	    JNE_exc(EXC_FPU_DISABLED);\
   241 	}\
   242     }
   244 #define check_ralign16( x86reg ) \
   245     TEST_imm32_r32( 0x00000001, x86reg ); \
   246     JNE_exc(EXC_DATA_ADDR_READ)
   248 #define check_walign16( x86reg ) \
   249     TEST_imm32_r32( 0x00000001, x86reg ); \
   250     JNE_exc(EXC_DATA_ADDR_WRITE);
   252 #define check_ralign32( x86reg ) \
   253     TEST_imm32_r32( 0x00000003, x86reg ); \
   254     JNE_exc(EXC_DATA_ADDR_READ)
   256 #define check_walign32( x86reg ) \
   257     TEST_imm32_r32( 0x00000003, x86reg ); \
   258     JNE_exc(EXC_DATA_ADDR_WRITE);
   260 #define UNDEF()
   261 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   262 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   263 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   264 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   265 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   266 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   267 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   269 /**
   270  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   271  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   272  */
   273 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   275 #define MMU_TRANSLATE_READ_EXC( addr_reg, exc_code ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(exc_code); MEM_RESULT(addr_reg) }
   276 /**
   277  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   278  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   279  */
   280 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   282 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   283 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   284 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   286 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   288 /****** Import appropriate calling conventions ******/
   289 #if SH4_TRANSLATOR == TARGET_X86_64
   290 #include "sh4/ia64abi.h"
   291 #else /* SH4_TRANSLATOR == TARGET_X86 */
   292 #ifdef APPLE_BUILD
   293 #include "sh4/ia32mac.h"
   294 #else
   295 #include "sh4/ia32abi.h"
   296 #endif
   297 #endif
   299 uint32_t sh4_translate_end_block_size()
   300 {
   301     if( sh4_x86.backpatch_posn <= 3 ) {
   302 	return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*12);
   303     } else {
   304 	return EPILOGUE_SIZE + 48 + (sh4_x86.backpatch_posn-3)*15;
   305     }
   306 }
   309 /**
   310  * Embed a breakpoint into the generated code
   311  */
   312 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   313 {
   314     load_imm32( R_EAX, pc );
   315     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   316 }
   319 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   321 /**
   322  * Embed a call to sh4_execute_instruction for situations that we
   323  * can't translate (just page-crossing delay slots at the moment).
   324  * Caller is responsible for setting new_pc before calling this function.
   325  *
   326  * Performs:
   327  *   Set PC = endpc
   328  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   329  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   330  *   Call sh4_execute_instruction
   331  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   332  */
   333 void exit_block_emu( sh4vma_t endpc )
   334 {
   335     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   336     ADD_r32_sh4r( R_ECX, R_PC );
   338     load_imm32( R_ECX, (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period ); // 5
   339     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   340     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   341     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   343     call_func0( sh4_execute_instruction );    
   344     load_spreg( R_EAX, R_PC );
   345     if( sh4_x86.tlb_on ) {
   346 	call_func1(xlat_get_code_by_vma,R_EAX);
   347     } else {
   348 	call_func1(xlat_get_code,R_EAX);
   349     }
   350     AND_imm8s_rptr( 0xFC, R_EAX );
   351     POP_r32(R_EBP);
   352     RET();
   353 } 
   355 /**
   356  * Translate a single instruction. Delayed branches are handled specially
   357  * by translating both branch and delayed instruction as a single unit (as
   358  * 
   359  * The instruction MUST be in the icache (assert check)
   360  *
   361  * @return true if the instruction marks the end of a basic block
   362  * (eg a branch or 
   363  */
   364 uint32_t sh4_translate_instruction( sh4vma_t pc )
   365 {
   366     uint32_t ir;
   367     /* Read instruction from icache */
   368     assert( IS_IN_ICACHE(pc) );
   369     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   371 	/* PC is not in the current icache - this usually means we're running
   372 	 * with MMU on, and we've gone past the end of the page. And since 
   373 	 * sh4_translate_block is pretty careful about this, it means we're
   374 	 * almost certainly in a delay slot.
   375 	 *
   376 	 * Since we can't assume the page is present (and we can't fault it in
   377 	 * at this point, inline a call to sh4_execute_instruction (with a few
   378 	 * small repairs to cope with the different environment).
   379 	 */
   381     if( !sh4_x86.in_delay_slot ) {
   382 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   383     }
   384 %%
   385 /* ALU operations */
   386 ADD Rm, Rn {:
   387     load_reg( R_EAX, Rm );
   388     load_reg( R_ECX, Rn );
   389     ADD_r32_r32( R_EAX, R_ECX );
   390     store_reg( R_ECX, Rn );
   391     sh4_x86.tstate = TSTATE_NONE;
   392 :}
   393 ADD #imm, Rn {:  
   394     load_reg( R_EAX, Rn );
   395     ADD_imm8s_r32( imm, R_EAX );
   396     store_reg( R_EAX, Rn );
   397     sh4_x86.tstate = TSTATE_NONE;
   398 :}
   399 ADDC Rm, Rn {:
   400     if( sh4_x86.tstate != TSTATE_C ) {
   401 	LDC_t();
   402     }
   403     load_reg( R_EAX, Rm );
   404     load_reg( R_ECX, Rn );
   405     ADC_r32_r32( R_EAX, R_ECX );
   406     store_reg( R_ECX, Rn );
   407     SETC_t();
   408     sh4_x86.tstate = TSTATE_C;
   409 :}
   410 ADDV Rm, Rn {:
   411     load_reg( R_EAX, Rm );
   412     load_reg( R_ECX, Rn );
   413     ADD_r32_r32( R_EAX, R_ECX );
   414     store_reg( R_ECX, Rn );
   415     SETO_t();
   416     sh4_x86.tstate = TSTATE_O;
   417 :}
   418 AND Rm, Rn {:
   419     load_reg( R_EAX, Rm );
   420     load_reg( R_ECX, Rn );
   421     AND_r32_r32( R_EAX, R_ECX );
   422     store_reg( R_ECX, Rn );
   423     sh4_x86.tstate = TSTATE_NONE;
   424 :}
   425 AND #imm, R0 {:  
   426     load_reg( R_EAX, 0 );
   427     AND_imm32_r32(imm, R_EAX); 
   428     store_reg( R_EAX, 0 );
   429     sh4_x86.tstate = TSTATE_NONE;
   430 :}
   431 AND.B #imm, @(R0, GBR) {: 
   432     load_reg( R_EAX, 0 );
   433     load_spreg( R_ECX, R_GBR );
   434     ADD_r32_r32( R_ECX, R_EAX );
   435     MMU_TRANSLATE_WRITE( R_EAX );
   436     PUSH_realigned_r32(R_EAX);
   437     MEM_READ_BYTE( R_EAX, R_EAX );
   438     POP_realigned_r32(R_ECX);
   439     AND_imm32_r32(imm, R_EAX );
   440     MEM_WRITE_BYTE( R_ECX, R_EAX );
   441     sh4_x86.tstate = TSTATE_NONE;
   442 :}
   443 CMP/EQ Rm, Rn {:  
   444     load_reg( R_EAX, Rm );
   445     load_reg( R_ECX, Rn );
   446     CMP_r32_r32( R_EAX, R_ECX );
   447     SETE_t();
   448     sh4_x86.tstate = TSTATE_E;
   449 :}
   450 CMP/EQ #imm, R0 {:  
   451     load_reg( R_EAX, 0 );
   452     CMP_imm8s_r32(imm, R_EAX);
   453     SETE_t();
   454     sh4_x86.tstate = TSTATE_E;
   455 :}
   456 CMP/GE Rm, Rn {:  
   457     load_reg( R_EAX, Rm );
   458     load_reg( R_ECX, Rn );
   459     CMP_r32_r32( R_EAX, R_ECX );
   460     SETGE_t();
   461     sh4_x86.tstate = TSTATE_GE;
   462 :}
   463 CMP/GT Rm, Rn {: 
   464     load_reg( R_EAX, Rm );
   465     load_reg( R_ECX, Rn );
   466     CMP_r32_r32( R_EAX, R_ECX );
   467     SETG_t();
   468     sh4_x86.tstate = TSTATE_G;
   469 :}
   470 CMP/HI Rm, Rn {:  
   471     load_reg( R_EAX, Rm );
   472     load_reg( R_ECX, Rn );
   473     CMP_r32_r32( R_EAX, R_ECX );
   474     SETA_t();
   475     sh4_x86.tstate = TSTATE_A;
   476 :}
   477 CMP/HS Rm, Rn {: 
   478     load_reg( R_EAX, Rm );
   479     load_reg( R_ECX, Rn );
   480     CMP_r32_r32( R_EAX, R_ECX );
   481     SETAE_t();
   482     sh4_x86.tstate = TSTATE_AE;
   483  :}
   484 CMP/PL Rn {: 
   485     load_reg( R_EAX, Rn );
   486     CMP_imm8s_r32( 0, R_EAX );
   487     SETG_t();
   488     sh4_x86.tstate = TSTATE_G;
   489 :}
   490 CMP/PZ Rn {:  
   491     load_reg( R_EAX, Rn );
   492     CMP_imm8s_r32( 0, R_EAX );
   493     SETGE_t();
   494     sh4_x86.tstate = TSTATE_GE;
   495 :}
   496 CMP/STR Rm, Rn {:  
   497     load_reg( R_EAX, Rm );
   498     load_reg( R_ECX, Rn );
   499     XOR_r32_r32( R_ECX, R_EAX );
   500     TEST_r8_r8( R_AL, R_AL );
   501     JE_rel8(target1);
   502     TEST_r8_r8( R_AH, R_AH );
   503     JE_rel8(target2);
   504     SHR_imm8_r32( 16, R_EAX );
   505     TEST_r8_r8( R_AL, R_AL );
   506     JE_rel8(target3);
   507     TEST_r8_r8( R_AH, R_AH );
   508     JMP_TARGET(target1);
   509     JMP_TARGET(target2);
   510     JMP_TARGET(target3);
   511     SETE_t();
   512     sh4_x86.tstate = TSTATE_E;
   513 :}
   514 DIV0S Rm, Rn {:
   515     load_reg( R_EAX, Rm );
   516     load_reg( R_ECX, Rn );
   517     SHR_imm8_r32( 31, R_EAX );
   518     SHR_imm8_r32( 31, R_ECX );
   519     store_spreg( R_EAX, R_M );
   520     store_spreg( R_ECX, R_Q );
   521     CMP_r32_r32( R_EAX, R_ECX );
   522     SETNE_t();
   523     sh4_x86.tstate = TSTATE_NE;
   524 :}
   525 DIV0U {:  
   526     XOR_r32_r32( R_EAX, R_EAX );
   527     store_spreg( R_EAX, R_Q );
   528     store_spreg( R_EAX, R_M );
   529     store_spreg( R_EAX, R_T );
   530     sh4_x86.tstate = TSTATE_C; // works for DIV1
   531 :}
   532 DIV1 Rm, Rn {:
   533     load_spreg( R_ECX, R_M );
   534     load_reg( R_EAX, Rn );
   535     if( sh4_x86.tstate != TSTATE_C ) {
   536 	LDC_t();
   537     }
   538     RCL1_r32( R_EAX );
   539     SETC_r8( R_DL ); // Q'
   540     CMP_sh4r_r32( R_Q, R_ECX );
   541     JE_rel8(mqequal);
   542     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   543     JMP_rel8(end);
   544     JMP_TARGET(mqequal);
   545     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   546     JMP_TARGET(end);
   547     store_reg( R_EAX, Rn ); // Done with Rn now
   548     SETC_r8(R_AL); // tmp1
   549     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   550     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   551     store_spreg( R_ECX, R_Q );
   552     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   553     MOVZX_r8_r32( R_AL, R_EAX );
   554     store_spreg( R_EAX, R_T );
   555     sh4_x86.tstate = TSTATE_NONE;
   556 :}
   557 DMULS.L Rm, Rn {:  
   558     load_reg( R_EAX, Rm );
   559     load_reg( R_ECX, Rn );
   560     IMUL_r32(R_ECX);
   561     store_spreg( R_EDX, R_MACH );
   562     store_spreg( R_EAX, R_MACL );
   563     sh4_x86.tstate = TSTATE_NONE;
   564 :}
   565 DMULU.L Rm, Rn {:  
   566     load_reg( R_EAX, Rm );
   567     load_reg( R_ECX, Rn );
   568     MUL_r32(R_ECX);
   569     store_spreg( R_EDX, R_MACH );
   570     store_spreg( R_EAX, R_MACL );    
   571     sh4_x86.tstate = TSTATE_NONE;
   572 :}
   573 DT Rn {:  
   574     load_reg( R_EAX, Rn );
   575     ADD_imm8s_r32( -1, R_EAX );
   576     store_reg( R_EAX, Rn );
   577     SETE_t();
   578     sh4_x86.tstate = TSTATE_E;
   579 :}
   580 EXTS.B Rm, Rn {:  
   581     load_reg( R_EAX, Rm );
   582     MOVSX_r8_r32( R_EAX, R_EAX );
   583     store_reg( R_EAX, Rn );
   584 :}
   585 EXTS.W Rm, Rn {:  
   586     load_reg( R_EAX, Rm );
   587     MOVSX_r16_r32( R_EAX, R_EAX );
   588     store_reg( R_EAX, Rn );
   589 :}
   590 EXTU.B Rm, Rn {:  
   591     load_reg( R_EAX, Rm );
   592     MOVZX_r8_r32( R_EAX, R_EAX );
   593     store_reg( R_EAX, Rn );
   594 :}
   595 EXTU.W Rm, Rn {:  
   596     load_reg( R_EAX, Rm );
   597     MOVZX_r16_r32( R_EAX, R_EAX );
   598     store_reg( R_EAX, Rn );
   599 :}
   600 MAC.L @Rm+, @Rn+ {:
   601     if( Rm == Rn ) {
   602 	load_reg( R_EAX, Rm );
   603 	check_ralign32( R_EAX );
   604 	MMU_TRANSLATE_READ( R_EAX );
   605 	PUSH_realigned_r32( R_EAX );
   606 	load_reg( R_EAX, Rn );
   607 	ADD_imm8s_r32( 4, R_EAX );
   608 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   609 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   610 	// Note translate twice in case of page boundaries. Maybe worth
   611 	// adding a page-boundary check to skip the second translation
   612     } else {
   613 	load_reg( R_EAX, Rm );
   614 	check_ralign32( R_EAX );
   615 	MMU_TRANSLATE_READ( R_EAX );
   616 	load_reg( R_ECX, Rn );
   617 	check_ralign32( R_ECX );
   618 	PUSH_realigned_r32( R_EAX );
   619 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   620 	MOV_r32_r32( R_ECX, R_EAX );
   621 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   622 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   623     }
   624     MEM_READ_LONG( R_EAX, R_EAX );
   625     POP_r32( R_ECX );
   626     PUSH_r32( R_EAX );
   627     MEM_READ_LONG( R_ECX, R_EAX );
   628     POP_realigned_r32( R_ECX );
   630     IMUL_r32( R_ECX );
   631     ADD_r32_sh4r( R_EAX, R_MACL );
   632     ADC_r32_sh4r( R_EDX, R_MACH );
   634     load_spreg( R_ECX, R_S );
   635     TEST_r32_r32(R_ECX, R_ECX);
   636     JE_rel8( nosat );
   637     call_func0( signsat48 );
   638     JMP_TARGET( nosat );
   639     sh4_x86.tstate = TSTATE_NONE;
   640 :}
   641 MAC.W @Rm+, @Rn+ {:  
   642     if( Rm == Rn ) {
   643 	load_reg( R_EAX, Rm );
   644 	check_ralign16( R_EAX );
   645 	MMU_TRANSLATE_READ( R_EAX );
   646 	PUSH_realigned_r32( R_EAX );
   647 	load_reg( R_EAX, Rn );
   648 	ADD_imm8s_r32( 2, R_EAX );
   649 	MMU_TRANSLATE_READ_EXC( R_EAX, -5 );
   650 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   651 	// Note translate twice in case of page boundaries. Maybe worth
   652 	// adding a page-boundary check to skip the second translation
   653     } else {
   654 	load_reg( R_EAX, Rm );
   655 	check_ralign16( R_EAX );
   656 	MMU_TRANSLATE_READ( R_EAX );
   657 	load_reg( R_ECX, Rn );
   658 	check_ralign16( R_ECX );
   659 	PUSH_realigned_r32( R_EAX );
   660 	MMU_TRANSLATE_READ_EXC( R_ECX, -5 );
   661 	MOV_r32_r32( R_ECX, R_EAX );
   662 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   663 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   664     }
   665     MEM_READ_WORD( R_EAX, R_EAX );
   666     POP_r32( R_ECX );
   667     PUSH_r32( R_EAX );
   668     MEM_READ_WORD( R_ECX, R_EAX );
   669     POP_realigned_r32( R_ECX );
   670     IMUL_r32( R_ECX );
   672     load_spreg( R_ECX, R_S );
   673     TEST_r32_r32( R_ECX, R_ECX );
   674     JE_rel8( nosat );
   676     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   677     JNO_rel8( end );            // 2
   678     load_imm32( R_EDX, 1 );         // 5
   679     store_spreg( R_EDX, R_MACH );   // 6
   680     JS_rel8( positive );        // 2
   681     load_imm32( R_EAX, 0x80000000 );// 5
   682     store_spreg( R_EAX, R_MACL );   // 6
   683     JMP_rel8(end2);           // 2
   685     JMP_TARGET(positive);
   686     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   687     store_spreg( R_EAX, R_MACL );   // 6
   688     JMP_rel8(end3);            // 2
   690     JMP_TARGET(nosat);
   691     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   692     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   693     JMP_TARGET(end);
   694     JMP_TARGET(end2);
   695     JMP_TARGET(end3);
   696     sh4_x86.tstate = TSTATE_NONE;
   697 :}
   698 MOVT Rn {:  
   699     load_spreg( R_EAX, R_T );
   700     store_reg( R_EAX, Rn );
   701 :}
   702 MUL.L Rm, Rn {:  
   703     load_reg( R_EAX, Rm );
   704     load_reg( R_ECX, Rn );
   705     MUL_r32( R_ECX );
   706     store_spreg( R_EAX, R_MACL );
   707     sh4_x86.tstate = TSTATE_NONE;
   708 :}
   709 MULS.W Rm, Rn {:
   710     load_reg16s( R_EAX, Rm );
   711     load_reg16s( R_ECX, Rn );
   712     MUL_r32( R_ECX );
   713     store_spreg( R_EAX, R_MACL );
   714     sh4_x86.tstate = TSTATE_NONE;
   715 :}
   716 MULU.W Rm, Rn {:  
   717     load_reg16u( R_EAX, Rm );
   718     load_reg16u( R_ECX, Rn );
   719     MUL_r32( R_ECX );
   720     store_spreg( R_EAX, R_MACL );
   721     sh4_x86.tstate = TSTATE_NONE;
   722 :}
   723 NEG Rm, Rn {:
   724     load_reg( R_EAX, Rm );
   725     NEG_r32( R_EAX );
   726     store_reg( R_EAX, Rn );
   727     sh4_x86.tstate = TSTATE_NONE;
   728 :}
   729 NEGC Rm, Rn {:  
   730     load_reg( R_EAX, Rm );
   731     XOR_r32_r32( R_ECX, R_ECX );
   732     LDC_t();
   733     SBB_r32_r32( R_EAX, R_ECX );
   734     store_reg( R_ECX, Rn );
   735     SETC_t();
   736     sh4_x86.tstate = TSTATE_C;
   737 :}
   738 NOT Rm, Rn {:  
   739     load_reg( R_EAX, Rm );
   740     NOT_r32( R_EAX );
   741     store_reg( R_EAX, Rn );
   742     sh4_x86.tstate = TSTATE_NONE;
   743 :}
   744 OR Rm, Rn {:  
   745     load_reg( R_EAX, Rm );
   746     load_reg( R_ECX, Rn );
   747     OR_r32_r32( R_EAX, R_ECX );
   748     store_reg( R_ECX, Rn );
   749     sh4_x86.tstate = TSTATE_NONE;
   750 :}
   751 OR #imm, R0 {:
   752     load_reg( R_EAX, 0 );
   753     OR_imm32_r32(imm, R_EAX);
   754     store_reg( R_EAX, 0 );
   755     sh4_x86.tstate = TSTATE_NONE;
   756 :}
   757 OR.B #imm, @(R0, GBR) {:  
   758     load_reg( R_EAX, 0 );
   759     load_spreg( R_ECX, R_GBR );
   760     ADD_r32_r32( R_ECX, R_EAX );
   761     MMU_TRANSLATE_WRITE( R_EAX );
   762     PUSH_realigned_r32(R_EAX);
   763     MEM_READ_BYTE( R_EAX, R_EAX );
   764     POP_realigned_r32(R_ECX);
   765     OR_imm32_r32(imm, R_EAX );
   766     MEM_WRITE_BYTE( R_ECX, R_EAX );
   767     sh4_x86.tstate = TSTATE_NONE;
   768 :}
   769 ROTCL Rn {:
   770     load_reg( R_EAX, Rn );
   771     if( sh4_x86.tstate != TSTATE_C ) {
   772 	LDC_t();
   773     }
   774     RCL1_r32( R_EAX );
   775     store_reg( R_EAX, Rn );
   776     SETC_t();
   777     sh4_x86.tstate = TSTATE_C;
   778 :}
   779 ROTCR Rn {:  
   780     load_reg( R_EAX, Rn );
   781     if( sh4_x86.tstate != TSTATE_C ) {
   782 	LDC_t();
   783     }
   784     RCR1_r32( R_EAX );
   785     store_reg( R_EAX, Rn );
   786     SETC_t();
   787     sh4_x86.tstate = TSTATE_C;
   788 :}
   789 ROTL Rn {:  
   790     load_reg( R_EAX, Rn );
   791     ROL1_r32( R_EAX );
   792     store_reg( R_EAX, Rn );
   793     SETC_t();
   794     sh4_x86.tstate = TSTATE_C;
   795 :}
   796 ROTR Rn {:  
   797     load_reg( R_EAX, Rn );
   798     ROR1_r32( R_EAX );
   799     store_reg( R_EAX, Rn );
   800     SETC_t();
   801     sh4_x86.tstate = TSTATE_C;
   802 :}
   803 SHAD Rm, Rn {:
   804     /* Annoyingly enough, not directly convertible */
   805     load_reg( R_EAX, Rn );
   806     load_reg( R_ECX, Rm );
   807     CMP_imm32_r32( 0, R_ECX );
   808     JGE_rel8(doshl);
   810     NEG_r32( R_ECX );      // 2
   811     AND_imm8_r8( 0x1F, R_CL ); // 3
   812     JE_rel8(emptysar);     // 2
   813     SAR_r32_CL( R_EAX );       // 2
   814     JMP_rel8(end);          // 2
   816     JMP_TARGET(emptysar);
   817     SAR_imm8_r32(31, R_EAX );  // 3
   818     JMP_rel8(end2);
   820     JMP_TARGET(doshl);
   821     AND_imm8_r8( 0x1F, R_CL ); // 3
   822     SHL_r32_CL( R_EAX );       // 2
   823     JMP_TARGET(end);
   824     JMP_TARGET(end2);
   825     store_reg( R_EAX, Rn );
   826     sh4_x86.tstate = TSTATE_NONE;
   827 :}
   828 SHLD Rm, Rn {:  
   829     load_reg( R_EAX, Rn );
   830     load_reg( R_ECX, Rm );
   831     CMP_imm32_r32( 0, R_ECX );
   832     JGE_rel8(doshl);
   834     NEG_r32( R_ECX );      // 2
   835     AND_imm8_r8( 0x1F, R_CL ); // 3
   836     JE_rel8(emptyshr );
   837     SHR_r32_CL( R_EAX );       // 2
   838     JMP_rel8(end);          // 2
   840     JMP_TARGET(emptyshr);
   841     XOR_r32_r32( R_EAX, R_EAX );
   842     JMP_rel8(end2);
   844     JMP_TARGET(doshl);
   845     AND_imm8_r8( 0x1F, R_CL ); // 3
   846     SHL_r32_CL( R_EAX );       // 2
   847     JMP_TARGET(end);
   848     JMP_TARGET(end2);
   849     store_reg( R_EAX, Rn );
   850     sh4_x86.tstate = TSTATE_NONE;
   851 :}
   852 SHAL Rn {: 
   853     load_reg( R_EAX, Rn );
   854     SHL1_r32( R_EAX );
   855     SETC_t();
   856     store_reg( R_EAX, Rn );
   857     sh4_x86.tstate = TSTATE_C;
   858 :}
   859 SHAR Rn {:  
   860     load_reg( R_EAX, Rn );
   861     SAR1_r32( R_EAX );
   862     SETC_t();
   863     store_reg( R_EAX, Rn );
   864     sh4_x86.tstate = TSTATE_C;
   865 :}
   866 SHLL Rn {:  
   867     load_reg( R_EAX, Rn );
   868     SHL1_r32( R_EAX );
   869     SETC_t();
   870     store_reg( R_EAX, Rn );
   871     sh4_x86.tstate = TSTATE_C;
   872 :}
   873 SHLL2 Rn {:
   874     load_reg( R_EAX, Rn );
   875     SHL_imm8_r32( 2, R_EAX );
   876     store_reg( R_EAX, Rn );
   877     sh4_x86.tstate = TSTATE_NONE;
   878 :}
   879 SHLL8 Rn {:  
   880     load_reg( R_EAX, Rn );
   881     SHL_imm8_r32( 8, R_EAX );
   882     store_reg( R_EAX, Rn );
   883     sh4_x86.tstate = TSTATE_NONE;
   884 :}
   885 SHLL16 Rn {:  
   886     load_reg( R_EAX, Rn );
   887     SHL_imm8_r32( 16, R_EAX );
   888     store_reg( R_EAX, Rn );
   889     sh4_x86.tstate = TSTATE_NONE;
   890 :}
   891 SHLR Rn {:  
   892     load_reg( R_EAX, Rn );
   893     SHR1_r32( R_EAX );
   894     SETC_t();
   895     store_reg( R_EAX, Rn );
   896     sh4_x86.tstate = TSTATE_C;
   897 :}
   898 SHLR2 Rn {:  
   899     load_reg( R_EAX, Rn );
   900     SHR_imm8_r32( 2, R_EAX );
   901     store_reg( R_EAX, Rn );
   902     sh4_x86.tstate = TSTATE_NONE;
   903 :}
   904 SHLR8 Rn {:  
   905     load_reg( R_EAX, Rn );
   906     SHR_imm8_r32( 8, R_EAX );
   907     store_reg( R_EAX, Rn );
   908     sh4_x86.tstate = TSTATE_NONE;
   909 :}
   910 SHLR16 Rn {:  
   911     load_reg( R_EAX, Rn );
   912     SHR_imm8_r32( 16, R_EAX );
   913     store_reg( R_EAX, Rn );
   914     sh4_x86.tstate = TSTATE_NONE;
   915 :}
   916 SUB Rm, Rn {:  
   917     load_reg( R_EAX, Rm );
   918     load_reg( R_ECX, Rn );
   919     SUB_r32_r32( R_EAX, R_ECX );
   920     store_reg( R_ECX, Rn );
   921     sh4_x86.tstate = TSTATE_NONE;
   922 :}
   923 SUBC Rm, Rn {:  
   924     load_reg( R_EAX, Rm );
   925     load_reg( R_ECX, Rn );
   926     if( sh4_x86.tstate != TSTATE_C ) {
   927 	LDC_t();
   928     }
   929     SBB_r32_r32( R_EAX, R_ECX );
   930     store_reg( R_ECX, Rn );
   931     SETC_t();
   932     sh4_x86.tstate = TSTATE_C;
   933 :}
   934 SUBV Rm, Rn {:  
   935     load_reg( R_EAX, Rm );
   936     load_reg( R_ECX, Rn );
   937     SUB_r32_r32( R_EAX, R_ECX );
   938     store_reg( R_ECX, Rn );
   939     SETO_t();
   940     sh4_x86.tstate = TSTATE_O;
   941 :}
   942 SWAP.B Rm, Rn {:  
   943     load_reg( R_EAX, Rm );
   944     XCHG_r8_r8( R_AL, R_AH ); // NB: does not touch EFLAGS
   945     store_reg( R_EAX, Rn );
   946 :}
   947 SWAP.W Rm, Rn {:  
   948     load_reg( R_EAX, Rm );
   949     MOV_r32_r32( R_EAX, R_ECX );
   950     SHL_imm8_r32( 16, R_ECX );
   951     SHR_imm8_r32( 16, R_EAX );
   952     OR_r32_r32( R_EAX, R_ECX );
   953     store_reg( R_ECX, Rn );
   954     sh4_x86.tstate = TSTATE_NONE;
   955 :}
   956 TAS.B @Rn {:  
   957     load_reg( R_EAX, Rn );
   958     MMU_TRANSLATE_WRITE( R_EAX );
   959     PUSH_realigned_r32( R_EAX );
   960     MEM_READ_BYTE( R_EAX, R_EAX );
   961     TEST_r8_r8( R_AL, R_AL );
   962     SETE_t();
   963     OR_imm8_r8( 0x80, R_AL );
   964     POP_realigned_r32( R_ECX );
   965     MEM_WRITE_BYTE( R_ECX, R_EAX );
   966     sh4_x86.tstate = TSTATE_NONE;
   967 :}
   968 TST Rm, Rn {:  
   969     load_reg( R_EAX, Rm );
   970     load_reg( R_ECX, Rn );
   971     TEST_r32_r32( R_EAX, R_ECX );
   972     SETE_t();
   973     sh4_x86.tstate = TSTATE_E;
   974 :}
   975 TST #imm, R0 {:  
   976     load_reg( R_EAX, 0 );
   977     TEST_imm32_r32( imm, R_EAX );
   978     SETE_t();
   979     sh4_x86.tstate = TSTATE_E;
   980 :}
   981 TST.B #imm, @(R0, GBR) {:  
   982     load_reg( R_EAX, 0);
   983     load_reg( R_ECX, R_GBR);
   984     ADD_r32_r32( R_ECX, R_EAX );
   985     MMU_TRANSLATE_READ( R_EAX );
   986     MEM_READ_BYTE( R_EAX, R_EAX );
   987     TEST_imm8_r8( imm, R_AL );
   988     SETE_t();
   989     sh4_x86.tstate = TSTATE_E;
   990 :}
   991 XOR Rm, Rn {:  
   992     load_reg( R_EAX, Rm );
   993     load_reg( R_ECX, Rn );
   994     XOR_r32_r32( R_EAX, R_ECX );
   995     store_reg( R_ECX, Rn );
   996     sh4_x86.tstate = TSTATE_NONE;
   997 :}
   998 XOR #imm, R0 {:  
   999     load_reg( R_EAX, 0 );
  1000     XOR_imm32_r32( imm, R_EAX );
  1001     store_reg( R_EAX, 0 );
  1002     sh4_x86.tstate = TSTATE_NONE;
  1003 :}
  1004 XOR.B #imm, @(R0, GBR) {:  
  1005     load_reg( R_EAX, 0 );
  1006     load_spreg( R_ECX, R_GBR );
  1007     ADD_r32_r32( R_ECX, R_EAX );
  1008     MMU_TRANSLATE_WRITE( R_EAX );
  1009     PUSH_realigned_r32(R_EAX);
  1010     MEM_READ_BYTE(R_EAX, R_EAX);
  1011     POP_realigned_r32(R_ECX);
  1012     XOR_imm32_r32( imm, R_EAX );
  1013     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1014     sh4_x86.tstate = TSTATE_NONE;
  1015 :}
  1016 XTRCT Rm, Rn {:
  1017     load_reg( R_EAX, Rm );
  1018     load_reg( R_ECX, Rn );
  1019     SHL_imm8_r32( 16, R_EAX );
  1020     SHR_imm8_r32( 16, R_ECX );
  1021     OR_r32_r32( R_EAX, R_ECX );
  1022     store_reg( R_ECX, Rn );
  1023     sh4_x86.tstate = TSTATE_NONE;
  1024 :}
  1026 /* Data move instructions */
  1027 MOV Rm, Rn {:  
  1028     load_reg( R_EAX, Rm );
  1029     store_reg( R_EAX, Rn );
  1030 :}
  1031 MOV #imm, Rn {:  
  1032     load_imm32( R_EAX, imm );
  1033     store_reg( R_EAX, Rn );
  1034 :}
  1035 MOV.B Rm, @Rn {:  
  1036     load_reg( R_EAX, Rn );
  1037     MMU_TRANSLATE_WRITE( R_EAX );
  1038     load_reg( R_EDX, Rm );
  1039     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1040     sh4_x86.tstate = TSTATE_NONE;
  1041 :}
  1042 MOV.B Rm, @-Rn {:  
  1043     load_reg( R_EAX, Rn );
  1044     ADD_imm8s_r32( -1, R_EAX );
  1045     MMU_TRANSLATE_WRITE( R_EAX );
  1046     load_reg( R_EDX, Rm );
  1047     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1048     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1049     sh4_x86.tstate = TSTATE_NONE;
  1050 :}
  1051 MOV.B Rm, @(R0, Rn) {:  
  1052     load_reg( R_EAX, 0 );
  1053     load_reg( R_ECX, Rn );
  1054     ADD_r32_r32( R_ECX, R_EAX );
  1055     MMU_TRANSLATE_WRITE( R_EAX );
  1056     load_reg( R_EDX, Rm );
  1057     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1058     sh4_x86.tstate = TSTATE_NONE;
  1059 :}
  1060 MOV.B R0, @(disp, GBR) {:  
  1061     load_spreg( R_EAX, R_GBR );
  1062     ADD_imm32_r32( disp, R_EAX );
  1063     MMU_TRANSLATE_WRITE( R_EAX );
  1064     load_reg( R_EDX, 0 );
  1065     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1066     sh4_x86.tstate = TSTATE_NONE;
  1067 :}
  1068 MOV.B R0, @(disp, Rn) {:  
  1069     load_reg( R_EAX, Rn );
  1070     ADD_imm32_r32( disp, R_EAX );
  1071     MMU_TRANSLATE_WRITE( R_EAX );
  1072     load_reg( R_EDX, 0 );
  1073     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1074     sh4_x86.tstate = TSTATE_NONE;
  1075 :}
  1076 MOV.B @Rm, Rn {:  
  1077     load_reg( R_EAX, Rm );
  1078     MMU_TRANSLATE_READ( R_EAX );
  1079     MEM_READ_BYTE( R_EAX, R_EAX );
  1080     store_reg( R_EAX, Rn );
  1081     sh4_x86.tstate = TSTATE_NONE;
  1082 :}
  1083 MOV.B @Rm+, Rn {:  
  1084     load_reg( R_EAX, Rm );
  1085     MMU_TRANSLATE_READ( R_EAX );
  1086     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1087     MEM_READ_BYTE( R_EAX, R_EAX );
  1088     store_reg( R_EAX, Rn );
  1089     sh4_x86.tstate = TSTATE_NONE;
  1090 :}
  1091 MOV.B @(R0, Rm), Rn {:  
  1092     load_reg( R_EAX, 0 );
  1093     load_reg( R_ECX, Rm );
  1094     ADD_r32_r32( R_ECX, R_EAX );
  1095     MMU_TRANSLATE_READ( R_EAX )
  1096     MEM_READ_BYTE( R_EAX, R_EAX );
  1097     store_reg( R_EAX, Rn );
  1098     sh4_x86.tstate = TSTATE_NONE;
  1099 :}
  1100 MOV.B @(disp, GBR), R0 {:  
  1101     load_spreg( R_EAX, R_GBR );
  1102     ADD_imm32_r32( disp, R_EAX );
  1103     MMU_TRANSLATE_READ( R_EAX );
  1104     MEM_READ_BYTE( R_EAX, R_EAX );
  1105     store_reg( R_EAX, 0 );
  1106     sh4_x86.tstate = TSTATE_NONE;
  1107 :}
  1108 MOV.B @(disp, Rm), R0 {:  
  1109     load_reg( R_EAX, Rm );
  1110     ADD_imm32_r32( disp, R_EAX );
  1111     MMU_TRANSLATE_READ( R_EAX );
  1112     MEM_READ_BYTE( R_EAX, R_EAX );
  1113     store_reg( R_EAX, 0 );
  1114     sh4_x86.tstate = TSTATE_NONE;
  1115 :}
  1116 MOV.L Rm, @Rn {:
  1117     load_reg( R_EAX, Rn );
  1118     check_walign32(R_EAX);
  1119     MMU_TRANSLATE_WRITE( R_EAX );
  1120     load_reg( R_EDX, Rm );
  1121     MEM_WRITE_LONG( R_EAX, R_EDX );
  1122     sh4_x86.tstate = TSTATE_NONE;
  1123 :}
  1124 MOV.L Rm, @-Rn {:  
  1125     load_reg( R_EAX, Rn );
  1126     ADD_imm8s_r32( -4, R_EAX );
  1127     check_walign32( R_EAX );
  1128     MMU_TRANSLATE_WRITE( R_EAX );
  1129     load_reg( R_EDX, Rm );
  1130     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1131     MEM_WRITE_LONG( R_EAX, R_EDX );
  1132     sh4_x86.tstate = TSTATE_NONE;
  1133 :}
  1134 MOV.L Rm, @(R0, Rn) {:  
  1135     load_reg( R_EAX, 0 );
  1136     load_reg( R_ECX, Rn );
  1137     ADD_r32_r32( R_ECX, R_EAX );
  1138     check_walign32( R_EAX );
  1139     MMU_TRANSLATE_WRITE( R_EAX );
  1140     load_reg( R_EDX, Rm );
  1141     MEM_WRITE_LONG( R_EAX, R_EDX );
  1142     sh4_x86.tstate = TSTATE_NONE;
  1143 :}
  1144 MOV.L R0, @(disp, GBR) {:  
  1145     load_spreg( R_EAX, R_GBR );
  1146     ADD_imm32_r32( disp, R_EAX );
  1147     check_walign32( R_EAX );
  1148     MMU_TRANSLATE_WRITE( R_EAX );
  1149     load_reg( R_EDX, 0 );
  1150     MEM_WRITE_LONG( R_EAX, R_EDX );
  1151     sh4_x86.tstate = TSTATE_NONE;
  1152 :}
  1153 MOV.L Rm, @(disp, Rn) {:  
  1154     load_reg( R_EAX, Rn );
  1155     ADD_imm32_r32( disp, R_EAX );
  1156     check_walign32( R_EAX );
  1157     MMU_TRANSLATE_WRITE( R_EAX );
  1158     load_reg( R_EDX, Rm );
  1159     MEM_WRITE_LONG( R_EAX, R_EDX );
  1160     sh4_x86.tstate = TSTATE_NONE;
  1161 :}
  1162 MOV.L @Rm, Rn {:  
  1163     load_reg( R_EAX, Rm );
  1164     check_ralign32( R_EAX );
  1165     MMU_TRANSLATE_READ( R_EAX );
  1166     MEM_READ_LONG( R_EAX, R_EAX );
  1167     store_reg( R_EAX, Rn );
  1168     sh4_x86.tstate = TSTATE_NONE;
  1169 :}
  1170 MOV.L @Rm+, Rn {:  
  1171     load_reg( R_EAX, Rm );
  1172     check_ralign32( R_EAX );
  1173     MMU_TRANSLATE_READ( R_EAX );
  1174     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1175     MEM_READ_LONG( R_EAX, R_EAX );
  1176     store_reg( R_EAX, Rn );
  1177     sh4_x86.tstate = TSTATE_NONE;
  1178 :}
  1179 MOV.L @(R0, Rm), Rn {:  
  1180     load_reg( R_EAX, 0 );
  1181     load_reg( R_ECX, Rm );
  1182     ADD_r32_r32( R_ECX, R_EAX );
  1183     check_ralign32( R_EAX );
  1184     MMU_TRANSLATE_READ( R_EAX );
  1185     MEM_READ_LONG( R_EAX, R_EAX );
  1186     store_reg( R_EAX, Rn );
  1187     sh4_x86.tstate = TSTATE_NONE;
  1188 :}
  1189 MOV.L @(disp, GBR), R0 {:
  1190     load_spreg( R_EAX, R_GBR );
  1191     ADD_imm32_r32( disp, R_EAX );
  1192     check_ralign32( R_EAX );
  1193     MMU_TRANSLATE_READ( R_EAX );
  1194     MEM_READ_LONG( R_EAX, R_EAX );
  1195     store_reg( R_EAX, 0 );
  1196     sh4_x86.tstate = TSTATE_NONE;
  1197 :}
  1198 MOV.L @(disp, PC), Rn {:  
  1199     if( sh4_x86.in_delay_slot ) {
  1200 	SLOTILLEGAL();
  1201     } else {
  1202 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1203 	if( IS_IN_ICACHE(target) ) {
  1204 	    // If the target address is in the same page as the code, it's
  1205 	    // pretty safe to just ref it directly and circumvent the whole
  1206 	    // memory subsystem. (this is a big performance win)
  1208 	    // FIXME: There's a corner-case that's not handled here when
  1209 	    // the current code-page is in the ITLB but not in the UTLB.
  1210 	    // (should generate a TLB miss although need to test SH4 
  1211 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1212 	    // behaviour though.
  1213 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1214 	    MOV_moff32_EAX( ptr );
  1215 	} else {
  1216 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1217 	    // different virtual address than the translation was done with,
  1218 	    // but we can safely assume that the low bits are the same.
  1219 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1220 	    ADD_sh4r_r32( R_PC, R_EAX );
  1221 	    MMU_TRANSLATE_READ( R_EAX );
  1222 	    MEM_READ_LONG( R_EAX, R_EAX );
  1223 	    sh4_x86.tstate = TSTATE_NONE;
  1225 	store_reg( R_EAX, Rn );
  1227 :}
  1228 MOV.L @(disp, Rm), Rn {:  
  1229     load_reg( R_EAX, Rm );
  1230     ADD_imm8s_r32( disp, R_EAX );
  1231     check_ralign32( R_EAX );
  1232     MMU_TRANSLATE_READ( R_EAX );
  1233     MEM_READ_LONG( R_EAX, R_EAX );
  1234     store_reg( R_EAX, Rn );
  1235     sh4_x86.tstate = TSTATE_NONE;
  1236 :}
  1237 MOV.W Rm, @Rn {:  
  1238     load_reg( R_EAX, Rn );
  1239     check_walign16( R_EAX );
  1240     MMU_TRANSLATE_WRITE( R_EAX )
  1241     load_reg( R_EDX, Rm );
  1242     MEM_WRITE_WORD( R_EAX, R_EDX );
  1243     sh4_x86.tstate = TSTATE_NONE;
  1244 :}
  1245 MOV.W Rm, @-Rn {:  
  1246     load_reg( R_EAX, Rn );
  1247     ADD_imm8s_r32( -2, R_EAX );
  1248     check_walign16( R_EAX );
  1249     MMU_TRANSLATE_WRITE( R_EAX );
  1250     load_reg( R_EDX, Rm );
  1251     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1252     MEM_WRITE_WORD( R_EAX, R_EDX );
  1253     sh4_x86.tstate = TSTATE_NONE;
  1254 :}
  1255 MOV.W Rm, @(R0, Rn) {:  
  1256     load_reg( R_EAX, 0 );
  1257     load_reg( R_ECX, Rn );
  1258     ADD_r32_r32( R_ECX, R_EAX );
  1259     check_walign16( R_EAX );
  1260     MMU_TRANSLATE_WRITE( R_EAX );
  1261     load_reg( R_EDX, Rm );
  1262     MEM_WRITE_WORD( R_EAX, R_EDX );
  1263     sh4_x86.tstate = TSTATE_NONE;
  1264 :}
  1265 MOV.W R0, @(disp, GBR) {:  
  1266     load_spreg( R_EAX, R_GBR );
  1267     ADD_imm32_r32( disp, R_EAX );
  1268     check_walign16( R_EAX );
  1269     MMU_TRANSLATE_WRITE( R_EAX );
  1270     load_reg( R_EDX, 0 );
  1271     MEM_WRITE_WORD( R_EAX, R_EDX );
  1272     sh4_x86.tstate = TSTATE_NONE;
  1273 :}
  1274 MOV.W R0, @(disp, Rn) {:  
  1275     load_reg( R_EAX, Rn );
  1276     ADD_imm32_r32( disp, R_EAX );
  1277     check_walign16( R_EAX );
  1278     MMU_TRANSLATE_WRITE( R_EAX );
  1279     load_reg( R_EDX, 0 );
  1280     MEM_WRITE_WORD( R_EAX, R_EDX );
  1281     sh4_x86.tstate = TSTATE_NONE;
  1282 :}
  1283 MOV.W @Rm, Rn {:  
  1284     load_reg( R_EAX, Rm );
  1285     check_ralign16( R_EAX );
  1286     MMU_TRANSLATE_READ( R_EAX );
  1287     MEM_READ_WORD( R_EAX, R_EAX );
  1288     store_reg( R_EAX, Rn );
  1289     sh4_x86.tstate = TSTATE_NONE;
  1290 :}
  1291 MOV.W @Rm+, Rn {:  
  1292     load_reg( R_EAX, Rm );
  1293     check_ralign16( R_EAX );
  1294     MMU_TRANSLATE_READ( R_EAX );
  1295     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1296     MEM_READ_WORD( R_EAX, R_EAX );
  1297     store_reg( R_EAX, Rn );
  1298     sh4_x86.tstate = TSTATE_NONE;
  1299 :}
  1300 MOV.W @(R0, Rm), Rn {:  
  1301     load_reg( R_EAX, 0 );
  1302     load_reg( R_ECX, Rm );
  1303     ADD_r32_r32( R_ECX, R_EAX );
  1304     check_ralign16( R_EAX );
  1305     MMU_TRANSLATE_READ( R_EAX );
  1306     MEM_READ_WORD( R_EAX, R_EAX );
  1307     store_reg( R_EAX, Rn );
  1308     sh4_x86.tstate = TSTATE_NONE;
  1309 :}
  1310 MOV.W @(disp, GBR), R0 {:  
  1311     load_spreg( R_EAX, R_GBR );
  1312     ADD_imm32_r32( disp, R_EAX );
  1313     check_ralign16( R_EAX );
  1314     MMU_TRANSLATE_READ( R_EAX );
  1315     MEM_READ_WORD( R_EAX, R_EAX );
  1316     store_reg( R_EAX, 0 );
  1317     sh4_x86.tstate = TSTATE_NONE;
  1318 :}
  1319 MOV.W @(disp, PC), Rn {:  
  1320     if( sh4_x86.in_delay_slot ) {
  1321 	SLOTILLEGAL();
  1322     } else {
  1323 	// See comments for MOV.L @(disp, PC), Rn
  1324 	uint32_t target = pc + disp + 4;
  1325 	if( IS_IN_ICACHE(target) ) {
  1326 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1327 	    MOV_moff32_EAX( ptr );
  1328 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1329 	} else {
  1330 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1331 	    ADD_sh4r_r32( R_PC, R_EAX );
  1332 	    MMU_TRANSLATE_READ( R_EAX );
  1333 	    MEM_READ_WORD( R_EAX, R_EAX );
  1334 	    sh4_x86.tstate = TSTATE_NONE;
  1336 	store_reg( R_EAX, Rn );
  1338 :}
  1339 MOV.W @(disp, Rm), R0 {:  
  1340     load_reg( R_EAX, Rm );
  1341     ADD_imm32_r32( disp, R_EAX );
  1342     check_ralign16( R_EAX );
  1343     MMU_TRANSLATE_READ( R_EAX );
  1344     MEM_READ_WORD( R_EAX, R_EAX );
  1345     store_reg( R_EAX, 0 );
  1346     sh4_x86.tstate = TSTATE_NONE;
  1347 :}
  1348 MOVA @(disp, PC), R0 {:  
  1349     if( sh4_x86.in_delay_slot ) {
  1350 	SLOTILLEGAL();
  1351     } else {
  1352 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1353 	ADD_sh4r_r32( R_PC, R_ECX );
  1354 	store_reg( R_ECX, 0 );
  1355 	sh4_x86.tstate = TSTATE_NONE;
  1357 :}
  1358 MOVCA.L R0, @Rn {:  
  1359     load_reg( R_EAX, Rn );
  1360     check_walign32( R_EAX );
  1361     MMU_TRANSLATE_WRITE( R_EAX );
  1362     load_reg( R_EDX, 0 );
  1363     MEM_WRITE_LONG( R_EAX, R_EDX );
  1364     sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1367 /* Control transfer instructions */
  1368 BF disp {:
  1369     if( sh4_x86.in_delay_slot ) {
  1370 	SLOTILLEGAL();
  1371     } else {
  1372 	sh4vma_t target = disp + pc + 4;
  1373 	JT_rel8( nottaken );
  1374 	exit_block_rel(target, pc+2 );
  1375 	JMP_TARGET(nottaken);
  1376 	return 2;
  1378 :}
  1379 BF/S disp {:
  1380     if( sh4_x86.in_delay_slot ) {
  1381 	SLOTILLEGAL();
  1382     } else {
  1383 	sh4_x86.in_delay_slot = DELAY_PC;
  1384 	if( UNTRANSLATABLE(pc+2) ) {
  1385 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1386 	    JT_rel8(nottaken);
  1387 	    ADD_imm32_r32( disp, R_EAX );
  1388 	    JMP_TARGET(nottaken);
  1389 	    ADD_sh4r_r32( R_PC, R_EAX );
  1390 	    store_spreg( R_EAX, R_NEW_PC );
  1391 	    exit_block_emu(pc+2);
  1392 	    sh4_x86.branch_taken = TRUE;
  1393 	    return 2;
  1394 	} else {
  1395 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1396 		CMP_imm8s_sh4r( 1, R_T );
  1397 		sh4_x86.tstate = TSTATE_E;
  1399 	    sh4vma_t target = disp + pc + 4;
  1400 	    OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JT rel32
  1401 	    sh4_translate_instruction(pc+2);
  1402 	    exit_block_rel( target, pc+4 );
  1404 	    // not taken
  1405 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1406 	    sh4_translate_instruction(pc+2);
  1407 	    return 4;
  1410 :}
  1411 BRA disp {:  
  1412     if( sh4_x86.in_delay_slot ) {
  1413 	SLOTILLEGAL();
  1414     } else {
  1415 	sh4_x86.in_delay_slot = DELAY_PC;
  1416 	sh4_x86.branch_taken = TRUE;
  1417 	if( UNTRANSLATABLE(pc+2) ) {
  1418 	    load_spreg( R_EAX, R_PC );
  1419 	    ADD_imm32_r32( pc + disp + 4 - sh4_x86.block_start_pc, R_EAX );
  1420 	    store_spreg( R_EAX, R_NEW_PC );
  1421 	    exit_block_emu(pc+2);
  1422 	    return 2;
  1423 	} else {
  1424 	    sh4_translate_instruction( pc + 2 );
  1425 	    exit_block_rel( disp + pc + 4, pc+4 );
  1426 	    return 4;
  1429 :}
  1430 BRAF Rn {:  
  1431     if( sh4_x86.in_delay_slot ) {
  1432 	SLOTILLEGAL();
  1433     } else {
  1434 	load_spreg( R_EAX, R_PC );
  1435 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1436 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1437 	store_spreg( R_EAX, R_NEW_PC );
  1438 	sh4_x86.in_delay_slot = DELAY_PC;
  1439 	sh4_x86.tstate = TSTATE_NONE;
  1440 	sh4_x86.branch_taken = TRUE;
  1441 	if( UNTRANSLATABLE(pc+2) ) {
  1442 	    exit_block_emu(pc+2);
  1443 	    return 2;
  1444 	} else {
  1445 	    sh4_translate_instruction( pc + 2 );
  1446 	    exit_block_newpcset(pc+2);
  1447 	    return 4;
  1450 :}
  1451 BSR disp {:  
  1452     if( sh4_x86.in_delay_slot ) {
  1453 	SLOTILLEGAL();
  1454     } else {
  1455 	load_spreg( R_EAX, R_PC );
  1456 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1457 	store_spreg( R_EAX, R_PR );
  1458 	sh4_x86.in_delay_slot = DELAY_PC;
  1459 	sh4_x86.branch_taken = TRUE;
  1460 	sh4_x86.tstate = TSTATE_NONE;
  1461 	if( UNTRANSLATABLE(pc+2) ) {
  1462 	    ADD_imm32_r32( disp, R_EAX );
  1463 	    store_spreg( R_EAX, R_NEW_PC );
  1464 	    exit_block_emu(pc+2);
  1465 	    return 2;
  1466 	} else {
  1467 	    sh4_translate_instruction( pc + 2 );
  1468 	    exit_block_rel( disp + pc + 4, pc+4 );
  1469 	    return 4;
  1472 :}
  1473 BSRF Rn {:  
  1474     if( sh4_x86.in_delay_slot ) {
  1475 	SLOTILLEGAL();
  1476     } else {
  1477 	load_spreg( R_EAX, R_PC );
  1478 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1479 	store_spreg( R_EAX, R_PR );
  1480 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1481 	store_spreg( R_EAX, R_NEW_PC );
  1483 	sh4_x86.in_delay_slot = DELAY_PC;
  1484 	sh4_x86.tstate = TSTATE_NONE;
  1485 	sh4_x86.branch_taken = TRUE;
  1486 	if( UNTRANSLATABLE(pc+2) ) {
  1487 	    exit_block_emu(pc+2);
  1488 	    return 2;
  1489 	} else {
  1490 	    sh4_translate_instruction( pc + 2 );
  1491 	    exit_block_newpcset(pc+2);
  1492 	    return 4;
  1495 :}
  1496 BT disp {:
  1497     if( sh4_x86.in_delay_slot ) {
  1498 	SLOTILLEGAL();
  1499     } else {
  1500 	sh4vma_t target = disp + pc + 4;
  1501 	JF_rel8( nottaken );
  1502 	exit_block_rel(target, pc+2 );
  1503 	JMP_TARGET(nottaken);
  1504 	return 2;
  1506 :}
  1507 BT/S disp {:
  1508     if( sh4_x86.in_delay_slot ) {
  1509 	SLOTILLEGAL();
  1510     } else {
  1511 	sh4_x86.in_delay_slot = DELAY_PC;
  1512 	if( UNTRANSLATABLE(pc+2) ) {
  1513 	    load_imm32( R_EAX, pc + 4 - sh4_x86.block_start_pc );
  1514 	    JF_rel8(nottaken);
  1515 	    ADD_imm32_r32( disp, R_EAX );
  1516 	    JMP_TARGET(nottaken);
  1517 	    ADD_sh4r_r32( R_PC, R_EAX );
  1518 	    store_spreg( R_EAX, R_NEW_PC );
  1519 	    exit_block_emu(pc+2);
  1520 	    sh4_x86.branch_taken = TRUE;
  1521 	    return 2;
  1522 	} else {
  1523 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1524 		CMP_imm8s_sh4r( 1, R_T );
  1525 		sh4_x86.tstate = TSTATE_E;
  1527 	    OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JF rel32
  1528 	    sh4_translate_instruction(pc+2);
  1529 	    exit_block_rel( disp + pc + 4, pc+4 );
  1530 	    // not taken
  1531 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1532 	    sh4_translate_instruction(pc+2);
  1533 	    return 4;
  1536 :}
  1537 JMP @Rn {:  
  1538     if( sh4_x86.in_delay_slot ) {
  1539 	SLOTILLEGAL();
  1540     } else {
  1541 	load_reg( R_ECX, Rn );
  1542 	store_spreg( R_ECX, R_NEW_PC );
  1543 	sh4_x86.in_delay_slot = DELAY_PC;
  1544 	sh4_x86.branch_taken = TRUE;
  1545 	if( UNTRANSLATABLE(pc+2) ) {
  1546 	    exit_block_emu(pc+2);
  1547 	    return 2;
  1548 	} else {
  1549 	    sh4_translate_instruction(pc+2);
  1550 	    exit_block_newpcset(pc+2);
  1551 	    return 4;
  1554 :}
  1555 JSR @Rn {:  
  1556     if( sh4_x86.in_delay_slot ) {
  1557 	SLOTILLEGAL();
  1558     } else {
  1559 	load_spreg( R_EAX, R_PC );
  1560 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1561 	store_spreg( R_EAX, R_PR );
  1562 	load_reg( R_ECX, Rn );
  1563 	store_spreg( R_ECX, R_NEW_PC );
  1564 	sh4_x86.in_delay_slot = DELAY_PC;
  1565 	sh4_x86.branch_taken = TRUE;
  1566 	sh4_x86.tstate = TSTATE_NONE;
  1567 	if( UNTRANSLATABLE(pc+2) ) {
  1568 	    exit_block_emu(pc+2);
  1569 	    return 2;
  1570 	} else {
  1571 	    sh4_translate_instruction(pc+2);
  1572 	    exit_block_newpcset(pc+2);
  1573 	    return 4;
  1576 :}
  1577 RTE {:  
  1578     if( sh4_x86.in_delay_slot ) {
  1579 	SLOTILLEGAL();
  1580     } else {
  1581 	check_priv();
  1582 	load_spreg( R_ECX, R_SPC );
  1583 	store_spreg( R_ECX, R_NEW_PC );
  1584 	load_spreg( R_EAX, R_SSR );
  1585 	call_func1( sh4_write_sr, R_EAX );
  1586 	sh4_x86.in_delay_slot = DELAY_PC;
  1587 	sh4_x86.priv_checked = FALSE;
  1588 	sh4_x86.fpuen_checked = FALSE;
  1589 	sh4_x86.tstate = TSTATE_NONE;
  1590 	sh4_x86.branch_taken = TRUE;
  1591 	if( UNTRANSLATABLE(pc+2) ) {
  1592 	    exit_block_emu(pc+2);
  1593 	    return 2;
  1594 	} else {
  1595 	    sh4_translate_instruction(pc+2);
  1596 	    exit_block_newpcset(pc+2);
  1597 	    return 4;
  1600 :}
  1601 RTS {:  
  1602     if( sh4_x86.in_delay_slot ) {
  1603 	SLOTILLEGAL();
  1604     } else {
  1605 	load_spreg( R_ECX, R_PR );
  1606 	store_spreg( R_ECX, R_NEW_PC );
  1607 	sh4_x86.in_delay_slot = DELAY_PC;
  1608 	sh4_x86.branch_taken = TRUE;
  1609 	if( UNTRANSLATABLE(pc+2) ) {
  1610 	    exit_block_emu(pc+2);
  1611 	    return 2;
  1612 	} else {
  1613 	    sh4_translate_instruction(pc+2);
  1614 	    exit_block_newpcset(pc+2);
  1615 	    return 4;
  1618 :}
  1619 TRAPA #imm {:  
  1620     if( sh4_x86.in_delay_slot ) {
  1621 	SLOTILLEGAL();
  1622     } else {
  1623 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1624 	ADD_r32_sh4r( R_ECX, R_PC );
  1625 	load_imm32( R_EAX, imm );
  1626 	call_func1( sh4_raise_trap, R_EAX );
  1627 	sh4_x86.tstate = TSTATE_NONE;
  1628 	exit_block_pcset(pc);
  1629 	sh4_x86.branch_taken = TRUE;
  1630 	return 2;
  1632 :}
  1633 UNDEF {:  
  1634     if( sh4_x86.in_delay_slot ) {
  1635 	SLOTILLEGAL();
  1636     } else {
  1637 	JMP_exc(EXC_ILLEGAL);
  1638 	return 2;
  1640 :}
  1642 CLRMAC {:  
  1643     XOR_r32_r32(R_EAX, R_EAX);
  1644     store_spreg( R_EAX, R_MACL );
  1645     store_spreg( R_EAX, R_MACH );
  1646     sh4_x86.tstate = TSTATE_NONE;
  1647 :}
  1648 CLRS {:
  1649     CLC();
  1650     SETC_sh4r(R_S);
  1651     sh4_x86.tstate = TSTATE_C;
  1652 :}
  1653 CLRT {:  
  1654     CLC();
  1655     SETC_t();
  1656     sh4_x86.tstate = TSTATE_C;
  1657 :}
  1658 SETS {:  
  1659     STC();
  1660     SETC_sh4r(R_S);
  1661     sh4_x86.tstate = TSTATE_C;
  1662 :}
  1663 SETT {:  
  1664     STC();
  1665     SETC_t();
  1666     sh4_x86.tstate = TSTATE_C;
  1667 :}
  1669 /* Floating point moves */
  1670 FMOV FRm, FRn {:  
  1671     /* As horrible as this looks, it's actually covering 5 separate cases:
  1672      * 1. 32-bit fr-to-fr (PR=0)
  1673      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1674      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1675      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1676      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1677      */
  1678     check_fpuen();
  1679     load_spreg( R_ECX, R_FPSCR );
  1680     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1681     JNE_rel8(doublesize);
  1682     load_fr( R_EAX, FRm ); // PR=0 branch
  1683     store_fr( R_EAX, FRn );
  1684     JMP_rel8(end);
  1685     JMP_TARGET(doublesize);
  1686     load_dr0( R_EAX, FRm );
  1687     load_dr1( R_ECX, FRm );
  1688     store_dr0( R_EAX, FRn );
  1689     store_dr1( R_ECX, FRn );
  1690     JMP_TARGET(end);
  1691     sh4_x86.tstate = TSTATE_NONE;
  1692 :}
  1693 FMOV FRm, @Rn {: 
  1694     check_fpuen();
  1695     load_reg( R_EAX, Rn );
  1696     check_walign32( R_EAX );
  1697     MMU_TRANSLATE_WRITE( R_EAX );
  1698     load_spreg( R_EDX, R_FPSCR );
  1699     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1700     JNE_rel8(doublesize);
  1702     load_fr( R_ECX, FRm );
  1703     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1704     JMP_rel8(end);
  1706     JMP_TARGET(doublesize);
  1707     load_dr0( R_ECX, FRm );
  1708     load_dr1( R_EDX, FRm );
  1709     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1710     JMP_TARGET(end);
  1711     sh4_x86.tstate = TSTATE_NONE;
  1712 :}
  1713 FMOV @Rm, FRn {:  
  1714     check_fpuen();
  1715     load_reg( R_EAX, Rm );
  1716     check_ralign32( R_EAX );
  1717     MMU_TRANSLATE_READ( R_EAX );
  1718     load_spreg( R_EDX, R_FPSCR );
  1719     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1720     JNE_rel8(doublesize);
  1722     MEM_READ_LONG( R_EAX, R_EAX );
  1723     store_fr( R_EAX, FRn );
  1724     JMP_rel8(end);
  1726     JMP_TARGET(doublesize);
  1727     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1728     store_dr0( R_ECX, FRn );
  1729     store_dr1( R_EAX, FRn );
  1730     JMP_TARGET(end);
  1731     sh4_x86.tstate = TSTATE_NONE;
  1732 :}
  1733 FMOV FRm, @-Rn {:  
  1734     check_fpuen();
  1735     load_reg( R_EAX, Rn );
  1736     check_walign32( R_EAX );
  1737     load_spreg( R_EDX, R_FPSCR );
  1738     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1739     JNE_rel8(doublesize);
  1741     ADD_imm8s_r32( -4, R_EAX );
  1742     MMU_TRANSLATE_WRITE( R_EAX );
  1743     load_fr( R_ECX, FRm );
  1744     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1745     MEM_WRITE_LONG( R_EAX, R_ECX );
  1746     JMP_rel8(end);
  1748     JMP_TARGET(doublesize);
  1749     ADD_imm8s_r32(-8,R_EAX);
  1750     MMU_TRANSLATE_WRITE( R_EAX );
  1751     load_dr0( R_ECX, FRm );
  1752     load_dr1( R_EDX, FRm );
  1753     ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1754     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1755     JMP_TARGET(end);
  1757     sh4_x86.tstate = TSTATE_NONE;
  1758 :}
  1759 FMOV @Rm+, FRn {:
  1760     check_fpuen();
  1761     load_reg( R_EAX, Rm );
  1762     check_ralign32( R_EAX );
  1763     MMU_TRANSLATE_READ( R_EAX );
  1764     load_spreg( R_EDX, R_FPSCR );
  1765     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1766     JNE_rel8(doublesize);
  1768     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1769     MEM_READ_LONG( R_EAX, R_EAX );
  1770     store_fr( R_EAX, FRn );
  1771     JMP_rel8(end);
  1773     JMP_TARGET(doublesize);
  1774     ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1775     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1776     store_dr0( R_ECX, FRn );
  1777     store_dr1( R_EAX, FRn );
  1778     JMP_TARGET(end);
  1780     sh4_x86.tstate = TSTATE_NONE;
  1781 :}
  1782 FMOV FRm, @(R0, Rn) {:  
  1783     check_fpuen();
  1784     load_reg( R_EAX, Rn );
  1785     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1786     check_walign32( R_EAX );
  1787     MMU_TRANSLATE_WRITE( R_EAX );
  1788     load_spreg( R_EDX, R_FPSCR );
  1789     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1790     JNE_rel8(doublesize);
  1792     load_fr( R_ECX, FRm );
  1793     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1794     JMP_rel8(end);
  1796     JMP_TARGET(doublesize);
  1797     load_dr0( R_ECX, FRm );
  1798     load_dr1( R_EDX, FRm );
  1799     MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1800     JMP_TARGET(end);
  1802     sh4_x86.tstate = TSTATE_NONE;
  1803 :}
  1804 FMOV @(R0, Rm), FRn {:  
  1805     check_fpuen();
  1806     load_reg( R_EAX, Rm );
  1807     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1808     check_ralign32( R_EAX );
  1809     MMU_TRANSLATE_READ( R_EAX );
  1810     load_spreg( R_EDX, R_FPSCR );
  1811     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1812     JNE_rel8(doublesize);
  1814     MEM_READ_LONG( R_EAX, R_EAX );
  1815     store_fr( R_EAX, FRn );
  1816     JMP_rel8(end);
  1818     JMP_TARGET(doublesize);
  1819     MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1820     store_dr0( R_ECX, FRn );
  1821     store_dr1( R_EAX, FRn );
  1822     JMP_TARGET(end);
  1824     sh4_x86.tstate = TSTATE_NONE;
  1825 :}
  1826 FLDI0 FRn {:  /* IFF PR=0 */
  1827     check_fpuen();
  1828     load_spreg( R_ECX, R_FPSCR );
  1829     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1830     JNE_rel8(end);
  1831     XOR_r32_r32( R_EAX, R_EAX );
  1832     store_fr( R_EAX, FRn );
  1833     JMP_TARGET(end);
  1834     sh4_x86.tstate = TSTATE_NONE;
  1835 :}
  1836 FLDI1 FRn {:  /* IFF PR=0 */
  1837     check_fpuen();
  1838     load_spreg( R_ECX, R_FPSCR );
  1839     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1840     JNE_rel8(end);
  1841     load_imm32(R_EAX, 0x3F800000);
  1842     store_fr( R_EAX, FRn );
  1843     JMP_TARGET(end);
  1844     sh4_x86.tstate = TSTATE_NONE;
  1845 :}
  1847 FLOAT FPUL, FRn {:  
  1848     check_fpuen();
  1849     load_spreg( R_ECX, R_FPSCR );
  1850     FILD_sh4r(R_FPUL);
  1851     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1852     JNE_rel8(doubleprec);
  1853     pop_fr( FRn );
  1854     JMP_rel8(end);
  1855     JMP_TARGET(doubleprec);
  1856     pop_dr( FRn );
  1857     JMP_TARGET(end);
  1858     sh4_x86.tstate = TSTATE_NONE;
  1859 :}
  1860 FTRC FRm, FPUL {:  
  1861     check_fpuen();
  1862     load_spreg( R_ECX, R_FPSCR );
  1863     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1864     JNE_rel8(doubleprec);
  1865     push_fr( FRm );
  1866     JMP_rel8(doop);
  1867     JMP_TARGET(doubleprec);
  1868     push_dr( FRm );
  1869     JMP_TARGET( doop );
  1870     load_imm32( R_ECX, (uint32_t)&max_int );
  1871     FILD_r32ind( R_ECX );
  1872     FCOMIP_st(1);
  1873     JNA_rel8( sat );
  1874     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1875     FILD_r32ind( R_ECX );           // 2
  1876     FCOMIP_st(1);                   // 2
  1877     JAE_rel8( sat2 );            // 2
  1878     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1879     FNSTCW_r32ind( R_EAX );
  1880     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1881     FLDCW_r32ind( R_EDX );
  1882     FISTP_sh4r(R_FPUL);             // 3
  1883     FLDCW_r32ind( R_EAX );
  1884     JMP_rel8(end);             // 2
  1886     JMP_TARGET(sat);
  1887     JMP_TARGET(sat2);
  1888     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1889     store_spreg( R_ECX, R_FPUL );
  1890     FPOP_st();
  1891     JMP_TARGET(end);
  1892     sh4_x86.tstate = TSTATE_NONE;
  1893 :}
  1894 FLDS FRm, FPUL {:  
  1895     check_fpuen();
  1896     load_fr( R_EAX, FRm );
  1897     store_spreg( R_EAX, R_FPUL );
  1898     sh4_x86.tstate = TSTATE_NONE;
  1899 :}
  1900 FSTS FPUL, FRn {:  
  1901     check_fpuen();
  1902     load_spreg( R_EAX, R_FPUL );
  1903     store_fr( R_EAX, FRn );
  1904     sh4_x86.tstate = TSTATE_NONE;
  1905 :}
  1906 FCNVDS FRm, FPUL {:  
  1907     check_fpuen();
  1908     load_spreg( R_ECX, R_FPSCR );
  1909     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1910     JE_rel8(end); // only when PR=1
  1911     push_dr( FRm );
  1912     pop_fpul();
  1913     JMP_TARGET(end);
  1914     sh4_x86.tstate = TSTATE_NONE;
  1915 :}
  1916 FCNVSD FPUL, FRn {:  
  1917     check_fpuen();
  1918     load_spreg( R_ECX, R_FPSCR );
  1919     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1920     JE_rel8(end); // only when PR=1
  1921     push_fpul();
  1922     pop_dr( FRn );
  1923     JMP_TARGET(end);
  1924     sh4_x86.tstate = TSTATE_NONE;
  1925 :}
  1927 /* Floating point instructions */
  1928 FABS FRn {:  
  1929     check_fpuen();
  1930     load_spreg( R_ECX, R_FPSCR );
  1931     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1932     JNE_rel8(doubleprec);
  1933     push_fr(FRn); // 6
  1934     FABS_st0(); // 2
  1935     pop_fr(FRn); //6
  1936     JMP_rel8(end); // 2
  1937     JMP_TARGET(doubleprec);
  1938     push_dr(FRn);
  1939     FABS_st0();
  1940     pop_dr(FRn);
  1941     JMP_TARGET(end);
  1942     sh4_x86.tstate = TSTATE_NONE;
  1943 :}
  1944 FADD FRm, FRn {:  
  1945     check_fpuen();
  1946     load_spreg( R_ECX, R_FPSCR );
  1947     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1948     JNE_rel8(doubleprec);
  1949     push_fr(FRm);
  1950     push_fr(FRn);
  1951     FADDP_st(1);
  1952     pop_fr(FRn);
  1953     JMP_rel8(end);
  1954     JMP_TARGET(doubleprec);
  1955     push_dr(FRm);
  1956     push_dr(FRn);
  1957     FADDP_st(1);
  1958     pop_dr(FRn);
  1959     JMP_TARGET(end);
  1960     sh4_x86.tstate = TSTATE_NONE;
  1961 :}
  1962 FDIV FRm, FRn {:  
  1963     check_fpuen();
  1964     load_spreg( R_ECX, R_FPSCR );
  1965     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1966     JNE_rel8(doubleprec);
  1967     push_fr(FRn);
  1968     push_fr(FRm);
  1969     FDIVP_st(1);
  1970     pop_fr(FRn);
  1971     JMP_rel8(end);
  1972     JMP_TARGET(doubleprec);
  1973     push_dr(FRn);
  1974     push_dr(FRm);
  1975     FDIVP_st(1);
  1976     pop_dr(FRn);
  1977     JMP_TARGET(end);
  1978     sh4_x86.tstate = TSTATE_NONE;
  1979 :}
  1980 FMAC FR0, FRm, FRn {:  
  1981     check_fpuen();
  1982     load_spreg( R_ECX, R_FPSCR );
  1983     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1984     JNE_rel8(doubleprec);
  1985     push_fr( 0 );
  1986     push_fr( FRm );
  1987     FMULP_st(1);
  1988     push_fr( FRn );
  1989     FADDP_st(1);
  1990     pop_fr( FRn );
  1991     JMP_rel8(end);
  1992     JMP_TARGET(doubleprec);
  1993     push_dr( 0 );
  1994     push_dr( FRm );
  1995     FMULP_st(1);
  1996     push_dr( FRn );
  1997     FADDP_st(1);
  1998     pop_dr( FRn );
  1999     JMP_TARGET(end);
  2000     sh4_x86.tstate = TSTATE_NONE;
  2001 :}
  2003 FMUL FRm, FRn {:  
  2004     check_fpuen();
  2005     load_spreg( R_ECX, R_FPSCR );
  2006     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2007     JNE_rel8(doubleprec);
  2008     push_fr(FRm);
  2009     push_fr(FRn);
  2010     FMULP_st(1);
  2011     pop_fr(FRn);
  2012     JMP_rel8(end);
  2013     JMP_TARGET(doubleprec);
  2014     push_dr(FRm);
  2015     push_dr(FRn);
  2016     FMULP_st(1);
  2017     pop_dr(FRn);
  2018     JMP_TARGET(end);
  2019     sh4_x86.tstate = TSTATE_NONE;
  2020 :}
  2021 FNEG FRn {:  
  2022     check_fpuen();
  2023     load_spreg( R_ECX, R_FPSCR );
  2024     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2025     JNE_rel8(doubleprec);
  2026     push_fr(FRn);
  2027     FCHS_st0();
  2028     pop_fr(FRn);
  2029     JMP_rel8(end);
  2030     JMP_TARGET(doubleprec);
  2031     push_dr(FRn);
  2032     FCHS_st0();
  2033     pop_dr(FRn);
  2034     JMP_TARGET(end);
  2035     sh4_x86.tstate = TSTATE_NONE;
  2036 :}
  2037 FSRRA FRn {:  
  2038     check_fpuen();
  2039     load_spreg( R_ECX, R_FPSCR );
  2040     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2041     JNE_rel8(end); // PR=0 only
  2042     FLD1_st0();
  2043     push_fr(FRn);
  2044     FSQRT_st0();
  2045     FDIVP_st(1);
  2046     pop_fr(FRn);
  2047     JMP_TARGET(end);
  2048     sh4_x86.tstate = TSTATE_NONE;
  2049 :}
  2050 FSQRT FRn {:  
  2051     check_fpuen();
  2052     load_spreg( R_ECX, R_FPSCR );
  2053     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2054     JNE_rel8(doubleprec);
  2055     push_fr(FRn);
  2056     FSQRT_st0();
  2057     pop_fr(FRn);
  2058     JMP_rel8(end);
  2059     JMP_TARGET(doubleprec);
  2060     push_dr(FRn);
  2061     FSQRT_st0();
  2062     pop_dr(FRn);
  2063     JMP_TARGET(end);
  2064     sh4_x86.tstate = TSTATE_NONE;
  2065 :}
  2066 FSUB FRm, FRn {:  
  2067     check_fpuen();
  2068     load_spreg( R_ECX, R_FPSCR );
  2069     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2070     JNE_rel8(doubleprec);
  2071     push_fr(FRn);
  2072     push_fr(FRm);
  2073     FSUBP_st(1);
  2074     pop_fr(FRn);
  2075     JMP_rel8(end);
  2076     JMP_TARGET(doubleprec);
  2077     push_dr(FRn);
  2078     push_dr(FRm);
  2079     FSUBP_st(1);
  2080     pop_dr(FRn);
  2081     JMP_TARGET(end);
  2082     sh4_x86.tstate = TSTATE_NONE;
  2083 :}
  2085 FCMP/EQ FRm, FRn {:  
  2086     check_fpuen();
  2087     load_spreg( R_ECX, R_FPSCR );
  2088     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2089     JNE_rel8(doubleprec);
  2090     push_fr(FRm);
  2091     push_fr(FRn);
  2092     JMP_rel8(end);
  2093     JMP_TARGET(doubleprec);
  2094     push_dr(FRm);
  2095     push_dr(FRn);
  2096     JMP_TARGET(end);
  2097     FCOMIP_st(1);
  2098     SETE_t();
  2099     FPOP_st();
  2100     sh4_x86.tstate = TSTATE_NONE;
  2101 :}
  2102 FCMP/GT FRm, FRn {:  
  2103     check_fpuen();
  2104     load_spreg( R_ECX, R_FPSCR );
  2105     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2106     JNE_rel8(doubleprec);
  2107     push_fr(FRm);
  2108     push_fr(FRn);
  2109     JMP_rel8(end);
  2110     JMP_TARGET(doubleprec);
  2111     push_dr(FRm);
  2112     push_dr(FRn);
  2113     JMP_TARGET(end);
  2114     FCOMIP_st(1);
  2115     SETA_t();
  2116     FPOP_st();
  2117     sh4_x86.tstate = TSTATE_NONE;
  2118 :}
  2120 FSCA FPUL, FRn {:  
  2121     check_fpuen();
  2122     load_spreg( R_ECX, R_FPSCR );
  2123     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2124     JNE_rel8(doubleprec );
  2125     LEA_sh4r_r32( REG_OFFSET(fr[0][FRn&0x0E]), R_ECX );
  2126     load_spreg( R_EDX, R_FPUL );
  2127     call_func2( sh4_fsca, R_EDX, R_ECX );
  2128     JMP_TARGET(doubleprec);
  2129     sh4_x86.tstate = TSTATE_NONE;
  2130 :}
  2131 FIPR FVm, FVn {:  
  2132     check_fpuen();
  2133     load_spreg( R_ECX, R_FPSCR );
  2134     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2135     JNE_rel8( doubleprec);
  2137     push_fr( FVm<<2 );
  2138     push_fr( FVn<<2 );
  2139     FMULP_st(1);
  2140     push_fr( (FVm<<2)+1);
  2141     push_fr( (FVn<<2)+1);
  2142     FMULP_st(1);
  2143     FADDP_st(1);
  2144     push_fr( (FVm<<2)+2);
  2145     push_fr( (FVn<<2)+2);
  2146     FMULP_st(1);
  2147     FADDP_st(1);
  2148     push_fr( (FVm<<2)+3);
  2149     push_fr( (FVn<<2)+3);
  2150     FMULP_st(1);
  2151     FADDP_st(1);
  2152     pop_fr( (FVn<<2)+3);
  2153     JMP_TARGET(doubleprec);
  2154     sh4_x86.tstate = TSTATE_NONE;
  2155 :}
  2156 FTRV XMTRX, FVn {:  
  2157     check_fpuen();
  2158     load_spreg( R_ECX, R_FPSCR );
  2159     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2160     JNE_rel8( doubleprec );
  2161     LEA_sh4r_r32( REG_OFFSET(fr[0][FVn<<2]), R_EDX );
  2162     call_func1( sh4_ftrv, R_EDX );  // 12
  2163     JMP_TARGET(doubleprec);
  2164     sh4_x86.tstate = TSTATE_NONE;
  2165 :}
  2167 FRCHG {:  
  2168     check_fpuen();
  2169     load_spreg( R_ECX, R_FPSCR );
  2170     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2171     store_spreg( R_ECX, R_FPSCR );
  2172     call_func0( sh4_switch_fr_banks );
  2173     sh4_x86.tstate = TSTATE_NONE;
  2174 :}
  2175 FSCHG {:  
  2176     check_fpuen();
  2177     load_spreg( R_ECX, R_FPSCR );
  2178     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2179     store_spreg( R_ECX, R_FPSCR );
  2180     sh4_x86.tstate = TSTATE_NONE;
  2181 :}
  2183 /* Processor control instructions */
  2184 LDC Rm, SR {:
  2185     if( sh4_x86.in_delay_slot ) {
  2186 	SLOTILLEGAL();
  2187     } else {
  2188 	check_priv();
  2189 	load_reg( R_EAX, Rm );
  2190 	call_func1( sh4_write_sr, R_EAX );
  2191 	sh4_x86.priv_checked = FALSE;
  2192 	sh4_x86.fpuen_checked = FALSE;
  2193 	sh4_x86.tstate = TSTATE_NONE;
  2195 :}
  2196 LDC Rm, GBR {: 
  2197     load_reg( R_EAX, Rm );
  2198     store_spreg( R_EAX, R_GBR );
  2199 :}
  2200 LDC Rm, VBR {:  
  2201     check_priv();
  2202     load_reg( R_EAX, Rm );
  2203     store_spreg( R_EAX, R_VBR );
  2204     sh4_x86.tstate = TSTATE_NONE;
  2205 :}
  2206 LDC Rm, SSR {:  
  2207     check_priv();
  2208     load_reg( R_EAX, Rm );
  2209     store_spreg( R_EAX, R_SSR );
  2210     sh4_x86.tstate = TSTATE_NONE;
  2211 :}
  2212 LDC Rm, SGR {:  
  2213     check_priv();
  2214     load_reg( R_EAX, Rm );
  2215     store_spreg( R_EAX, R_SGR );
  2216     sh4_x86.tstate = TSTATE_NONE;
  2217 :}
  2218 LDC Rm, SPC {:  
  2219     check_priv();
  2220     load_reg( R_EAX, Rm );
  2221     store_spreg( R_EAX, R_SPC );
  2222     sh4_x86.tstate = TSTATE_NONE;
  2223 :}
  2224 LDC Rm, DBR {:  
  2225     check_priv();
  2226     load_reg( R_EAX, Rm );
  2227     store_spreg( R_EAX, R_DBR );
  2228     sh4_x86.tstate = TSTATE_NONE;
  2229 :}
  2230 LDC Rm, Rn_BANK {:  
  2231     check_priv();
  2232     load_reg( R_EAX, Rm );
  2233     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2234     sh4_x86.tstate = TSTATE_NONE;
  2235 :}
  2236 LDC.L @Rm+, GBR {:  
  2237     load_reg( R_EAX, Rm );
  2238     check_ralign32( R_EAX );
  2239     MMU_TRANSLATE_READ( R_EAX );
  2240     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2241     MEM_READ_LONG( R_EAX, R_EAX );
  2242     store_spreg( R_EAX, R_GBR );
  2243     sh4_x86.tstate = TSTATE_NONE;
  2244 :}
  2245 LDC.L @Rm+, SR {:
  2246     if( sh4_x86.in_delay_slot ) {
  2247 	SLOTILLEGAL();
  2248     } else {
  2249 	check_priv();
  2250 	load_reg( R_EAX, Rm );
  2251 	check_ralign32( R_EAX );
  2252 	MMU_TRANSLATE_READ( R_EAX );
  2253 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2254 	MEM_READ_LONG( R_EAX, R_EAX );
  2255 	call_func1( sh4_write_sr, R_EAX );
  2256 	sh4_x86.priv_checked = FALSE;
  2257 	sh4_x86.fpuen_checked = FALSE;
  2258 	sh4_x86.tstate = TSTATE_NONE;
  2260 :}
  2261 LDC.L @Rm+, VBR {:  
  2262     check_priv();
  2263     load_reg( R_EAX, Rm );
  2264     check_ralign32( R_EAX );
  2265     MMU_TRANSLATE_READ( R_EAX );
  2266     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2267     MEM_READ_LONG( R_EAX, R_EAX );
  2268     store_spreg( R_EAX, R_VBR );
  2269     sh4_x86.tstate = TSTATE_NONE;
  2270 :}
  2271 LDC.L @Rm+, SSR {:
  2272     check_priv();
  2273     load_reg( R_EAX, Rm );
  2274     check_ralign32( R_EAX );
  2275     MMU_TRANSLATE_READ( R_EAX );
  2276     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2277     MEM_READ_LONG( R_EAX, R_EAX );
  2278     store_spreg( R_EAX, R_SSR );
  2279     sh4_x86.tstate = TSTATE_NONE;
  2280 :}
  2281 LDC.L @Rm+, SGR {:  
  2282     check_priv();
  2283     load_reg( R_EAX, Rm );
  2284     check_ralign32( R_EAX );
  2285     MMU_TRANSLATE_READ( R_EAX );
  2286     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2287     MEM_READ_LONG( R_EAX, R_EAX );
  2288     store_spreg( R_EAX, R_SGR );
  2289     sh4_x86.tstate = TSTATE_NONE;
  2290 :}
  2291 LDC.L @Rm+, SPC {:  
  2292     check_priv();
  2293     load_reg( R_EAX, Rm );
  2294     check_ralign32( R_EAX );
  2295     MMU_TRANSLATE_READ( R_EAX );
  2296     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2297     MEM_READ_LONG( R_EAX, R_EAX );
  2298     store_spreg( R_EAX, R_SPC );
  2299     sh4_x86.tstate = TSTATE_NONE;
  2300 :}
  2301 LDC.L @Rm+, DBR {:  
  2302     check_priv();
  2303     load_reg( R_EAX, Rm );
  2304     check_ralign32( R_EAX );
  2305     MMU_TRANSLATE_READ( R_EAX );
  2306     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2307     MEM_READ_LONG( R_EAX, R_EAX );
  2308     store_spreg( R_EAX, R_DBR );
  2309     sh4_x86.tstate = TSTATE_NONE;
  2310 :}
  2311 LDC.L @Rm+, Rn_BANK {:  
  2312     check_priv();
  2313     load_reg( R_EAX, Rm );
  2314     check_ralign32( R_EAX );
  2315     MMU_TRANSLATE_READ( R_EAX );
  2316     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2317     MEM_READ_LONG( R_EAX, R_EAX );
  2318     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2319     sh4_x86.tstate = TSTATE_NONE;
  2320 :}
  2321 LDS Rm, FPSCR {:
  2322     check_fpuen();
  2323     load_reg( R_EAX, Rm );
  2324     call_func1( sh4_write_fpscr, R_EAX );
  2325     sh4_x86.tstate = TSTATE_NONE;
  2326 :}
  2327 LDS.L @Rm+, FPSCR {:  
  2328     check_fpuen();
  2329     load_reg( R_EAX, Rm );
  2330     check_ralign32( R_EAX );
  2331     MMU_TRANSLATE_READ( R_EAX );
  2332     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2333     MEM_READ_LONG( R_EAX, R_EAX );
  2334     call_func1( sh4_write_fpscr, R_EAX );
  2335     sh4_x86.tstate = TSTATE_NONE;
  2336 :}
  2337 LDS Rm, FPUL {:  
  2338     check_fpuen();
  2339     load_reg( R_EAX, Rm );
  2340     store_spreg( R_EAX, R_FPUL );
  2341 :}
  2342 LDS.L @Rm+, FPUL {:  
  2343     check_fpuen();
  2344     load_reg( R_EAX, Rm );
  2345     check_ralign32( R_EAX );
  2346     MMU_TRANSLATE_READ( R_EAX );
  2347     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2348     MEM_READ_LONG( R_EAX, R_EAX );
  2349     store_spreg( R_EAX, R_FPUL );
  2350     sh4_x86.tstate = TSTATE_NONE;
  2351 :}
  2352 LDS Rm, MACH {: 
  2353     load_reg( R_EAX, Rm );
  2354     store_spreg( R_EAX, R_MACH );
  2355 :}
  2356 LDS.L @Rm+, MACH {:  
  2357     load_reg( R_EAX, Rm );
  2358     check_ralign32( R_EAX );
  2359     MMU_TRANSLATE_READ( R_EAX );
  2360     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2361     MEM_READ_LONG( R_EAX, R_EAX );
  2362     store_spreg( R_EAX, R_MACH );
  2363     sh4_x86.tstate = TSTATE_NONE;
  2364 :}
  2365 LDS Rm, MACL {:  
  2366     load_reg( R_EAX, Rm );
  2367     store_spreg( R_EAX, R_MACL );
  2368 :}
  2369 LDS.L @Rm+, MACL {:  
  2370     load_reg( R_EAX, Rm );
  2371     check_ralign32( R_EAX );
  2372     MMU_TRANSLATE_READ( R_EAX );
  2373     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2374     MEM_READ_LONG( R_EAX, R_EAX );
  2375     store_spreg( R_EAX, R_MACL );
  2376     sh4_x86.tstate = TSTATE_NONE;
  2377 :}
  2378 LDS Rm, PR {:  
  2379     load_reg( R_EAX, Rm );
  2380     store_spreg( R_EAX, R_PR );
  2381 :}
  2382 LDS.L @Rm+, PR {:  
  2383     load_reg( R_EAX, Rm );
  2384     check_ralign32( R_EAX );
  2385     MMU_TRANSLATE_READ( R_EAX );
  2386     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2387     MEM_READ_LONG( R_EAX, R_EAX );
  2388     store_spreg( R_EAX, R_PR );
  2389     sh4_x86.tstate = TSTATE_NONE;
  2390 :}
  2391 LDTLB {:  
  2392     call_func0( MMU_ldtlb );
  2393 :}
  2394 OCBI @Rn {:  :}
  2395 OCBP @Rn {:  :}
  2396 OCBWB @Rn {:  :}
  2397 PREF @Rn {:
  2398     load_reg( R_EAX, Rn );
  2399     MOV_r32_r32( R_EAX, R_ECX );
  2400     AND_imm32_r32( 0xFC000000, R_EAX );
  2401     CMP_imm32_r32( 0xE0000000, R_EAX );
  2402     JNE_rel8(end);
  2403     call_func1( sh4_flush_store_queue, R_ECX );
  2404     TEST_r32_r32( R_EAX, R_EAX );
  2405     JE_exc(-1);
  2406     JMP_TARGET(end);
  2407     sh4_x86.tstate = TSTATE_NONE;
  2408 :}
  2409 SLEEP {: 
  2410     check_priv();
  2411     call_func0( sh4_sleep );
  2412     sh4_x86.tstate = TSTATE_NONE;
  2413     sh4_x86.in_delay_slot = DELAY_NONE;
  2414     return 2;
  2415 :}
  2416 STC SR, Rn {:
  2417     check_priv();
  2418     call_func0(sh4_read_sr);
  2419     store_reg( R_EAX, Rn );
  2420     sh4_x86.tstate = TSTATE_NONE;
  2421 :}
  2422 STC GBR, Rn {:  
  2423     load_spreg( R_EAX, R_GBR );
  2424     store_reg( R_EAX, Rn );
  2425 :}
  2426 STC VBR, Rn {:  
  2427     check_priv();
  2428     load_spreg( R_EAX, R_VBR );
  2429     store_reg( R_EAX, Rn );
  2430     sh4_x86.tstate = TSTATE_NONE;
  2431 :}
  2432 STC SSR, Rn {:  
  2433     check_priv();
  2434     load_spreg( R_EAX, R_SSR );
  2435     store_reg( R_EAX, Rn );
  2436     sh4_x86.tstate = TSTATE_NONE;
  2437 :}
  2438 STC SPC, Rn {:  
  2439     check_priv();
  2440     load_spreg( R_EAX, R_SPC );
  2441     store_reg( R_EAX, Rn );
  2442     sh4_x86.tstate = TSTATE_NONE;
  2443 :}
  2444 STC SGR, Rn {:  
  2445     check_priv();
  2446     load_spreg( R_EAX, R_SGR );
  2447     store_reg( R_EAX, Rn );
  2448     sh4_x86.tstate = TSTATE_NONE;
  2449 :}
  2450 STC DBR, Rn {:  
  2451     check_priv();
  2452     load_spreg( R_EAX, R_DBR );
  2453     store_reg( R_EAX, Rn );
  2454     sh4_x86.tstate = TSTATE_NONE;
  2455 :}
  2456 STC Rm_BANK, Rn {:
  2457     check_priv();
  2458     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2459     store_reg( R_EAX, Rn );
  2460     sh4_x86.tstate = TSTATE_NONE;
  2461 :}
  2462 STC.L SR, @-Rn {:
  2463     check_priv();
  2464     load_reg( R_EAX, Rn );
  2465     check_walign32( R_EAX );
  2466     ADD_imm8s_r32( -4, R_EAX );
  2467     MMU_TRANSLATE_WRITE( R_EAX );
  2468     PUSH_realigned_r32( R_EAX );
  2469     call_func0( sh4_read_sr );
  2470     POP_realigned_r32( R_ECX );
  2471     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2472     MEM_WRITE_LONG( R_ECX, R_EAX );
  2473     sh4_x86.tstate = TSTATE_NONE;
  2474 :}
  2475 STC.L VBR, @-Rn {:  
  2476     check_priv();
  2477     load_reg( R_EAX, Rn );
  2478     check_walign32( R_EAX );
  2479     ADD_imm8s_r32( -4, R_EAX );
  2480     MMU_TRANSLATE_WRITE( R_EAX );
  2481     load_spreg( R_EDX, R_VBR );
  2482     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2483     MEM_WRITE_LONG( R_EAX, R_EDX );
  2484     sh4_x86.tstate = TSTATE_NONE;
  2485 :}
  2486 STC.L SSR, @-Rn {:  
  2487     check_priv();
  2488     load_reg( R_EAX, Rn );
  2489     check_walign32( R_EAX );
  2490     ADD_imm8s_r32( -4, R_EAX );
  2491     MMU_TRANSLATE_WRITE( R_EAX );
  2492     load_spreg( R_EDX, R_SSR );
  2493     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2494     MEM_WRITE_LONG( R_EAX, R_EDX );
  2495     sh4_x86.tstate = TSTATE_NONE;
  2496 :}
  2497 STC.L SPC, @-Rn {:
  2498     check_priv();
  2499     load_reg( R_EAX, Rn );
  2500     check_walign32( R_EAX );
  2501     ADD_imm8s_r32( -4, R_EAX );
  2502     MMU_TRANSLATE_WRITE( R_EAX );
  2503     load_spreg( R_EDX, R_SPC );
  2504     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2505     MEM_WRITE_LONG( R_EAX, R_EDX );
  2506     sh4_x86.tstate = TSTATE_NONE;
  2507 :}
  2508 STC.L SGR, @-Rn {:  
  2509     check_priv();
  2510     load_reg( R_EAX, Rn );
  2511     check_walign32( R_EAX );
  2512     ADD_imm8s_r32( -4, R_EAX );
  2513     MMU_TRANSLATE_WRITE( R_EAX );
  2514     load_spreg( R_EDX, R_SGR );
  2515     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2516     MEM_WRITE_LONG( R_EAX, R_EDX );
  2517     sh4_x86.tstate = TSTATE_NONE;
  2518 :}
  2519 STC.L DBR, @-Rn {:  
  2520     check_priv();
  2521     load_reg( R_EAX, Rn );
  2522     check_walign32( R_EAX );
  2523     ADD_imm8s_r32( -4, R_EAX );
  2524     MMU_TRANSLATE_WRITE( R_EAX );
  2525     load_spreg( R_EDX, R_DBR );
  2526     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2527     MEM_WRITE_LONG( R_EAX, R_EDX );
  2528     sh4_x86.tstate = TSTATE_NONE;
  2529 :}
  2530 STC.L Rm_BANK, @-Rn {:  
  2531     check_priv();
  2532     load_reg( R_EAX, Rn );
  2533     check_walign32( R_EAX );
  2534     ADD_imm8s_r32( -4, R_EAX );
  2535     MMU_TRANSLATE_WRITE( R_EAX );
  2536     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2537     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2538     MEM_WRITE_LONG( R_EAX, R_EDX );
  2539     sh4_x86.tstate = TSTATE_NONE;
  2540 :}
  2541 STC.L GBR, @-Rn {:  
  2542     load_reg( R_EAX, Rn );
  2543     check_walign32( R_EAX );
  2544     ADD_imm8s_r32( -4, R_EAX );
  2545     MMU_TRANSLATE_WRITE( R_EAX );
  2546     load_spreg( R_EDX, R_GBR );
  2547     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2548     MEM_WRITE_LONG( R_EAX, R_EDX );
  2549     sh4_x86.tstate = TSTATE_NONE;
  2550 :}
  2551 STS FPSCR, Rn {:  
  2552     check_fpuen();
  2553     load_spreg( R_EAX, R_FPSCR );
  2554     store_reg( R_EAX, Rn );
  2555 :}
  2556 STS.L FPSCR, @-Rn {:  
  2557     check_fpuen();
  2558     load_reg( R_EAX, Rn );
  2559     check_walign32( R_EAX );
  2560     ADD_imm8s_r32( -4, R_EAX );
  2561     MMU_TRANSLATE_WRITE( R_EAX );
  2562     load_spreg( R_EDX, R_FPSCR );
  2563     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2564     MEM_WRITE_LONG( R_EAX, R_EDX );
  2565     sh4_x86.tstate = TSTATE_NONE;
  2566 :}
  2567 STS FPUL, Rn {:  
  2568     check_fpuen();
  2569     load_spreg( R_EAX, R_FPUL );
  2570     store_reg( R_EAX, Rn );
  2571 :}
  2572 STS.L FPUL, @-Rn {:  
  2573     check_fpuen();
  2574     load_reg( R_EAX, Rn );
  2575     check_walign32( R_EAX );
  2576     ADD_imm8s_r32( -4, R_EAX );
  2577     MMU_TRANSLATE_WRITE( R_EAX );
  2578     load_spreg( R_EDX, R_FPUL );
  2579     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2580     MEM_WRITE_LONG( R_EAX, R_EDX );
  2581     sh4_x86.tstate = TSTATE_NONE;
  2582 :}
  2583 STS MACH, Rn {:  
  2584     load_spreg( R_EAX, R_MACH );
  2585     store_reg( R_EAX, Rn );
  2586 :}
  2587 STS.L MACH, @-Rn {:  
  2588     load_reg( R_EAX, Rn );
  2589     check_walign32( R_EAX );
  2590     ADD_imm8s_r32( -4, R_EAX );
  2591     MMU_TRANSLATE_WRITE( R_EAX );
  2592     load_spreg( R_EDX, R_MACH );
  2593     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2594     MEM_WRITE_LONG( R_EAX, R_EDX );
  2595     sh4_x86.tstate = TSTATE_NONE;
  2596 :}
  2597 STS MACL, Rn {:  
  2598     load_spreg( R_EAX, R_MACL );
  2599     store_reg( R_EAX, Rn );
  2600 :}
  2601 STS.L MACL, @-Rn {:  
  2602     load_reg( R_EAX, Rn );
  2603     check_walign32( R_EAX );
  2604     ADD_imm8s_r32( -4, R_EAX );
  2605     MMU_TRANSLATE_WRITE( R_EAX );
  2606     load_spreg( R_EDX, R_MACL );
  2607     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2608     MEM_WRITE_LONG( R_EAX, R_EDX );
  2609     sh4_x86.tstate = TSTATE_NONE;
  2610 :}
  2611 STS PR, Rn {:  
  2612     load_spreg( R_EAX, R_PR );
  2613     store_reg( R_EAX, Rn );
  2614 :}
  2615 STS.L PR, @-Rn {:  
  2616     load_reg( R_EAX, Rn );
  2617     check_walign32( R_EAX );
  2618     ADD_imm8s_r32( -4, R_EAX );
  2619     MMU_TRANSLATE_WRITE( R_EAX );
  2620     load_spreg( R_EDX, R_PR );
  2621     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2622     MEM_WRITE_LONG( R_EAX, R_EDX );
  2623     sh4_x86.tstate = TSTATE_NONE;
  2624 :}
  2626 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2627 %%
  2628     sh4_x86.in_delay_slot = DELAY_NONE;
  2629     return 0;
.