Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 591:7b9612fd2395
prev590:4db6a084ca3c
next593:6c710c7c6835
author nkeynes
date Thu Jan 17 10:17:32 2008 +0000 (16 years ago)
permissions -rw-r--r--
last change Change recovery table to offset rather than pointer (so as to survive block moves)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     uint32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 #define DELAY_NONE 0
    46 #define DELAY_PC 1
    47 #define DELAY_PC_PR 2
    49 /** 
    50  * Struct to manage internal translation state. This state is not saved -
    51  * it is only valid between calls to sh4_translate_begin_block() and
    52  * sh4_translate_end_block()
    53  */
    54 struct sh4_x86_state {
    55     int in_delay_slot;
    56     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    57     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    58     gboolean branch_taken; /* true if we branched unconditionally */
    59     uint32_t block_start_pc;
    60     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    61     int tstate;
    63     /* mode flags */
    64     gboolean tlb_on; /* True if tlb translation is active */
    66     /* Allocated memory for the (block-wide) back-patch list */
    67     struct backpatch_record *backpatch_list;
    68     uint32_t backpatch_posn;
    69     uint32_t backpatch_size;
    70     struct xlat_recovery_record recovery_list[MAX_RECOVERY_SIZE];
    71     uint32_t recovery_posn;
    72 };
    74 #define TSTATE_NONE -1
    75 #define TSTATE_O    0
    76 #define TSTATE_C    2
    77 #define TSTATE_E    4
    78 #define TSTATE_NE   5
    79 #define TSTATE_G    0xF
    80 #define TSTATE_GE   0xD
    81 #define TSTATE_A    7
    82 #define TSTATE_AE   3
    84 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    85 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    86 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    87     OP(0x70+sh4_x86.tstate); OP(rel8); \
    88     MARK_JMP(rel8,label)
    89 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    90 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    91 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    92     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    93     MARK_JMP(rel8, label)
    95 static struct sh4_x86_state sh4_x86;
    97 static uint32_t max_int = 0x7FFFFFFF;
    98 static uint32_t min_int = 0x80000000;
    99 static uint32_t save_fcw; /* save value for fpu control word */
   100 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   102 void sh4_x86_init()
   103 {
   104     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   105     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   106 }
   109 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   110 {
   111     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   112 	sh4_x86.backpatch_size <<= 1;
   113 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   114 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   115 	assert( sh4_x86.backpatch_list != NULL );
   116     }
   117     if( sh4_x86.in_delay_slot ) {
   118 	fixup_pc -= 2;
   119     }
   120     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   121     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   122     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   123     sh4_x86.backpatch_posn++;
   124 }
   126 void sh4_x86_add_recovery( uint32_t pc )
   127 {
   128     xlat_recovery[xlat_recovery_posn].xlat_pc = (uintptr_t)xlat_output;
   129     xlat_recovery[xlat_recovery_posn].sh4_icount = (pc - sh4_x86.block_start_pc)>>1;
   130     xlat_recovery_posn++;
   131 }
   133 /**
   134  * Emit an instruction to load an SH4 reg into a real register
   135  */
   136 static inline void load_reg( int x86reg, int sh4reg ) 
   137 {
   138     /* mov [bp+n], reg */
   139     OP(0x8B);
   140     OP(0x45 + (x86reg<<3));
   141     OP(REG_OFFSET(r[sh4reg]));
   142 }
   144 static inline void load_reg16s( int x86reg, int sh4reg )
   145 {
   146     OP(0x0F);
   147     OP(0xBF);
   148     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   149 }
   151 static inline void load_reg16u( int x86reg, int sh4reg )
   152 {
   153     OP(0x0F);
   154     OP(0xB7);
   155     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   157 }
   159 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   160 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   161 /**
   162  * Emit an instruction to load an immediate value into a register
   163  */
   164 static inline void load_imm32( int x86reg, uint32_t value ) {
   165     /* mov #value, reg */
   166     OP(0xB8 + x86reg);
   167     OP32(value);
   168 }
   170 /**
   171  * Load an immediate 64-bit quantity (note: x86-64 only)
   172  */
   173 static inline void load_imm64( int x86reg, uint32_t value ) {
   174     /* mov #value, reg */
   175     REXW();
   176     OP(0xB8 + x86reg);
   177     OP64(value);
   178 }
   181 /**
   182  * Emit an instruction to store an SH4 reg (RN)
   183  */
   184 void static inline store_reg( int x86reg, int sh4reg ) {
   185     /* mov reg, [bp+n] */
   186     OP(0x89);
   187     OP(0x45 + (x86reg<<3));
   188     OP(REG_OFFSET(r[sh4reg]));
   189 }
   191 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   193 /**
   194  * Load an FR register (single-precision floating point) into an integer x86
   195  * register (eg for register-to-register moves)
   196  */
   197 void static inline load_fr( int bankreg, int x86reg, int frm )
   198 {
   199     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   200 }
   202 /**
   203  * Store an FR register (single-precision floating point) into an integer x86
   204  * register (eg for register-to-register moves)
   205  */
   206 void static inline store_fr( int bankreg, int x86reg, int frn )
   207 {
   208     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   209 }
   212 /**
   213  * Load a pointer to the back fp back into the specified x86 register. The
   214  * bankreg must have been previously loaded with FPSCR.
   215  * NB: 12 bytes
   216  */
   217 static inline void load_xf_bank( int bankreg )
   218 {
   219     NOT_r32( bankreg );
   220     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   221     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   222     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   223 }
   225 /**
   226  * Update the fr_bank pointer based on the current fpscr value.
   227  */
   228 static inline void update_fr_bank( int fpscrreg )
   229 {
   230     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   231     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   232     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   233     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   234 }
   235 /**
   236  * Push FPUL (as a 32-bit float) onto the FPU stack
   237  */
   238 static inline void push_fpul( )
   239 {
   240     OP(0xD9); OP(0x45); OP(R_FPUL);
   241 }
   243 /**
   244  * Pop FPUL (as a 32-bit float) from the FPU stack
   245  */
   246 static inline void pop_fpul( )
   247 {
   248     OP(0xD9); OP(0x5D); OP(R_FPUL);
   249 }
   251 /**
   252  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   253  * with the location of the current fp bank.
   254  */
   255 static inline void push_fr( int bankreg, int frm ) 
   256 {
   257     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   258 }
   260 /**
   261  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   262  * with bankreg previously loaded with the location of the current fp bank.
   263  */
   264 static inline void pop_fr( int bankreg, int frm )
   265 {
   266     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   267 }
   269 /**
   270  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   271  * with the location of the current fp bank.
   272  */
   273 static inline void push_dr( int bankreg, int frm )
   274 {
   275     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   276 }
   278 static inline void pop_dr( int bankreg, int frm )
   279 {
   280     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   281 }
   283 /* Exception checks - Note that all exception checks will clobber EAX */
   285 #define check_priv( ) \
   286     if( !sh4_x86.priv_checked ) { \
   287 	sh4_x86.priv_checked = TRUE;\
   288 	load_spreg( R_EAX, R_SR );\
   289 	AND_imm32_r32( SR_MD, R_EAX );\
   290 	if( sh4_x86.in_delay_slot ) {\
   291 	    JE_exc( EXC_SLOT_ILLEGAL );\
   292 	} else {\
   293 	    JE_exc( EXC_ILLEGAL );\
   294 	}\
   295     }\
   297 #define check_fpuen( ) \
   298     if( !sh4_x86.fpuen_checked ) {\
   299 	sh4_x86.fpuen_checked = TRUE;\
   300 	load_spreg( R_EAX, R_SR );\
   301 	AND_imm32_r32( SR_FD, R_EAX );\
   302 	if( sh4_x86.in_delay_slot ) {\
   303 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   304 	} else {\
   305 	    JNE_exc(EXC_FPU_DISABLED);\
   306 	}\
   307     }
   309 #define check_ralign16( x86reg ) \
   310     TEST_imm32_r32( 0x00000001, x86reg ); \
   311     JNE_exc(EXC_DATA_ADDR_READ)
   313 #define check_walign16( x86reg ) \
   314     TEST_imm32_r32( 0x00000001, x86reg ); \
   315     JNE_exc(EXC_DATA_ADDR_WRITE);
   317 #define check_ralign32( x86reg ) \
   318     TEST_imm32_r32( 0x00000003, x86reg ); \
   319     JNE_exc(EXC_DATA_ADDR_READ)
   321 #define check_walign32( x86reg ) \
   322     TEST_imm32_r32( 0x00000003, x86reg ); \
   323     JNE_exc(EXC_DATA_ADDR_WRITE);
   325 #define UNDEF()
   326 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   327 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   328 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   329 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   330 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   331 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   332 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   334 /**
   335  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   336  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   337  */
   338 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   339 /**
   340  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   341  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   342  */
   343 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   345 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   346 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   347 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   349 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = DELAY_NONE; return 1;
   351 /****** Import appropriate calling conventions ******/
   352 #if SH4_TRANSLATOR == TARGET_X86_64
   353 #include "sh4/ia64abi.h"
   354 #else /* SH4_TRANSLATOR == TARGET_X86 */
   355 #ifdef APPLE_BUILD
   356 #include "sh4/ia32mac.h"
   357 #else
   358 #include "sh4/ia32abi.h"
   359 #endif
   360 #endif
   362 /**
   363  * Embed a breakpoint into the generated code
   364  */
   365 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   366 {
   367     load_imm32( R_EAX, pc );
   368     call_func1( sh4_translate_breakpoint_hit, R_EAX );
   369 }
   371 /**
   372  * Embed a call to sh4_execute_instruction for situations that we
   373  * can't translate (mainly page-crossing delay slots at the moment).
   374  * Caller is responsible for setting new_pc.
   375  */
   376 void sh4_emulator_exit( sh4vma_t endpc )
   377 {
   378     load_imm32( R_ECX, endpc - sh4_x86.block_start_pc );   // 5
   379     ADD_r32_sh4r( R_ECX, R_PC );
   381     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   382     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   383     load_imm32( R_ECX, sh4_x86.in_delay_slot ? 1 : 0 );
   384     store_spreg( R_ECX, REG_OFFSET(in_delay_slot) );
   386     call_func0( sh4_execute_instruction );    
   387     load_imm32( R_EAX, R_PC );
   388     if( sh4_x86.tlb_on ) {
   389 	call_func1(xlat_get_code_by_vma,R_EAX);
   390     } else {
   391 	call_func1(xlat_get_code,R_EAX);
   392     }
   393     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   394     POP_r32(R_EBP);
   395     RET();
   396 } 
   398 /**
   399  * Translate a single instruction. Delayed branches are handled specially
   400  * by translating both branch and delayed instruction as a single unit (as
   401  * 
   402  * The instruction MUST be in the icache (assert check)
   403  *
   404  * @return true if the instruction marks the end of a basic block
   405  * (eg a branch or 
   406  */
   407 uint32_t sh4_translate_instruction( sh4vma_t pc )
   408 {
   409     uint32_t ir;
   410     /* Read instruction from icache */
   411     assert( IS_IN_ICACHE(pc) );
   412     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   414 	/* PC is not in the current icache - this usually means we're running
   415 	 * with MMU on, and we've gone past the end of the page. And since 
   416 	 * sh4_translate_block is pretty careful about this, it means we're
   417 	 * almost certainly in a delay slot.
   418 	 *
   419 	 * Since we can't assume the page is present (and we can't fault it in
   420 	 * at this point, inline a call to sh4_execute_instruction (with a few
   421 	 * small repairs to cope with the different environment).
   422 	 */
   424     if( !sh4_x86.in_delay_slot ) {
   425 	sh4_x86_add_recovery(pc);
   426     }
   427 %%
   428 /* ALU operations */
   429 ADD Rm, Rn {:
   430     load_reg( R_EAX, Rm );
   431     load_reg( R_ECX, Rn );
   432     ADD_r32_r32( R_EAX, R_ECX );
   433     store_reg( R_ECX, Rn );
   434     sh4_x86.tstate = TSTATE_NONE;
   435 :}
   436 ADD #imm, Rn {:  
   437     load_reg( R_EAX, Rn );
   438     ADD_imm8s_r32( imm, R_EAX );
   439     store_reg( R_EAX, Rn );
   440     sh4_x86.tstate = TSTATE_NONE;
   441 :}
   442 ADDC Rm, Rn {:
   443     if( sh4_x86.tstate != TSTATE_C ) {
   444 	LDC_t();
   445     }
   446     load_reg( R_EAX, Rm );
   447     load_reg( R_ECX, Rn );
   448     ADC_r32_r32( R_EAX, R_ECX );
   449     store_reg( R_ECX, Rn );
   450     SETC_t();
   451     sh4_x86.tstate = TSTATE_C;
   452 :}
   453 ADDV Rm, Rn {:
   454     load_reg( R_EAX, Rm );
   455     load_reg( R_ECX, Rn );
   456     ADD_r32_r32( R_EAX, R_ECX );
   457     store_reg( R_ECX, Rn );
   458     SETO_t();
   459     sh4_x86.tstate = TSTATE_O;
   460 :}
   461 AND Rm, Rn {:
   462     load_reg( R_EAX, Rm );
   463     load_reg( R_ECX, Rn );
   464     AND_r32_r32( R_EAX, R_ECX );
   465     store_reg( R_ECX, Rn );
   466     sh4_x86.tstate = TSTATE_NONE;
   467 :}
   468 AND #imm, R0 {:  
   469     load_reg( R_EAX, 0 );
   470     AND_imm32_r32(imm, R_EAX); 
   471     store_reg( R_EAX, 0 );
   472     sh4_x86.tstate = TSTATE_NONE;
   473 :}
   474 AND.B #imm, @(R0, GBR) {: 
   475     load_reg( R_EAX, 0 );
   476     load_spreg( R_ECX, R_GBR );
   477     ADD_r32_r32( R_ECX, R_EAX );
   478     MMU_TRANSLATE_WRITE( R_EAX );
   479     PUSH_realigned_r32(R_EAX);
   480     MEM_READ_BYTE( R_EAX, R_EAX );
   481     POP_realigned_r32(R_ECX);
   482     AND_imm32_r32(imm, R_EAX );
   483     MEM_WRITE_BYTE( R_ECX, R_EAX );
   484     sh4_x86.tstate = TSTATE_NONE;
   485 :}
   486 CMP/EQ Rm, Rn {:  
   487     load_reg( R_EAX, Rm );
   488     load_reg( R_ECX, Rn );
   489     CMP_r32_r32( R_EAX, R_ECX );
   490     SETE_t();
   491     sh4_x86.tstate = TSTATE_E;
   492 :}
   493 CMP/EQ #imm, R0 {:  
   494     load_reg( R_EAX, 0 );
   495     CMP_imm8s_r32(imm, R_EAX);
   496     SETE_t();
   497     sh4_x86.tstate = TSTATE_E;
   498 :}
   499 CMP/GE Rm, Rn {:  
   500     load_reg( R_EAX, Rm );
   501     load_reg( R_ECX, Rn );
   502     CMP_r32_r32( R_EAX, R_ECX );
   503     SETGE_t();
   504     sh4_x86.tstate = TSTATE_GE;
   505 :}
   506 CMP/GT Rm, Rn {: 
   507     load_reg( R_EAX, Rm );
   508     load_reg( R_ECX, Rn );
   509     CMP_r32_r32( R_EAX, R_ECX );
   510     SETG_t();
   511     sh4_x86.tstate = TSTATE_G;
   512 :}
   513 CMP/HI Rm, Rn {:  
   514     load_reg( R_EAX, Rm );
   515     load_reg( R_ECX, Rn );
   516     CMP_r32_r32( R_EAX, R_ECX );
   517     SETA_t();
   518     sh4_x86.tstate = TSTATE_A;
   519 :}
   520 CMP/HS Rm, Rn {: 
   521     load_reg( R_EAX, Rm );
   522     load_reg( R_ECX, Rn );
   523     CMP_r32_r32( R_EAX, R_ECX );
   524     SETAE_t();
   525     sh4_x86.tstate = TSTATE_AE;
   526  :}
   527 CMP/PL Rn {: 
   528     load_reg( R_EAX, Rn );
   529     CMP_imm8s_r32( 0, R_EAX );
   530     SETG_t();
   531     sh4_x86.tstate = TSTATE_G;
   532 :}
   533 CMP/PZ Rn {:  
   534     load_reg( R_EAX, Rn );
   535     CMP_imm8s_r32( 0, R_EAX );
   536     SETGE_t();
   537     sh4_x86.tstate = TSTATE_GE;
   538 :}
   539 CMP/STR Rm, Rn {:  
   540     load_reg( R_EAX, Rm );
   541     load_reg( R_ECX, Rn );
   542     XOR_r32_r32( R_ECX, R_EAX );
   543     TEST_r8_r8( R_AL, R_AL );
   544     JE_rel8(13, target1);
   545     TEST_r8_r8( R_AH, R_AH ); // 2
   546     JE_rel8(9, target2);
   547     SHR_imm8_r32( 16, R_EAX ); // 3
   548     TEST_r8_r8( R_AL, R_AL ); // 2
   549     JE_rel8(2, target3);
   550     TEST_r8_r8( R_AH, R_AH ); // 2
   551     JMP_TARGET(target1);
   552     JMP_TARGET(target2);
   553     JMP_TARGET(target3);
   554     SETE_t();
   555     sh4_x86.tstate = TSTATE_E;
   556 :}
   557 DIV0S Rm, Rn {:
   558     load_reg( R_EAX, Rm );
   559     load_reg( R_ECX, Rn );
   560     SHR_imm8_r32( 31, R_EAX );
   561     SHR_imm8_r32( 31, R_ECX );
   562     store_spreg( R_EAX, R_M );
   563     store_spreg( R_ECX, R_Q );
   564     CMP_r32_r32( R_EAX, R_ECX );
   565     SETNE_t();
   566     sh4_x86.tstate = TSTATE_NE;
   567 :}
   568 DIV0U {:  
   569     XOR_r32_r32( R_EAX, R_EAX );
   570     store_spreg( R_EAX, R_Q );
   571     store_spreg( R_EAX, R_M );
   572     store_spreg( R_EAX, R_T );
   573     sh4_x86.tstate = TSTATE_C; // works for DIV1
   574 :}
   575 DIV1 Rm, Rn {:
   576     load_spreg( R_ECX, R_M );
   577     load_reg( R_EAX, Rn );
   578     if( sh4_x86.tstate != TSTATE_C ) {
   579 	LDC_t();
   580     }
   581     RCL1_r32( R_EAX );
   582     SETC_r8( R_DL ); // Q'
   583     CMP_sh4r_r32( R_Q, R_ECX );
   584     JE_rel8(5, mqequal);
   585     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   586     JMP_rel8(3, end);
   587     JMP_TARGET(mqequal);
   588     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   589     JMP_TARGET(end);
   590     store_reg( R_EAX, Rn ); // Done with Rn now
   591     SETC_r8(R_AL); // tmp1
   592     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   593     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   594     store_spreg( R_ECX, R_Q );
   595     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   596     MOVZX_r8_r32( R_AL, R_EAX );
   597     store_spreg( R_EAX, R_T );
   598     sh4_x86.tstate = TSTATE_NONE;
   599 :}
   600 DMULS.L Rm, Rn {:  
   601     load_reg( R_EAX, Rm );
   602     load_reg( R_ECX, Rn );
   603     IMUL_r32(R_ECX);
   604     store_spreg( R_EDX, R_MACH );
   605     store_spreg( R_EAX, R_MACL );
   606     sh4_x86.tstate = TSTATE_NONE;
   607 :}
   608 DMULU.L Rm, Rn {:  
   609     load_reg( R_EAX, Rm );
   610     load_reg( R_ECX, Rn );
   611     MUL_r32(R_ECX);
   612     store_spreg( R_EDX, R_MACH );
   613     store_spreg( R_EAX, R_MACL );    
   614     sh4_x86.tstate = TSTATE_NONE;
   615 :}
   616 DT Rn {:  
   617     load_reg( R_EAX, Rn );
   618     ADD_imm8s_r32( -1, R_EAX );
   619     store_reg( R_EAX, Rn );
   620     SETE_t();
   621     sh4_x86.tstate = TSTATE_E;
   622 :}
   623 EXTS.B Rm, Rn {:  
   624     load_reg( R_EAX, Rm );
   625     MOVSX_r8_r32( R_EAX, R_EAX );
   626     store_reg( R_EAX, Rn );
   627 :}
   628 EXTS.W Rm, Rn {:  
   629     load_reg( R_EAX, Rm );
   630     MOVSX_r16_r32( R_EAX, R_EAX );
   631     store_reg( R_EAX, Rn );
   632 :}
   633 EXTU.B Rm, Rn {:  
   634     load_reg( R_EAX, Rm );
   635     MOVZX_r8_r32( R_EAX, R_EAX );
   636     store_reg( R_EAX, Rn );
   637 :}
   638 EXTU.W Rm, Rn {:  
   639     load_reg( R_EAX, Rm );
   640     MOVZX_r16_r32( R_EAX, R_EAX );
   641     store_reg( R_EAX, Rn );
   642 :}
   643 MAC.L @Rm+, @Rn+ {:
   644     if( Rm == Rn ) {
   645 	load_reg( R_EAX, Rm );
   646 	check_ralign32( R_EAX );
   647 	MMU_TRANSLATE_READ( R_EAX );
   648 	PUSH_realigned_r32( R_EAX );
   649 	load_reg( R_EAX, Rn );
   650 	ADD_imm8s_r32( 4, R_EAX );
   651 	MMU_TRANSLATE_READ( R_EAX );
   652 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   653 	// Note translate twice in case of page boundaries. Maybe worth
   654 	// adding a page-boundary check to skip the second translation
   655     } else {
   656 	load_reg( R_EAX, Rm );
   657 	check_ralign32( R_EAX );
   658 	MMU_TRANSLATE_READ( R_EAX );
   659 	PUSH_realigned_r32( R_EAX );
   660 	load_reg( R_EAX, Rn );
   661 	check_ralign32( R_EAX );
   662 	MMU_TRANSLATE_READ( R_EAX );
   663 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   664 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   665     }
   666     MEM_READ_LONG( R_EAX, R_EAX );
   667     POP_r32( R_ECX );
   668     PUSH_r32( R_EAX );
   669     MEM_READ_LONG( R_ECX, R_EAX );
   670     POP_realigned_r32( R_ECX );
   672     IMUL_r32( R_ECX );
   673     ADD_r32_sh4r( R_EAX, R_MACL );
   674     ADC_r32_sh4r( R_EDX, R_MACH );
   676     load_spreg( R_ECX, R_S );
   677     TEST_r32_r32(R_ECX, R_ECX);
   678     JE_rel8( CALL_FUNC0_SIZE, nosat );
   679     call_func0( signsat48 );
   680     JMP_TARGET( nosat );
   681     sh4_x86.tstate = TSTATE_NONE;
   682 :}
   683 MAC.W @Rm+, @Rn+ {:  
   684     if( Rm == Rn ) {
   685 	load_reg( R_EAX, Rm );
   686 	check_ralign16( R_EAX );
   687 	MMU_TRANSLATE_READ( R_EAX );
   688 	PUSH_realigned_r32( R_EAX );
   689 	load_reg( R_EAX, Rn );
   690 	ADD_imm8s_r32( 2, R_EAX );
   691 	MMU_TRANSLATE_READ( R_EAX );
   692 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   693 	// Note translate twice in case of page boundaries. Maybe worth
   694 	// adding a page-boundary check to skip the second translation
   695     } else {
   696 	load_reg( R_EAX, Rm );
   697 	check_ralign16( R_EAX );
   698 	MMU_TRANSLATE_READ( R_EAX );
   699 	PUSH_realigned_r32( R_EAX );
   700 	load_reg( R_EAX, Rn );
   701 	check_ralign16( R_EAX );
   702 	MMU_TRANSLATE_READ( R_EAX );
   703 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   704 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   705     }
   706     MEM_READ_WORD( R_EAX, R_EAX );
   707     POP_r32( R_ECX );
   708     PUSH_r32( R_EAX );
   709     MEM_READ_WORD( R_ECX, R_EAX );
   710     POP_realigned_r32( R_ECX );
   711     IMUL_r32( R_ECX );
   713     load_spreg( R_ECX, R_S );
   714     TEST_r32_r32( R_ECX, R_ECX );
   715     JE_rel8( 47, nosat );
   717     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   718     JNO_rel8( 51, end );            // 2
   719     load_imm32( R_EDX, 1 );         // 5
   720     store_spreg( R_EDX, R_MACH );   // 6
   721     JS_rel8( 13, positive );        // 2
   722     load_imm32( R_EAX, 0x80000000 );// 5
   723     store_spreg( R_EAX, R_MACL );   // 6
   724     JMP_rel8( 25, end2 );           // 2
   726     JMP_TARGET(positive);
   727     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   728     store_spreg( R_EAX, R_MACL );   // 6
   729     JMP_rel8( 12, end3);            // 2
   731     JMP_TARGET(nosat);
   732     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   733     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   734     JMP_TARGET(end);
   735     JMP_TARGET(end2);
   736     JMP_TARGET(end3);
   737     sh4_x86.tstate = TSTATE_NONE;
   738 :}
   739 MOVT Rn {:  
   740     load_spreg( R_EAX, R_T );
   741     store_reg( R_EAX, Rn );
   742 :}
   743 MUL.L Rm, Rn {:  
   744     load_reg( R_EAX, Rm );
   745     load_reg( R_ECX, Rn );
   746     MUL_r32( R_ECX );
   747     store_spreg( R_EAX, R_MACL );
   748     sh4_x86.tstate = TSTATE_NONE;
   749 :}
   750 MULS.W Rm, Rn {:
   751     load_reg16s( R_EAX, Rm );
   752     load_reg16s( R_ECX, Rn );
   753     MUL_r32( R_ECX );
   754     store_spreg( R_EAX, R_MACL );
   755     sh4_x86.tstate = TSTATE_NONE;
   756 :}
   757 MULU.W Rm, Rn {:  
   758     load_reg16u( R_EAX, Rm );
   759     load_reg16u( R_ECX, Rn );
   760     MUL_r32( R_ECX );
   761     store_spreg( R_EAX, R_MACL );
   762     sh4_x86.tstate = TSTATE_NONE;
   763 :}
   764 NEG Rm, Rn {:
   765     load_reg( R_EAX, Rm );
   766     NEG_r32( R_EAX );
   767     store_reg( R_EAX, Rn );
   768     sh4_x86.tstate = TSTATE_NONE;
   769 :}
   770 NEGC Rm, Rn {:  
   771     load_reg( R_EAX, Rm );
   772     XOR_r32_r32( R_ECX, R_ECX );
   773     LDC_t();
   774     SBB_r32_r32( R_EAX, R_ECX );
   775     store_reg( R_ECX, Rn );
   776     SETC_t();
   777     sh4_x86.tstate = TSTATE_C;
   778 :}
   779 NOT Rm, Rn {:  
   780     load_reg( R_EAX, Rm );
   781     NOT_r32( R_EAX );
   782     store_reg( R_EAX, Rn );
   783     sh4_x86.tstate = TSTATE_NONE;
   784 :}
   785 OR Rm, Rn {:  
   786     load_reg( R_EAX, Rm );
   787     load_reg( R_ECX, Rn );
   788     OR_r32_r32( R_EAX, R_ECX );
   789     store_reg( R_ECX, Rn );
   790     sh4_x86.tstate = TSTATE_NONE;
   791 :}
   792 OR #imm, R0 {:
   793     load_reg( R_EAX, 0 );
   794     OR_imm32_r32(imm, R_EAX);
   795     store_reg( R_EAX, 0 );
   796     sh4_x86.tstate = TSTATE_NONE;
   797 :}
   798 OR.B #imm, @(R0, GBR) {:  
   799     load_reg( R_EAX, 0 );
   800     load_spreg( R_ECX, R_GBR );
   801     ADD_r32_r32( R_ECX, R_EAX );
   802     MMU_TRANSLATE_WRITE( R_EAX );
   803     PUSH_realigned_r32(R_EAX);
   804     MEM_READ_BYTE( R_EAX, R_EAX );
   805     POP_realigned_r32(R_ECX);
   806     OR_imm32_r32(imm, R_EAX );
   807     MEM_WRITE_BYTE( R_ECX, R_EAX );
   808     sh4_x86.tstate = TSTATE_NONE;
   809 :}
   810 ROTCL Rn {:
   811     load_reg( R_EAX, Rn );
   812     if( sh4_x86.tstate != TSTATE_C ) {
   813 	LDC_t();
   814     }
   815     RCL1_r32( R_EAX );
   816     store_reg( R_EAX, Rn );
   817     SETC_t();
   818     sh4_x86.tstate = TSTATE_C;
   819 :}
   820 ROTCR Rn {:  
   821     load_reg( R_EAX, Rn );
   822     if( sh4_x86.tstate != TSTATE_C ) {
   823 	LDC_t();
   824     }
   825     RCR1_r32( R_EAX );
   826     store_reg( R_EAX, Rn );
   827     SETC_t();
   828     sh4_x86.tstate = TSTATE_C;
   829 :}
   830 ROTL Rn {:  
   831     load_reg( R_EAX, Rn );
   832     ROL1_r32( R_EAX );
   833     store_reg( R_EAX, Rn );
   834     SETC_t();
   835     sh4_x86.tstate = TSTATE_C;
   836 :}
   837 ROTR Rn {:  
   838     load_reg( R_EAX, Rn );
   839     ROR1_r32( R_EAX );
   840     store_reg( R_EAX, Rn );
   841     SETC_t();
   842     sh4_x86.tstate = TSTATE_C;
   843 :}
   844 SHAD Rm, Rn {:
   845     /* Annoyingly enough, not directly convertible */
   846     load_reg( R_EAX, Rn );
   847     load_reg( R_ECX, Rm );
   848     CMP_imm32_r32( 0, R_ECX );
   849     JGE_rel8(16, doshl);
   851     NEG_r32( R_ECX );      // 2
   852     AND_imm8_r8( 0x1F, R_CL ); // 3
   853     JE_rel8( 4, emptysar);     // 2
   854     SAR_r32_CL( R_EAX );       // 2
   855     JMP_rel8(10, end);          // 2
   857     JMP_TARGET(emptysar);
   858     SAR_imm8_r32(31, R_EAX );  // 3
   859     JMP_rel8(5, end2);
   861     JMP_TARGET(doshl);
   862     AND_imm8_r8( 0x1F, R_CL ); // 3
   863     SHL_r32_CL( R_EAX );       // 2
   864     JMP_TARGET(end);
   865     JMP_TARGET(end2);
   866     store_reg( R_EAX, Rn );
   867     sh4_x86.tstate = TSTATE_NONE;
   868 :}
   869 SHLD Rm, Rn {:  
   870     load_reg( R_EAX, Rn );
   871     load_reg( R_ECX, Rm );
   872     CMP_imm32_r32( 0, R_ECX );
   873     JGE_rel8(15, doshl);
   875     NEG_r32( R_ECX );      // 2
   876     AND_imm8_r8( 0x1F, R_CL ); // 3
   877     JE_rel8( 4, emptyshr );
   878     SHR_r32_CL( R_EAX );       // 2
   879     JMP_rel8(9, end);          // 2
   881     JMP_TARGET(emptyshr);
   882     XOR_r32_r32( R_EAX, R_EAX );
   883     JMP_rel8(5, end2);
   885     JMP_TARGET(doshl);
   886     AND_imm8_r8( 0x1F, R_CL ); // 3
   887     SHL_r32_CL( R_EAX );       // 2
   888     JMP_TARGET(end);
   889     JMP_TARGET(end2);
   890     store_reg( R_EAX, Rn );
   891     sh4_x86.tstate = TSTATE_NONE;
   892 :}
   893 SHAL Rn {: 
   894     load_reg( R_EAX, Rn );
   895     SHL1_r32( R_EAX );
   896     SETC_t();
   897     store_reg( R_EAX, Rn );
   898     sh4_x86.tstate = TSTATE_C;
   899 :}
   900 SHAR Rn {:  
   901     load_reg( R_EAX, Rn );
   902     SAR1_r32( R_EAX );
   903     SETC_t();
   904     store_reg( R_EAX, Rn );
   905     sh4_x86.tstate = TSTATE_C;
   906 :}
   907 SHLL Rn {:  
   908     load_reg( R_EAX, Rn );
   909     SHL1_r32( R_EAX );
   910     SETC_t();
   911     store_reg( R_EAX, Rn );
   912     sh4_x86.tstate = TSTATE_C;
   913 :}
   914 SHLL2 Rn {:
   915     load_reg( R_EAX, Rn );
   916     SHL_imm8_r32( 2, R_EAX );
   917     store_reg( R_EAX, Rn );
   918     sh4_x86.tstate = TSTATE_NONE;
   919 :}
   920 SHLL8 Rn {:  
   921     load_reg( R_EAX, Rn );
   922     SHL_imm8_r32( 8, R_EAX );
   923     store_reg( R_EAX, Rn );
   924     sh4_x86.tstate = TSTATE_NONE;
   925 :}
   926 SHLL16 Rn {:  
   927     load_reg( R_EAX, Rn );
   928     SHL_imm8_r32( 16, R_EAX );
   929     store_reg( R_EAX, Rn );
   930     sh4_x86.tstate = TSTATE_NONE;
   931 :}
   932 SHLR Rn {:  
   933     load_reg( R_EAX, Rn );
   934     SHR1_r32( R_EAX );
   935     SETC_t();
   936     store_reg( R_EAX, Rn );
   937     sh4_x86.tstate = TSTATE_C;
   938 :}
   939 SHLR2 Rn {:  
   940     load_reg( R_EAX, Rn );
   941     SHR_imm8_r32( 2, R_EAX );
   942     store_reg( R_EAX, Rn );
   943     sh4_x86.tstate = TSTATE_NONE;
   944 :}
   945 SHLR8 Rn {:  
   946     load_reg( R_EAX, Rn );
   947     SHR_imm8_r32( 8, R_EAX );
   948     store_reg( R_EAX, Rn );
   949     sh4_x86.tstate = TSTATE_NONE;
   950 :}
   951 SHLR16 Rn {:  
   952     load_reg( R_EAX, Rn );
   953     SHR_imm8_r32( 16, R_EAX );
   954     store_reg( R_EAX, Rn );
   955     sh4_x86.tstate = TSTATE_NONE;
   956 :}
   957 SUB Rm, Rn {:  
   958     load_reg( R_EAX, Rm );
   959     load_reg( R_ECX, Rn );
   960     SUB_r32_r32( R_EAX, R_ECX );
   961     store_reg( R_ECX, Rn );
   962     sh4_x86.tstate = TSTATE_NONE;
   963 :}
   964 SUBC Rm, Rn {:  
   965     load_reg( R_EAX, Rm );
   966     load_reg( R_ECX, Rn );
   967     if( sh4_x86.tstate != TSTATE_C ) {
   968 	LDC_t();
   969     }
   970     SBB_r32_r32( R_EAX, R_ECX );
   971     store_reg( R_ECX, Rn );
   972     SETC_t();
   973     sh4_x86.tstate = TSTATE_C;
   974 :}
   975 SUBV Rm, Rn {:  
   976     load_reg( R_EAX, Rm );
   977     load_reg( R_ECX, Rn );
   978     SUB_r32_r32( R_EAX, R_ECX );
   979     store_reg( R_ECX, Rn );
   980     SETO_t();
   981     sh4_x86.tstate = TSTATE_O;
   982 :}
   983 SWAP.B Rm, Rn {:  
   984     load_reg( R_EAX, Rm );
   985     XCHG_r8_r8( R_AL, R_AH );
   986     store_reg( R_EAX, Rn );
   987 :}
   988 SWAP.W Rm, Rn {:  
   989     load_reg( R_EAX, Rm );
   990     MOV_r32_r32( R_EAX, R_ECX );
   991     SHL_imm8_r32( 16, R_ECX );
   992     SHR_imm8_r32( 16, R_EAX );
   993     OR_r32_r32( R_EAX, R_ECX );
   994     store_reg( R_ECX, Rn );
   995     sh4_x86.tstate = TSTATE_NONE;
   996 :}
   997 TAS.B @Rn {:  
   998     load_reg( R_EAX, Rn );
   999     MMU_TRANSLATE_WRITE( R_EAX );
  1000     PUSH_realigned_r32( R_EAX );
  1001     MEM_READ_BYTE( R_EAX, R_EAX );
  1002     TEST_r8_r8( R_AL, R_AL );
  1003     SETE_t();
  1004     OR_imm8_r8( 0x80, R_AL );
  1005     POP_realigned_r32( R_ECX );
  1006     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1007     sh4_x86.tstate = TSTATE_NONE;
  1008 :}
  1009 TST Rm, Rn {:  
  1010     load_reg( R_EAX, Rm );
  1011     load_reg( R_ECX, Rn );
  1012     TEST_r32_r32( R_EAX, R_ECX );
  1013     SETE_t();
  1014     sh4_x86.tstate = TSTATE_E;
  1015 :}
  1016 TST #imm, R0 {:  
  1017     load_reg( R_EAX, 0 );
  1018     TEST_imm32_r32( imm, R_EAX );
  1019     SETE_t();
  1020     sh4_x86.tstate = TSTATE_E;
  1021 :}
  1022 TST.B #imm, @(R0, GBR) {:  
  1023     load_reg( R_EAX, 0);
  1024     load_reg( R_ECX, R_GBR);
  1025     ADD_r32_r32( R_ECX, R_EAX );
  1026     MMU_TRANSLATE_READ( R_EAX );
  1027     MEM_READ_BYTE( R_EAX, R_EAX );
  1028     TEST_imm8_r8( imm, R_AL );
  1029     SETE_t();
  1030     sh4_x86.tstate = TSTATE_E;
  1031 :}
  1032 XOR Rm, Rn {:  
  1033     load_reg( R_EAX, Rm );
  1034     load_reg( R_ECX, Rn );
  1035     XOR_r32_r32( R_EAX, R_ECX );
  1036     store_reg( R_ECX, Rn );
  1037     sh4_x86.tstate = TSTATE_NONE;
  1038 :}
  1039 XOR #imm, R0 {:  
  1040     load_reg( R_EAX, 0 );
  1041     XOR_imm32_r32( imm, R_EAX );
  1042     store_reg( R_EAX, 0 );
  1043     sh4_x86.tstate = TSTATE_NONE;
  1044 :}
  1045 XOR.B #imm, @(R0, GBR) {:  
  1046     load_reg( R_EAX, 0 );
  1047     load_spreg( R_ECX, R_GBR );
  1048     ADD_r32_r32( R_ECX, R_EAX );
  1049     MMU_TRANSLATE_WRITE( R_EAX );
  1050     PUSH_realigned_r32(R_EAX);
  1051     MEM_READ_BYTE(R_EAX, R_EAX);
  1052     POP_realigned_r32(R_ECX);
  1053     XOR_imm32_r32( imm, R_EAX );
  1054     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1055     sh4_x86.tstate = TSTATE_NONE;
  1056 :}
  1057 XTRCT Rm, Rn {:
  1058     load_reg( R_EAX, Rm );
  1059     load_reg( R_ECX, Rn );
  1060     SHL_imm8_r32( 16, R_EAX );
  1061     SHR_imm8_r32( 16, R_ECX );
  1062     OR_r32_r32( R_EAX, R_ECX );
  1063     store_reg( R_ECX, Rn );
  1064     sh4_x86.tstate = TSTATE_NONE;
  1065 :}
  1067 /* Data move instructions */
  1068 MOV Rm, Rn {:  
  1069     load_reg( R_EAX, Rm );
  1070     store_reg( R_EAX, Rn );
  1071 :}
  1072 MOV #imm, Rn {:  
  1073     load_imm32( R_EAX, imm );
  1074     store_reg( R_EAX, Rn );
  1075 :}
  1076 MOV.B Rm, @Rn {:  
  1077     load_reg( R_EAX, Rn );
  1078     MMU_TRANSLATE_WRITE( R_EAX );
  1079     load_reg( R_EDX, Rm );
  1080     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1081     sh4_x86.tstate = TSTATE_NONE;
  1082 :}
  1083 MOV.B Rm, @-Rn {:  
  1084     load_reg( R_EAX, Rn );
  1085     ADD_imm8s_r32( -1, R_EAX );
  1086     MMU_TRANSLATE_WRITE( R_EAX );
  1087     load_reg( R_EDX, Rm );
  1088     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1089     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1090     sh4_x86.tstate = TSTATE_NONE;
  1091 :}
  1092 MOV.B Rm, @(R0, Rn) {:  
  1093     load_reg( R_EAX, 0 );
  1094     load_reg( R_ECX, Rn );
  1095     ADD_r32_r32( R_ECX, R_EAX );
  1096     MMU_TRANSLATE_WRITE( R_EAX );
  1097     load_reg( R_EDX, Rm );
  1098     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1099     sh4_x86.tstate = TSTATE_NONE;
  1100 :}
  1101 MOV.B R0, @(disp, GBR) {:  
  1102     load_spreg( R_EAX, R_GBR );
  1103     ADD_imm32_r32( disp, R_EAX );
  1104     MMU_TRANSLATE_WRITE( R_EAX );
  1105     load_reg( R_EDX, 0 );
  1106     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1107     sh4_x86.tstate = TSTATE_NONE;
  1108 :}
  1109 MOV.B R0, @(disp, Rn) {:  
  1110     load_reg( R_EAX, Rn );
  1111     ADD_imm32_r32( disp, R_EAX );
  1112     MMU_TRANSLATE_WRITE( R_EAX );
  1113     load_reg( R_EDX, 0 );
  1114     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1115     sh4_x86.tstate = TSTATE_NONE;
  1116 :}
  1117 MOV.B @Rm, Rn {:  
  1118     load_reg( R_EAX, Rm );
  1119     MMU_TRANSLATE_READ( R_EAX );
  1120     MEM_READ_BYTE( R_EAX, R_EAX );
  1121     store_reg( R_EAX, Rn );
  1122     sh4_x86.tstate = TSTATE_NONE;
  1123 :}
  1124 MOV.B @Rm+, Rn {:  
  1125     load_reg( R_EAX, Rm );
  1126     MMU_TRANSLATE_READ( R_EAX );
  1127     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1128     MEM_READ_BYTE( R_EAX, R_EAX );
  1129     store_reg( R_EAX, Rn );
  1130     sh4_x86.tstate = TSTATE_NONE;
  1131 :}
  1132 MOV.B @(R0, Rm), Rn {:  
  1133     load_reg( R_EAX, 0 );
  1134     load_reg( R_ECX, Rm );
  1135     ADD_r32_r32( R_ECX, R_EAX );
  1136     MMU_TRANSLATE_READ( R_EAX )
  1137     MEM_READ_BYTE( R_EAX, R_EAX );
  1138     store_reg( R_EAX, Rn );
  1139     sh4_x86.tstate = TSTATE_NONE;
  1140 :}
  1141 MOV.B @(disp, GBR), R0 {:  
  1142     load_spreg( R_EAX, R_GBR );
  1143     ADD_imm32_r32( disp, R_EAX );
  1144     MMU_TRANSLATE_READ( R_EAX );
  1145     MEM_READ_BYTE( R_EAX, R_EAX );
  1146     store_reg( R_EAX, 0 );
  1147     sh4_x86.tstate = TSTATE_NONE;
  1148 :}
  1149 MOV.B @(disp, Rm), R0 {:  
  1150     load_reg( R_EAX, Rm );
  1151     ADD_imm32_r32( disp, R_EAX );
  1152     MMU_TRANSLATE_READ( R_EAX );
  1153     MEM_READ_BYTE( R_EAX, R_EAX );
  1154     store_reg( R_EAX, 0 );
  1155     sh4_x86.tstate = TSTATE_NONE;
  1156 :}
  1157 MOV.L Rm, @Rn {:
  1158     load_reg( R_EAX, Rn );
  1159     check_walign32(R_EAX);
  1160     MMU_TRANSLATE_WRITE( R_EAX );
  1161     load_reg( R_EDX, Rm );
  1162     MEM_WRITE_LONG( R_EAX, R_EDX );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.L Rm, @-Rn {:  
  1166     load_reg( R_EAX, Rn );
  1167     ADD_imm8s_r32( -4, R_EAX );
  1168     check_walign32( R_EAX );
  1169     MMU_TRANSLATE_WRITE( R_EAX );
  1170     load_reg( R_EDX, Rm );
  1171     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1172     MEM_WRITE_LONG( R_EAX, R_EDX );
  1173     sh4_x86.tstate = TSTATE_NONE;
  1174 :}
  1175 MOV.L Rm, @(R0, Rn) {:  
  1176     load_reg( R_EAX, 0 );
  1177     load_reg( R_ECX, Rn );
  1178     ADD_r32_r32( R_ECX, R_EAX );
  1179     check_walign32( R_EAX );
  1180     MMU_TRANSLATE_WRITE( R_EAX );
  1181     load_reg( R_EDX, Rm );
  1182     MEM_WRITE_LONG( R_EAX, R_EDX );
  1183     sh4_x86.tstate = TSTATE_NONE;
  1184 :}
  1185 MOV.L R0, @(disp, GBR) {:  
  1186     load_spreg( R_EAX, R_GBR );
  1187     ADD_imm32_r32( disp, R_EAX );
  1188     check_walign32( R_EAX );
  1189     MMU_TRANSLATE_WRITE( R_EAX );
  1190     load_reg( R_EDX, 0 );
  1191     MEM_WRITE_LONG( R_EAX, R_EDX );
  1192     sh4_x86.tstate = TSTATE_NONE;
  1193 :}
  1194 MOV.L Rm, @(disp, Rn) {:  
  1195     load_reg( R_EAX, Rn );
  1196     ADD_imm32_r32( disp, R_EAX );
  1197     check_walign32( R_EAX );
  1198     MMU_TRANSLATE_WRITE( R_EAX );
  1199     load_reg( R_EDX, Rm );
  1200     MEM_WRITE_LONG( R_EAX, R_EDX );
  1201     sh4_x86.tstate = TSTATE_NONE;
  1202 :}
  1203 MOV.L @Rm, Rn {:  
  1204     load_reg( R_EAX, Rm );
  1205     check_ralign32( R_EAX );
  1206     MMU_TRANSLATE_READ( R_EAX );
  1207     MEM_READ_LONG( R_EAX, R_EAX );
  1208     store_reg( R_EAX, Rn );
  1209     sh4_x86.tstate = TSTATE_NONE;
  1210 :}
  1211 MOV.L @Rm+, Rn {:  
  1212     load_reg( R_EAX, Rm );
  1213     check_ralign32( R_EAX );
  1214     MMU_TRANSLATE_READ( R_EAX );
  1215     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1216     MEM_READ_LONG( R_EAX, R_EAX );
  1217     store_reg( R_EAX, Rn );
  1218     sh4_x86.tstate = TSTATE_NONE;
  1219 :}
  1220 MOV.L @(R0, Rm), Rn {:  
  1221     load_reg( R_EAX, 0 );
  1222     load_reg( R_ECX, Rm );
  1223     ADD_r32_r32( R_ECX, R_EAX );
  1224     check_ralign32( R_EAX );
  1225     MMU_TRANSLATE_READ( R_EAX );
  1226     MEM_READ_LONG( R_EAX, R_EAX );
  1227     store_reg( R_EAX, Rn );
  1228     sh4_x86.tstate = TSTATE_NONE;
  1229 :}
  1230 MOV.L @(disp, GBR), R0 {:
  1231     load_spreg( R_EAX, R_GBR );
  1232     ADD_imm32_r32( disp, R_EAX );
  1233     check_ralign32( R_EAX );
  1234     MMU_TRANSLATE_READ( R_EAX );
  1235     MEM_READ_LONG( R_EAX, R_EAX );
  1236     store_reg( R_EAX, 0 );
  1237     sh4_x86.tstate = TSTATE_NONE;
  1238 :}
  1239 MOV.L @(disp, PC), Rn {:  
  1240     if( sh4_x86.in_delay_slot ) {
  1241 	SLOTILLEGAL();
  1242     } else {
  1243 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1244 	if( IS_IN_ICACHE(target) ) {
  1245 	    // If the target address is in the same page as the code, it's
  1246 	    // pretty safe to just ref it directly and circumvent the whole
  1247 	    // memory subsystem. (this is a big performance win)
  1249 	    // FIXME: There's a corner-case that's not handled here when
  1250 	    // the current code-page is in the ITLB but not in the UTLB.
  1251 	    // (should generate a TLB miss although need to test SH4 
  1252 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1253 	    // behaviour though.
  1254 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1255 	    MOV_moff32_EAX( ptr );
  1256 	} else {
  1257 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1258 	    // different virtual address than the translation was done with,
  1259 	    // but we can safely assume that the low bits are the same.
  1260 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1261 	    ADD_sh4r_r32( R_PC, R_EAX );
  1262 	    MMU_TRANSLATE_READ( R_EAX );
  1263 	    MEM_READ_LONG( R_EAX, R_EAX );
  1264 	    sh4_x86.tstate = TSTATE_NONE;
  1266 	store_reg( R_EAX, Rn );
  1268 :}
  1269 MOV.L @(disp, Rm), Rn {:  
  1270     load_reg( R_EAX, Rm );
  1271     ADD_imm8s_r32( disp, R_EAX );
  1272     check_ralign32( R_EAX );
  1273     MMU_TRANSLATE_READ( R_EAX );
  1274     MEM_READ_LONG( R_EAX, R_EAX );
  1275     store_reg( R_EAX, Rn );
  1276     sh4_x86.tstate = TSTATE_NONE;
  1277 :}
  1278 MOV.W Rm, @Rn {:  
  1279     load_reg( R_EAX, Rn );
  1280     check_walign16( R_EAX );
  1281     MMU_TRANSLATE_WRITE( R_EAX )
  1282     load_reg( R_EDX, Rm );
  1283     MEM_WRITE_WORD( R_EAX, R_EDX );
  1284     sh4_x86.tstate = TSTATE_NONE;
  1285 :}
  1286 MOV.W Rm, @-Rn {:  
  1287     load_reg( R_EAX, Rn );
  1288     ADD_imm8s_r32( -2, R_EAX );
  1289     check_walign16( R_EAX );
  1290     MMU_TRANSLATE_WRITE( R_EAX );
  1291     load_reg( R_EDX, Rm );
  1292     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1293     MEM_WRITE_WORD( R_EAX, R_EDX );
  1294     sh4_x86.tstate = TSTATE_NONE;
  1295 :}
  1296 MOV.W Rm, @(R0, Rn) {:  
  1297     load_reg( R_EAX, 0 );
  1298     load_reg( R_ECX, Rn );
  1299     ADD_r32_r32( R_ECX, R_EAX );
  1300     check_walign16( R_EAX );
  1301     MMU_TRANSLATE_WRITE( R_EAX );
  1302     load_reg( R_EDX, Rm );
  1303     MEM_WRITE_WORD( R_EAX, R_EDX );
  1304     sh4_x86.tstate = TSTATE_NONE;
  1305 :}
  1306 MOV.W R0, @(disp, GBR) {:  
  1307     load_spreg( R_EAX, R_GBR );
  1308     ADD_imm32_r32( disp, R_EAX );
  1309     check_walign16( R_EAX );
  1310     MMU_TRANSLATE_WRITE( R_EAX );
  1311     load_reg( R_EDX, 0 );
  1312     MEM_WRITE_WORD( R_EAX, R_EDX );
  1313     sh4_x86.tstate = TSTATE_NONE;
  1314 :}
  1315 MOV.W R0, @(disp, Rn) {:  
  1316     load_reg( R_EAX, Rn );
  1317     ADD_imm32_r32( disp, R_EAX );
  1318     check_walign16( R_EAX );
  1319     MMU_TRANSLATE_WRITE( R_EAX );
  1320     load_reg( R_EDX, 0 );
  1321     MEM_WRITE_WORD( R_EAX, R_EDX );
  1322     sh4_x86.tstate = TSTATE_NONE;
  1323 :}
  1324 MOV.W @Rm, Rn {:  
  1325     load_reg( R_EAX, Rm );
  1326     check_ralign16( R_EAX );
  1327     MMU_TRANSLATE_READ( R_EAX );
  1328     MEM_READ_WORD( R_EAX, R_EAX );
  1329     store_reg( R_EAX, Rn );
  1330     sh4_x86.tstate = TSTATE_NONE;
  1331 :}
  1332 MOV.W @Rm+, Rn {:  
  1333     load_reg( R_EAX, Rm );
  1334     check_ralign16( R_EAX );
  1335     MMU_TRANSLATE_READ( R_EAX );
  1336     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1337     MEM_READ_WORD( R_EAX, R_EAX );
  1338     store_reg( R_EAX, Rn );
  1339     sh4_x86.tstate = TSTATE_NONE;
  1340 :}
  1341 MOV.W @(R0, Rm), Rn {:  
  1342     load_reg( R_EAX, 0 );
  1343     load_reg( R_ECX, Rm );
  1344     ADD_r32_r32( R_ECX, R_EAX );
  1345     check_ralign16( R_EAX );
  1346     MMU_TRANSLATE_READ( R_EAX );
  1347     MEM_READ_WORD( R_EAX, R_EAX );
  1348     store_reg( R_EAX, Rn );
  1349     sh4_x86.tstate = TSTATE_NONE;
  1350 :}
  1351 MOV.W @(disp, GBR), R0 {:  
  1352     load_spreg( R_EAX, R_GBR );
  1353     ADD_imm32_r32( disp, R_EAX );
  1354     check_ralign16( R_EAX );
  1355     MMU_TRANSLATE_READ( R_EAX );
  1356     MEM_READ_WORD( R_EAX, R_EAX );
  1357     store_reg( R_EAX, 0 );
  1358     sh4_x86.tstate = TSTATE_NONE;
  1359 :}
  1360 MOV.W @(disp, PC), Rn {:  
  1361     if( sh4_x86.in_delay_slot ) {
  1362 	SLOTILLEGAL();
  1363     } else {
  1364 	// See comments for MOV.L @(disp, PC), Rn
  1365 	uint32_t target = pc + disp + 4;
  1366 	if( IS_IN_ICACHE(target) ) {
  1367 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1368 	    MOV_moff32_EAX( ptr );
  1369 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1370 	} else {
  1371 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1372 	    ADD_sh4r_r32( R_PC, R_EAX );
  1373 	    MMU_TRANSLATE_READ( R_EAX );
  1374 	    MEM_READ_WORD( R_EAX, R_EAX );
  1375 	    sh4_x86.tstate = TSTATE_NONE;
  1377 	store_reg( R_EAX, Rn );
  1379 :}
  1380 MOV.W @(disp, Rm), R0 {:  
  1381     load_reg( R_EAX, Rm );
  1382     ADD_imm32_r32( disp, R_EAX );
  1383     check_ralign16( R_EAX );
  1384     MMU_TRANSLATE_READ( R_EAX );
  1385     MEM_READ_WORD( R_EAX, R_EAX );
  1386     store_reg( R_EAX, 0 );
  1387     sh4_x86.tstate = TSTATE_NONE;
  1388 :}
  1389 MOVA @(disp, PC), R0 {:  
  1390     if( sh4_x86.in_delay_slot ) {
  1391 	SLOTILLEGAL();
  1392     } else {
  1393 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1394 	ADD_sh4r_r32( R_PC, R_ECX );
  1395 	store_reg( R_ECX, 0 );
  1396 	sh4_x86.tstate = TSTATE_NONE;
  1398 :}
  1399 MOVCA.L R0, @Rn {:  
  1400     load_reg( R_EAX, Rn );
  1401     check_walign32( R_EAX );
  1402     MMU_TRANSLATE_WRITE( R_EAX );
  1403     load_reg( R_EDX, 0 );
  1404     MEM_WRITE_LONG( R_EAX, R_EDX );
  1405     sh4_x86.tstate = TSTATE_NONE;
  1406 :}
  1408 /* Control transfer instructions */
  1409 BF disp {:
  1410     if( sh4_x86.in_delay_slot ) {
  1411 	SLOTILLEGAL();
  1412     } else {
  1413 	sh4vma_t target = disp + pc + 4;
  1414 	JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1415 	exit_block_rel(target, pc+2 );
  1416 	JMP_TARGET(nottaken);
  1417 	return 2;
  1419 :}
  1420 BF/S disp {:
  1421     if( sh4_x86.in_delay_slot ) {
  1422 	SLOTILLEGAL();
  1423     } else {
  1424 	sh4vma_t target = disp + pc + 4;
  1425 	sh4_x86.in_delay_slot = DELAY_PC;
  1426 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1427 	    CMP_imm8s_sh4r( 1, R_T );
  1428 	    sh4_x86.tstate = TSTATE_E;
  1430 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1431 	sh4_translate_instruction(pc+2);
  1432 	exit_block_rel( target, pc+4 );
  1433 	// not taken
  1434 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1435 	sh4_translate_instruction(pc+2);
  1436 	return 4;
  1438 :}
  1439 BRA disp {:  
  1440     if( sh4_x86.in_delay_slot ) {
  1441 	SLOTILLEGAL();
  1442     } else {
  1443 	sh4_x86.in_delay_slot = DELAY_PC;
  1444 	sh4_translate_instruction( pc + 2 );
  1445 	exit_block_rel( disp + pc + 4, pc+4 );
  1446 	sh4_x86.branch_taken = TRUE;
  1447 	return 4;
  1449 :}
  1450 BRAF Rn {:  
  1451     if( sh4_x86.in_delay_slot ) {
  1452 	SLOTILLEGAL();
  1453     } else {
  1454 	load_spreg( R_EAX, R_PC );
  1455 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1456 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1457 	store_spreg( R_EAX, R_NEW_PC );
  1458 	sh4_x86.in_delay_slot = DELAY_PC;
  1459 	sh4_x86.tstate = TSTATE_NONE;
  1460 	sh4_translate_instruction( pc + 2 );
  1461 	exit_block_newpcset(pc+2);
  1462 	sh4_x86.branch_taken = TRUE;
  1463 	return 4;
  1465 :}
  1466 BSR disp {:  
  1467     if( sh4_x86.in_delay_slot ) {
  1468 	SLOTILLEGAL();
  1469     } else {
  1470 	load_spreg( R_EAX, R_PC );
  1471 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1472 	store_spreg( R_EAX, R_PR );
  1473 	sh4_x86.in_delay_slot = DELAY_PC;
  1474 	sh4_translate_instruction( pc + 2 );
  1475 	exit_block_rel( disp + pc + 4, pc+4 );
  1476 	sh4_x86.branch_taken = TRUE;
  1477 	return 4;
  1479 :}
  1480 BSRF Rn {:  
  1481     if( sh4_x86.in_delay_slot ) {
  1482 	SLOTILLEGAL();
  1483     } else {
  1484 	load_spreg( R_EAX, R_PC );
  1485 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1486 	store_spreg( R_EAX, R_PR );
  1487 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_EAX );
  1488 	store_spreg( R_EAX, R_NEW_PC );
  1490 	sh4_x86.tstate = TSTATE_NONE;
  1491 	sh4_translate_instruction( pc + 2 );
  1492 	exit_block_newpcset(pc+2);
  1493 	sh4_x86.branch_taken = TRUE;
  1494 	return 4;
  1496 :}
  1497 BT disp {:
  1498     if( sh4_x86.in_delay_slot ) {
  1499 	SLOTILLEGAL();
  1500     } else {
  1501 	sh4vma_t target = disp + pc + 4;
  1502 	JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1503 	exit_block_rel(target, pc+2 );
  1504 	JMP_TARGET(nottaken);
  1505 	return 2;
  1507 :}
  1508 BT/S disp {:
  1509     if( sh4_x86.in_delay_slot ) {
  1510 	SLOTILLEGAL();
  1511     } else {
  1512 	sh4_x86.in_delay_slot = DELAY_PC;
  1513 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1514 	    CMP_imm8s_sh4r( 1, R_T );
  1515 	    sh4_x86.tstate = TSTATE_E;
  1517 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1518 	sh4_translate_instruction(pc+2);
  1519 	exit_block_rel( disp + pc + 4, pc+4 );
  1520 	// not taken
  1521 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1522 	sh4_translate_instruction(pc+2);
  1523 	return 4;
  1525 :}
  1526 JMP @Rn {:  
  1527     if( sh4_x86.in_delay_slot ) {
  1528 	SLOTILLEGAL();
  1529     } else {
  1530 	load_reg( R_ECX, Rn );
  1531 	store_spreg( R_ECX, R_NEW_PC );
  1532 	sh4_x86.in_delay_slot = DELAY_PC;
  1533 	sh4_translate_instruction(pc+2);
  1534 	exit_block_newpcset(pc+2);
  1535 	sh4_x86.branch_taken = TRUE;
  1536 	return 4;
  1538 :}
  1539 JSR @Rn {:  
  1540     if( sh4_x86.in_delay_slot ) {
  1541 	SLOTILLEGAL();
  1542     } else {
  1543 	load_spreg( R_EAX, R_PC );
  1544 	ADD_imm32_r32( pc + 4 - sh4_x86.block_start_pc, R_EAX );
  1545 	store_spreg( R_EAX, R_PR );
  1546 	load_reg( R_ECX, Rn );
  1547 	store_spreg( R_ECX, R_NEW_PC );
  1548 	sh4_translate_instruction(pc+2);
  1549 	exit_block_newpcset(pc+2);
  1550 	sh4_x86.branch_taken = TRUE;
  1551 	return 4;
  1553 :}
  1554 RTE {:  
  1555     if( sh4_x86.in_delay_slot ) {
  1556 	SLOTILLEGAL();
  1557     } else {
  1558 	check_priv();
  1559 	load_spreg( R_ECX, R_SPC );
  1560 	store_spreg( R_ECX, R_NEW_PC );
  1561 	load_spreg( R_EAX, R_SSR );
  1562 	call_func1( sh4_write_sr, R_EAX );
  1563 	sh4_x86.in_delay_slot = DELAY_PC;
  1564 	sh4_x86.priv_checked = FALSE;
  1565 	sh4_x86.fpuen_checked = FALSE;
  1566 	sh4_x86.tstate = TSTATE_NONE;
  1567 	sh4_translate_instruction(pc+2);
  1568 	exit_block_newpcset(pc+2);
  1569 	sh4_x86.branch_taken = TRUE;
  1570 	return 4;
  1572 :}
  1573 RTS {:  
  1574     if( sh4_x86.in_delay_slot ) {
  1575 	SLOTILLEGAL();
  1576     } else {
  1577 	load_spreg( R_ECX, R_PR );
  1578 	store_spreg( R_ECX, R_NEW_PC );
  1579 	sh4_x86.in_delay_slot = DELAY_PC;
  1580 	sh4_translate_instruction(pc+2);
  1581 	exit_block_newpcset(pc+2);
  1582 	sh4_x86.branch_taken = TRUE;
  1583 	return 4;
  1585 :}
  1586 TRAPA #imm {:  
  1587     if( sh4_x86.in_delay_slot ) {
  1588 	SLOTILLEGAL();
  1589     } else {
  1590 	load_imm32( R_ECX, pc+2 - sh4_x86.block_start_pc );   // 5
  1591 	ADD_r32_sh4r( R_ECX, R_PC );
  1592 	load_imm32( R_EAX, imm );
  1593 	call_func1( sh4_raise_trap, R_EAX );
  1594 	sh4_x86.tstate = TSTATE_NONE;
  1595 	exit_block_pcset(pc);
  1596 	sh4_x86.branch_taken = TRUE;
  1597 	return 2;
  1599 :}
  1600 UNDEF {:  
  1601     if( sh4_x86.in_delay_slot ) {
  1602 	SLOTILLEGAL();
  1603     } else {
  1604 	JMP_exc(EXC_ILLEGAL);
  1605 	return 2;
  1607 :}
  1609 CLRMAC {:  
  1610     XOR_r32_r32(R_EAX, R_EAX);
  1611     store_spreg( R_EAX, R_MACL );
  1612     store_spreg( R_EAX, R_MACH );
  1613     sh4_x86.tstate = TSTATE_NONE;
  1614 :}
  1615 CLRS {:
  1616     CLC();
  1617     SETC_sh4r(R_S);
  1618     sh4_x86.tstate = TSTATE_C;
  1619 :}
  1620 CLRT {:  
  1621     CLC();
  1622     SETC_t();
  1623     sh4_x86.tstate = TSTATE_C;
  1624 :}
  1625 SETS {:  
  1626     STC();
  1627     SETC_sh4r(R_S);
  1628     sh4_x86.tstate = TSTATE_C;
  1629 :}
  1630 SETT {:  
  1631     STC();
  1632     SETC_t();
  1633     sh4_x86.tstate = TSTATE_C;
  1634 :}
  1636 /* Floating point moves */
  1637 FMOV FRm, FRn {:  
  1638     /* As horrible as this looks, it's actually covering 5 separate cases:
  1639      * 1. 32-bit fr-to-fr (PR=0)
  1640      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1641      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1642      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1643      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1644      */
  1645     check_fpuen();
  1646     load_spreg( R_ECX, R_FPSCR );
  1647     load_fr_bank( R_EDX );
  1648     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1649     JNE_rel8(8, doublesize);
  1650     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1651     store_fr( R_EDX, R_EAX, FRn );
  1652     if( FRm&1 ) {
  1653 	JMP_rel8(24, end);
  1654 	JMP_TARGET(doublesize);
  1655 	load_xf_bank( R_ECX ); 
  1656 	load_fr( R_ECX, R_EAX, FRm-1 );
  1657 	if( FRn&1 ) {
  1658 	    load_fr( R_ECX, R_EDX, FRm );
  1659 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1660 	    store_fr( R_ECX, R_EDX, FRn );
  1661 	} else /* FRn&1 == 0 */ {
  1662 	    load_fr( R_ECX, R_ECX, FRm );
  1663 	    store_fr( R_EDX, R_EAX, FRn );
  1664 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1666 	JMP_TARGET(end);
  1667     } else /* FRm&1 == 0 */ {
  1668 	if( FRn&1 ) {
  1669 	    JMP_rel8(24, end);
  1670 	    load_xf_bank( R_ECX );
  1671 	    load_fr( R_EDX, R_EAX, FRm );
  1672 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1673 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1674 	    store_fr( R_ECX, R_EDX, FRn );
  1675 	    JMP_TARGET(end);
  1676 	} else /* FRn&1 == 0 */ {
  1677 	    JMP_rel8(12, end);
  1678 	    load_fr( R_EDX, R_EAX, FRm );
  1679 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1680 	    store_fr( R_EDX, R_EAX, FRn );
  1681 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1682 	    JMP_TARGET(end);
  1685     sh4_x86.tstate = TSTATE_NONE;
  1686 :}
  1687 FMOV FRm, @Rn {: 
  1688     check_fpuen();
  1689     load_reg( R_EAX, Rn );
  1690     check_walign32( R_EAX );
  1691     MMU_TRANSLATE_WRITE( R_EAX );
  1692     load_spreg( R_EDX, R_FPSCR );
  1693     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1694     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1695     load_fr_bank( R_EDX );
  1696     load_fr( R_EDX, R_ECX, FRm );
  1697     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1698     if( FRm&1 ) {
  1699 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1700 	JMP_TARGET(doublesize);
  1701 	load_xf_bank( R_EDX );
  1702 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1703 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1704 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1705 	JMP_TARGET(end);
  1706     } else {
  1707 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1708 	JMP_TARGET(doublesize);
  1709 	load_fr_bank( R_EDX );
  1710 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1711 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1712 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1713 	JMP_TARGET(end);
  1715     sh4_x86.tstate = TSTATE_NONE;
  1716 :}
  1717 FMOV @Rm, FRn {:  
  1718     check_fpuen();
  1719     load_reg( R_EAX, Rm );
  1720     check_ralign32( R_EAX );
  1721     MMU_TRANSLATE_READ( R_EAX );
  1722     load_spreg( R_EDX, R_FPSCR );
  1723     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1724     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1725     MEM_READ_LONG( R_EAX, R_EAX );
  1726     load_fr_bank( R_EDX );
  1727     store_fr( R_EDX, R_EAX, FRn );
  1728     if( FRn&1 ) {
  1729 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1730 	JMP_TARGET(doublesize);
  1731 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1732 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1733 	load_xf_bank( R_EDX );
  1734 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1735 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1736 	JMP_TARGET(end);
  1737     } else {
  1738 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1739 	JMP_TARGET(doublesize);
  1740 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1741 	load_fr_bank( R_EDX );
  1742 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1743 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1744 	JMP_TARGET(end);
  1746     sh4_x86.tstate = TSTATE_NONE;
  1747 :}
  1748 FMOV FRm, @-Rn {:  
  1749     check_fpuen();
  1750     load_reg( R_EAX, Rn );
  1751     check_walign32( R_EAX );
  1752     load_spreg( R_EDX, R_FPSCR );
  1753     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1754     JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
  1755     ADD_imm8s_r32( -4, R_EAX );
  1756     MMU_TRANSLATE_WRITE( R_EAX );
  1757     load_fr_bank( R_EDX );
  1758     load_fr( R_EDX, R_ECX, FRm );
  1759     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1760     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1761     if( FRm&1 ) {
  1762 	JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1763 	JMP_TARGET(doublesize);
  1764 	ADD_imm8s_r32(-8,R_EAX);
  1765 	MMU_TRANSLATE_WRITE( R_EAX );
  1766 	load_xf_bank( R_EDX );
  1767 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1768 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1769 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1770 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1771 	JMP_TARGET(end);
  1772     } else {
  1773 	JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1774 	JMP_TARGET(doublesize);
  1775 	ADD_imm8s_r32(-8,R_EAX);
  1776 	MMU_TRANSLATE_WRITE( R_EAX );
  1777 	load_fr_bank( R_EDX );
  1778 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1779 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1780 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1781 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1782 	JMP_TARGET(end);
  1784     sh4_x86.tstate = TSTATE_NONE;
  1785 :}
  1786 FMOV @Rm+, FRn {:
  1787     check_fpuen();
  1788     load_reg( R_EAX, Rm );
  1789     check_ralign32( R_EAX );
  1790     MMU_TRANSLATE_READ( R_EAX );
  1791     load_spreg( R_EDX, R_FPSCR );
  1792     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1793     JNE_rel8(12 + MEM_READ_SIZE, doublesize);
  1794     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1795     MEM_READ_LONG( R_EAX, R_EAX );
  1796     load_fr_bank( R_EDX );
  1797     store_fr( R_EDX, R_EAX, FRn );
  1798     if( FRn&1 ) {
  1799 	JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
  1800 	JMP_TARGET(doublesize);
  1801 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1802 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1803 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1804 	load_xf_bank( R_EDX );
  1805 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1806 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1807 	JMP_TARGET(end);
  1808     } else {
  1809 	JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
  1810 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1811 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1812 	load_fr_bank( R_EDX );
  1813 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1814 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1815 	JMP_TARGET(end);
  1817     sh4_x86.tstate = TSTATE_NONE;
  1818 :}
  1819 FMOV FRm, @(R0, Rn) {:  
  1820     check_fpuen();
  1821     load_reg( R_EAX, Rn );
  1822     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1823     check_walign32( R_EAX );
  1824     MMU_TRANSLATE_WRITE( R_EAX );
  1825     load_spreg( R_EDX, R_FPSCR );
  1826     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1827     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1828     load_fr_bank( R_EDX );
  1829     load_fr( R_EDX, R_ECX, FRm );
  1830     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1831     if( FRm&1 ) {
  1832 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1833 	JMP_TARGET(doublesize);
  1834 	load_xf_bank( R_EDX );
  1835 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1836 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1837 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1838 	JMP_TARGET(end);
  1839     } else {
  1840 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1841 	JMP_TARGET(doublesize);
  1842 	load_fr_bank( R_EDX );
  1843 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1844 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1845 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1846 	JMP_TARGET(end);
  1848     sh4_x86.tstate = TSTATE_NONE;
  1849 :}
  1850 FMOV @(R0, Rm), FRn {:  
  1851     check_fpuen();
  1852     load_reg( R_EAX, Rm );
  1853     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1854     check_ralign32( R_EAX );
  1855     MMU_TRANSLATE_READ( R_EAX );
  1856     load_spreg( R_EDX, R_FPSCR );
  1857     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1858     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1859     MEM_READ_LONG( R_EAX, R_EAX );
  1860     load_fr_bank( R_EDX );
  1861     store_fr( R_EDX, R_EAX, FRn );
  1862     if( FRn&1 ) {
  1863 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1864 	JMP_TARGET(doublesize);
  1865 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1866 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1867 	load_xf_bank( R_EDX );
  1868 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1869 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1870 	JMP_TARGET(end);
  1871     } else {
  1872 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1873 	JMP_TARGET(doublesize);
  1874 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1875 	load_fr_bank( R_EDX );
  1876 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1877 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1878 	JMP_TARGET(end);
  1880     sh4_x86.tstate = TSTATE_NONE;
  1881 :}
  1882 FLDI0 FRn {:  /* IFF PR=0 */
  1883     check_fpuen();
  1884     load_spreg( R_ECX, R_FPSCR );
  1885     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1886     JNE_rel8(8, end);
  1887     XOR_r32_r32( R_EAX, R_EAX );
  1888     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1889     store_fr( R_ECX, R_EAX, FRn );
  1890     JMP_TARGET(end);
  1891     sh4_x86.tstate = TSTATE_NONE;
  1892 :}
  1893 FLDI1 FRn {:  /* IFF PR=0 */
  1894     check_fpuen();
  1895     load_spreg( R_ECX, R_FPSCR );
  1896     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1897     JNE_rel8(11, end);
  1898     load_imm32(R_EAX, 0x3F800000);
  1899     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1900     store_fr( R_ECX, R_EAX, FRn );
  1901     JMP_TARGET(end);
  1902     sh4_x86.tstate = TSTATE_NONE;
  1903 :}
  1905 FLOAT FPUL, FRn {:  
  1906     check_fpuen();
  1907     load_spreg( R_ECX, R_FPSCR );
  1908     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1909     FILD_sh4r(R_FPUL);
  1910     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1911     JNE_rel8(5, doubleprec);
  1912     pop_fr( R_EDX, FRn );
  1913     JMP_rel8(3, end);
  1914     JMP_TARGET(doubleprec);
  1915     pop_dr( R_EDX, FRn );
  1916     JMP_TARGET(end);
  1917     sh4_x86.tstate = TSTATE_NONE;
  1918 :}
  1919 FTRC FRm, FPUL {:  
  1920     check_fpuen();
  1921     load_spreg( R_ECX, R_FPSCR );
  1922     load_fr_bank( R_EDX );
  1923     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1924     JNE_rel8(5, doubleprec);
  1925     push_fr( R_EDX, FRm );
  1926     JMP_rel8(3, doop);
  1927     JMP_TARGET(doubleprec);
  1928     push_dr( R_EDX, FRm );
  1929     JMP_TARGET( doop );
  1930     load_imm32( R_ECX, (uint32_t)&max_int );
  1931     FILD_r32ind( R_ECX );
  1932     FCOMIP_st(1);
  1933     JNA_rel8( 32, sat );
  1934     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1935     FILD_r32ind( R_ECX );           // 2
  1936     FCOMIP_st(1);                   // 2
  1937     JAE_rel8( 21, sat2 );            // 2
  1938     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1939     FNSTCW_r32ind( R_EAX );
  1940     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1941     FLDCW_r32ind( R_EDX );
  1942     FISTP_sh4r(R_FPUL);             // 3
  1943     FLDCW_r32ind( R_EAX );
  1944     JMP_rel8( 9, end );             // 2
  1946     JMP_TARGET(sat);
  1947     JMP_TARGET(sat2);
  1948     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1949     store_spreg( R_ECX, R_FPUL );
  1950     FPOP_st();
  1951     JMP_TARGET(end);
  1952     sh4_x86.tstate = TSTATE_NONE;
  1953 :}
  1954 FLDS FRm, FPUL {:  
  1955     check_fpuen();
  1956     load_fr_bank( R_ECX );
  1957     load_fr( R_ECX, R_EAX, FRm );
  1958     store_spreg( R_EAX, R_FPUL );
  1959     sh4_x86.tstate = TSTATE_NONE;
  1960 :}
  1961 FSTS FPUL, FRn {:  
  1962     check_fpuen();
  1963     load_fr_bank( R_ECX );
  1964     load_spreg( R_EAX, R_FPUL );
  1965     store_fr( R_ECX, R_EAX, FRn );
  1966     sh4_x86.tstate = TSTATE_NONE;
  1967 :}
  1968 FCNVDS FRm, FPUL {:  
  1969     check_fpuen();
  1970     load_spreg( R_ECX, R_FPSCR );
  1971     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1972     JE_rel8(9, end); // only when PR=1
  1973     load_fr_bank( R_ECX );
  1974     push_dr( R_ECX, FRm );
  1975     pop_fpul();
  1976     JMP_TARGET(end);
  1977     sh4_x86.tstate = TSTATE_NONE;
  1978 :}
  1979 FCNVSD FPUL, FRn {:  
  1980     check_fpuen();
  1981     load_spreg( R_ECX, R_FPSCR );
  1982     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1983     JE_rel8(9, end); // only when PR=1
  1984     load_fr_bank( R_ECX );
  1985     push_fpul();
  1986     pop_dr( R_ECX, FRn );
  1987     JMP_TARGET(end);
  1988     sh4_x86.tstate = TSTATE_NONE;
  1989 :}
  1991 /* Floating point instructions */
  1992 FABS FRn {:  
  1993     check_fpuen();
  1994     load_spreg( R_ECX, R_FPSCR );
  1995     load_fr_bank( R_EDX );
  1996     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1997     JNE_rel8(10, doubleprec);
  1998     push_fr(R_EDX, FRn); // 3
  1999     FABS_st0(); // 2
  2000     pop_fr( R_EDX, FRn); //3
  2001     JMP_rel8(8,end); // 2
  2002     JMP_TARGET(doubleprec);
  2003     push_dr(R_EDX, FRn);
  2004     FABS_st0();
  2005     pop_dr(R_EDX, FRn);
  2006     JMP_TARGET(end);
  2007     sh4_x86.tstate = TSTATE_NONE;
  2008 :}
  2009 FADD FRm, FRn {:  
  2010     check_fpuen();
  2011     load_spreg( R_ECX, R_FPSCR );
  2012     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2013     load_fr_bank( R_EDX );
  2014     JNE_rel8(13,doubleprec);
  2015     push_fr(R_EDX, FRm);
  2016     push_fr(R_EDX, FRn);
  2017     FADDP_st(1);
  2018     pop_fr(R_EDX, FRn);
  2019     JMP_rel8(11,end);
  2020     JMP_TARGET(doubleprec);
  2021     push_dr(R_EDX, FRm);
  2022     push_dr(R_EDX, FRn);
  2023     FADDP_st(1);
  2024     pop_dr(R_EDX, FRn);
  2025     JMP_TARGET(end);
  2026     sh4_x86.tstate = TSTATE_NONE;
  2027 :}
  2028 FDIV FRm, FRn {:  
  2029     check_fpuen();
  2030     load_spreg( R_ECX, R_FPSCR );
  2031     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2032     load_fr_bank( R_EDX );
  2033     JNE_rel8(13, doubleprec);
  2034     push_fr(R_EDX, FRn);
  2035     push_fr(R_EDX, FRm);
  2036     FDIVP_st(1);
  2037     pop_fr(R_EDX, FRn);
  2038     JMP_rel8(11, end);
  2039     JMP_TARGET(doubleprec);
  2040     push_dr(R_EDX, FRn);
  2041     push_dr(R_EDX, FRm);
  2042     FDIVP_st(1);
  2043     pop_dr(R_EDX, FRn);
  2044     JMP_TARGET(end);
  2045     sh4_x86.tstate = TSTATE_NONE;
  2046 :}
  2047 FMAC FR0, FRm, FRn {:  
  2048     check_fpuen();
  2049     load_spreg( R_ECX, R_FPSCR );
  2050     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2051     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2052     JNE_rel8(18, doubleprec);
  2053     push_fr( R_EDX, 0 );
  2054     push_fr( R_EDX, FRm );
  2055     FMULP_st(1);
  2056     push_fr( R_EDX, FRn );
  2057     FADDP_st(1);
  2058     pop_fr( R_EDX, FRn );
  2059     JMP_rel8(16, end);
  2060     JMP_TARGET(doubleprec);
  2061     push_dr( R_EDX, 0 );
  2062     push_dr( R_EDX, FRm );
  2063     FMULP_st(1);
  2064     push_dr( R_EDX, FRn );
  2065     FADDP_st(1);
  2066     pop_dr( R_EDX, FRn );
  2067     JMP_TARGET(end);
  2068     sh4_x86.tstate = TSTATE_NONE;
  2069 :}
  2071 FMUL FRm, FRn {:  
  2072     check_fpuen();
  2073     load_spreg( R_ECX, R_FPSCR );
  2074     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2075     load_fr_bank( R_EDX );
  2076     JNE_rel8(13, doubleprec);
  2077     push_fr(R_EDX, FRm);
  2078     push_fr(R_EDX, FRn);
  2079     FMULP_st(1);
  2080     pop_fr(R_EDX, FRn);
  2081     JMP_rel8(11, end);
  2082     JMP_TARGET(doubleprec);
  2083     push_dr(R_EDX, FRm);
  2084     push_dr(R_EDX, FRn);
  2085     FMULP_st(1);
  2086     pop_dr(R_EDX, FRn);
  2087     JMP_TARGET(end);
  2088     sh4_x86.tstate = TSTATE_NONE;
  2089 :}
  2090 FNEG FRn {:  
  2091     check_fpuen();
  2092     load_spreg( R_ECX, R_FPSCR );
  2093     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2094     load_fr_bank( R_EDX );
  2095     JNE_rel8(10, doubleprec);
  2096     push_fr(R_EDX, FRn);
  2097     FCHS_st0();
  2098     pop_fr(R_EDX, FRn);
  2099     JMP_rel8(8, end);
  2100     JMP_TARGET(doubleprec);
  2101     push_dr(R_EDX, FRn);
  2102     FCHS_st0();
  2103     pop_dr(R_EDX, FRn);
  2104     JMP_TARGET(end);
  2105     sh4_x86.tstate = TSTATE_NONE;
  2106 :}
  2107 FSRRA FRn {:  
  2108     check_fpuen();
  2109     load_spreg( R_ECX, R_FPSCR );
  2110     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2111     load_fr_bank( R_EDX );
  2112     JNE_rel8(12, end); // PR=0 only
  2113     FLD1_st0();
  2114     push_fr(R_EDX, FRn);
  2115     FSQRT_st0();
  2116     FDIVP_st(1);
  2117     pop_fr(R_EDX, FRn);
  2118     JMP_TARGET(end);
  2119     sh4_x86.tstate = TSTATE_NONE;
  2120 :}
  2121 FSQRT FRn {:  
  2122     check_fpuen();
  2123     load_spreg( R_ECX, R_FPSCR );
  2124     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2125     load_fr_bank( R_EDX );
  2126     JNE_rel8(10, doubleprec);
  2127     push_fr(R_EDX, FRn);
  2128     FSQRT_st0();
  2129     pop_fr(R_EDX, FRn);
  2130     JMP_rel8(8, end);
  2131     JMP_TARGET(doubleprec);
  2132     push_dr(R_EDX, FRn);
  2133     FSQRT_st0();
  2134     pop_dr(R_EDX, FRn);
  2135     JMP_TARGET(end);
  2136     sh4_x86.tstate = TSTATE_NONE;
  2137 :}
  2138 FSUB FRm, FRn {:  
  2139     check_fpuen();
  2140     load_spreg( R_ECX, R_FPSCR );
  2141     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2142     load_fr_bank( R_EDX );
  2143     JNE_rel8(13, doubleprec);
  2144     push_fr(R_EDX, FRn);
  2145     push_fr(R_EDX, FRm);
  2146     FSUBP_st(1);
  2147     pop_fr(R_EDX, FRn);
  2148     JMP_rel8(11, end);
  2149     JMP_TARGET(doubleprec);
  2150     push_dr(R_EDX, FRn);
  2151     push_dr(R_EDX, FRm);
  2152     FSUBP_st(1);
  2153     pop_dr(R_EDX, FRn);
  2154     JMP_TARGET(end);
  2155     sh4_x86.tstate = TSTATE_NONE;
  2156 :}
  2158 FCMP/EQ FRm, FRn {:  
  2159     check_fpuen();
  2160     load_spreg( R_ECX, R_FPSCR );
  2161     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2162     load_fr_bank( R_EDX );
  2163     JNE_rel8(8, doubleprec);
  2164     push_fr(R_EDX, FRm);
  2165     push_fr(R_EDX, FRn);
  2166     JMP_rel8(6, end);
  2167     JMP_TARGET(doubleprec);
  2168     push_dr(R_EDX, FRm);
  2169     push_dr(R_EDX, FRn);
  2170     JMP_TARGET(end);
  2171     FCOMIP_st(1);
  2172     SETE_t();
  2173     FPOP_st();
  2174     sh4_x86.tstate = TSTATE_NONE;
  2175 :}
  2176 FCMP/GT FRm, FRn {:  
  2177     check_fpuen();
  2178     load_spreg( R_ECX, R_FPSCR );
  2179     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2180     load_fr_bank( R_EDX );
  2181     JNE_rel8(8, doubleprec);
  2182     push_fr(R_EDX, FRm);
  2183     push_fr(R_EDX, FRn);
  2184     JMP_rel8(6, end);
  2185     JMP_TARGET(doubleprec);
  2186     push_dr(R_EDX, FRm);
  2187     push_dr(R_EDX, FRn);
  2188     JMP_TARGET(end);
  2189     FCOMIP_st(1);
  2190     SETA_t();
  2191     FPOP_st();
  2192     sh4_x86.tstate = TSTATE_NONE;
  2193 :}
  2195 FSCA FPUL, FRn {:  
  2196     check_fpuen();
  2197     load_spreg( R_ECX, R_FPSCR );
  2198     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2199     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2200     load_fr_bank( R_ECX );
  2201     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2202     load_spreg( R_EDX, R_FPUL );
  2203     call_func2( sh4_fsca, R_EDX, R_ECX );
  2204     JMP_TARGET(doubleprec);
  2205     sh4_x86.tstate = TSTATE_NONE;
  2206 :}
  2207 FIPR FVm, FVn {:  
  2208     check_fpuen();
  2209     load_spreg( R_ECX, R_FPSCR );
  2210     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2211     JNE_rel8(44, doubleprec);
  2213     load_fr_bank( R_ECX );
  2214     push_fr( R_ECX, FVm<<2 );
  2215     push_fr( R_ECX, FVn<<2 );
  2216     FMULP_st(1);
  2217     push_fr( R_ECX, (FVm<<2)+1);
  2218     push_fr( R_ECX, (FVn<<2)+1);
  2219     FMULP_st(1);
  2220     FADDP_st(1);
  2221     push_fr( R_ECX, (FVm<<2)+2);
  2222     push_fr( R_ECX, (FVn<<2)+2);
  2223     FMULP_st(1);
  2224     FADDP_st(1);
  2225     push_fr( R_ECX, (FVm<<2)+3);
  2226     push_fr( R_ECX, (FVn<<2)+3);
  2227     FMULP_st(1);
  2228     FADDP_st(1);
  2229     pop_fr( R_ECX, (FVn<<2)+3);
  2230     JMP_TARGET(doubleprec);
  2231     sh4_x86.tstate = TSTATE_NONE;
  2232 :}
  2233 FTRV XMTRX, FVn {:  
  2234     check_fpuen();
  2235     load_spreg( R_ECX, R_FPSCR );
  2236     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2237     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2238     load_fr_bank( R_EDX );                 // 3
  2239     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2240     load_xf_bank( R_ECX );                 // 12
  2241     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2242     JMP_TARGET(doubleprec);
  2243     sh4_x86.tstate = TSTATE_NONE;
  2244 :}
  2246 FRCHG {:  
  2247     check_fpuen();
  2248     load_spreg( R_ECX, R_FPSCR );
  2249     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2250     store_spreg( R_ECX, R_FPSCR );
  2251     update_fr_bank( R_ECX );
  2252     sh4_x86.tstate = TSTATE_NONE;
  2253 :}
  2254 FSCHG {:  
  2255     check_fpuen();
  2256     load_spreg( R_ECX, R_FPSCR );
  2257     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2258     store_spreg( R_ECX, R_FPSCR );
  2259     sh4_x86.tstate = TSTATE_NONE;
  2260 :}
  2262 /* Processor control instructions */
  2263 LDC Rm, SR {:
  2264     if( sh4_x86.in_delay_slot ) {
  2265 	SLOTILLEGAL();
  2266     } else {
  2267 	check_priv();
  2268 	load_reg( R_EAX, Rm );
  2269 	call_func1( sh4_write_sr, R_EAX );
  2270 	sh4_x86.priv_checked = FALSE;
  2271 	sh4_x86.fpuen_checked = FALSE;
  2272 	sh4_x86.tstate = TSTATE_NONE;
  2274 :}
  2275 LDC Rm, GBR {: 
  2276     load_reg( R_EAX, Rm );
  2277     store_spreg( R_EAX, R_GBR );
  2278 :}
  2279 LDC Rm, VBR {:  
  2280     check_priv();
  2281     load_reg( R_EAX, Rm );
  2282     store_spreg( R_EAX, R_VBR );
  2283     sh4_x86.tstate = TSTATE_NONE;
  2284 :}
  2285 LDC Rm, SSR {:  
  2286     check_priv();
  2287     load_reg( R_EAX, Rm );
  2288     store_spreg( R_EAX, R_SSR );
  2289     sh4_x86.tstate = TSTATE_NONE;
  2290 :}
  2291 LDC Rm, SGR {:  
  2292     check_priv();
  2293     load_reg( R_EAX, Rm );
  2294     store_spreg( R_EAX, R_SGR );
  2295     sh4_x86.tstate = TSTATE_NONE;
  2296 :}
  2297 LDC Rm, SPC {:  
  2298     check_priv();
  2299     load_reg( R_EAX, Rm );
  2300     store_spreg( R_EAX, R_SPC );
  2301     sh4_x86.tstate = TSTATE_NONE;
  2302 :}
  2303 LDC Rm, DBR {:  
  2304     check_priv();
  2305     load_reg( R_EAX, Rm );
  2306     store_spreg( R_EAX, R_DBR );
  2307     sh4_x86.tstate = TSTATE_NONE;
  2308 :}
  2309 LDC Rm, Rn_BANK {:  
  2310     check_priv();
  2311     load_reg( R_EAX, Rm );
  2312     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2313     sh4_x86.tstate = TSTATE_NONE;
  2314 :}
  2315 LDC.L @Rm+, GBR {:  
  2316     load_reg( R_EAX, Rm );
  2317     check_ralign32( R_EAX );
  2318     MMU_TRANSLATE_READ( R_EAX );
  2319     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2320     MEM_READ_LONG( R_EAX, R_EAX );
  2321     store_spreg( R_EAX, R_GBR );
  2322     sh4_x86.tstate = TSTATE_NONE;
  2323 :}
  2324 LDC.L @Rm+, SR {:
  2325     if( sh4_x86.in_delay_slot ) {
  2326 	SLOTILLEGAL();
  2327     } else {
  2328 	check_priv();
  2329 	load_reg( R_EAX, Rm );
  2330 	check_ralign32( R_EAX );
  2331 	MMU_TRANSLATE_READ( R_EAX );
  2332 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2333 	MEM_READ_LONG( R_EAX, R_EAX );
  2334 	call_func1( sh4_write_sr, R_EAX );
  2335 	sh4_x86.priv_checked = FALSE;
  2336 	sh4_x86.fpuen_checked = FALSE;
  2337 	sh4_x86.tstate = TSTATE_NONE;
  2339 :}
  2340 LDC.L @Rm+, VBR {:  
  2341     check_priv();
  2342     load_reg( R_EAX, Rm );
  2343     check_ralign32( R_EAX );
  2344     MMU_TRANSLATE_READ( R_EAX );
  2345     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2346     MEM_READ_LONG( R_EAX, R_EAX );
  2347     store_spreg( R_EAX, R_VBR );
  2348     sh4_x86.tstate = TSTATE_NONE;
  2349 :}
  2350 LDC.L @Rm+, SSR {:
  2351     check_priv();
  2352     load_reg( R_EAX, Rm );
  2353     check_ralign32( R_EAX );
  2354     MMU_TRANSLATE_READ( R_EAX );
  2355     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2356     MEM_READ_LONG( R_EAX, R_EAX );
  2357     store_spreg( R_EAX, R_SSR );
  2358     sh4_x86.tstate = TSTATE_NONE;
  2359 :}
  2360 LDC.L @Rm+, SGR {:  
  2361     check_priv();
  2362     load_reg( R_EAX, Rm );
  2363     check_ralign32( R_EAX );
  2364     MMU_TRANSLATE_READ( R_EAX );
  2365     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2366     MEM_READ_LONG( R_EAX, R_EAX );
  2367     store_spreg( R_EAX, R_SGR );
  2368     sh4_x86.tstate = TSTATE_NONE;
  2369 :}
  2370 LDC.L @Rm+, SPC {:  
  2371     check_priv();
  2372     load_reg( R_EAX, Rm );
  2373     check_ralign32( R_EAX );
  2374     MMU_TRANSLATE_READ( R_EAX );
  2375     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2376     MEM_READ_LONG( R_EAX, R_EAX );
  2377     store_spreg( R_EAX, R_SPC );
  2378     sh4_x86.tstate = TSTATE_NONE;
  2379 :}
  2380 LDC.L @Rm+, DBR {:  
  2381     check_priv();
  2382     load_reg( R_EAX, Rm );
  2383     check_ralign32( R_EAX );
  2384     MMU_TRANSLATE_READ( R_EAX );
  2385     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2386     MEM_READ_LONG( R_EAX, R_EAX );
  2387     store_spreg( R_EAX, R_DBR );
  2388     sh4_x86.tstate = TSTATE_NONE;
  2389 :}
  2390 LDC.L @Rm+, Rn_BANK {:  
  2391     check_priv();
  2392     load_reg( R_EAX, Rm );
  2393     check_ralign32( R_EAX );
  2394     MMU_TRANSLATE_READ( R_EAX );
  2395     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2396     MEM_READ_LONG( R_EAX, R_EAX );
  2397     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2398     sh4_x86.tstate = TSTATE_NONE;
  2399 :}
  2400 LDS Rm, FPSCR {:  
  2401     load_reg( R_EAX, Rm );
  2402     store_spreg( R_EAX, R_FPSCR );
  2403     update_fr_bank( R_EAX );
  2404     sh4_x86.tstate = TSTATE_NONE;
  2405 :}
  2406 LDS.L @Rm+, FPSCR {:  
  2407     load_reg( R_EAX, Rm );
  2408     check_ralign32( R_EAX );
  2409     MMU_TRANSLATE_READ( R_EAX );
  2410     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2411     MEM_READ_LONG( R_EAX, R_EAX );
  2412     store_spreg( R_EAX, R_FPSCR );
  2413     update_fr_bank( R_EAX );
  2414     sh4_x86.tstate = TSTATE_NONE;
  2415 :}
  2416 LDS Rm, FPUL {:  
  2417     load_reg( R_EAX, Rm );
  2418     store_spreg( R_EAX, R_FPUL );
  2419 :}
  2420 LDS.L @Rm+, FPUL {:  
  2421     load_reg( R_EAX, Rm );
  2422     check_ralign32( R_EAX );
  2423     MMU_TRANSLATE_READ( R_EAX );
  2424     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2425     MEM_READ_LONG( R_EAX, R_EAX );
  2426     store_spreg( R_EAX, R_FPUL );
  2427     sh4_x86.tstate = TSTATE_NONE;
  2428 :}
  2429 LDS Rm, MACH {: 
  2430     load_reg( R_EAX, Rm );
  2431     store_spreg( R_EAX, R_MACH );
  2432 :}
  2433 LDS.L @Rm+, MACH {:  
  2434     load_reg( R_EAX, Rm );
  2435     check_ralign32( R_EAX );
  2436     MMU_TRANSLATE_READ( R_EAX );
  2437     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2438     MEM_READ_LONG( R_EAX, R_EAX );
  2439     store_spreg( R_EAX, R_MACH );
  2440     sh4_x86.tstate = TSTATE_NONE;
  2441 :}
  2442 LDS Rm, MACL {:  
  2443     load_reg( R_EAX, Rm );
  2444     store_spreg( R_EAX, R_MACL );
  2445 :}
  2446 LDS.L @Rm+, MACL {:  
  2447     load_reg( R_EAX, Rm );
  2448     check_ralign32( R_EAX );
  2449     MMU_TRANSLATE_READ( R_EAX );
  2450     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2451     MEM_READ_LONG( R_EAX, R_EAX );
  2452     store_spreg( R_EAX, R_MACL );
  2453     sh4_x86.tstate = TSTATE_NONE;
  2454 :}
  2455 LDS Rm, PR {:  
  2456     load_reg( R_EAX, Rm );
  2457     store_spreg( R_EAX, R_PR );
  2458 :}
  2459 LDS.L @Rm+, PR {:  
  2460     load_reg( R_EAX, Rm );
  2461     check_ralign32( R_EAX );
  2462     MMU_TRANSLATE_READ( R_EAX );
  2463     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2464     MEM_READ_LONG( R_EAX, R_EAX );
  2465     store_spreg( R_EAX, R_PR );
  2466     sh4_x86.tstate = TSTATE_NONE;
  2467 :}
  2468 LDTLB {:  
  2469     call_func0( MMU_ldtlb );
  2470 :}
  2471 OCBI @Rn {:  :}
  2472 OCBP @Rn {:  :}
  2473 OCBWB @Rn {:  :}
  2474 PREF @Rn {:
  2475     load_reg( R_EAX, Rn );
  2476     MOV_r32_r32( R_EAX, R_ECX );
  2477     AND_imm32_r32( 0xFC000000, R_EAX );
  2478     CMP_imm32_r32( 0xE0000000, R_EAX );
  2479     JNE_rel8(8+CALL_FUNC1_SIZE, end);
  2480     call_func1( sh4_flush_store_queue, R_ECX );
  2481     TEST_r32_r32( R_EAX, R_EAX );
  2482     JE_exc(-1);
  2483     JMP_TARGET(end);
  2484     sh4_x86.tstate = TSTATE_NONE;
  2485 :}
  2486 SLEEP {: 
  2487     check_priv();
  2488     call_func0( sh4_sleep );
  2489     sh4_x86.tstate = TSTATE_NONE;
  2490     sh4_x86.in_delay_slot = DELAY_NONE;
  2491     return 2;
  2492 :}
  2493 STC SR, Rn {:
  2494     check_priv();
  2495     call_func0(sh4_read_sr);
  2496     store_reg( R_EAX, Rn );
  2497     sh4_x86.tstate = TSTATE_NONE;
  2498 :}
  2499 STC GBR, Rn {:  
  2500     load_spreg( R_EAX, R_GBR );
  2501     store_reg( R_EAX, Rn );
  2502 :}
  2503 STC VBR, Rn {:  
  2504     check_priv();
  2505     load_spreg( R_EAX, R_VBR );
  2506     store_reg( R_EAX, Rn );
  2507     sh4_x86.tstate = TSTATE_NONE;
  2508 :}
  2509 STC SSR, Rn {:  
  2510     check_priv();
  2511     load_spreg( R_EAX, R_SSR );
  2512     store_reg( R_EAX, Rn );
  2513     sh4_x86.tstate = TSTATE_NONE;
  2514 :}
  2515 STC SPC, Rn {:  
  2516     check_priv();
  2517     load_spreg( R_EAX, R_SPC );
  2518     store_reg( R_EAX, Rn );
  2519     sh4_x86.tstate = TSTATE_NONE;
  2520 :}
  2521 STC SGR, Rn {:  
  2522     check_priv();
  2523     load_spreg( R_EAX, R_SGR );
  2524     store_reg( R_EAX, Rn );
  2525     sh4_x86.tstate = TSTATE_NONE;
  2526 :}
  2527 STC DBR, Rn {:  
  2528     check_priv();
  2529     load_spreg( R_EAX, R_DBR );
  2530     store_reg( R_EAX, Rn );
  2531     sh4_x86.tstate = TSTATE_NONE;
  2532 :}
  2533 STC Rm_BANK, Rn {:
  2534     check_priv();
  2535     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2536     store_reg( R_EAX, Rn );
  2537     sh4_x86.tstate = TSTATE_NONE;
  2538 :}
  2539 STC.L SR, @-Rn {:
  2540     check_priv();
  2541     load_reg( R_EAX, Rn );
  2542     check_walign32( R_EAX );
  2543     ADD_imm8s_r32( -4, R_EAX );
  2544     MMU_TRANSLATE_WRITE( R_EAX );
  2545     PUSH_realigned_r32( R_EAX );
  2546     call_func0( sh4_read_sr );
  2547     POP_realigned_r32( R_ECX );
  2548     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2549     MEM_WRITE_LONG( R_ECX, R_EAX );
  2550     sh4_x86.tstate = TSTATE_NONE;
  2551 :}
  2552 STC.L VBR, @-Rn {:  
  2553     check_priv();
  2554     load_reg( R_EAX, Rn );
  2555     check_walign32( R_EAX );
  2556     ADD_imm8s_r32( -4, R_EAX );
  2557     MMU_TRANSLATE_WRITE( R_EAX );
  2558     load_spreg( R_EDX, R_VBR );
  2559     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2560     MEM_WRITE_LONG( R_EAX, R_EDX );
  2561     sh4_x86.tstate = TSTATE_NONE;
  2562 :}
  2563 STC.L SSR, @-Rn {:  
  2564     check_priv();
  2565     load_reg( R_EAX, Rn );
  2566     check_walign32( R_EAX );
  2567     ADD_imm8s_r32( -4, R_EAX );
  2568     MMU_TRANSLATE_WRITE( R_EAX );
  2569     load_spreg( R_EDX, R_SSR );
  2570     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2571     MEM_WRITE_LONG( R_EAX, R_EDX );
  2572     sh4_x86.tstate = TSTATE_NONE;
  2573 :}
  2574 STC.L SPC, @-Rn {:
  2575     check_priv();
  2576     load_reg( R_EAX, Rn );
  2577     check_walign32( R_EAX );
  2578     ADD_imm8s_r32( -4, R_EAX );
  2579     MMU_TRANSLATE_WRITE( R_EAX );
  2580     load_spreg( R_EDX, R_SPC );
  2581     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2582     MEM_WRITE_LONG( R_EAX, R_EDX );
  2583     sh4_x86.tstate = TSTATE_NONE;
  2584 :}
  2585 STC.L SGR, @-Rn {:  
  2586     check_priv();
  2587     load_reg( R_EAX, Rn );
  2588     check_walign32( R_EAX );
  2589     ADD_imm8s_r32( -4, R_EAX );
  2590     MMU_TRANSLATE_WRITE( R_EAX );
  2591     load_spreg( R_EDX, R_SGR );
  2592     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2593     MEM_WRITE_LONG( R_EAX, R_EDX );
  2594     sh4_x86.tstate = TSTATE_NONE;
  2595 :}
  2596 STC.L DBR, @-Rn {:  
  2597     check_priv();
  2598     load_reg( R_EAX, Rn );
  2599     check_walign32( R_EAX );
  2600     ADD_imm8s_r32( -4, R_EAX );
  2601     MMU_TRANSLATE_WRITE( R_EAX );
  2602     load_spreg( R_EDX, R_DBR );
  2603     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2604     MEM_WRITE_LONG( R_EAX, R_EDX );
  2605     sh4_x86.tstate = TSTATE_NONE;
  2606 :}
  2607 STC.L Rm_BANK, @-Rn {:  
  2608     check_priv();
  2609     load_reg( R_EAX, Rn );
  2610     check_walign32( R_EAX );
  2611     ADD_imm8s_r32( -4, R_EAX );
  2612     MMU_TRANSLATE_WRITE( R_EAX );
  2613     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2614     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2615     MEM_WRITE_LONG( R_EAX, R_EDX );
  2616     sh4_x86.tstate = TSTATE_NONE;
  2617 :}
  2618 STC.L GBR, @-Rn {:  
  2619     load_reg( R_EAX, Rn );
  2620     check_walign32( R_EAX );
  2621     ADD_imm8s_r32( -4, R_EAX );
  2622     MMU_TRANSLATE_WRITE( R_EAX );
  2623     load_spreg( R_EDX, R_GBR );
  2624     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2625     MEM_WRITE_LONG( R_EAX, R_EDX );
  2626     sh4_x86.tstate = TSTATE_NONE;
  2627 :}
  2628 STS FPSCR, Rn {:  
  2629     load_spreg( R_EAX, R_FPSCR );
  2630     store_reg( R_EAX, Rn );
  2631 :}
  2632 STS.L FPSCR, @-Rn {:  
  2633     load_reg( R_EAX, Rn );
  2634     check_walign32( R_EAX );
  2635     ADD_imm8s_r32( -4, R_EAX );
  2636     MMU_TRANSLATE_WRITE( R_EAX );
  2637     load_spreg( R_EDX, R_FPSCR );
  2638     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2639     MEM_WRITE_LONG( R_EAX, R_EDX );
  2640     sh4_x86.tstate = TSTATE_NONE;
  2641 :}
  2642 STS FPUL, Rn {:  
  2643     load_spreg( R_EAX, R_FPUL );
  2644     store_reg( R_EAX, Rn );
  2645 :}
  2646 STS.L FPUL, @-Rn {:  
  2647     load_reg( R_EAX, Rn );
  2648     check_walign32( R_EAX );
  2649     ADD_imm8s_r32( -4, R_EAX );
  2650     MMU_TRANSLATE_WRITE( R_EAX );
  2651     load_spreg( R_EDX, R_FPUL );
  2652     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2653     MEM_WRITE_LONG( R_EAX, R_EDX );
  2654     sh4_x86.tstate = TSTATE_NONE;
  2655 :}
  2656 STS MACH, Rn {:  
  2657     load_spreg( R_EAX, R_MACH );
  2658     store_reg( R_EAX, Rn );
  2659 :}
  2660 STS.L MACH, @-Rn {:  
  2661     load_reg( R_EAX, Rn );
  2662     check_walign32( R_EAX );
  2663     ADD_imm8s_r32( -4, R_EAX );
  2664     MMU_TRANSLATE_WRITE( R_EAX );
  2665     load_spreg( R_EDX, R_MACH );
  2666     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2667     MEM_WRITE_LONG( R_EAX, R_EDX );
  2668     sh4_x86.tstate = TSTATE_NONE;
  2669 :}
  2670 STS MACL, Rn {:  
  2671     load_spreg( R_EAX, R_MACL );
  2672     store_reg( R_EAX, Rn );
  2673 :}
  2674 STS.L MACL, @-Rn {:  
  2675     load_reg( R_EAX, Rn );
  2676     check_walign32( R_EAX );
  2677     ADD_imm8s_r32( -4, R_EAX );
  2678     MMU_TRANSLATE_WRITE( R_EAX );
  2679     load_spreg( R_EDX, R_MACL );
  2680     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2681     MEM_WRITE_LONG( R_EAX, R_EDX );
  2682     sh4_x86.tstate = TSTATE_NONE;
  2683 :}
  2684 STS PR, Rn {:  
  2685     load_spreg( R_EAX, R_PR );
  2686     store_reg( R_EAX, Rn );
  2687 :}
  2688 STS.L PR, @-Rn {:  
  2689     load_reg( R_EAX, Rn );
  2690     check_walign32( R_EAX );
  2691     ADD_imm8s_r32( -4, R_EAX );
  2692     MMU_TRANSLATE_WRITE( R_EAX );
  2693     load_spreg( R_EDX, R_PR );
  2694     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2695     MEM_WRITE_LONG( R_EAX, R_EDX );
  2696     sh4_x86.tstate = TSTATE_NONE;
  2697 :}
  2699 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2700 %%
  2701     sh4_x86.in_delay_slot = DELAY_NONE;
  2702     return 0;
.