Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 577:a181aeacd6e8
prev571:9bc09948d0f2
next584:5c29dd7297df
author nkeynes
date Mon Jan 14 10:23:49 2008 +0000 (12 years ago)
branchlxdream-mmu
permissions -rw-r--r--
last change Remove asm file and convert to inline (easier to cope with platform conventions)
Add breakpoint support
Add MMU store-queue support
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     uint32_t exc_code;
    41 };
    43 #define MAX_RECOVERY_SIZE 2048
    45 /** 
    46  * Struct to manage internal translation state. This state is not saved -
    47  * it is only valid between calls to sh4_translate_begin_block() and
    48  * sh4_translate_end_block()
    49  */
    50 struct sh4_x86_state {
    51     gboolean in_delay_slot;
    52     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    53     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    54     gboolean branch_taken; /* true if we branched unconditionally */
    55     uint32_t block_start_pc;
    56     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    57     int tstate;
    59     /* mode flags */
    60     gboolean tlb_on; /* True if tlb translation is active */
    62     /* Allocated memory for the (block-wide) back-patch list */
    63     struct backpatch_record *backpatch_list;
    64     uint32_t backpatch_posn;
    65     uint32_t backpatch_size;
    66     struct xlat_recovery_record recovery_list[MAX_RECOVERY_SIZE];
    67     uint32_t recovery_posn;
    68 };
    70 #define TSTATE_NONE -1
    71 #define TSTATE_O    0
    72 #define TSTATE_C    2
    73 #define TSTATE_E    4
    74 #define TSTATE_NE   5
    75 #define TSTATE_G    0xF
    76 #define TSTATE_GE   0xD
    77 #define TSTATE_A    7
    78 #define TSTATE_AE   3
    80 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    81 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    82 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    83     OP(0x70+sh4_x86.tstate); OP(rel8); \
    84     MARK_JMP(rel8,label)
    85 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    86 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    87 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    88     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    89     MARK_JMP(rel8, label)
    91 static struct sh4_x86_state sh4_x86;
    93 static uint32_t max_int = 0x7FFFFFFF;
    94 static uint32_t min_int = 0x80000000;
    95 static uint32_t save_fcw; /* save value for fpu control word */
    96 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    98 void sh4_x86_init()
    99 {
   100     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   101     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   102 }
   105 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   106 {
   107     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   108 	sh4_x86.backpatch_size <<= 1;
   109 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   110 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   111 	assert( sh4_x86.backpatch_list != NULL );
   112     }
   113     if( sh4_x86.in_delay_slot ) {
   114 	fixup_pc -= 2;
   115     }
   116     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   117     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   118     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   119     sh4_x86.backpatch_posn++;
   120 }
   122 void sh4_x86_add_recovery( uint32_t pc )
   123 {
   124     xlat_recovery[xlat_recovery_posn].xlat_pc = (uintptr_t)xlat_output;
   125     xlat_recovery[xlat_recovery_posn].sh4_icount = (pc - sh4_x86.block_start_pc)>>1;
   126     xlat_recovery_posn++;
   127 }
   129 /**
   130  * Emit an instruction to load an SH4 reg into a real register
   131  */
   132 static inline void load_reg( int x86reg, int sh4reg ) 
   133 {
   134     /* mov [bp+n], reg */
   135     OP(0x8B);
   136     OP(0x45 + (x86reg<<3));
   137     OP(REG_OFFSET(r[sh4reg]));
   138 }
   140 static inline void load_reg16s( int x86reg, int sh4reg )
   141 {
   142     OP(0x0F);
   143     OP(0xBF);
   144     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   145 }
   147 static inline void load_reg16u( int x86reg, int sh4reg )
   148 {
   149     OP(0x0F);
   150     OP(0xB7);
   151     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   153 }
   155 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   156 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   157 /**
   158  * Emit an instruction to load an immediate value into a register
   159  */
   160 static inline void load_imm32( int x86reg, uint32_t value ) {
   161     /* mov #value, reg */
   162     OP(0xB8 + x86reg);
   163     OP32(value);
   164 }
   166 /**
   167  * Load an immediate 64-bit quantity (note: x86-64 only)
   168  */
   169 static inline void load_imm64( int x86reg, uint32_t value ) {
   170     /* mov #value, reg */
   171     REXW();
   172     OP(0xB8 + x86reg);
   173     OP64(value);
   174 }
   177 /**
   178  * Emit an instruction to store an SH4 reg (RN)
   179  */
   180 void static inline store_reg( int x86reg, int sh4reg ) {
   181     /* mov reg, [bp+n] */
   182     OP(0x89);
   183     OP(0x45 + (x86reg<<3));
   184     OP(REG_OFFSET(r[sh4reg]));
   185 }
   187 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   189 /**
   190  * Load an FR register (single-precision floating point) into an integer x86
   191  * register (eg for register-to-register moves)
   192  */
   193 void static inline load_fr( int bankreg, int x86reg, int frm )
   194 {
   195     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   196 }
   198 /**
   199  * Store an FR register (single-precision floating point) into an integer x86
   200  * register (eg for register-to-register moves)
   201  */
   202 void static inline store_fr( int bankreg, int x86reg, int frn )
   203 {
   204     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   205 }
   208 /**
   209  * Load a pointer to the back fp back into the specified x86 register. The
   210  * bankreg must have been previously loaded with FPSCR.
   211  * NB: 12 bytes
   212  */
   213 static inline void load_xf_bank( int bankreg )
   214 {
   215     NOT_r32( bankreg );
   216     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   217     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   218     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   219 }
   221 /**
   222  * Update the fr_bank pointer based on the current fpscr value.
   223  */
   224 static inline void update_fr_bank( int fpscrreg )
   225 {
   226     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   227     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   228     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   229     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   230 }
   231 /**
   232  * Push FPUL (as a 32-bit float) onto the FPU stack
   233  */
   234 static inline void push_fpul( )
   235 {
   236     OP(0xD9); OP(0x45); OP(R_FPUL);
   237 }
   239 /**
   240  * Pop FPUL (as a 32-bit float) from the FPU stack
   241  */
   242 static inline void pop_fpul( )
   243 {
   244     OP(0xD9); OP(0x5D); OP(R_FPUL);
   245 }
   247 /**
   248  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   249  * with the location of the current fp bank.
   250  */
   251 static inline void push_fr( int bankreg, int frm ) 
   252 {
   253     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   254 }
   256 /**
   257  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   258  * with bankreg previously loaded with the location of the current fp bank.
   259  */
   260 static inline void pop_fr( int bankreg, int frm )
   261 {
   262     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   263 }
   265 /**
   266  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   267  * with the location of the current fp bank.
   268  */
   269 static inline void push_dr( int bankreg, int frm )
   270 {
   271     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   272 }
   274 static inline void pop_dr( int bankreg, int frm )
   275 {
   276     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   277 }
   279 /* Exception checks - Note that all exception checks will clobber EAX */
   281 #define check_priv( ) \
   282     if( !sh4_x86.priv_checked ) { \
   283 	sh4_x86.priv_checked = TRUE;\
   284 	load_spreg( R_EAX, R_SR );\
   285 	AND_imm32_r32( SR_MD, R_EAX );\
   286 	if( sh4_x86.in_delay_slot ) {\
   287 	    JE_exc( EXC_SLOT_ILLEGAL );\
   288 	} else {\
   289 	    JE_exc( EXC_ILLEGAL );\
   290 	}\
   291     }\
   293 #define check_fpuen( ) \
   294     if( !sh4_x86.fpuen_checked ) {\
   295 	sh4_x86.fpuen_checked = TRUE;\
   296 	load_spreg( R_EAX, R_SR );\
   297 	AND_imm32_r32( SR_FD, R_EAX );\
   298 	if( sh4_x86.in_delay_slot ) {\
   299 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   300 	} else {\
   301 	    JNE_exc(EXC_FPU_DISABLED);\
   302 	}\
   303     }
   305 #define check_ralign16( x86reg ) \
   306     TEST_imm32_r32( 0x00000001, x86reg ); \
   307     JNE_exc(EXC_DATA_ADDR_READ)
   309 #define check_walign16( x86reg ) \
   310     TEST_imm32_r32( 0x00000001, x86reg ); \
   311     JNE_exc(EXC_DATA_ADDR_WRITE);
   313 #define check_ralign32( x86reg ) \
   314     TEST_imm32_r32( 0x00000003, x86reg ); \
   315     JNE_exc(EXC_DATA_ADDR_READ)
   317 #define check_walign32( x86reg ) \
   318     TEST_imm32_r32( 0x00000003, x86reg ); \
   319     JNE_exc(EXC_DATA_ADDR_WRITE);
   321 #define UNDEF()
   322 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   323 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   324 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   325 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   326 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   327 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   328 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   330 /**
   331  * Perform MMU translation on the address in addr_reg for a read operation, iff the TLB is turned 
   332  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   333  */
   334 #define MMU_TRANSLATE_READ( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   335 /**
   336  * Perform MMU translation on the address in addr_reg for a write operation, iff the TLB is turned 
   337  * on, otherwise do nothing. Clobbers EAX, ECX and EDX. May raise a TLB exception or address error.
   338  */
   339 #define MMU_TRANSLATE_WRITE( addr_reg ) if( sh4_x86.tlb_on ) { call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); MEM_RESULT(addr_reg); }
   341 #define MEM_READ_SIZE (CALL_FUNC1_SIZE)
   342 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE)
   343 #define MMU_TRANSLATE_SIZE (sh4_x86.tlb_on ? (CALL_FUNC1_SIZE + 12) : 0 )
   345 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   347 /****** Import appropriate calling conventions ******/
   348 #if SH4_TRANSLATOR == TARGET_X86_64
   349 #include "sh4/ia64abi.h"
   350 #else /* SH4_TRANSLATOR == TARGET_X86 */
   351 #ifdef APPLE_BUILD
   352 #include "sh4/ia32mac.h"
   353 #else
   354 #include "sh4/ia32abi.h"
   355 #endif
   356 #endif
   358 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   359 {
   360     load_imm32( R_EAX, XLAT_EXIT_BREAKPOINT );
   361     call_func1( sh4_translate_exit, R_EAX );
   362 }
   365 /**
   366  * Translate a single instruction. Delayed branches are handled specially
   367  * by translating both branch and delayed instruction as a single unit (as
   368  * 
   369  * The instruction MUST be in the icache (assert check)
   370  *
   371  * @return true if the instruction marks the end of a basic block
   372  * (eg a branch or 
   373  */
   374 uint32_t sh4_translate_instruction( sh4addr_t pc )
   375 {
   376     uint32_t ir;
   377     /* Read instruction from icache */
   378     assert( IS_IN_ICACHE(pc) );
   379     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   381 	/* PC is not in the current icache - this usually means we're running
   382 	 * with MMU on, and we've gone past the end of the page. And since 
   383 	 * sh4_translate_block is pretty careful about this, it means we're
   384 	 * almost certainly in a delay slot.
   385 	 *
   386 	 * Since we can't assume the page is present (and we can't fault it in
   387 	 * at this point, inline a call to sh4_execute_instruction (with a few
   388 	 * small repairs to cope with the different environment).
   389 	 */
   390 	ir = sh4_read_word(pc);
   392     if( !sh4_x86.in_delay_slot ) {
   393 	sh4_x86_add_recovery(pc);
   394     }
   395 %%
   396 /* ALU operations */
   397 ADD Rm, Rn {:
   398     load_reg( R_EAX, Rm );
   399     load_reg( R_ECX, Rn );
   400     ADD_r32_r32( R_EAX, R_ECX );
   401     store_reg( R_ECX, Rn );
   402     sh4_x86.tstate = TSTATE_NONE;
   403 :}
   404 ADD #imm, Rn {:  
   405     load_reg( R_EAX, Rn );
   406     ADD_imm8s_r32( imm, R_EAX );
   407     store_reg( R_EAX, Rn );
   408     sh4_x86.tstate = TSTATE_NONE;
   409 :}
   410 ADDC Rm, Rn {:
   411     if( sh4_x86.tstate != TSTATE_C ) {
   412 	LDC_t();
   413     }
   414     load_reg( R_EAX, Rm );
   415     load_reg( R_ECX, Rn );
   416     ADC_r32_r32( R_EAX, R_ECX );
   417     store_reg( R_ECX, Rn );
   418     SETC_t();
   419     sh4_x86.tstate = TSTATE_C;
   420 :}
   421 ADDV Rm, Rn {:
   422     load_reg( R_EAX, Rm );
   423     load_reg( R_ECX, Rn );
   424     ADD_r32_r32( R_EAX, R_ECX );
   425     store_reg( R_ECX, Rn );
   426     SETO_t();
   427     sh4_x86.tstate = TSTATE_O;
   428 :}
   429 AND Rm, Rn {:
   430     load_reg( R_EAX, Rm );
   431     load_reg( R_ECX, Rn );
   432     AND_r32_r32( R_EAX, R_ECX );
   433     store_reg( R_ECX, Rn );
   434     sh4_x86.tstate = TSTATE_NONE;
   435 :}
   436 AND #imm, R0 {:  
   437     load_reg( R_EAX, 0 );
   438     AND_imm32_r32(imm, R_EAX); 
   439     store_reg( R_EAX, 0 );
   440     sh4_x86.tstate = TSTATE_NONE;
   441 :}
   442 AND.B #imm, @(R0, GBR) {: 
   443     load_reg( R_EAX, 0 );
   444     load_spreg( R_ECX, R_GBR );
   445     ADD_r32_r32( R_ECX, R_EAX );
   446     MMU_TRANSLATE_WRITE( R_EAX );
   447     PUSH_realigned_r32(R_EAX);
   448     MEM_READ_BYTE( R_EAX, R_EAX );
   449     POP_realigned_r32(R_ECX);
   450     AND_imm32_r32(imm, R_EAX );
   451     MEM_WRITE_BYTE( R_ECX, R_EAX );
   452     sh4_x86.tstate = TSTATE_NONE;
   453 :}
   454 CMP/EQ Rm, Rn {:  
   455     load_reg( R_EAX, Rm );
   456     load_reg( R_ECX, Rn );
   457     CMP_r32_r32( R_EAX, R_ECX );
   458     SETE_t();
   459     sh4_x86.tstate = TSTATE_E;
   460 :}
   461 CMP/EQ #imm, R0 {:  
   462     load_reg( R_EAX, 0 );
   463     CMP_imm8s_r32(imm, R_EAX);
   464     SETE_t();
   465     sh4_x86.tstate = TSTATE_E;
   466 :}
   467 CMP/GE Rm, Rn {:  
   468     load_reg( R_EAX, Rm );
   469     load_reg( R_ECX, Rn );
   470     CMP_r32_r32( R_EAX, R_ECX );
   471     SETGE_t();
   472     sh4_x86.tstate = TSTATE_GE;
   473 :}
   474 CMP/GT Rm, Rn {: 
   475     load_reg( R_EAX, Rm );
   476     load_reg( R_ECX, Rn );
   477     CMP_r32_r32( R_EAX, R_ECX );
   478     SETG_t();
   479     sh4_x86.tstate = TSTATE_G;
   480 :}
   481 CMP/HI Rm, Rn {:  
   482     load_reg( R_EAX, Rm );
   483     load_reg( R_ECX, Rn );
   484     CMP_r32_r32( R_EAX, R_ECX );
   485     SETA_t();
   486     sh4_x86.tstate = TSTATE_A;
   487 :}
   488 CMP/HS Rm, Rn {: 
   489     load_reg( R_EAX, Rm );
   490     load_reg( R_ECX, Rn );
   491     CMP_r32_r32( R_EAX, R_ECX );
   492     SETAE_t();
   493     sh4_x86.tstate = TSTATE_AE;
   494  :}
   495 CMP/PL Rn {: 
   496     load_reg( R_EAX, Rn );
   497     CMP_imm8s_r32( 0, R_EAX );
   498     SETG_t();
   499     sh4_x86.tstate = TSTATE_G;
   500 :}
   501 CMP/PZ Rn {:  
   502     load_reg( R_EAX, Rn );
   503     CMP_imm8s_r32( 0, R_EAX );
   504     SETGE_t();
   505     sh4_x86.tstate = TSTATE_GE;
   506 :}
   507 CMP/STR Rm, Rn {:  
   508     load_reg( R_EAX, Rm );
   509     load_reg( R_ECX, Rn );
   510     XOR_r32_r32( R_ECX, R_EAX );
   511     TEST_r8_r8( R_AL, R_AL );
   512     JE_rel8(13, target1);
   513     TEST_r8_r8( R_AH, R_AH ); // 2
   514     JE_rel8(9, target2);
   515     SHR_imm8_r32( 16, R_EAX ); // 3
   516     TEST_r8_r8( R_AL, R_AL ); // 2
   517     JE_rel8(2, target3);
   518     TEST_r8_r8( R_AH, R_AH ); // 2
   519     JMP_TARGET(target1);
   520     JMP_TARGET(target2);
   521     JMP_TARGET(target3);
   522     SETE_t();
   523     sh4_x86.tstate = TSTATE_E;
   524 :}
   525 DIV0S Rm, Rn {:
   526     load_reg( R_EAX, Rm );
   527     load_reg( R_ECX, Rn );
   528     SHR_imm8_r32( 31, R_EAX );
   529     SHR_imm8_r32( 31, R_ECX );
   530     store_spreg( R_EAX, R_M );
   531     store_spreg( R_ECX, R_Q );
   532     CMP_r32_r32( R_EAX, R_ECX );
   533     SETNE_t();
   534     sh4_x86.tstate = TSTATE_NE;
   535 :}
   536 DIV0U {:  
   537     XOR_r32_r32( R_EAX, R_EAX );
   538     store_spreg( R_EAX, R_Q );
   539     store_spreg( R_EAX, R_M );
   540     store_spreg( R_EAX, R_T );
   541     sh4_x86.tstate = TSTATE_C; // works for DIV1
   542 :}
   543 DIV1 Rm, Rn {:
   544     load_spreg( R_ECX, R_M );
   545     load_reg( R_EAX, Rn );
   546     if( sh4_x86.tstate != TSTATE_C ) {
   547 	LDC_t();
   548     }
   549     RCL1_r32( R_EAX );
   550     SETC_r8( R_DL ); // Q'
   551     CMP_sh4r_r32( R_Q, R_ECX );
   552     JE_rel8(5, mqequal);
   553     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   554     JMP_rel8(3, end);
   555     JMP_TARGET(mqequal);
   556     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   557     JMP_TARGET(end);
   558     store_reg( R_EAX, Rn ); // Done with Rn now
   559     SETC_r8(R_AL); // tmp1
   560     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   561     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   562     store_spreg( R_ECX, R_Q );
   563     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   564     MOVZX_r8_r32( R_AL, R_EAX );
   565     store_spreg( R_EAX, R_T );
   566     sh4_x86.tstate = TSTATE_NONE;
   567 :}
   568 DMULS.L Rm, Rn {:  
   569     load_reg( R_EAX, Rm );
   570     load_reg( R_ECX, Rn );
   571     IMUL_r32(R_ECX);
   572     store_spreg( R_EDX, R_MACH );
   573     store_spreg( R_EAX, R_MACL );
   574     sh4_x86.tstate = TSTATE_NONE;
   575 :}
   576 DMULU.L Rm, Rn {:  
   577     load_reg( R_EAX, Rm );
   578     load_reg( R_ECX, Rn );
   579     MUL_r32(R_ECX);
   580     store_spreg( R_EDX, R_MACH );
   581     store_spreg( R_EAX, R_MACL );    
   582     sh4_x86.tstate = TSTATE_NONE;
   583 :}
   584 DT Rn {:  
   585     load_reg( R_EAX, Rn );
   586     ADD_imm8s_r32( -1, R_EAX );
   587     store_reg( R_EAX, Rn );
   588     SETE_t();
   589     sh4_x86.tstate = TSTATE_E;
   590 :}
   591 EXTS.B Rm, Rn {:  
   592     load_reg( R_EAX, Rm );
   593     MOVSX_r8_r32( R_EAX, R_EAX );
   594     store_reg( R_EAX, Rn );
   595 :}
   596 EXTS.W Rm, Rn {:  
   597     load_reg( R_EAX, Rm );
   598     MOVSX_r16_r32( R_EAX, R_EAX );
   599     store_reg( R_EAX, Rn );
   600 :}
   601 EXTU.B Rm, Rn {:  
   602     load_reg( R_EAX, Rm );
   603     MOVZX_r8_r32( R_EAX, R_EAX );
   604     store_reg( R_EAX, Rn );
   605 :}
   606 EXTU.W Rm, Rn {:  
   607     load_reg( R_EAX, Rm );
   608     MOVZX_r16_r32( R_EAX, R_EAX );
   609     store_reg( R_EAX, Rn );
   610 :}
   611 MAC.L @Rm+, @Rn+ {:
   612     if( Rm == Rn ) {
   613 	load_reg( R_EAX, Rm );
   614 	check_ralign32( R_EAX );
   615 	MMU_TRANSLATE_READ( R_EAX );
   616 	PUSH_realigned_r32( R_EAX );
   617 	load_reg( R_EAX, Rn );
   618 	ADD_imm8s_r32( 4, R_EAX );
   619 	MMU_TRANSLATE_READ( R_EAX );
   620 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rn]) );
   621 	// Note translate twice in case of page boundaries. Maybe worth
   622 	// adding a page-boundary check to skip the second translation
   623     } else {
   624 	load_reg( R_EAX, Rm );
   625 	check_ralign32( R_EAX );
   626 	MMU_TRANSLATE_READ( R_EAX );
   627 	PUSH_realigned_r32( R_EAX );
   628 	load_reg( R_EAX, Rn );
   629 	check_ralign32( R_EAX );
   630 	MMU_TRANSLATE_READ( R_EAX );
   631 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   632 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   633     }
   634     MEM_READ_LONG( R_EAX, R_EAX );
   635     POP_r32( R_ECX );
   636     PUSH_r32( R_EAX );
   637     MEM_READ_LONG( R_ECX, R_EAX );
   638     POP_realigned_r32( R_ECX );
   640     IMUL_r32( R_ECX );
   641     ADD_r32_sh4r( R_EAX, R_MACL );
   642     ADC_r32_sh4r( R_EDX, R_MACH );
   644     load_spreg( R_ECX, R_S );
   645     TEST_r32_r32(R_ECX, R_ECX);
   646     JE_rel8( CALL_FUNC0_SIZE, nosat );
   647     call_func0( signsat48 );
   648     JMP_TARGET( nosat );
   649     sh4_x86.tstate = TSTATE_NONE;
   650 :}
   651 MAC.W @Rm+, @Rn+ {:  
   652     if( Rm == Rn ) {
   653 	load_reg( R_EAX, Rm );
   654 	check_ralign16( R_EAX );
   655 	MMU_TRANSLATE_READ( R_EAX );
   656 	PUSH_realigned_r32( R_EAX );
   657 	load_reg( R_EAX, Rn );
   658 	ADD_imm8s_r32( 2, R_EAX );
   659 	MMU_TRANSLATE_READ( R_EAX );
   660 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   661 	// Note translate twice in case of page boundaries. Maybe worth
   662 	// adding a page-boundary check to skip the second translation
   663     } else {
   664 	load_reg( R_EAX, Rm );
   665 	check_ralign16( R_EAX );
   666 	MMU_TRANSLATE_READ( R_EAX );
   667 	PUSH_realigned_r32( R_EAX );
   668 	load_reg( R_EAX, Rn );
   669 	check_ralign16( R_EAX );
   670 	MMU_TRANSLATE_READ( R_EAX );
   671 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   672 	ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   673     }
   674     MEM_READ_WORD( R_EAX, R_EAX );
   675     POP_r32( R_ECX );
   676     PUSH_r32( R_EAX );
   677     MEM_READ_WORD( R_ECX, R_EAX );
   678     POP_realigned_r32( R_ECX );
   679     IMUL_r32( R_ECX );
   681     load_spreg( R_ECX, R_S );
   682     TEST_r32_r32( R_ECX, R_ECX );
   683     JE_rel8( 47, nosat );
   685     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   686     JNO_rel8( 51, end );            // 2
   687     load_imm32( R_EDX, 1 );         // 5
   688     store_spreg( R_EDX, R_MACH );   // 6
   689     JS_rel8( 13, positive );        // 2
   690     load_imm32( R_EAX, 0x80000000 );// 5
   691     store_spreg( R_EAX, R_MACL );   // 6
   692     JMP_rel8( 25, end2 );           // 2
   694     JMP_TARGET(positive);
   695     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   696     store_spreg( R_EAX, R_MACL );   // 6
   697     JMP_rel8( 12, end3);            // 2
   699     JMP_TARGET(nosat);
   700     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   701     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   702     JMP_TARGET(end);
   703     JMP_TARGET(end2);
   704     JMP_TARGET(end3);
   705     sh4_x86.tstate = TSTATE_NONE;
   706 :}
   707 MOVT Rn {:  
   708     load_spreg( R_EAX, R_T );
   709     store_reg( R_EAX, Rn );
   710 :}
   711 MUL.L Rm, Rn {:  
   712     load_reg( R_EAX, Rm );
   713     load_reg( R_ECX, Rn );
   714     MUL_r32( R_ECX );
   715     store_spreg( R_EAX, R_MACL );
   716     sh4_x86.tstate = TSTATE_NONE;
   717 :}
   718 MULS.W Rm, Rn {:
   719     load_reg16s( R_EAX, Rm );
   720     load_reg16s( R_ECX, Rn );
   721     MUL_r32( R_ECX );
   722     store_spreg( R_EAX, R_MACL );
   723     sh4_x86.tstate = TSTATE_NONE;
   724 :}
   725 MULU.W Rm, Rn {:  
   726     load_reg16u( R_EAX, Rm );
   727     load_reg16u( R_ECX, Rn );
   728     MUL_r32( R_ECX );
   729     store_spreg( R_EAX, R_MACL );
   730     sh4_x86.tstate = TSTATE_NONE;
   731 :}
   732 NEG Rm, Rn {:
   733     load_reg( R_EAX, Rm );
   734     NEG_r32( R_EAX );
   735     store_reg( R_EAX, Rn );
   736     sh4_x86.tstate = TSTATE_NONE;
   737 :}
   738 NEGC Rm, Rn {:  
   739     load_reg( R_EAX, Rm );
   740     XOR_r32_r32( R_ECX, R_ECX );
   741     LDC_t();
   742     SBB_r32_r32( R_EAX, R_ECX );
   743     store_reg( R_ECX, Rn );
   744     SETC_t();
   745     sh4_x86.tstate = TSTATE_C;
   746 :}
   747 NOT Rm, Rn {:  
   748     load_reg( R_EAX, Rm );
   749     NOT_r32( R_EAX );
   750     store_reg( R_EAX, Rn );
   751     sh4_x86.tstate = TSTATE_NONE;
   752 :}
   753 OR Rm, Rn {:  
   754     load_reg( R_EAX, Rm );
   755     load_reg( R_ECX, Rn );
   756     OR_r32_r32( R_EAX, R_ECX );
   757     store_reg( R_ECX, Rn );
   758     sh4_x86.tstate = TSTATE_NONE;
   759 :}
   760 OR #imm, R0 {:
   761     load_reg( R_EAX, 0 );
   762     OR_imm32_r32(imm, R_EAX);
   763     store_reg( R_EAX, 0 );
   764     sh4_x86.tstate = TSTATE_NONE;
   765 :}
   766 OR.B #imm, @(R0, GBR) {:  
   767     load_reg( R_EAX, 0 );
   768     load_spreg( R_ECX, R_GBR );
   769     ADD_r32_r32( R_ECX, R_EAX );
   770     MMU_TRANSLATE_WRITE( R_EAX );
   771     PUSH_realigned_r32(R_EAX);
   772     MEM_READ_BYTE( R_EAX, R_EAX );
   773     POP_realigned_r32(R_ECX);
   774     OR_imm32_r32(imm, R_EAX );
   775     MEM_WRITE_BYTE( R_ECX, R_EAX );
   776     sh4_x86.tstate = TSTATE_NONE;
   777 :}
   778 ROTCL Rn {:
   779     load_reg( R_EAX, Rn );
   780     if( sh4_x86.tstate != TSTATE_C ) {
   781 	LDC_t();
   782     }
   783     RCL1_r32( R_EAX );
   784     store_reg( R_EAX, Rn );
   785     SETC_t();
   786     sh4_x86.tstate = TSTATE_C;
   787 :}
   788 ROTCR Rn {:  
   789     load_reg( R_EAX, Rn );
   790     if( sh4_x86.tstate != TSTATE_C ) {
   791 	LDC_t();
   792     }
   793     RCR1_r32( R_EAX );
   794     store_reg( R_EAX, Rn );
   795     SETC_t();
   796     sh4_x86.tstate = TSTATE_C;
   797 :}
   798 ROTL Rn {:  
   799     load_reg( R_EAX, Rn );
   800     ROL1_r32( R_EAX );
   801     store_reg( R_EAX, Rn );
   802     SETC_t();
   803     sh4_x86.tstate = TSTATE_C;
   804 :}
   805 ROTR Rn {:  
   806     load_reg( R_EAX, Rn );
   807     ROR1_r32( R_EAX );
   808     store_reg( R_EAX, Rn );
   809     SETC_t();
   810     sh4_x86.tstate = TSTATE_C;
   811 :}
   812 SHAD Rm, Rn {:
   813     /* Annoyingly enough, not directly convertible */
   814     load_reg( R_EAX, Rn );
   815     load_reg( R_ECX, Rm );
   816     CMP_imm32_r32( 0, R_ECX );
   817     JGE_rel8(16, doshl);
   819     NEG_r32( R_ECX );      // 2
   820     AND_imm8_r8( 0x1F, R_CL ); // 3
   821     JE_rel8( 4, emptysar);     // 2
   822     SAR_r32_CL( R_EAX );       // 2
   823     JMP_rel8(10, end);          // 2
   825     JMP_TARGET(emptysar);
   826     SAR_imm8_r32(31, R_EAX );  // 3
   827     JMP_rel8(5, end2);
   829     JMP_TARGET(doshl);
   830     AND_imm8_r8( 0x1F, R_CL ); // 3
   831     SHL_r32_CL( R_EAX );       // 2
   832     JMP_TARGET(end);
   833     JMP_TARGET(end2);
   834     store_reg( R_EAX, Rn );
   835     sh4_x86.tstate = TSTATE_NONE;
   836 :}
   837 SHLD Rm, Rn {:  
   838     load_reg( R_EAX, Rn );
   839     load_reg( R_ECX, Rm );
   840     CMP_imm32_r32( 0, R_ECX );
   841     JGE_rel8(15, doshl);
   843     NEG_r32( R_ECX );      // 2
   844     AND_imm8_r8( 0x1F, R_CL ); // 3
   845     JE_rel8( 4, emptyshr );
   846     SHR_r32_CL( R_EAX );       // 2
   847     JMP_rel8(9, end);          // 2
   849     JMP_TARGET(emptyshr);
   850     XOR_r32_r32( R_EAX, R_EAX );
   851     JMP_rel8(5, end2);
   853     JMP_TARGET(doshl);
   854     AND_imm8_r8( 0x1F, R_CL ); // 3
   855     SHL_r32_CL( R_EAX );       // 2
   856     JMP_TARGET(end);
   857     JMP_TARGET(end2);
   858     store_reg( R_EAX, Rn );
   859     sh4_x86.tstate = TSTATE_NONE;
   860 :}
   861 SHAL Rn {: 
   862     load_reg( R_EAX, Rn );
   863     SHL1_r32( R_EAX );
   864     SETC_t();
   865     store_reg( R_EAX, Rn );
   866     sh4_x86.tstate = TSTATE_C;
   867 :}
   868 SHAR Rn {:  
   869     load_reg( R_EAX, Rn );
   870     SAR1_r32( R_EAX );
   871     SETC_t();
   872     store_reg( R_EAX, Rn );
   873     sh4_x86.tstate = TSTATE_C;
   874 :}
   875 SHLL Rn {:  
   876     load_reg( R_EAX, Rn );
   877     SHL1_r32( R_EAX );
   878     SETC_t();
   879     store_reg( R_EAX, Rn );
   880     sh4_x86.tstate = TSTATE_C;
   881 :}
   882 SHLL2 Rn {:
   883     load_reg( R_EAX, Rn );
   884     SHL_imm8_r32( 2, R_EAX );
   885     store_reg( R_EAX, Rn );
   886     sh4_x86.tstate = TSTATE_NONE;
   887 :}
   888 SHLL8 Rn {:  
   889     load_reg( R_EAX, Rn );
   890     SHL_imm8_r32( 8, R_EAX );
   891     store_reg( R_EAX, Rn );
   892     sh4_x86.tstate = TSTATE_NONE;
   893 :}
   894 SHLL16 Rn {:  
   895     load_reg( R_EAX, Rn );
   896     SHL_imm8_r32( 16, R_EAX );
   897     store_reg( R_EAX, Rn );
   898     sh4_x86.tstate = TSTATE_NONE;
   899 :}
   900 SHLR Rn {:  
   901     load_reg( R_EAX, Rn );
   902     SHR1_r32( R_EAX );
   903     SETC_t();
   904     store_reg( R_EAX, Rn );
   905     sh4_x86.tstate = TSTATE_C;
   906 :}
   907 SHLR2 Rn {:  
   908     load_reg( R_EAX, Rn );
   909     SHR_imm8_r32( 2, R_EAX );
   910     store_reg( R_EAX, Rn );
   911     sh4_x86.tstate = TSTATE_NONE;
   912 :}
   913 SHLR8 Rn {:  
   914     load_reg( R_EAX, Rn );
   915     SHR_imm8_r32( 8, R_EAX );
   916     store_reg( R_EAX, Rn );
   917     sh4_x86.tstate = TSTATE_NONE;
   918 :}
   919 SHLR16 Rn {:  
   920     load_reg( R_EAX, Rn );
   921     SHR_imm8_r32( 16, R_EAX );
   922     store_reg( R_EAX, Rn );
   923     sh4_x86.tstate = TSTATE_NONE;
   924 :}
   925 SUB Rm, Rn {:  
   926     load_reg( R_EAX, Rm );
   927     load_reg( R_ECX, Rn );
   928     SUB_r32_r32( R_EAX, R_ECX );
   929     store_reg( R_ECX, Rn );
   930     sh4_x86.tstate = TSTATE_NONE;
   931 :}
   932 SUBC Rm, Rn {:  
   933     load_reg( R_EAX, Rm );
   934     load_reg( R_ECX, Rn );
   935     if( sh4_x86.tstate != TSTATE_C ) {
   936 	LDC_t();
   937     }
   938     SBB_r32_r32( R_EAX, R_ECX );
   939     store_reg( R_ECX, Rn );
   940     SETC_t();
   941     sh4_x86.tstate = TSTATE_C;
   942 :}
   943 SUBV Rm, Rn {:  
   944     load_reg( R_EAX, Rm );
   945     load_reg( R_ECX, Rn );
   946     SUB_r32_r32( R_EAX, R_ECX );
   947     store_reg( R_ECX, Rn );
   948     SETO_t();
   949     sh4_x86.tstate = TSTATE_O;
   950 :}
   951 SWAP.B Rm, Rn {:  
   952     load_reg( R_EAX, Rm );
   953     XCHG_r8_r8( R_AL, R_AH );
   954     store_reg( R_EAX, Rn );
   955 :}
   956 SWAP.W Rm, Rn {:  
   957     load_reg( R_EAX, Rm );
   958     MOV_r32_r32( R_EAX, R_ECX );
   959     SHL_imm8_r32( 16, R_ECX );
   960     SHR_imm8_r32( 16, R_EAX );
   961     OR_r32_r32( R_EAX, R_ECX );
   962     store_reg( R_ECX, Rn );
   963     sh4_x86.tstate = TSTATE_NONE;
   964 :}
   965 TAS.B @Rn {:  
   966     load_reg( R_EAX, Rn );
   967     MMU_TRANSLATE_WRITE( R_EAX );
   968     PUSH_realigned_r32( R_EAX );
   969     MEM_READ_BYTE( R_EAX, R_EAX );
   970     TEST_r8_r8( R_AL, R_AL );
   971     SETE_t();
   972     OR_imm8_r8( 0x80, R_AL );
   973     POP_realigned_r32( R_ECX );
   974     MEM_WRITE_BYTE( R_ECX, R_EAX );
   975     sh4_x86.tstate = TSTATE_NONE;
   976 :}
   977 TST Rm, Rn {:  
   978     load_reg( R_EAX, Rm );
   979     load_reg( R_ECX, Rn );
   980     TEST_r32_r32( R_EAX, R_ECX );
   981     SETE_t();
   982     sh4_x86.tstate = TSTATE_E;
   983 :}
   984 TST #imm, R0 {:  
   985     load_reg( R_EAX, 0 );
   986     TEST_imm32_r32( imm, R_EAX );
   987     SETE_t();
   988     sh4_x86.tstate = TSTATE_E;
   989 :}
   990 TST.B #imm, @(R0, GBR) {:  
   991     load_reg( R_EAX, 0);
   992     load_reg( R_ECX, R_GBR);
   993     ADD_r32_r32( R_ECX, R_EAX );
   994     MMU_TRANSLATE_READ( R_EAX );
   995     MEM_READ_BYTE( R_EAX, R_EAX );
   996     TEST_imm8_r8( imm, R_AL );
   997     SETE_t();
   998     sh4_x86.tstate = TSTATE_E;
   999 :}
  1000 XOR Rm, Rn {:  
  1001     load_reg( R_EAX, Rm );
  1002     load_reg( R_ECX, Rn );
  1003     XOR_r32_r32( R_EAX, R_ECX );
  1004     store_reg( R_ECX, Rn );
  1005     sh4_x86.tstate = TSTATE_NONE;
  1006 :}
  1007 XOR #imm, R0 {:  
  1008     load_reg( R_EAX, 0 );
  1009     XOR_imm32_r32( imm, R_EAX );
  1010     store_reg( R_EAX, 0 );
  1011     sh4_x86.tstate = TSTATE_NONE;
  1012 :}
  1013 XOR.B #imm, @(R0, GBR) {:  
  1014     load_reg( R_EAX, 0 );
  1015     load_spreg( R_ECX, R_GBR );
  1016     ADD_r32_r32( R_ECX, R_EAX );
  1017     MMU_TRANSLATE_WRITE( R_EAX );
  1018     PUSH_realigned_r32(R_EAX);
  1019     MEM_READ_BYTE(R_EAX, R_EAX);
  1020     POP_realigned_r32(R_ECX);
  1021     XOR_imm32_r32( imm, R_EAX );
  1022     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1023     sh4_x86.tstate = TSTATE_NONE;
  1024 :}
  1025 XTRCT Rm, Rn {:
  1026     load_reg( R_EAX, Rm );
  1027     load_reg( R_ECX, Rn );
  1028     SHL_imm8_r32( 16, R_EAX );
  1029     SHR_imm8_r32( 16, R_ECX );
  1030     OR_r32_r32( R_EAX, R_ECX );
  1031     store_reg( R_ECX, Rn );
  1032     sh4_x86.tstate = TSTATE_NONE;
  1033 :}
  1035 /* Data move instructions */
  1036 MOV Rm, Rn {:  
  1037     load_reg( R_EAX, Rm );
  1038     store_reg( R_EAX, Rn );
  1039 :}
  1040 MOV #imm, Rn {:  
  1041     load_imm32( R_EAX, imm );
  1042     store_reg( R_EAX, Rn );
  1043 :}
  1044 MOV.B Rm, @Rn {:  
  1045     load_reg( R_EAX, Rn );
  1046     MMU_TRANSLATE_WRITE( R_EAX );
  1047     load_reg( R_EDX, Rm );
  1048     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1049     sh4_x86.tstate = TSTATE_NONE;
  1050 :}
  1051 MOV.B Rm, @-Rn {:  
  1052     load_reg( R_EAX, Rn );
  1053     ADD_imm8s_r32( -1, R_EAX );
  1054     MMU_TRANSLATE_WRITE( R_EAX );
  1055     load_reg( R_EDX, Rm );
  1056     ADD_imm8s_sh4r( -1, REG_OFFSET(r[Rn]) );
  1057     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1058     sh4_x86.tstate = TSTATE_NONE;
  1059 :}
  1060 MOV.B Rm, @(R0, Rn) {:  
  1061     load_reg( R_EAX, 0 );
  1062     load_reg( R_ECX, Rn );
  1063     ADD_r32_r32( R_ECX, R_EAX );
  1064     MMU_TRANSLATE_WRITE( R_EAX );
  1065     load_reg( R_EDX, Rm );
  1066     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1067     sh4_x86.tstate = TSTATE_NONE;
  1068 :}
  1069 MOV.B R0, @(disp, GBR) {:  
  1070     load_spreg( R_EAX, R_GBR );
  1071     ADD_imm32_r32( disp, R_EAX );
  1072     MMU_TRANSLATE_WRITE( R_EAX );
  1073     load_reg( R_EDX, 0 );
  1074     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1075     sh4_x86.tstate = TSTATE_NONE;
  1076 :}
  1077 MOV.B R0, @(disp, Rn) {:  
  1078     load_reg( R_EAX, Rn );
  1079     ADD_imm32_r32( disp, R_EAX );
  1080     MMU_TRANSLATE_WRITE( R_EAX );
  1081     load_reg( R_EDX, 0 );
  1082     MEM_WRITE_BYTE( R_EAX, R_EDX );
  1083     sh4_x86.tstate = TSTATE_NONE;
  1084 :}
  1085 MOV.B @Rm, Rn {:  
  1086     load_reg( R_EAX, Rm );
  1087     MMU_TRANSLATE_READ( R_EAX );
  1088     MEM_READ_BYTE( R_EAX, R_EAX );
  1089     store_reg( R_EAX, Rn );
  1090     sh4_x86.tstate = TSTATE_NONE;
  1091 :}
  1092 MOV.B @Rm+, Rn {:  
  1093     load_reg( R_EAX, Rm );
  1094     MMU_TRANSLATE_READ( R_EAX );
  1095     ADD_imm8s_sh4r( 1, REG_OFFSET(r[Rm]) );
  1096     MEM_READ_BYTE( R_EAX, R_EAX );
  1097     store_reg( R_EAX, Rn );
  1098     sh4_x86.tstate = TSTATE_NONE;
  1099 :}
  1100 MOV.B @(R0, Rm), Rn {:  
  1101     load_reg( R_EAX, 0 );
  1102     load_reg( R_ECX, Rm );
  1103     ADD_r32_r32( R_ECX, R_EAX );
  1104     MMU_TRANSLATE_READ( R_EAX )
  1105     MEM_READ_BYTE( R_EAX, R_EAX );
  1106     store_reg( R_EAX, Rn );
  1107     sh4_x86.tstate = TSTATE_NONE;
  1108 :}
  1109 MOV.B @(disp, GBR), R0 {:  
  1110     load_spreg( R_EAX, R_GBR );
  1111     ADD_imm32_r32( disp, R_EAX );
  1112     MMU_TRANSLATE_READ( R_EAX );
  1113     MEM_READ_BYTE( R_EAX, R_EAX );
  1114     store_reg( R_EAX, 0 );
  1115     sh4_x86.tstate = TSTATE_NONE;
  1116 :}
  1117 MOV.B @(disp, Rm), R0 {:  
  1118     load_reg( R_EAX, Rm );
  1119     ADD_imm32_r32( disp, R_EAX );
  1120     MMU_TRANSLATE_READ( R_EAX );
  1121     MEM_READ_BYTE( R_EAX, R_EAX );
  1122     store_reg( R_EAX, 0 );
  1123     sh4_x86.tstate = TSTATE_NONE;
  1124 :}
  1125 MOV.L Rm, @Rn {:
  1126     load_reg( R_EAX, Rn );
  1127     check_walign32(R_EAX);
  1128     MMU_TRANSLATE_WRITE( R_EAX );
  1129     load_reg( R_EDX, Rm );
  1130     MEM_WRITE_LONG( R_EAX, R_EDX );
  1131     sh4_x86.tstate = TSTATE_NONE;
  1132 :}
  1133 MOV.L Rm, @-Rn {:  
  1134     load_reg( R_EAX, Rn );
  1135     ADD_imm8s_r32( -4, R_EAX );
  1136     check_walign32( R_EAX );
  1137     MMU_TRANSLATE_WRITE( R_EAX );
  1138     load_reg( R_EDX, Rm );
  1139     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  1140     MEM_WRITE_LONG( R_EAX, R_EDX );
  1141     sh4_x86.tstate = TSTATE_NONE;
  1142 :}
  1143 MOV.L Rm, @(R0, Rn) {:  
  1144     load_reg( R_EAX, 0 );
  1145     load_reg( R_ECX, Rn );
  1146     ADD_r32_r32( R_ECX, R_EAX );
  1147     check_walign32( R_EAX );
  1148     MMU_TRANSLATE_WRITE( R_EAX );
  1149     load_reg( R_EDX, Rm );
  1150     MEM_WRITE_LONG( R_EAX, R_EDX );
  1151     sh4_x86.tstate = TSTATE_NONE;
  1152 :}
  1153 MOV.L R0, @(disp, GBR) {:  
  1154     load_spreg( R_EAX, R_GBR );
  1155     ADD_imm32_r32( disp, R_EAX );
  1156     check_walign32( R_EAX );
  1157     MMU_TRANSLATE_WRITE( R_EAX );
  1158     load_reg( R_EDX, 0 );
  1159     MEM_WRITE_LONG( R_EAX, R_EDX );
  1160     sh4_x86.tstate = TSTATE_NONE;
  1161 :}
  1162 MOV.L Rm, @(disp, Rn) {:  
  1163     load_reg( R_EAX, Rn );
  1164     ADD_imm32_r32( disp, R_EAX );
  1165     check_walign32( R_EAX );
  1166     MMU_TRANSLATE_WRITE( R_EAX );
  1167     load_reg( R_EDX, Rm );
  1168     MEM_WRITE_LONG( R_EAX, R_EDX );
  1169     sh4_x86.tstate = TSTATE_NONE;
  1170 :}
  1171 MOV.L @Rm, Rn {:  
  1172     load_reg( R_EAX, Rm );
  1173     check_ralign32( R_EAX );
  1174     MMU_TRANSLATE_READ( R_EAX );
  1175     MEM_READ_LONG( R_EAX, R_EAX );
  1176     store_reg( R_EAX, Rn );
  1177     sh4_x86.tstate = TSTATE_NONE;
  1178 :}
  1179 MOV.L @Rm+, Rn {:  
  1180     load_reg( R_EAX, Rm );
  1181     check_ralign32( R_EAX );
  1182     MMU_TRANSLATE_READ( R_EAX );
  1183     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1184     MEM_READ_LONG( R_EAX, R_EAX );
  1185     store_reg( R_EAX, Rn );
  1186     sh4_x86.tstate = TSTATE_NONE;
  1187 :}
  1188 MOV.L @(R0, Rm), Rn {:  
  1189     load_reg( R_EAX, 0 );
  1190     load_reg( R_ECX, Rm );
  1191     ADD_r32_r32( R_ECX, R_EAX );
  1192     check_ralign32( R_EAX );
  1193     MMU_TRANSLATE_READ( R_EAX );
  1194     MEM_READ_LONG( R_EAX, R_EAX );
  1195     store_reg( R_EAX, Rn );
  1196     sh4_x86.tstate = TSTATE_NONE;
  1197 :}
  1198 MOV.L @(disp, GBR), R0 {:
  1199     load_spreg( R_EAX, R_GBR );
  1200     ADD_imm32_r32( disp, R_EAX );
  1201     check_ralign32( R_EAX );
  1202     MMU_TRANSLATE_READ( R_EAX );
  1203     MEM_READ_LONG( R_EAX, R_EAX );
  1204     store_reg( R_EAX, 0 );
  1205     sh4_x86.tstate = TSTATE_NONE;
  1206 :}
  1207 MOV.L @(disp, PC), Rn {:  
  1208     if( sh4_x86.in_delay_slot ) {
  1209 	SLOTILLEGAL();
  1210     } else {
  1211 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1212 	if( IS_IN_ICACHE(target) ) {
  1213 	    // If the target address is in the same page as the code, it's
  1214 	    // pretty safe to just ref it directly and circumvent the whole
  1215 	    // memory subsystem. (this is a big performance win)
  1217 	    // FIXME: There's a corner-case that's not handled here when
  1218 	    // the current code-page is in the ITLB but not in the UTLB.
  1219 	    // (should generate a TLB miss although need to test SH4 
  1220 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1221 	    // behaviour though.
  1222 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1223 	    MOV_moff32_EAX( ptr );
  1224 	} else {
  1225 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1226 	    // different virtual address than the translation was done with,
  1227 	    // but we can safely assume that the low bits are the same.
  1228 	    load_imm32( R_EAX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1229 	    ADD_sh4r_r32( R_PC, R_EAX );
  1230 	    MMU_TRANSLATE_READ( R_EAX );
  1231 	    MEM_READ_LONG( R_EAX, R_EAX );
  1232 	    sh4_x86.tstate = TSTATE_NONE;
  1234 	store_reg( R_EAX, Rn );
  1236 :}
  1237 MOV.L @(disp, Rm), Rn {:  
  1238     load_reg( R_EAX, Rm );
  1239     ADD_imm8s_r32( disp, R_EAX );
  1240     check_ralign32( R_EAX );
  1241     MMU_TRANSLATE_READ( R_EAX );
  1242     MEM_READ_LONG( R_EAX, R_EAX );
  1243     store_reg( R_EAX, Rn );
  1244     sh4_x86.tstate = TSTATE_NONE;
  1245 :}
  1246 MOV.W Rm, @Rn {:  
  1247     load_reg( R_EAX, Rn );
  1248     check_walign16( R_EAX );
  1249     MMU_TRANSLATE_WRITE( R_EAX )
  1250     load_reg( R_EDX, Rm );
  1251     MEM_WRITE_WORD( R_EAX, R_EDX );
  1252     sh4_x86.tstate = TSTATE_NONE;
  1253 :}
  1254 MOV.W Rm, @-Rn {:  
  1255     load_reg( R_EAX, Rn );
  1256     ADD_imm8s_r32( -2, R_EAX );
  1257     check_walign16( R_EAX );
  1258     MMU_TRANSLATE_WRITE( R_EAX );
  1259     load_reg( R_EDX, Rm );
  1260     ADD_imm8s_sh4r( -2, REG_OFFSET(r[Rn]) );
  1261     MEM_WRITE_WORD( R_EAX, R_EDX );
  1262     sh4_x86.tstate = TSTATE_NONE;
  1263 :}
  1264 MOV.W Rm, @(R0, Rn) {:  
  1265     load_reg( R_EAX, 0 );
  1266     load_reg( R_ECX, Rn );
  1267     ADD_r32_r32( R_ECX, R_EAX );
  1268     check_walign16( R_EAX );
  1269     MMU_TRANSLATE_WRITE( R_EAX );
  1270     load_reg( R_EDX, Rm );
  1271     MEM_WRITE_WORD( R_EAX, R_EDX );
  1272     sh4_x86.tstate = TSTATE_NONE;
  1273 :}
  1274 MOV.W R0, @(disp, GBR) {:  
  1275     load_spreg( R_EAX, R_GBR );
  1276     ADD_imm32_r32( disp, R_EAX );
  1277     check_walign16( R_EAX );
  1278     MMU_TRANSLATE_WRITE( R_EAX );
  1279     load_reg( R_EDX, 0 );
  1280     MEM_WRITE_WORD( R_EAX, R_EDX );
  1281     sh4_x86.tstate = TSTATE_NONE;
  1282 :}
  1283 MOV.W R0, @(disp, Rn) {:  
  1284     load_reg( R_EAX, Rn );
  1285     ADD_imm32_r32( disp, R_EAX );
  1286     check_walign16( R_EAX );
  1287     MMU_TRANSLATE_WRITE( R_EAX );
  1288     load_reg( R_EDX, 0 );
  1289     MEM_WRITE_WORD( R_EAX, R_EDX );
  1290     sh4_x86.tstate = TSTATE_NONE;
  1291 :}
  1292 MOV.W @Rm, Rn {:  
  1293     load_reg( R_EAX, Rm );
  1294     check_ralign16( R_EAX );
  1295     MMU_TRANSLATE_READ( R_EAX );
  1296     MEM_READ_WORD( R_EAX, R_EAX );
  1297     store_reg( R_EAX, Rn );
  1298     sh4_x86.tstate = TSTATE_NONE;
  1299 :}
  1300 MOV.W @Rm+, Rn {:  
  1301     load_reg( R_EAX, Rm );
  1302     check_ralign16( R_EAX );
  1303     MMU_TRANSLATE_READ( R_EAX );
  1304     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
  1305     MEM_READ_WORD( R_EAX, R_EAX );
  1306     store_reg( R_EAX, Rn );
  1307     sh4_x86.tstate = TSTATE_NONE;
  1308 :}
  1309 MOV.W @(R0, Rm), Rn {:  
  1310     load_reg( R_EAX, 0 );
  1311     load_reg( R_ECX, Rm );
  1312     ADD_r32_r32( R_ECX, R_EAX );
  1313     check_ralign16( R_EAX );
  1314     MMU_TRANSLATE_READ( R_EAX );
  1315     MEM_READ_WORD( R_EAX, R_EAX );
  1316     store_reg( R_EAX, Rn );
  1317     sh4_x86.tstate = TSTATE_NONE;
  1318 :}
  1319 MOV.W @(disp, GBR), R0 {:  
  1320     load_spreg( R_EAX, R_GBR );
  1321     ADD_imm32_r32( disp, R_EAX );
  1322     check_ralign16( R_EAX );
  1323     MMU_TRANSLATE_READ( R_EAX );
  1324     MEM_READ_WORD( R_EAX, R_EAX );
  1325     store_reg( R_EAX, 0 );
  1326     sh4_x86.tstate = TSTATE_NONE;
  1327 :}
  1328 MOV.W @(disp, PC), Rn {:  
  1329     if( sh4_x86.in_delay_slot ) {
  1330 	SLOTILLEGAL();
  1331     } else {
  1332 	// See comments for MOV.L @(disp, PC), Rn
  1333 	uint32_t target = pc + disp + 4;
  1334 	if( IS_IN_ICACHE(target) ) {
  1335 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1336 	    MOV_moff32_EAX( ptr );
  1337 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1338 	} else {
  1339 	    load_imm32( R_EAX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1340 	    ADD_sh4r_r32( R_PC, R_EAX );
  1341 	    MMU_TRANSLATE_READ( R_EAX );
  1342 	    MEM_READ_WORD( R_EAX, R_EAX );
  1343 	    sh4_x86.tstate = TSTATE_NONE;
  1345 	store_reg( R_EAX, Rn );
  1347 :}
  1348 MOV.W @(disp, Rm), R0 {:  
  1349     load_reg( R_EAX, Rm );
  1350     ADD_imm32_r32( disp, R_EAX );
  1351     check_ralign16( R_EAX );
  1352     MMU_TRANSLATE_READ( R_EAX );
  1353     MEM_READ_WORD( R_EAX, R_EAX );
  1354     store_reg( R_EAX, 0 );
  1355     sh4_x86.tstate = TSTATE_NONE;
  1356 :}
  1357 MOVA @(disp, PC), R0 {:  
  1358     if( sh4_x86.in_delay_slot ) {
  1359 	SLOTILLEGAL();
  1360     } else {
  1361 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1362 	ADD_sh4r_r32( R_PC, R_ECX );
  1363 	store_reg( R_ECX, 0 );
  1364 	sh4_x86.tstate = TSTATE_NONE;
  1366 :}
  1367 MOVCA.L R0, @Rn {:  
  1368     load_reg( R_EAX, Rn );
  1369     check_walign32( R_EAX );
  1370     MMU_TRANSLATE_WRITE( R_EAX );
  1371     load_reg( R_EDX, 0 );
  1372     MEM_WRITE_LONG( R_EAX, R_EDX );
  1373     sh4_x86.tstate = TSTATE_NONE;
  1374 :}
  1376 /* Control transfer instructions */
  1377 BF disp {:
  1378     if( sh4_x86.in_delay_slot ) {
  1379 	SLOTILLEGAL();
  1380     } else {
  1381 	sh4vma_t target = disp + pc + 4;
  1382 	JT_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1383 	exit_block_rel(target, pc+2 );
  1384 	JMP_TARGET(nottaken);
  1385 	return 2;
  1387 :}
  1388 BF/S disp {:
  1389     if( sh4_x86.in_delay_slot ) {
  1390 	SLOTILLEGAL();
  1391     } else {
  1392 	sh4vma_t target = disp + pc + 4;
  1393 	sh4_x86.in_delay_slot = TRUE;
  1394 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1395 	    CMP_imm8s_sh4r( 1, R_T );
  1396 	    sh4_x86.tstate = TSTATE_E;
  1398 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1399 	sh4_translate_instruction(pc+2);
  1400 	exit_block_rel( target, pc+4 );
  1401 	// not taken
  1402 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1403 	sh4_translate_instruction(pc+2);
  1404 	return 4;
  1406 :}
  1407 BRA disp {:  
  1408     if( sh4_x86.in_delay_slot ) {
  1409 	SLOTILLEGAL();
  1410     } else {
  1411 	sh4_x86.in_delay_slot = TRUE;
  1412 	sh4_translate_instruction( pc + 2 );
  1413 	exit_block_rel( disp + pc + 4, pc+4 );
  1414 	sh4_x86.branch_taken = TRUE;
  1415 	return 4;
  1417 :}
  1418 BRAF Rn {:  
  1419     if( sh4_x86.in_delay_slot ) {
  1420 	SLOTILLEGAL();
  1421     } else {
  1422 	load_reg( R_EAX, Rn );
  1423 	ADD_imm32_r32( pc + 4, R_EAX );
  1424 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1425 	sh4_x86.in_delay_slot = TRUE;
  1426 	sh4_x86.tstate = TSTATE_NONE;
  1427 	sh4_translate_instruction( pc + 2 );
  1428 	exit_block_pcset(pc+2);
  1429 	sh4_x86.branch_taken = TRUE;
  1430 	return 4;
  1432 :}
  1433 BSR disp {:  
  1434     if( sh4_x86.in_delay_slot ) {
  1435 	SLOTILLEGAL();
  1436     } else {
  1437 	load_imm32( R_EAX, pc + 4 );
  1438 	store_spreg( R_EAX, R_PR );
  1439 	sh4_x86.in_delay_slot = TRUE;
  1440 	sh4_translate_instruction( pc + 2 );
  1441 	exit_block_rel( disp + pc + 4, pc+4 );
  1442 	sh4_x86.branch_taken = TRUE;
  1443 	return 4;
  1445 :}
  1446 BSRF Rn {:  
  1447     if( sh4_x86.in_delay_slot ) {
  1448 	SLOTILLEGAL();
  1449     } else {
  1450 	load_imm32( R_ECX, pc + 4 );
  1451 	store_spreg( R_ECX, R_PR );
  1452 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1453 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1454 	sh4_x86.in_delay_slot = TRUE;
  1455 	sh4_x86.tstate = TSTATE_NONE;
  1456 	sh4_translate_instruction( pc + 2 );
  1457 	exit_block_pcset(pc+2);
  1458 	sh4_x86.branch_taken = TRUE;
  1459 	return 4;
  1461 :}
  1462 BT disp {:
  1463     if( sh4_x86.in_delay_slot ) {
  1464 	SLOTILLEGAL();
  1465     } else {
  1466 	sh4vma_t target = disp + pc + 4;
  1467 	JF_rel8( EXIT_BLOCK_REL_SIZE(target), nottaken );
  1468 	exit_block_rel(target, pc+2 );
  1469 	JMP_TARGET(nottaken);
  1470 	return 2;
  1472 :}
  1473 BT/S disp {:
  1474     if( sh4_x86.in_delay_slot ) {
  1475 	SLOTILLEGAL();
  1476     } else {
  1477 	sh4_x86.in_delay_slot = TRUE;
  1478 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1479 	    CMP_imm8s_sh4r( 1, R_T );
  1480 	    sh4_x86.tstate = TSTATE_E;
  1482 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1483 	sh4_translate_instruction(pc+2);
  1484 	exit_block_rel( disp + pc + 4, pc+4 );
  1485 	// not taken
  1486 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1487 	sh4_translate_instruction(pc+2);
  1488 	return 4;
  1490 :}
  1491 JMP @Rn {:  
  1492     if( sh4_x86.in_delay_slot ) {
  1493 	SLOTILLEGAL();
  1494     } else {
  1495 	load_reg( R_ECX, Rn );
  1496 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1497 	sh4_x86.in_delay_slot = TRUE;
  1498 	sh4_translate_instruction(pc+2);
  1499 	exit_block_pcset(pc+2);
  1500 	sh4_x86.branch_taken = TRUE;
  1501 	return 4;
  1503 :}
  1504 JSR @Rn {:  
  1505     if( sh4_x86.in_delay_slot ) {
  1506 	SLOTILLEGAL();
  1507     } else {
  1508 	load_imm32( R_EAX, pc + 4 );
  1509 	store_spreg( R_EAX, R_PR );
  1510 	load_reg( R_ECX, Rn );
  1511 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1512 	sh4_x86.in_delay_slot = TRUE;
  1513 	sh4_translate_instruction(pc+2);
  1514 	exit_block_pcset(pc+2);
  1515 	sh4_x86.branch_taken = TRUE;
  1516 	return 4;
  1518 :}
  1519 RTE {:  
  1520     if( sh4_x86.in_delay_slot ) {
  1521 	SLOTILLEGAL();
  1522     } else {
  1523 	check_priv();
  1524 	load_spreg( R_ECX, R_SPC );
  1525 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1526 	load_spreg( R_EAX, R_SSR );
  1527 	call_func1( sh4_write_sr, R_EAX );
  1528 	sh4_x86.in_delay_slot = TRUE;
  1529 	sh4_x86.priv_checked = FALSE;
  1530 	sh4_x86.fpuen_checked = FALSE;
  1531 	sh4_x86.tstate = TSTATE_NONE;
  1532 	sh4_translate_instruction(pc+2);
  1533 	exit_block_pcset(pc+2);
  1534 	sh4_x86.branch_taken = TRUE;
  1535 	return 4;
  1537 :}
  1538 RTS {:  
  1539     if( sh4_x86.in_delay_slot ) {
  1540 	SLOTILLEGAL();
  1541     } else {
  1542 	load_spreg( R_ECX, R_PR );
  1543 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1544 	sh4_x86.in_delay_slot = TRUE;
  1545 	sh4_translate_instruction(pc+2);
  1546 	exit_block_pcset(pc+2);
  1547 	sh4_x86.branch_taken = TRUE;
  1548 	return 4;
  1550 :}
  1551 TRAPA #imm {:  
  1552     if( sh4_x86.in_delay_slot ) {
  1553 	SLOTILLEGAL();
  1554     } else {
  1555 	load_imm32( R_ECX, pc+2 );
  1556 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1557 	load_imm32( R_EAX, imm );
  1558 	call_func1( sh4_raise_trap, R_EAX );
  1559 	sh4_x86.tstate = TSTATE_NONE;
  1560 	exit_block_pcset(pc);
  1561 	sh4_x86.branch_taken = TRUE;
  1562 	return 2;
  1564 :}
  1565 UNDEF {:  
  1566     if( sh4_x86.in_delay_slot ) {
  1567 	SLOTILLEGAL();
  1568     } else {
  1569 	JMP_exc(EXC_ILLEGAL);
  1570 	return 2;
  1572 :}
  1574 CLRMAC {:  
  1575     XOR_r32_r32(R_EAX, R_EAX);
  1576     store_spreg( R_EAX, R_MACL );
  1577     store_spreg( R_EAX, R_MACH );
  1578     sh4_x86.tstate = TSTATE_NONE;
  1579 :}
  1580 CLRS {:
  1581     CLC();
  1582     SETC_sh4r(R_S);
  1583     sh4_x86.tstate = TSTATE_C;
  1584 :}
  1585 CLRT {:  
  1586     CLC();
  1587     SETC_t();
  1588     sh4_x86.tstate = TSTATE_C;
  1589 :}
  1590 SETS {:  
  1591     STC();
  1592     SETC_sh4r(R_S);
  1593     sh4_x86.tstate = TSTATE_C;
  1594 :}
  1595 SETT {:  
  1596     STC();
  1597     SETC_t();
  1598     sh4_x86.tstate = TSTATE_C;
  1599 :}
  1601 /* Floating point moves */
  1602 FMOV FRm, FRn {:  
  1603     /* As horrible as this looks, it's actually covering 5 separate cases:
  1604      * 1. 32-bit fr-to-fr (PR=0)
  1605      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1606      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1607      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1608      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1609      */
  1610     check_fpuen();
  1611     load_spreg( R_ECX, R_FPSCR );
  1612     load_fr_bank( R_EDX );
  1613     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1614     JNE_rel8(8, doublesize);
  1615     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1616     store_fr( R_EDX, R_EAX, FRn );
  1617     if( FRm&1 ) {
  1618 	JMP_rel8(24, end);
  1619 	JMP_TARGET(doublesize);
  1620 	load_xf_bank( R_ECX ); 
  1621 	load_fr( R_ECX, R_EAX, FRm-1 );
  1622 	if( FRn&1 ) {
  1623 	    load_fr( R_ECX, R_EDX, FRm );
  1624 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1625 	    store_fr( R_ECX, R_EDX, FRn );
  1626 	} else /* FRn&1 == 0 */ {
  1627 	    load_fr( R_ECX, R_ECX, FRm );
  1628 	    store_fr( R_EDX, R_EAX, FRn );
  1629 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1631 	JMP_TARGET(end);
  1632     } else /* FRm&1 == 0 */ {
  1633 	if( FRn&1 ) {
  1634 	    JMP_rel8(24, end);
  1635 	    load_xf_bank( R_ECX );
  1636 	    load_fr( R_EDX, R_EAX, FRm );
  1637 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1638 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1639 	    store_fr( R_ECX, R_EDX, FRn );
  1640 	    JMP_TARGET(end);
  1641 	} else /* FRn&1 == 0 */ {
  1642 	    JMP_rel8(12, end);
  1643 	    load_fr( R_EDX, R_EAX, FRm );
  1644 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1645 	    store_fr( R_EDX, R_EAX, FRn );
  1646 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1647 	    JMP_TARGET(end);
  1650     sh4_x86.tstate = TSTATE_NONE;
  1651 :}
  1652 FMOV FRm, @Rn {: 
  1653     check_fpuen();
  1654     load_reg( R_EAX, Rn );
  1655     check_walign32( R_EAX );
  1656     MMU_TRANSLATE_WRITE( R_EAX );
  1657     load_spreg( R_EDX, R_FPSCR );
  1658     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1659     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1660     load_fr_bank( R_EDX );
  1661     load_fr( R_EDX, R_ECX, FRm );
  1662     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1663     if( FRm&1 ) {
  1664 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1665 	JMP_TARGET(doublesize);
  1666 	load_xf_bank( R_EDX );
  1667 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1668 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1669 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1670 	JMP_TARGET(end);
  1671     } else {
  1672 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1673 	JMP_TARGET(doublesize);
  1674 	load_fr_bank( R_EDX );
  1675 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1676 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1677 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1678 	JMP_TARGET(end);
  1680     sh4_x86.tstate = TSTATE_NONE;
  1681 :}
  1682 FMOV @Rm, FRn {:  
  1683     check_fpuen();
  1684     load_reg( R_EAX, Rm );
  1685     check_ralign32( R_EAX );
  1686     MMU_TRANSLATE_READ( R_EAX );
  1687     load_spreg( R_EDX, R_FPSCR );
  1688     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1689     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1690     MEM_READ_LONG( R_EAX, R_EAX );
  1691     load_fr_bank( R_EDX );
  1692     store_fr( R_EDX, R_EAX, FRn );
  1693     if( FRn&1 ) {
  1694 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1695 	JMP_TARGET(doublesize);
  1696 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1697 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1698 	load_xf_bank( R_EDX );
  1699 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1700 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1701 	JMP_TARGET(end);
  1702     } else {
  1703 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1704 	JMP_TARGET(doublesize);
  1705 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1706 	load_fr_bank( R_EDX );
  1707 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1708 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1709 	JMP_TARGET(end);
  1711     sh4_x86.tstate = TSTATE_NONE;
  1712 :}
  1713 FMOV FRm, @-Rn {:  
  1714     check_fpuen();
  1715     load_reg( R_EAX, Rn );
  1716     check_walign32( R_EAX );
  1717     load_spreg( R_EDX, R_FPSCR );
  1718     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1719     JNE_rel8(15 + MEM_WRITE_SIZE + MMU_TRANSLATE_SIZE, doublesize);
  1720     ADD_imm8s_r32( -4, R_EAX );
  1721     MMU_TRANSLATE_WRITE( R_EAX );
  1722     load_fr_bank( R_EDX );
  1723     load_fr( R_EDX, R_ECX, FRm );
  1724     ADD_imm8s_sh4r(-4,REG_OFFSET(r[Rn]));
  1725     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1726     if( FRm&1 ) {
  1727 	JMP_rel8( 25 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1728 	JMP_TARGET(doublesize);
  1729 	ADD_imm8s_r32(-8,R_EAX);
  1730 	MMU_TRANSLATE_WRITE( R_EAX );
  1731 	load_xf_bank( R_EDX );
  1732 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1733 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1734 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1735 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1736 	JMP_TARGET(end);
  1737     } else {
  1738 	JMP_rel8( 16 + MEM_WRITE_DOUBLE_SIZE + MMU_TRANSLATE_SIZE, end );
  1739 	JMP_TARGET(doublesize);
  1740 	ADD_imm8s_r32(-8,R_EAX);
  1741 	MMU_TRANSLATE_WRITE( R_EAX );
  1742 	load_fr_bank( R_EDX );
  1743 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1744 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1745 	ADD_imm8s_sh4r(-8,REG_OFFSET(r[Rn]));
  1746 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1747 	JMP_TARGET(end);
  1749     sh4_x86.tstate = TSTATE_NONE;
  1750 :}
  1751 FMOV @Rm+, FRn {:
  1752     check_fpuen();
  1753     load_reg( R_EAX, Rm );
  1754     check_ralign32( R_EAX );
  1755     MMU_TRANSLATE_READ( R_EAX );
  1756     load_spreg( R_EDX, R_FPSCR );
  1757     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1758     JNE_rel8(12 + MEM_READ_SIZE, doublesize);
  1759     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  1760     MEM_READ_LONG( R_EAX, R_EAX );
  1761     load_fr_bank( R_EDX );
  1762     store_fr( R_EDX, R_EAX, FRn );
  1763     if( FRn&1 ) {
  1764 	JMP_rel8(25 + MEM_READ_DOUBLE_SIZE, end);
  1765 	JMP_TARGET(doublesize);
  1766 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1767 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1768 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1769 	load_xf_bank( R_EDX );
  1770 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1771 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1772 	JMP_TARGET(end);
  1773     } else {
  1774 	JMP_rel8(13 + MEM_READ_DOUBLE_SIZE, end);
  1775 	ADD_imm8s_sh4r( 8, REG_OFFSET(r[Rm]) );
  1776 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1777 	load_fr_bank( R_EDX );
  1778 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1779 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1780 	JMP_TARGET(end);
  1782     sh4_x86.tstate = TSTATE_NONE;
  1783 :}
  1784 FMOV FRm, @(R0, Rn) {:  
  1785     check_fpuen();
  1786     load_reg( R_EAX, Rn );
  1787     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1788     check_walign32( R_EAX );
  1789     MMU_TRANSLATE_WRITE( R_EAX );
  1790     load_spreg( R_EDX, R_FPSCR );
  1791     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1792     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1793     load_fr_bank( R_EDX );
  1794     load_fr( R_EDX, R_ECX, FRm );
  1795     MEM_WRITE_LONG( R_EAX, R_ECX ); // 12
  1796     if( FRm&1 ) {
  1797 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1798 	JMP_TARGET(doublesize);
  1799 	load_xf_bank( R_EDX );
  1800 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1801 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1802 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1803 	JMP_TARGET(end);
  1804     } else {
  1805 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1806 	JMP_TARGET(doublesize);
  1807 	load_fr_bank( R_EDX );
  1808 	load_fr( R_EDX, R_ECX, FRm&0x0E );
  1809 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1810 	MEM_WRITE_DOUBLE( R_EAX, R_ECX, R_EDX );
  1811 	JMP_TARGET(end);
  1813     sh4_x86.tstate = TSTATE_NONE;
  1814 :}
  1815 FMOV @(R0, Rm), FRn {:  
  1816     check_fpuen();
  1817     load_reg( R_EAX, Rm );
  1818     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EAX );
  1819     check_ralign32( R_EAX );
  1820     MMU_TRANSLATE_READ( R_EAX );
  1821     load_spreg( R_EDX, R_FPSCR );
  1822     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1823     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1824     MEM_READ_LONG( R_EAX, R_EAX );
  1825     load_fr_bank( R_EDX );
  1826     store_fr( R_EDX, R_EAX, FRn );
  1827     if( FRn&1 ) {
  1828 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1829 	JMP_TARGET(doublesize);
  1830 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1831 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1832 	load_xf_bank( R_EDX );
  1833 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1834 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1835 	JMP_TARGET(end);
  1836     } else {
  1837 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1838 	JMP_TARGET(doublesize);
  1839 	MEM_READ_DOUBLE( R_EAX, R_ECX, R_EAX );
  1840 	load_fr_bank( R_EDX );
  1841 	store_fr( R_EDX, R_ECX, FRn&0x0E );
  1842 	store_fr( R_EDX, R_EAX, FRn|0x01 );
  1843 	JMP_TARGET(end);
  1845     sh4_x86.tstate = TSTATE_NONE;
  1846 :}
  1847 FLDI0 FRn {:  /* IFF PR=0 */
  1848     check_fpuen();
  1849     load_spreg( R_ECX, R_FPSCR );
  1850     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1851     JNE_rel8(8, end);
  1852     XOR_r32_r32( R_EAX, R_EAX );
  1853     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1854     store_fr( R_ECX, R_EAX, FRn );
  1855     JMP_TARGET(end);
  1856     sh4_x86.tstate = TSTATE_NONE;
  1857 :}
  1858 FLDI1 FRn {:  /* IFF PR=0 */
  1859     check_fpuen();
  1860     load_spreg( R_ECX, R_FPSCR );
  1861     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1862     JNE_rel8(11, end);
  1863     load_imm32(R_EAX, 0x3F800000);
  1864     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1865     store_fr( R_ECX, R_EAX, FRn );
  1866     JMP_TARGET(end);
  1867     sh4_x86.tstate = TSTATE_NONE;
  1868 :}
  1870 FLOAT FPUL, FRn {:  
  1871     check_fpuen();
  1872     load_spreg( R_ECX, R_FPSCR );
  1873     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1874     FILD_sh4r(R_FPUL);
  1875     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1876     JNE_rel8(5, doubleprec);
  1877     pop_fr( R_EDX, FRn );
  1878     JMP_rel8(3, end);
  1879     JMP_TARGET(doubleprec);
  1880     pop_dr( R_EDX, FRn );
  1881     JMP_TARGET(end);
  1882     sh4_x86.tstate = TSTATE_NONE;
  1883 :}
  1884 FTRC FRm, FPUL {:  
  1885     check_fpuen();
  1886     load_spreg( R_ECX, R_FPSCR );
  1887     load_fr_bank( R_EDX );
  1888     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1889     JNE_rel8(5, doubleprec);
  1890     push_fr( R_EDX, FRm );
  1891     JMP_rel8(3, doop);
  1892     JMP_TARGET(doubleprec);
  1893     push_dr( R_EDX, FRm );
  1894     JMP_TARGET( doop );
  1895     load_imm32( R_ECX, (uint32_t)&max_int );
  1896     FILD_r32ind( R_ECX );
  1897     FCOMIP_st(1);
  1898     JNA_rel8( 32, sat );
  1899     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1900     FILD_r32ind( R_ECX );           // 2
  1901     FCOMIP_st(1);                   // 2
  1902     JAE_rel8( 21, sat2 );            // 2
  1903     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1904     FNSTCW_r32ind( R_EAX );
  1905     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1906     FLDCW_r32ind( R_EDX );
  1907     FISTP_sh4r(R_FPUL);             // 3
  1908     FLDCW_r32ind( R_EAX );
  1909     JMP_rel8( 9, end );             // 2
  1911     JMP_TARGET(sat);
  1912     JMP_TARGET(sat2);
  1913     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1914     store_spreg( R_ECX, R_FPUL );
  1915     FPOP_st();
  1916     JMP_TARGET(end);
  1917     sh4_x86.tstate = TSTATE_NONE;
  1918 :}
  1919 FLDS FRm, FPUL {:  
  1920     check_fpuen();
  1921     load_fr_bank( R_ECX );
  1922     load_fr( R_ECX, R_EAX, FRm );
  1923     store_spreg( R_EAX, R_FPUL );
  1924     sh4_x86.tstate = TSTATE_NONE;
  1925 :}
  1926 FSTS FPUL, FRn {:  
  1927     check_fpuen();
  1928     load_fr_bank( R_ECX );
  1929     load_spreg( R_EAX, R_FPUL );
  1930     store_fr( R_ECX, R_EAX, FRn );
  1931     sh4_x86.tstate = TSTATE_NONE;
  1932 :}
  1933 FCNVDS FRm, FPUL {:  
  1934     check_fpuen();
  1935     load_spreg( R_ECX, R_FPSCR );
  1936     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1937     JE_rel8(9, end); // only when PR=1
  1938     load_fr_bank( R_ECX );
  1939     push_dr( R_ECX, FRm );
  1940     pop_fpul();
  1941     JMP_TARGET(end);
  1942     sh4_x86.tstate = TSTATE_NONE;
  1943 :}
  1944 FCNVSD FPUL, FRn {:  
  1945     check_fpuen();
  1946     load_spreg( R_ECX, R_FPSCR );
  1947     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1948     JE_rel8(9, end); // only when PR=1
  1949     load_fr_bank( R_ECX );
  1950     push_fpul();
  1951     pop_dr( R_ECX, FRn );
  1952     JMP_TARGET(end);
  1953     sh4_x86.tstate = TSTATE_NONE;
  1954 :}
  1956 /* Floating point instructions */
  1957 FABS FRn {:  
  1958     check_fpuen();
  1959     load_spreg( R_ECX, R_FPSCR );
  1960     load_fr_bank( R_EDX );
  1961     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1962     JNE_rel8(10, doubleprec);
  1963     push_fr(R_EDX, FRn); // 3
  1964     FABS_st0(); // 2
  1965     pop_fr( R_EDX, FRn); //3
  1966     JMP_rel8(8,end); // 2
  1967     JMP_TARGET(doubleprec);
  1968     push_dr(R_EDX, FRn);
  1969     FABS_st0();
  1970     pop_dr(R_EDX, FRn);
  1971     JMP_TARGET(end);
  1972     sh4_x86.tstate = TSTATE_NONE;
  1973 :}
  1974 FADD FRm, FRn {:  
  1975     check_fpuen();
  1976     load_spreg( R_ECX, R_FPSCR );
  1977     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1978     load_fr_bank( R_EDX );
  1979     JNE_rel8(13,doubleprec);
  1980     push_fr(R_EDX, FRm);
  1981     push_fr(R_EDX, FRn);
  1982     FADDP_st(1);
  1983     pop_fr(R_EDX, FRn);
  1984     JMP_rel8(11,end);
  1985     JMP_TARGET(doubleprec);
  1986     push_dr(R_EDX, FRm);
  1987     push_dr(R_EDX, FRn);
  1988     FADDP_st(1);
  1989     pop_dr(R_EDX, FRn);
  1990     JMP_TARGET(end);
  1991     sh4_x86.tstate = TSTATE_NONE;
  1992 :}
  1993 FDIV FRm, FRn {:  
  1994     check_fpuen();
  1995     load_spreg( R_ECX, R_FPSCR );
  1996     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1997     load_fr_bank( R_EDX );
  1998     JNE_rel8(13, doubleprec);
  1999     push_fr(R_EDX, FRn);
  2000     push_fr(R_EDX, FRm);
  2001     FDIVP_st(1);
  2002     pop_fr(R_EDX, FRn);
  2003     JMP_rel8(11, end);
  2004     JMP_TARGET(doubleprec);
  2005     push_dr(R_EDX, FRn);
  2006     push_dr(R_EDX, FRm);
  2007     FDIVP_st(1);
  2008     pop_dr(R_EDX, FRn);
  2009     JMP_TARGET(end);
  2010     sh4_x86.tstate = TSTATE_NONE;
  2011 :}
  2012 FMAC FR0, FRm, FRn {:  
  2013     check_fpuen();
  2014     load_spreg( R_ECX, R_FPSCR );
  2015     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2016     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2017     JNE_rel8(18, doubleprec);
  2018     push_fr( R_EDX, 0 );
  2019     push_fr( R_EDX, FRm );
  2020     FMULP_st(1);
  2021     push_fr( R_EDX, FRn );
  2022     FADDP_st(1);
  2023     pop_fr( R_EDX, FRn );
  2024     JMP_rel8(16, end);
  2025     JMP_TARGET(doubleprec);
  2026     push_dr( R_EDX, 0 );
  2027     push_dr( R_EDX, FRm );
  2028     FMULP_st(1);
  2029     push_dr( R_EDX, FRn );
  2030     FADDP_st(1);
  2031     pop_dr( R_EDX, FRn );
  2032     JMP_TARGET(end);
  2033     sh4_x86.tstate = TSTATE_NONE;
  2034 :}
  2036 FMUL FRm, FRn {:  
  2037     check_fpuen();
  2038     load_spreg( R_ECX, R_FPSCR );
  2039     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2040     load_fr_bank( R_EDX );
  2041     JNE_rel8(13, doubleprec);
  2042     push_fr(R_EDX, FRm);
  2043     push_fr(R_EDX, FRn);
  2044     FMULP_st(1);
  2045     pop_fr(R_EDX, FRn);
  2046     JMP_rel8(11, end);
  2047     JMP_TARGET(doubleprec);
  2048     push_dr(R_EDX, FRm);
  2049     push_dr(R_EDX, FRn);
  2050     FMULP_st(1);
  2051     pop_dr(R_EDX, FRn);
  2052     JMP_TARGET(end);
  2053     sh4_x86.tstate = TSTATE_NONE;
  2054 :}
  2055 FNEG FRn {:  
  2056     check_fpuen();
  2057     load_spreg( R_ECX, R_FPSCR );
  2058     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2059     load_fr_bank( R_EDX );
  2060     JNE_rel8(10, doubleprec);
  2061     push_fr(R_EDX, FRn);
  2062     FCHS_st0();
  2063     pop_fr(R_EDX, FRn);
  2064     JMP_rel8(8, end);
  2065     JMP_TARGET(doubleprec);
  2066     push_dr(R_EDX, FRn);
  2067     FCHS_st0();
  2068     pop_dr(R_EDX, FRn);
  2069     JMP_TARGET(end);
  2070     sh4_x86.tstate = TSTATE_NONE;
  2071 :}
  2072 FSRRA FRn {:  
  2073     check_fpuen();
  2074     load_spreg( R_ECX, R_FPSCR );
  2075     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2076     load_fr_bank( R_EDX );
  2077     JNE_rel8(12, end); // PR=0 only
  2078     FLD1_st0();
  2079     push_fr(R_EDX, FRn);
  2080     FSQRT_st0();
  2081     FDIVP_st(1);
  2082     pop_fr(R_EDX, FRn);
  2083     JMP_TARGET(end);
  2084     sh4_x86.tstate = TSTATE_NONE;
  2085 :}
  2086 FSQRT FRn {:  
  2087     check_fpuen();
  2088     load_spreg( R_ECX, R_FPSCR );
  2089     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2090     load_fr_bank( R_EDX );
  2091     JNE_rel8(10, doubleprec);
  2092     push_fr(R_EDX, FRn);
  2093     FSQRT_st0();
  2094     pop_fr(R_EDX, FRn);
  2095     JMP_rel8(8, end);
  2096     JMP_TARGET(doubleprec);
  2097     push_dr(R_EDX, FRn);
  2098     FSQRT_st0();
  2099     pop_dr(R_EDX, FRn);
  2100     JMP_TARGET(end);
  2101     sh4_x86.tstate = TSTATE_NONE;
  2102 :}
  2103 FSUB FRm, FRn {:  
  2104     check_fpuen();
  2105     load_spreg( R_ECX, R_FPSCR );
  2106     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2107     load_fr_bank( R_EDX );
  2108     JNE_rel8(13, doubleprec);
  2109     push_fr(R_EDX, FRn);
  2110     push_fr(R_EDX, FRm);
  2111     FSUBP_st(1);
  2112     pop_fr(R_EDX, FRn);
  2113     JMP_rel8(11, end);
  2114     JMP_TARGET(doubleprec);
  2115     push_dr(R_EDX, FRn);
  2116     push_dr(R_EDX, FRm);
  2117     FSUBP_st(1);
  2118     pop_dr(R_EDX, FRn);
  2119     JMP_TARGET(end);
  2120     sh4_x86.tstate = TSTATE_NONE;
  2121 :}
  2123 FCMP/EQ FRm, FRn {:  
  2124     check_fpuen();
  2125     load_spreg( R_ECX, R_FPSCR );
  2126     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2127     load_fr_bank( R_EDX );
  2128     JNE_rel8(8, doubleprec);
  2129     push_fr(R_EDX, FRm);
  2130     push_fr(R_EDX, FRn);
  2131     JMP_rel8(6, end);
  2132     JMP_TARGET(doubleprec);
  2133     push_dr(R_EDX, FRm);
  2134     push_dr(R_EDX, FRn);
  2135     JMP_TARGET(end);
  2136     FCOMIP_st(1);
  2137     SETE_t();
  2138     FPOP_st();
  2139     sh4_x86.tstate = TSTATE_NONE;
  2140 :}
  2141 FCMP/GT FRm, FRn {:  
  2142     check_fpuen();
  2143     load_spreg( R_ECX, R_FPSCR );
  2144     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2145     load_fr_bank( R_EDX );
  2146     JNE_rel8(8, doubleprec);
  2147     push_fr(R_EDX, FRm);
  2148     push_fr(R_EDX, FRn);
  2149     JMP_rel8(6, end);
  2150     JMP_TARGET(doubleprec);
  2151     push_dr(R_EDX, FRm);
  2152     push_dr(R_EDX, FRn);
  2153     JMP_TARGET(end);
  2154     FCOMIP_st(1);
  2155     SETA_t();
  2156     FPOP_st();
  2157     sh4_x86.tstate = TSTATE_NONE;
  2158 :}
  2160 FSCA FPUL, FRn {:  
  2161     check_fpuen();
  2162     load_spreg( R_ECX, R_FPSCR );
  2163     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2164     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2165     load_fr_bank( R_ECX );
  2166     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2167     load_spreg( R_EDX, R_FPUL );
  2168     call_func2( sh4_fsca, R_EDX, R_ECX );
  2169     JMP_TARGET(doubleprec);
  2170     sh4_x86.tstate = TSTATE_NONE;
  2171 :}
  2172 FIPR FVm, FVn {:  
  2173     check_fpuen();
  2174     load_spreg( R_ECX, R_FPSCR );
  2175     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2176     JNE_rel8(44, doubleprec);
  2178     load_fr_bank( R_ECX );
  2179     push_fr( R_ECX, FVm<<2 );
  2180     push_fr( R_ECX, FVn<<2 );
  2181     FMULP_st(1);
  2182     push_fr( R_ECX, (FVm<<2)+1);
  2183     push_fr( R_ECX, (FVn<<2)+1);
  2184     FMULP_st(1);
  2185     FADDP_st(1);
  2186     push_fr( R_ECX, (FVm<<2)+2);
  2187     push_fr( R_ECX, (FVn<<2)+2);
  2188     FMULP_st(1);
  2189     FADDP_st(1);
  2190     push_fr( R_ECX, (FVm<<2)+3);
  2191     push_fr( R_ECX, (FVn<<2)+3);
  2192     FMULP_st(1);
  2193     FADDP_st(1);
  2194     pop_fr( R_ECX, (FVn<<2)+3);
  2195     JMP_TARGET(doubleprec);
  2196     sh4_x86.tstate = TSTATE_NONE;
  2197 :}
  2198 FTRV XMTRX, FVn {:  
  2199     check_fpuen();
  2200     load_spreg( R_ECX, R_FPSCR );
  2201     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2202     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2203     load_fr_bank( R_EDX );                 // 3
  2204     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2205     load_xf_bank( R_ECX );                 // 12
  2206     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2207     JMP_TARGET(doubleprec);
  2208     sh4_x86.tstate = TSTATE_NONE;
  2209 :}
  2211 FRCHG {:  
  2212     check_fpuen();
  2213     load_spreg( R_ECX, R_FPSCR );
  2214     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2215     store_spreg( R_ECX, R_FPSCR );
  2216     update_fr_bank( R_ECX );
  2217     sh4_x86.tstate = TSTATE_NONE;
  2218 :}
  2219 FSCHG {:  
  2220     check_fpuen();
  2221     load_spreg( R_ECX, R_FPSCR );
  2222     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2223     store_spreg( R_ECX, R_FPSCR );
  2224     sh4_x86.tstate = TSTATE_NONE;
  2225 :}
  2227 /* Processor control instructions */
  2228 LDC Rm, SR {:
  2229     if( sh4_x86.in_delay_slot ) {
  2230 	SLOTILLEGAL();
  2231     } else {
  2232 	check_priv();
  2233 	load_reg( R_EAX, Rm );
  2234 	call_func1( sh4_write_sr, R_EAX );
  2235 	sh4_x86.priv_checked = FALSE;
  2236 	sh4_x86.fpuen_checked = FALSE;
  2237 	sh4_x86.tstate = TSTATE_NONE;
  2239 :}
  2240 LDC Rm, GBR {: 
  2241     load_reg( R_EAX, Rm );
  2242     store_spreg( R_EAX, R_GBR );
  2243 :}
  2244 LDC Rm, VBR {:  
  2245     check_priv();
  2246     load_reg( R_EAX, Rm );
  2247     store_spreg( R_EAX, R_VBR );
  2248     sh4_x86.tstate = TSTATE_NONE;
  2249 :}
  2250 LDC Rm, SSR {:  
  2251     check_priv();
  2252     load_reg( R_EAX, Rm );
  2253     store_spreg( R_EAX, R_SSR );
  2254     sh4_x86.tstate = TSTATE_NONE;
  2255 :}
  2256 LDC Rm, SGR {:  
  2257     check_priv();
  2258     load_reg( R_EAX, Rm );
  2259     store_spreg( R_EAX, R_SGR );
  2260     sh4_x86.tstate = TSTATE_NONE;
  2261 :}
  2262 LDC Rm, SPC {:  
  2263     check_priv();
  2264     load_reg( R_EAX, Rm );
  2265     store_spreg( R_EAX, R_SPC );
  2266     sh4_x86.tstate = TSTATE_NONE;
  2267 :}
  2268 LDC Rm, DBR {:  
  2269     check_priv();
  2270     load_reg( R_EAX, Rm );
  2271     store_spreg( R_EAX, R_DBR );
  2272     sh4_x86.tstate = TSTATE_NONE;
  2273 :}
  2274 LDC Rm, Rn_BANK {:  
  2275     check_priv();
  2276     load_reg( R_EAX, Rm );
  2277     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2278     sh4_x86.tstate = TSTATE_NONE;
  2279 :}
  2280 LDC.L @Rm+, GBR {:  
  2281     load_reg( R_EAX, Rm );
  2282     check_ralign32( R_EAX );
  2283     MMU_TRANSLATE_READ( R_EAX );
  2284     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2285     MEM_READ_LONG( R_EAX, R_EAX );
  2286     store_spreg( R_EAX, R_GBR );
  2287     sh4_x86.tstate = TSTATE_NONE;
  2288 :}
  2289 LDC.L @Rm+, SR {:
  2290     if( sh4_x86.in_delay_slot ) {
  2291 	SLOTILLEGAL();
  2292     } else {
  2293 	check_priv();
  2294 	load_reg( R_EAX, Rm );
  2295 	check_ralign32( R_EAX );
  2296 	MMU_TRANSLATE_READ( R_EAX );
  2297 	ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2298 	MEM_READ_LONG( R_EAX, R_EAX );
  2299 	call_func1( sh4_write_sr, R_EAX );
  2300 	sh4_x86.priv_checked = FALSE;
  2301 	sh4_x86.fpuen_checked = FALSE;
  2302 	sh4_x86.tstate = TSTATE_NONE;
  2304 :}
  2305 LDC.L @Rm+, VBR {:  
  2306     check_priv();
  2307     load_reg( R_EAX, Rm );
  2308     check_ralign32( R_EAX );
  2309     MMU_TRANSLATE_READ( R_EAX );
  2310     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2311     MEM_READ_LONG( R_EAX, R_EAX );
  2312     store_spreg( R_EAX, R_VBR );
  2313     sh4_x86.tstate = TSTATE_NONE;
  2314 :}
  2315 LDC.L @Rm+, SSR {:
  2316     check_priv();
  2317     load_reg( R_EAX, Rm );
  2318     check_ralign32( R_EAX );
  2319     MMU_TRANSLATE_READ( R_EAX );
  2320     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2321     MEM_READ_LONG( R_EAX, R_EAX );
  2322     store_spreg( R_EAX, R_SSR );
  2323     sh4_x86.tstate = TSTATE_NONE;
  2324 :}
  2325 LDC.L @Rm+, SGR {:  
  2326     check_priv();
  2327     load_reg( R_EAX, Rm );
  2328     check_ralign32( R_EAX );
  2329     MMU_TRANSLATE_READ( R_EAX );
  2330     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2331     MEM_READ_LONG( R_EAX, R_EAX );
  2332     store_spreg( R_EAX, R_SGR );
  2333     sh4_x86.tstate = TSTATE_NONE;
  2334 :}
  2335 LDC.L @Rm+, SPC {:  
  2336     check_priv();
  2337     load_reg( R_EAX, Rm );
  2338     check_ralign32( R_EAX );
  2339     MMU_TRANSLATE_READ( R_EAX );
  2340     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2341     MEM_READ_LONG( R_EAX, R_EAX );
  2342     store_spreg( R_EAX, R_SPC );
  2343     sh4_x86.tstate = TSTATE_NONE;
  2344 :}
  2345 LDC.L @Rm+, DBR {:  
  2346     check_priv();
  2347     load_reg( R_EAX, Rm );
  2348     check_ralign32( R_EAX );
  2349     MMU_TRANSLATE_READ( R_EAX );
  2350     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2351     MEM_READ_LONG( R_EAX, R_EAX );
  2352     store_spreg( R_EAX, R_DBR );
  2353     sh4_x86.tstate = TSTATE_NONE;
  2354 :}
  2355 LDC.L @Rm+, Rn_BANK {:  
  2356     check_priv();
  2357     load_reg( R_EAX, Rm );
  2358     check_ralign32( R_EAX );
  2359     MMU_TRANSLATE_READ( R_EAX );
  2360     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2361     MEM_READ_LONG( R_EAX, R_EAX );
  2362     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2363     sh4_x86.tstate = TSTATE_NONE;
  2364 :}
  2365 LDS Rm, FPSCR {:  
  2366     load_reg( R_EAX, Rm );
  2367     store_spreg( R_EAX, R_FPSCR );
  2368     update_fr_bank( R_EAX );
  2369     sh4_x86.tstate = TSTATE_NONE;
  2370 :}
  2371 LDS.L @Rm+, FPSCR {:  
  2372     load_reg( R_EAX, Rm );
  2373     check_ralign32( R_EAX );
  2374     MMU_TRANSLATE_READ( R_EAX );
  2375     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2376     MEM_READ_LONG( R_EAX, R_EAX );
  2377     store_spreg( R_EAX, R_FPSCR );
  2378     update_fr_bank( R_EAX );
  2379     sh4_x86.tstate = TSTATE_NONE;
  2380 :}
  2381 LDS Rm, FPUL {:  
  2382     load_reg( R_EAX, Rm );
  2383     store_spreg( R_EAX, R_FPUL );
  2384 :}
  2385 LDS.L @Rm+, FPUL {:  
  2386     load_reg( R_EAX, Rm );
  2387     check_ralign32( R_EAX );
  2388     MMU_TRANSLATE_READ( R_EAX );
  2389     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2390     MEM_READ_LONG( R_EAX, R_EAX );
  2391     store_spreg( R_EAX, R_FPUL );
  2392     sh4_x86.tstate = TSTATE_NONE;
  2393 :}
  2394 LDS Rm, MACH {: 
  2395     load_reg( R_EAX, Rm );
  2396     store_spreg( R_EAX, R_MACH );
  2397 :}
  2398 LDS.L @Rm+, MACH {:  
  2399     load_reg( R_EAX, Rm );
  2400     check_ralign32( R_EAX );
  2401     MMU_TRANSLATE_READ( R_EAX );
  2402     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2403     MEM_READ_LONG( R_EAX, R_EAX );
  2404     store_spreg( R_EAX, R_MACH );
  2405     sh4_x86.tstate = TSTATE_NONE;
  2406 :}
  2407 LDS Rm, MACL {:  
  2408     load_reg( R_EAX, Rm );
  2409     store_spreg( R_EAX, R_MACL );
  2410 :}
  2411 LDS.L @Rm+, MACL {:  
  2412     load_reg( R_EAX, Rm );
  2413     check_ralign32( R_EAX );
  2414     MMU_TRANSLATE_READ( R_EAX );
  2415     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2416     MEM_READ_LONG( R_EAX, R_EAX );
  2417     store_spreg( R_EAX, R_MACL );
  2418     sh4_x86.tstate = TSTATE_NONE;
  2419 :}
  2420 LDS Rm, PR {:  
  2421     load_reg( R_EAX, Rm );
  2422     store_spreg( R_EAX, R_PR );
  2423 :}
  2424 LDS.L @Rm+, PR {:  
  2425     load_reg( R_EAX, Rm );
  2426     check_ralign32( R_EAX );
  2427     MMU_TRANSLATE_READ( R_EAX );
  2428     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
  2429     MEM_READ_LONG( R_EAX, R_EAX );
  2430     store_spreg( R_EAX, R_PR );
  2431     sh4_x86.tstate = TSTATE_NONE;
  2432 :}
  2433 LDTLB {:  
  2434     call_func0( MMU_ldtlb );
  2435 :}
  2436 OCBI @Rn {:  :}
  2437 OCBP @Rn {:  :}
  2438 OCBWB @Rn {:  :}
  2439 PREF @Rn {:
  2440     load_reg( R_EAX, Rn );
  2441     MOV_r32_r32( R_EAX, R_ECX );
  2442     AND_imm32_r32( 0xFC000000, R_EAX );
  2443     CMP_imm32_r32( 0xE0000000, R_EAX );
  2444     JNE_rel8(8+CALL_FUNC1_SIZE, end);
  2445     call_func1( sh4_flush_store_queue, R_ECX );
  2446     TEST_r32_r32( R_EAX, R_EAX );
  2447     JE_exc(-1);
  2448     JMP_TARGET(end);
  2449     sh4_x86.tstate = TSTATE_NONE;
  2450 :}
  2451 SLEEP {: 
  2452     check_priv();
  2453     call_func0( sh4_sleep );
  2454     sh4_x86.tstate = TSTATE_NONE;
  2455     sh4_x86.in_delay_slot = FALSE;
  2456     return 2;
  2457 :}
  2458 STC SR, Rn {:
  2459     check_priv();
  2460     call_func0(sh4_read_sr);
  2461     store_reg( R_EAX, Rn );
  2462     sh4_x86.tstate = TSTATE_NONE;
  2463 :}
  2464 STC GBR, Rn {:  
  2465     load_spreg( R_EAX, R_GBR );
  2466     store_reg( R_EAX, Rn );
  2467 :}
  2468 STC VBR, Rn {:  
  2469     check_priv();
  2470     load_spreg( R_EAX, R_VBR );
  2471     store_reg( R_EAX, Rn );
  2472     sh4_x86.tstate = TSTATE_NONE;
  2473 :}
  2474 STC SSR, Rn {:  
  2475     check_priv();
  2476     load_spreg( R_EAX, R_SSR );
  2477     store_reg( R_EAX, Rn );
  2478     sh4_x86.tstate = TSTATE_NONE;
  2479 :}
  2480 STC SPC, Rn {:  
  2481     check_priv();
  2482     load_spreg( R_EAX, R_SPC );
  2483     store_reg( R_EAX, Rn );
  2484     sh4_x86.tstate = TSTATE_NONE;
  2485 :}
  2486 STC SGR, Rn {:  
  2487     check_priv();
  2488     load_spreg( R_EAX, R_SGR );
  2489     store_reg( R_EAX, Rn );
  2490     sh4_x86.tstate = TSTATE_NONE;
  2491 :}
  2492 STC DBR, Rn {:  
  2493     check_priv();
  2494     load_spreg( R_EAX, R_DBR );
  2495     store_reg( R_EAX, Rn );
  2496     sh4_x86.tstate = TSTATE_NONE;
  2497 :}
  2498 STC Rm_BANK, Rn {:
  2499     check_priv();
  2500     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2501     store_reg( R_EAX, Rn );
  2502     sh4_x86.tstate = TSTATE_NONE;
  2503 :}
  2504 STC.L SR, @-Rn {:
  2505     check_priv();
  2506     load_reg( R_EAX, Rn );
  2507     check_walign32( R_EAX );
  2508     ADD_imm8s_r32( -4, R_EAX );
  2509     MMU_TRANSLATE_WRITE( R_EAX );
  2510     PUSH_realigned_r32( R_EAX );
  2511     call_func0( sh4_read_sr );
  2512     POP_realigned_r32( R_ECX );
  2513     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2514     MEM_WRITE_LONG( R_ECX, R_EAX );
  2515     sh4_x86.tstate = TSTATE_NONE;
  2516 :}
  2517 STC.L VBR, @-Rn {:  
  2518     check_priv();
  2519     load_reg( R_EAX, Rn );
  2520     check_walign32( R_EAX );
  2521     ADD_imm8s_r32( -4, R_EAX );
  2522     MMU_TRANSLATE_WRITE( R_EAX );
  2523     load_spreg( R_EDX, R_VBR );
  2524     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2525     MEM_WRITE_LONG( R_EAX, R_EDX );
  2526     sh4_x86.tstate = TSTATE_NONE;
  2527 :}
  2528 STC.L SSR, @-Rn {:  
  2529     check_priv();
  2530     load_reg( R_EAX, Rn );
  2531     check_walign32( R_EAX );
  2532     ADD_imm8s_r32( -4, R_EAX );
  2533     MMU_TRANSLATE_WRITE( R_EAX );
  2534     load_spreg( R_EDX, R_SSR );
  2535     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2536     MEM_WRITE_LONG( R_EAX, R_EDX );
  2537     sh4_x86.tstate = TSTATE_NONE;
  2538 :}
  2539 STC.L SPC, @-Rn {:
  2540     check_priv();
  2541     load_reg( R_EAX, Rn );
  2542     check_walign32( R_EAX );
  2543     ADD_imm8s_r32( -4, R_EAX );
  2544     MMU_TRANSLATE_WRITE( R_EAX );
  2545     load_spreg( R_EDX, R_SPC );
  2546     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2547     MEM_WRITE_LONG( R_EAX, R_EDX );
  2548     sh4_x86.tstate = TSTATE_NONE;
  2549 :}
  2550 STC.L SGR, @-Rn {:  
  2551     check_priv();
  2552     load_reg( R_EAX, Rn );
  2553     check_walign32( R_EAX );
  2554     ADD_imm8s_r32( -4, R_EAX );
  2555     MMU_TRANSLATE_WRITE( R_EAX );
  2556     load_spreg( R_EDX, R_SGR );
  2557     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2558     MEM_WRITE_LONG( R_EAX, R_EDX );
  2559     sh4_x86.tstate = TSTATE_NONE;
  2560 :}
  2561 STC.L DBR, @-Rn {:  
  2562     check_priv();
  2563     load_reg( R_EAX, Rn );
  2564     check_walign32( R_EAX );
  2565     ADD_imm8s_r32( -4, R_EAX );
  2566     MMU_TRANSLATE_WRITE( R_EAX );
  2567     load_spreg( R_EDX, R_DBR );
  2568     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2569     MEM_WRITE_LONG( R_EAX, R_EDX );
  2570     sh4_x86.tstate = TSTATE_NONE;
  2571 :}
  2572 STC.L Rm_BANK, @-Rn {:  
  2573     check_priv();
  2574     load_reg( R_EAX, Rn );
  2575     check_walign32( R_EAX );
  2576     ADD_imm8s_r32( -4, R_EAX );
  2577     MMU_TRANSLATE_WRITE( R_EAX );
  2578     load_spreg( R_EDX, REG_OFFSET(r_bank[Rm_BANK]) );
  2579     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2580     MEM_WRITE_LONG( R_EAX, R_EDX );
  2581     sh4_x86.tstate = TSTATE_NONE;
  2582 :}
  2583 STC.L GBR, @-Rn {:  
  2584     load_reg( R_EAX, Rn );
  2585     check_walign32( R_EAX );
  2586     ADD_imm8s_r32( -4, R_EAX );
  2587     MMU_TRANSLATE_WRITE( R_EAX );
  2588     load_spreg( R_EDX, R_GBR );
  2589     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2590     MEM_WRITE_LONG( R_EAX, R_EDX );
  2591     sh4_x86.tstate = TSTATE_NONE;
  2592 :}
  2593 STS FPSCR, Rn {:  
  2594     load_spreg( R_EAX, R_FPSCR );
  2595     store_reg( R_EAX, Rn );
  2596 :}
  2597 STS.L FPSCR, @-Rn {:  
  2598     load_reg( R_EAX, Rn );
  2599     check_walign32( R_EAX );
  2600     ADD_imm8s_r32( -4, R_EAX );
  2601     MMU_TRANSLATE_WRITE( R_EAX );
  2602     load_spreg( R_EDX, R_FPSCR );
  2603     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2604     MEM_WRITE_LONG( R_EAX, R_EDX );
  2605     sh4_x86.tstate = TSTATE_NONE;
  2606 :}
  2607 STS FPUL, Rn {:  
  2608     load_spreg( R_EAX, R_FPUL );
  2609     store_reg( R_EAX, Rn );
  2610 :}
  2611 STS.L FPUL, @-Rn {:  
  2612     load_reg( R_EAX, Rn );
  2613     check_walign32( R_EAX );
  2614     ADD_imm8s_r32( -4, R_EAX );
  2615     MMU_TRANSLATE_WRITE( R_EAX );
  2616     load_spreg( R_EDX, R_FPUL );
  2617     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2618     MEM_WRITE_LONG( R_EAX, R_EDX );
  2619     sh4_x86.tstate = TSTATE_NONE;
  2620 :}
  2621 STS MACH, Rn {:  
  2622     load_spreg( R_EAX, R_MACH );
  2623     store_reg( R_EAX, Rn );
  2624 :}
  2625 STS.L MACH, @-Rn {:  
  2626     load_reg( R_EAX, Rn );
  2627     check_walign32( R_EAX );
  2628     ADD_imm8s_r32( -4, R_EAX );
  2629     MMU_TRANSLATE_WRITE( R_EAX );
  2630     load_spreg( R_EDX, R_MACH );
  2631     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2632     MEM_WRITE_LONG( R_EAX, R_EDX );
  2633     sh4_x86.tstate = TSTATE_NONE;
  2634 :}
  2635 STS MACL, Rn {:  
  2636     load_spreg( R_EAX, R_MACL );
  2637     store_reg( R_EAX, Rn );
  2638 :}
  2639 STS.L MACL, @-Rn {:  
  2640     load_reg( R_EAX, Rn );
  2641     check_walign32( R_EAX );
  2642     ADD_imm8s_r32( -4, R_EAX );
  2643     MMU_TRANSLATE_WRITE( R_EAX );
  2644     load_spreg( R_EDX, R_MACL );
  2645     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2646     MEM_WRITE_LONG( R_EAX, R_EDX );
  2647     sh4_x86.tstate = TSTATE_NONE;
  2648 :}
  2649 STS PR, Rn {:  
  2650     load_spreg( R_EAX, R_PR );
  2651     store_reg( R_EAX, Rn );
  2652 :}
  2653 STS.L PR, @-Rn {:  
  2654     load_reg( R_EAX, Rn );
  2655     check_walign32( R_EAX );
  2656     ADD_imm8s_r32( -4, R_EAX );
  2657     MMU_TRANSLATE_WRITE( R_EAX );
  2658     load_spreg( R_EDX, R_PR );
  2659     ADD_imm8s_sh4r( -4, REG_OFFSET(r[Rn]) );
  2660     MEM_WRITE_LONG( R_EAX, R_EDX );
  2661     sh4_x86.tstate = TSTATE_NONE;
  2662 :}
  2664 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2665 %%
  2666     sh4_x86.in_delay_slot = FALSE;
  2667     return 0;
.