Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 570:d2893980fbf5
prev569:a1c49e1e8776
next571:9bc09948d0f2
author nkeynes
date Sun Jan 06 12:24:18 2008 +0000 (16 years ago)
branchlxdream-mmu
permissions -rw-r--r--
last change Change to generate different code for mmu on/off cases
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     uint32_t exc_code;
    41 };
    43 /** 
    44  * Struct to manage internal translation state. This state is not saved -
    45  * it is only valid between calls to sh4_translate_begin_block() and
    46  * sh4_translate_end_block()
    47  */
    48 struct sh4_x86_state {
    49     gboolean in_delay_slot;
    50     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    51     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    52     gboolean branch_taken; /* true if we branched unconditionally */
    53     uint32_t block_start_pc;
    54     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    55     int tstate;
    57     /* mode flags */
    58     gboolean tlb_on; /* True if tlb translation is active */
    60     /* Allocated memory for the (block-wide) back-patch list */
    61     struct backpatch_record *backpatch_list;
    62     uint32_t backpatch_posn;
    63     uint32_t backpatch_size;
    64 };
    66 #define TSTATE_NONE -1
    67 #define TSTATE_O    0
    68 #define TSTATE_C    2
    69 #define TSTATE_E    4
    70 #define TSTATE_NE   5
    71 #define TSTATE_G    0xF
    72 #define TSTATE_GE   0xD
    73 #define TSTATE_A    7
    74 #define TSTATE_AE   3
    76 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    77 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    78 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    79     OP(0x70+sh4_x86.tstate); OP(rel8); \
    80     MARK_JMP(rel8,label)
    81 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    82 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    83 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    84     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    85     MARK_JMP(rel8, label)
    87 static struct sh4_x86_state sh4_x86;
    89 static uint32_t max_int = 0x7FFFFFFF;
    90 static uint32_t min_int = 0x80000000;
    91 static uint32_t save_fcw; /* save value for fpu control word */
    92 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    94 void sh4_x86_init()
    95 {
    96     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    97     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
    98 }
   101 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   102 {
   103     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   104 	sh4_x86.backpatch_size <<= 1;
   105 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   106 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   107 	assert( sh4_x86.backpatch_list != NULL );
   108     }
   109     if( sh4_x86.in_delay_slot ) {
   110 	fixup_pc -= 2;
   111     }
   112     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   113     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   114     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   115     sh4_x86.backpatch_posn++;
   116 }
   118 /**
   119  * Emit an instruction to load an SH4 reg into a real register
   120  */
   121 static inline void load_reg( int x86reg, int sh4reg ) 
   122 {
   123     /* mov [bp+n], reg */
   124     OP(0x8B);
   125     OP(0x45 + (x86reg<<3));
   126     OP(REG_OFFSET(r[sh4reg]));
   127 }
   129 static inline void load_reg16s( int x86reg, int sh4reg )
   130 {
   131     OP(0x0F);
   132     OP(0xBF);
   133     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   134 }
   136 static inline void load_reg16u( int x86reg, int sh4reg )
   137 {
   138     OP(0x0F);
   139     OP(0xB7);
   140     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   142 }
   144 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   145 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   146 /**
   147  * Emit an instruction to load an immediate value into a register
   148  */
   149 static inline void load_imm32( int x86reg, uint32_t value ) {
   150     /* mov #value, reg */
   151     OP(0xB8 + x86reg);
   152     OP32(value);
   153 }
   155 /**
   156  * Load an immediate 64-bit quantity (note: x86-64 only)
   157  */
   158 static inline void load_imm64( int x86reg, uint32_t value ) {
   159     /* mov #value, reg */
   160     REXW();
   161     OP(0xB8 + x86reg);
   162     OP64(value);
   163 }
   166 /**
   167  * Emit an instruction to store an SH4 reg (RN)
   168  */
   169 void static inline store_reg( int x86reg, int sh4reg ) {
   170     /* mov reg, [bp+n] */
   171     OP(0x89);
   172     OP(0x45 + (x86reg<<3));
   173     OP(REG_OFFSET(r[sh4reg]));
   174 }
   176 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   178 /**
   179  * Load an FR register (single-precision floating point) into an integer x86
   180  * register (eg for register-to-register moves)
   181  */
   182 void static inline load_fr( int bankreg, int x86reg, int frm )
   183 {
   184     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   185 }
   187 /**
   188  * Store an FR register (single-precision floating point) into an integer x86
   189  * register (eg for register-to-register moves)
   190  */
   191 void static inline store_fr( int bankreg, int x86reg, int frn )
   192 {
   193     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   194 }
   197 /**
   198  * Load a pointer to the back fp back into the specified x86 register. The
   199  * bankreg must have been previously loaded with FPSCR.
   200  * NB: 12 bytes
   201  */
   202 static inline void load_xf_bank( int bankreg )
   203 {
   204     NOT_r32( bankreg );
   205     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   206     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   207     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   208 }
   210 /**
   211  * Update the fr_bank pointer based on the current fpscr value.
   212  */
   213 static inline void update_fr_bank( int fpscrreg )
   214 {
   215     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   216     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   217     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   218     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   219 }
   220 /**
   221  * Push FPUL (as a 32-bit float) onto the FPU stack
   222  */
   223 static inline void push_fpul( )
   224 {
   225     OP(0xD9); OP(0x45); OP(R_FPUL);
   226 }
   228 /**
   229  * Pop FPUL (as a 32-bit float) from the FPU stack
   230  */
   231 static inline void pop_fpul( )
   232 {
   233     OP(0xD9); OP(0x5D); OP(R_FPUL);
   234 }
   236 /**
   237  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   238  * with the location of the current fp bank.
   239  */
   240 static inline void push_fr( int bankreg, int frm ) 
   241 {
   242     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   243 }
   245 /**
   246  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   247  * with bankreg previously loaded with the location of the current fp bank.
   248  */
   249 static inline void pop_fr( int bankreg, int frm )
   250 {
   251     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   252 }
   254 /**
   255  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   256  * with the location of the current fp bank.
   257  */
   258 static inline void push_dr( int bankreg, int frm )
   259 {
   260     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   261 }
   263 static inline void pop_dr( int bankreg, int frm )
   264 {
   265     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   266 }
   268 /* Exception checks - Note that all exception checks will clobber EAX */
   270 #define check_priv( ) \
   271     if( !sh4_x86.priv_checked ) { \
   272 	sh4_x86.priv_checked = TRUE;\
   273 	load_spreg( R_EAX, R_SR );\
   274 	AND_imm32_r32( SR_MD, R_EAX );\
   275 	if( sh4_x86.in_delay_slot ) {\
   276 	    JE_exc( EXC_SLOT_ILLEGAL );\
   277 	} else {\
   278 	    JE_exc( EXC_ILLEGAL );\
   279 	}\
   280     }\
   282 #define check_fpuen( ) \
   283     if( !sh4_x86.fpuen_checked ) {\
   284 	sh4_x86.fpuen_checked = TRUE;\
   285 	load_spreg( R_EAX, R_SR );\
   286 	AND_imm32_r32( SR_FD, R_EAX );\
   287 	if( sh4_x86.in_delay_slot ) {\
   288 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   289 	} else {\
   290 	    JNE_exc(EXC_FPU_DISABLED);\
   291 	}\
   292     }
   294 #define check_ralign16( x86reg ) \
   295     TEST_imm32_r32( 0x00000001, x86reg ); \
   296     JNE_exc(EXC_DATA_ADDR_READ)
   298 #define check_walign16( x86reg ) \
   299     TEST_imm32_r32( 0x00000001, x86reg ); \
   300     JNE_exc(EXC_DATA_ADDR_WRITE);
   302 #define check_ralign32( x86reg ) \
   303     TEST_imm32_r32( 0x00000003, x86reg ); \
   304     JNE_exc(EXC_DATA_ADDR_READ)
   306 #define check_walign32( x86reg ) \
   307     TEST_imm32_r32( 0x00000003, x86reg ); \
   308     JNE_exc(EXC_DATA_ADDR_WRITE);
   310 #define UNDEF()
   311 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   312 #define MEM_READ_BYTE_PHYS( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   313 #define MEM_READ_WORD_PHYS( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   314 #define MEM_READ_LONG_PHYS( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   315 #define MEM_WRITE_BYTE_PHYS( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   316 #define MEM_WRITE_WORD_PHYS( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   317 #define MEM_WRITE_LONG_PHYS( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   319 #define MEM_READ_BYTE_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func1(sh4_read_byte, R_EAX); MEM_RESULT(value_reg)
   320 #define MEM_READ_WORD_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func1(sh4_read_word, R_EAX); MEM_RESULT(value_reg)
   321 #define MEM_READ_LONG_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_read, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func1(sh4_read_long, R_EAX); MEM_RESULT(value_reg)
   322 #define MEM_WRITE_BYTE_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func2(sh4_write_byte, R_EAX, value_reg)
   323 #define MEM_WRITE_WORD_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func2(sh4_write_word, R_EAX, value_reg)
   324 #define MEM_WRITE_LONG_VMA( addr_reg, value_reg ) call_func1(mmu_vma_to_phys_write, addr_reg); CMP_imm32_r32(MMU_VMA_ERROR, R_EAX); JE_exc(-1); call_func2(sh4_write_long, R_EAX, value_reg)
   326 #define MEM_READ_BYTE( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_READ_BYTE_VMA(addr_reg,value_reg);}else{MEM_READ_BYTE_PHYS(addr_reg, value_reg);}
   327 #define MEM_READ_WORD( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_READ_WORD_VMA(addr_reg,value_reg);}else{MEM_READ_WORD_PHYS(addr_reg, value_reg);}
   328 #define MEM_READ_LONG( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_READ_LONG_VMA(addr_reg,value_reg);}else{MEM_READ_LONG_PHYS(addr_reg, value_reg);}
   329 #define MEM_WRITE_BYTE( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_WRITE_BYTE_VMA(addr_reg,value_reg);}else{MEM_WRITE_BYTE_PHYS(addr_reg, value_reg);}
   330 #define MEM_WRITE_WORD( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_WRITE_WORD_VMA(addr_reg,value_reg);}else{MEM_WRITE_WORD_PHYS(addr_reg, value_reg);}
   331 #define MEM_WRITE_LONG( addr_reg, value_reg ) if(sh4_x86.tlb_on){MEM_WRITE_LONG_VMA(addr_reg,value_reg);}else{MEM_WRITE_LONG_PHYS(addr_reg, value_reg);}
   333 #define MEM_READ_SIZE_PHYS (CALL_FUNC1_SIZE)
   334 #define MEM_WRITE_SIZE_PHYS (CALL_FUNC2_SIZE)
   335 #define MEM_READ_SIZE_VMA (CALL_FUNC1_SIZE + CALL_FUNC1_SIZE + 12)
   336 #define MEM_WRITE_SIZE_VMA (CALL_FUNC1_SIZE + CALL_FUNC2_SIZE + 12)
   338 #define MEM_READ_SIZE (sh4_x86.tlb_on?MEM_READ_SIZE_VMA:MEM_READ_SIZE_PHYS)
   339 #define MEM_WRITE_SIZE (sh4_x86.tlb_on?MEM_WRITE_SIZE_VMA:MEM_WRITE_SIZE_PHYS)
   341 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   343 /****** Import appropriate calling conventions ******/
   344 #if SH4_TRANSLATOR == TARGET_X86_64
   345 #include "sh4/ia64abi.h"
   346 #else /* SH4_TRANSLATOR == TARGET_X86 */
   347 #ifdef APPLE_BUILD
   348 #include "sh4/ia32mac.h"
   349 #else
   350 #include "sh4/ia32abi.h"
   351 #endif
   352 #endif
   355 /**
   356  * Translate a single instruction. Delayed branches are handled specially
   357  * by translating both branch and delayed instruction as a single unit (as
   358  * 
   359  *
   360  * @return true if the instruction marks the end of a basic block
   361  * (eg a branch or 
   362  */
   363 uint32_t sh4_translate_instruction( sh4addr_t pc )
   364 {
   365     uint32_t ir;
   366     /* Read instruction */
   367     if( IS_IN_ICACHE(pc) ) {
   368 	ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   369     } else {
   370 	ir = sh4_read_word(pc);
   371     }
   372 %%
   373 /* ALU operations */
   374 ADD Rm, Rn {:
   375     load_reg( R_EAX, Rm );
   376     load_reg( R_ECX, Rn );
   377     ADD_r32_r32( R_EAX, R_ECX );
   378     store_reg( R_ECX, Rn );
   379     sh4_x86.tstate = TSTATE_NONE;
   380 :}
   381 ADD #imm, Rn {:  
   382     load_reg( R_EAX, Rn );
   383     ADD_imm8s_r32( imm, R_EAX );
   384     store_reg( R_EAX, Rn );
   385     sh4_x86.tstate = TSTATE_NONE;
   386 :}
   387 ADDC Rm, Rn {:
   388     if( sh4_x86.tstate != TSTATE_C ) {
   389 	LDC_t();
   390     }
   391     load_reg( R_EAX, Rm );
   392     load_reg( R_ECX, Rn );
   393     ADC_r32_r32( R_EAX, R_ECX );
   394     store_reg( R_ECX, Rn );
   395     SETC_t();
   396     sh4_x86.tstate = TSTATE_C;
   397 :}
   398 ADDV Rm, Rn {:
   399     load_reg( R_EAX, Rm );
   400     load_reg( R_ECX, Rn );
   401     ADD_r32_r32( R_EAX, R_ECX );
   402     store_reg( R_ECX, Rn );
   403     SETO_t();
   404     sh4_x86.tstate = TSTATE_O;
   405 :}
   406 AND Rm, Rn {:
   407     load_reg( R_EAX, Rm );
   408     load_reg( R_ECX, Rn );
   409     AND_r32_r32( R_EAX, R_ECX );
   410     store_reg( R_ECX, Rn );
   411     sh4_x86.tstate = TSTATE_NONE;
   412 :}
   413 AND #imm, R0 {:  
   414     load_reg( R_EAX, 0 );
   415     AND_imm32_r32(imm, R_EAX); 
   416     store_reg( R_EAX, 0 );
   417     sh4_x86.tstate = TSTATE_NONE;
   418 :}
   419 AND.B #imm, @(R0, GBR) {: 
   420     load_reg( R_EAX, 0 );
   421     load_spreg( R_ECX, R_GBR );
   422     ADD_r32_r32( R_EAX, R_ECX );
   423     PUSH_realigned_r32(R_ECX);
   424     MEM_READ_BYTE( R_ECX, R_EAX );
   425     POP_realigned_r32(R_ECX);
   426     AND_imm32_r32(imm, R_EAX );
   427     MEM_WRITE_BYTE( R_ECX, R_EAX );
   428     sh4_x86.tstate = TSTATE_NONE;
   429 :}
   430 CMP/EQ Rm, Rn {:  
   431     load_reg( R_EAX, Rm );
   432     load_reg( R_ECX, Rn );
   433     CMP_r32_r32( R_EAX, R_ECX );
   434     SETE_t();
   435     sh4_x86.tstate = TSTATE_E;
   436 :}
   437 CMP/EQ #imm, R0 {:  
   438     load_reg( R_EAX, 0 );
   439     CMP_imm8s_r32(imm, R_EAX);
   440     SETE_t();
   441     sh4_x86.tstate = TSTATE_E;
   442 :}
   443 CMP/GE Rm, Rn {:  
   444     load_reg( R_EAX, Rm );
   445     load_reg( R_ECX, Rn );
   446     CMP_r32_r32( R_EAX, R_ECX );
   447     SETGE_t();
   448     sh4_x86.tstate = TSTATE_GE;
   449 :}
   450 CMP/GT Rm, Rn {: 
   451     load_reg( R_EAX, Rm );
   452     load_reg( R_ECX, Rn );
   453     CMP_r32_r32( R_EAX, R_ECX );
   454     SETG_t();
   455     sh4_x86.tstate = TSTATE_G;
   456 :}
   457 CMP/HI Rm, Rn {:  
   458     load_reg( R_EAX, Rm );
   459     load_reg( R_ECX, Rn );
   460     CMP_r32_r32( R_EAX, R_ECX );
   461     SETA_t();
   462     sh4_x86.tstate = TSTATE_A;
   463 :}
   464 CMP/HS Rm, Rn {: 
   465     load_reg( R_EAX, Rm );
   466     load_reg( R_ECX, Rn );
   467     CMP_r32_r32( R_EAX, R_ECX );
   468     SETAE_t();
   469     sh4_x86.tstate = TSTATE_AE;
   470  :}
   471 CMP/PL Rn {: 
   472     load_reg( R_EAX, Rn );
   473     CMP_imm8s_r32( 0, R_EAX );
   474     SETG_t();
   475     sh4_x86.tstate = TSTATE_G;
   476 :}
   477 CMP/PZ Rn {:  
   478     load_reg( R_EAX, Rn );
   479     CMP_imm8s_r32( 0, R_EAX );
   480     SETGE_t();
   481     sh4_x86.tstate = TSTATE_GE;
   482 :}
   483 CMP/STR Rm, Rn {:  
   484     load_reg( R_EAX, Rm );
   485     load_reg( R_ECX, Rn );
   486     XOR_r32_r32( R_ECX, R_EAX );
   487     TEST_r8_r8( R_AL, R_AL );
   488     JE_rel8(13, target1);
   489     TEST_r8_r8( R_AH, R_AH ); // 2
   490     JE_rel8(9, target2);
   491     SHR_imm8_r32( 16, R_EAX ); // 3
   492     TEST_r8_r8( R_AL, R_AL ); // 2
   493     JE_rel8(2, target3);
   494     TEST_r8_r8( R_AH, R_AH ); // 2
   495     JMP_TARGET(target1);
   496     JMP_TARGET(target2);
   497     JMP_TARGET(target3);
   498     SETE_t();
   499     sh4_x86.tstate = TSTATE_E;
   500 :}
   501 DIV0S Rm, Rn {:
   502     load_reg( R_EAX, Rm );
   503     load_reg( R_ECX, Rn );
   504     SHR_imm8_r32( 31, R_EAX );
   505     SHR_imm8_r32( 31, R_ECX );
   506     store_spreg( R_EAX, R_M );
   507     store_spreg( R_ECX, R_Q );
   508     CMP_r32_r32( R_EAX, R_ECX );
   509     SETNE_t();
   510     sh4_x86.tstate = TSTATE_NE;
   511 :}
   512 DIV0U {:  
   513     XOR_r32_r32( R_EAX, R_EAX );
   514     store_spreg( R_EAX, R_Q );
   515     store_spreg( R_EAX, R_M );
   516     store_spreg( R_EAX, R_T );
   517     sh4_x86.tstate = TSTATE_C; // works for DIV1
   518 :}
   519 DIV1 Rm, Rn {:
   520     load_spreg( R_ECX, R_M );
   521     load_reg( R_EAX, Rn );
   522     if( sh4_x86.tstate != TSTATE_C ) {
   523 	LDC_t();
   524     }
   525     RCL1_r32( R_EAX );
   526     SETC_r8( R_DL ); // Q'
   527     CMP_sh4r_r32( R_Q, R_ECX );
   528     JE_rel8(5, mqequal);
   529     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   530     JMP_rel8(3, end);
   531     JMP_TARGET(mqequal);
   532     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   533     JMP_TARGET(end);
   534     store_reg( R_EAX, Rn ); // Done with Rn now
   535     SETC_r8(R_AL); // tmp1
   536     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   537     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   538     store_spreg( R_ECX, R_Q );
   539     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   540     MOVZX_r8_r32( R_AL, R_EAX );
   541     store_spreg( R_EAX, R_T );
   542     sh4_x86.tstate = TSTATE_NONE;
   543 :}
   544 DMULS.L Rm, Rn {:  
   545     load_reg( R_EAX, Rm );
   546     load_reg( R_ECX, Rn );
   547     IMUL_r32(R_ECX);
   548     store_spreg( R_EDX, R_MACH );
   549     store_spreg( R_EAX, R_MACL );
   550     sh4_x86.tstate = TSTATE_NONE;
   551 :}
   552 DMULU.L Rm, Rn {:  
   553     load_reg( R_EAX, Rm );
   554     load_reg( R_ECX, Rn );
   555     MUL_r32(R_ECX);
   556     store_spreg( R_EDX, R_MACH );
   557     store_spreg( R_EAX, R_MACL );    
   558     sh4_x86.tstate = TSTATE_NONE;
   559 :}
   560 DT Rn {:  
   561     load_reg( R_EAX, Rn );
   562     ADD_imm8s_r32( -1, R_EAX );
   563     store_reg( R_EAX, Rn );
   564     SETE_t();
   565     sh4_x86.tstate = TSTATE_E;
   566 :}
   567 EXTS.B Rm, Rn {:  
   568     load_reg( R_EAX, Rm );
   569     MOVSX_r8_r32( R_EAX, R_EAX );
   570     store_reg( R_EAX, Rn );
   571 :}
   572 EXTS.W Rm, Rn {:  
   573     load_reg( R_EAX, Rm );
   574     MOVSX_r16_r32( R_EAX, R_EAX );
   575     store_reg( R_EAX, Rn );
   576 :}
   577 EXTU.B Rm, Rn {:  
   578     load_reg( R_EAX, Rm );
   579     MOVZX_r8_r32( R_EAX, R_EAX );
   580     store_reg( R_EAX, Rn );
   581 :}
   582 EXTU.W Rm, Rn {:  
   583     load_reg( R_EAX, Rm );
   584     MOVZX_r16_r32( R_EAX, R_EAX );
   585     store_reg( R_EAX, Rn );
   586 :}
   587 MAC.L @Rm+, @Rn+ {:  
   588     load_reg( R_ECX, Rm );
   589     check_ralign32( R_ECX );
   590     load_reg( R_ECX, Rn );
   591     check_ralign32( R_ECX );
   592     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   593     MEM_READ_LONG( R_ECX, R_EAX );
   594     PUSH_realigned_r32( R_EAX );
   595     load_reg( R_ECX, Rm );
   596     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   597     MEM_READ_LONG( R_ECX, R_EAX );
   598     POP_realigned_r32( R_ECX );
   599     IMUL_r32( R_ECX );
   600     ADD_r32_sh4r( R_EAX, R_MACL );
   601     ADC_r32_sh4r( R_EDX, R_MACH );
   603     load_spreg( R_ECX, R_S );
   604     TEST_r32_r32(R_ECX, R_ECX);
   605     JE_rel8( CALL_FUNC0_SIZE, nosat );
   606     call_func0( signsat48 );
   607     JMP_TARGET( nosat );
   608     sh4_x86.tstate = TSTATE_NONE;
   609 :}
   610 MAC.W @Rm+, @Rn+ {:  
   611     load_reg( R_ECX, Rm );
   612     check_ralign16( R_ECX );
   613     load_reg( R_ECX, Rn );
   614     check_ralign16( R_ECX );
   615     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   616     MEM_READ_WORD( R_ECX, R_EAX );
   617     PUSH_realigned_r32( R_EAX );
   618     load_reg( R_ECX, Rm );
   619     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   620     MEM_READ_WORD( R_ECX, R_EAX );
   621     POP_realigned_r32( R_ECX );
   622     IMUL_r32( R_ECX );
   624     load_spreg( R_ECX, R_S );
   625     TEST_r32_r32( R_ECX, R_ECX );
   626     JE_rel8( 47, nosat );
   628     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   629     JNO_rel8( 51, end );            // 2
   630     load_imm32( R_EDX, 1 );         // 5
   631     store_spreg( R_EDX, R_MACH );   // 6
   632     JS_rel8( 13, positive );        // 2
   633     load_imm32( R_EAX, 0x80000000 );// 5
   634     store_spreg( R_EAX, R_MACL );   // 6
   635     JMP_rel8( 25, end2 );           // 2
   637     JMP_TARGET(positive);
   638     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   639     store_spreg( R_EAX, R_MACL );   // 6
   640     JMP_rel8( 12, end3);            // 2
   642     JMP_TARGET(nosat);
   643     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   644     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   645     JMP_TARGET(end);
   646     JMP_TARGET(end2);
   647     JMP_TARGET(end3);
   648     sh4_x86.tstate = TSTATE_NONE;
   649 :}
   650 MOVT Rn {:  
   651     load_spreg( R_EAX, R_T );
   652     store_reg( R_EAX, Rn );
   653 :}
   654 MUL.L Rm, Rn {:  
   655     load_reg( R_EAX, Rm );
   656     load_reg( R_ECX, Rn );
   657     MUL_r32( R_ECX );
   658     store_spreg( R_EAX, R_MACL );
   659     sh4_x86.tstate = TSTATE_NONE;
   660 :}
   661 MULS.W Rm, Rn {:
   662     load_reg16s( R_EAX, Rm );
   663     load_reg16s( R_ECX, Rn );
   664     MUL_r32( R_ECX );
   665     store_spreg( R_EAX, R_MACL );
   666     sh4_x86.tstate = TSTATE_NONE;
   667 :}
   668 MULU.W Rm, Rn {:  
   669     load_reg16u( R_EAX, Rm );
   670     load_reg16u( R_ECX, Rn );
   671     MUL_r32( R_ECX );
   672     store_spreg( R_EAX, R_MACL );
   673     sh4_x86.tstate = TSTATE_NONE;
   674 :}
   675 NEG Rm, Rn {:
   676     load_reg( R_EAX, Rm );
   677     NEG_r32( R_EAX );
   678     store_reg( R_EAX, Rn );
   679     sh4_x86.tstate = TSTATE_NONE;
   680 :}
   681 NEGC Rm, Rn {:  
   682     load_reg( R_EAX, Rm );
   683     XOR_r32_r32( R_ECX, R_ECX );
   684     LDC_t();
   685     SBB_r32_r32( R_EAX, R_ECX );
   686     store_reg( R_ECX, Rn );
   687     SETC_t();
   688     sh4_x86.tstate = TSTATE_C;
   689 :}
   690 NOT Rm, Rn {:  
   691     load_reg( R_EAX, Rm );
   692     NOT_r32( R_EAX );
   693     store_reg( R_EAX, Rn );
   694     sh4_x86.tstate = TSTATE_NONE;
   695 :}
   696 OR Rm, Rn {:  
   697     load_reg( R_EAX, Rm );
   698     load_reg( R_ECX, Rn );
   699     OR_r32_r32( R_EAX, R_ECX );
   700     store_reg( R_ECX, Rn );
   701     sh4_x86.tstate = TSTATE_NONE;
   702 :}
   703 OR #imm, R0 {:
   704     load_reg( R_EAX, 0 );
   705     OR_imm32_r32(imm, R_EAX);
   706     store_reg( R_EAX, 0 );
   707     sh4_x86.tstate = TSTATE_NONE;
   708 :}
   709 OR.B #imm, @(R0, GBR) {:  
   710     load_reg( R_EAX, 0 );
   711     load_spreg( R_ECX, R_GBR );
   712     ADD_r32_r32( R_EAX, R_ECX );
   713     PUSH_realigned_r32(R_ECX);
   714     MEM_READ_BYTE( R_ECX, R_EAX );
   715     POP_realigned_r32(R_ECX);
   716     OR_imm32_r32(imm, R_EAX );
   717     MEM_WRITE_BYTE( R_ECX, R_EAX );
   718     sh4_x86.tstate = TSTATE_NONE;
   719 :}
   720 ROTCL Rn {:
   721     load_reg( R_EAX, Rn );
   722     if( sh4_x86.tstate != TSTATE_C ) {
   723 	LDC_t();
   724     }
   725     RCL1_r32( R_EAX );
   726     store_reg( R_EAX, Rn );
   727     SETC_t();
   728     sh4_x86.tstate = TSTATE_C;
   729 :}
   730 ROTCR Rn {:  
   731     load_reg( R_EAX, Rn );
   732     if( sh4_x86.tstate != TSTATE_C ) {
   733 	LDC_t();
   734     }
   735     RCR1_r32( R_EAX );
   736     store_reg( R_EAX, Rn );
   737     SETC_t();
   738     sh4_x86.tstate = TSTATE_C;
   739 :}
   740 ROTL Rn {:  
   741     load_reg( R_EAX, Rn );
   742     ROL1_r32( R_EAX );
   743     store_reg( R_EAX, Rn );
   744     SETC_t();
   745     sh4_x86.tstate = TSTATE_C;
   746 :}
   747 ROTR Rn {:  
   748     load_reg( R_EAX, Rn );
   749     ROR1_r32( R_EAX );
   750     store_reg( R_EAX, Rn );
   751     SETC_t();
   752     sh4_x86.tstate = TSTATE_C;
   753 :}
   754 SHAD Rm, Rn {:
   755     /* Annoyingly enough, not directly convertible */
   756     load_reg( R_EAX, Rn );
   757     load_reg( R_ECX, Rm );
   758     CMP_imm32_r32( 0, R_ECX );
   759     JGE_rel8(16, doshl);
   761     NEG_r32( R_ECX );      // 2
   762     AND_imm8_r8( 0x1F, R_CL ); // 3
   763     JE_rel8( 4, emptysar);     // 2
   764     SAR_r32_CL( R_EAX );       // 2
   765     JMP_rel8(10, end);          // 2
   767     JMP_TARGET(emptysar);
   768     SAR_imm8_r32(31, R_EAX );  // 3
   769     JMP_rel8(5, end2);
   771     JMP_TARGET(doshl);
   772     AND_imm8_r8( 0x1F, R_CL ); // 3
   773     SHL_r32_CL( R_EAX );       // 2
   774     JMP_TARGET(end);
   775     JMP_TARGET(end2);
   776     store_reg( R_EAX, Rn );
   777     sh4_x86.tstate = TSTATE_NONE;
   778 :}
   779 SHLD Rm, Rn {:  
   780     load_reg( R_EAX, Rn );
   781     load_reg( R_ECX, Rm );
   782     CMP_imm32_r32( 0, R_ECX );
   783     JGE_rel8(15, doshl);
   785     NEG_r32( R_ECX );      // 2
   786     AND_imm8_r8( 0x1F, R_CL ); // 3
   787     JE_rel8( 4, emptyshr );
   788     SHR_r32_CL( R_EAX );       // 2
   789     JMP_rel8(9, end);          // 2
   791     JMP_TARGET(emptyshr);
   792     XOR_r32_r32( R_EAX, R_EAX );
   793     JMP_rel8(5, end2);
   795     JMP_TARGET(doshl);
   796     AND_imm8_r8( 0x1F, R_CL ); // 3
   797     SHL_r32_CL( R_EAX );       // 2
   798     JMP_TARGET(end);
   799     JMP_TARGET(end2);
   800     store_reg( R_EAX, Rn );
   801     sh4_x86.tstate = TSTATE_NONE;
   802 :}
   803 SHAL Rn {: 
   804     load_reg( R_EAX, Rn );
   805     SHL1_r32( R_EAX );
   806     SETC_t();
   807     store_reg( R_EAX, Rn );
   808     sh4_x86.tstate = TSTATE_C;
   809 :}
   810 SHAR Rn {:  
   811     load_reg( R_EAX, Rn );
   812     SAR1_r32( R_EAX );
   813     SETC_t();
   814     store_reg( R_EAX, Rn );
   815     sh4_x86.tstate = TSTATE_C;
   816 :}
   817 SHLL Rn {:  
   818     load_reg( R_EAX, Rn );
   819     SHL1_r32( R_EAX );
   820     SETC_t();
   821     store_reg( R_EAX, Rn );
   822     sh4_x86.tstate = TSTATE_C;
   823 :}
   824 SHLL2 Rn {:
   825     load_reg( R_EAX, Rn );
   826     SHL_imm8_r32( 2, R_EAX );
   827     store_reg( R_EAX, Rn );
   828     sh4_x86.tstate = TSTATE_NONE;
   829 :}
   830 SHLL8 Rn {:  
   831     load_reg( R_EAX, Rn );
   832     SHL_imm8_r32( 8, R_EAX );
   833     store_reg( R_EAX, Rn );
   834     sh4_x86.tstate = TSTATE_NONE;
   835 :}
   836 SHLL16 Rn {:  
   837     load_reg( R_EAX, Rn );
   838     SHL_imm8_r32( 16, R_EAX );
   839     store_reg( R_EAX, Rn );
   840     sh4_x86.tstate = TSTATE_NONE;
   841 :}
   842 SHLR Rn {:  
   843     load_reg( R_EAX, Rn );
   844     SHR1_r32( R_EAX );
   845     SETC_t();
   846     store_reg( R_EAX, Rn );
   847     sh4_x86.tstate = TSTATE_C;
   848 :}
   849 SHLR2 Rn {:  
   850     load_reg( R_EAX, Rn );
   851     SHR_imm8_r32( 2, R_EAX );
   852     store_reg( R_EAX, Rn );
   853     sh4_x86.tstate = TSTATE_NONE;
   854 :}
   855 SHLR8 Rn {:  
   856     load_reg( R_EAX, Rn );
   857     SHR_imm8_r32( 8, R_EAX );
   858     store_reg( R_EAX, Rn );
   859     sh4_x86.tstate = TSTATE_NONE;
   860 :}
   861 SHLR16 Rn {:  
   862     load_reg( R_EAX, Rn );
   863     SHR_imm8_r32( 16, R_EAX );
   864     store_reg( R_EAX, Rn );
   865     sh4_x86.tstate = TSTATE_NONE;
   866 :}
   867 SUB Rm, Rn {:  
   868     load_reg( R_EAX, Rm );
   869     load_reg( R_ECX, Rn );
   870     SUB_r32_r32( R_EAX, R_ECX );
   871     store_reg( R_ECX, Rn );
   872     sh4_x86.tstate = TSTATE_NONE;
   873 :}
   874 SUBC Rm, Rn {:  
   875     load_reg( R_EAX, Rm );
   876     load_reg( R_ECX, Rn );
   877     if( sh4_x86.tstate != TSTATE_C ) {
   878 	LDC_t();
   879     }
   880     SBB_r32_r32( R_EAX, R_ECX );
   881     store_reg( R_ECX, Rn );
   882     SETC_t();
   883     sh4_x86.tstate = TSTATE_C;
   884 :}
   885 SUBV Rm, Rn {:  
   886     load_reg( R_EAX, Rm );
   887     load_reg( R_ECX, Rn );
   888     SUB_r32_r32( R_EAX, R_ECX );
   889     store_reg( R_ECX, Rn );
   890     SETO_t();
   891     sh4_x86.tstate = TSTATE_O;
   892 :}
   893 SWAP.B Rm, Rn {:  
   894     load_reg( R_EAX, Rm );
   895     XCHG_r8_r8( R_AL, R_AH );
   896     store_reg( R_EAX, Rn );
   897 :}
   898 SWAP.W Rm, Rn {:  
   899     load_reg( R_EAX, Rm );
   900     MOV_r32_r32( R_EAX, R_ECX );
   901     SHL_imm8_r32( 16, R_ECX );
   902     SHR_imm8_r32( 16, R_EAX );
   903     OR_r32_r32( R_EAX, R_ECX );
   904     store_reg( R_ECX, Rn );
   905     sh4_x86.tstate = TSTATE_NONE;
   906 :}
   907 TAS.B @Rn {:  
   908     load_reg( R_ECX, Rn );
   909     MEM_READ_BYTE( R_ECX, R_EAX );
   910     TEST_r8_r8( R_AL, R_AL );
   911     SETE_t();
   912     OR_imm8_r8( 0x80, R_AL );
   913     load_reg( R_ECX, Rn );
   914     MEM_WRITE_BYTE( R_ECX, R_EAX );
   915     sh4_x86.tstate = TSTATE_NONE;
   916 :}
   917 TST Rm, Rn {:  
   918     load_reg( R_EAX, Rm );
   919     load_reg( R_ECX, Rn );
   920     TEST_r32_r32( R_EAX, R_ECX );
   921     SETE_t();
   922     sh4_x86.tstate = TSTATE_E;
   923 :}
   924 TST #imm, R0 {:  
   925     load_reg( R_EAX, 0 );
   926     TEST_imm32_r32( imm, R_EAX );
   927     SETE_t();
   928     sh4_x86.tstate = TSTATE_E;
   929 :}
   930 TST.B #imm, @(R0, GBR) {:  
   931     load_reg( R_EAX, 0);
   932     load_reg( R_ECX, R_GBR);
   933     ADD_r32_r32( R_EAX, R_ECX );
   934     MEM_READ_BYTE( R_ECX, R_EAX );
   935     TEST_imm8_r8( imm, R_AL );
   936     SETE_t();
   937     sh4_x86.tstate = TSTATE_E;
   938 :}
   939 XOR Rm, Rn {:  
   940     load_reg( R_EAX, Rm );
   941     load_reg( R_ECX, Rn );
   942     XOR_r32_r32( R_EAX, R_ECX );
   943     store_reg( R_ECX, Rn );
   944     sh4_x86.tstate = TSTATE_NONE;
   945 :}
   946 XOR #imm, R0 {:  
   947     load_reg( R_EAX, 0 );
   948     XOR_imm32_r32( imm, R_EAX );
   949     store_reg( R_EAX, 0 );
   950     sh4_x86.tstate = TSTATE_NONE;
   951 :}
   952 XOR.B #imm, @(R0, GBR) {:  
   953     load_reg( R_EAX, 0 );
   954     load_spreg( R_ECX, R_GBR );
   955     ADD_r32_r32( R_EAX, R_ECX );
   956     PUSH_realigned_r32(R_ECX);
   957     MEM_READ_BYTE(R_ECX, R_EAX);
   958     POP_realigned_r32(R_ECX);
   959     XOR_imm32_r32( imm, R_EAX );
   960     MEM_WRITE_BYTE( R_ECX, R_EAX );
   961     sh4_x86.tstate = TSTATE_NONE;
   962 :}
   963 XTRCT Rm, Rn {:
   964     load_reg( R_EAX, Rm );
   965     load_reg( R_ECX, Rn );
   966     SHL_imm8_r32( 16, R_EAX );
   967     SHR_imm8_r32( 16, R_ECX );
   968     OR_r32_r32( R_EAX, R_ECX );
   969     store_reg( R_ECX, Rn );
   970     sh4_x86.tstate = TSTATE_NONE;
   971 :}
   973 /* Data move instructions */
   974 MOV Rm, Rn {:  
   975     load_reg( R_EAX, Rm );
   976     store_reg( R_EAX, Rn );
   977 :}
   978 MOV #imm, Rn {:  
   979     load_imm32( R_EAX, imm );
   980     store_reg( R_EAX, Rn );
   981 :}
   982 MOV.B Rm, @Rn {:  
   983     load_reg( R_EAX, Rm );
   984     load_reg( R_ECX, Rn );
   985     MEM_WRITE_BYTE( R_ECX, R_EAX );
   986     sh4_x86.tstate = TSTATE_NONE;
   987 :}
   988 MOV.B Rm, @-Rn {:  
   989     load_reg( R_EAX, Rm );
   990     load_reg( R_ECX, Rn );
   991     ADD_imm8s_r32( -1, R_ECX );
   992     store_reg( R_ECX, Rn );
   993     MEM_WRITE_BYTE( R_ECX, R_EAX );
   994     sh4_x86.tstate = TSTATE_NONE;
   995 :}
   996 MOV.B Rm, @(R0, Rn) {:  
   997     load_reg( R_EAX, 0 );
   998     load_reg( R_ECX, Rn );
   999     ADD_r32_r32( R_EAX, R_ECX );
  1000     load_reg( R_EAX, Rm );
  1001     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1002     sh4_x86.tstate = TSTATE_NONE;
  1003 :}
  1004 MOV.B R0, @(disp, GBR) {:  
  1005     load_reg( R_EAX, 0 );
  1006     load_spreg( R_ECX, R_GBR );
  1007     ADD_imm32_r32( disp, R_ECX );
  1008     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1009     sh4_x86.tstate = TSTATE_NONE;
  1010 :}
  1011 MOV.B R0, @(disp, Rn) {:  
  1012     load_reg( R_EAX, 0 );
  1013     load_reg( R_ECX, Rn );
  1014     ADD_imm32_r32( disp, R_ECX );
  1015     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1016     sh4_x86.tstate = TSTATE_NONE;
  1017 :}
  1018 MOV.B @Rm, Rn {:  
  1019     load_reg( R_ECX, Rm );
  1020     MEM_READ_BYTE( R_ECX, R_EAX );
  1021     store_reg( R_EAX, Rn );
  1022     sh4_x86.tstate = TSTATE_NONE;
  1023 :}
  1024 MOV.B @Rm+, Rn {:  
  1025     load_reg( R_ECX, Rm );
  1026     MOV_r32_r32( R_ECX, R_EAX );
  1027     ADD_imm8s_r32( 1, R_EAX );
  1028     store_reg( R_EAX, Rm );
  1029     MEM_READ_BYTE( R_ECX, R_EAX );
  1030     store_reg( R_EAX, Rn );
  1031     sh4_x86.tstate = TSTATE_NONE;
  1032 :}
  1033 MOV.B @(R0, Rm), Rn {:  
  1034     load_reg( R_EAX, 0 );
  1035     load_reg( R_ECX, Rm );
  1036     ADD_r32_r32( R_EAX, R_ECX );
  1037     MEM_READ_BYTE( R_ECX, R_EAX );
  1038     store_reg( R_EAX, Rn );
  1039     sh4_x86.tstate = TSTATE_NONE;
  1040 :}
  1041 MOV.B @(disp, GBR), R0 {:  
  1042     load_spreg( R_ECX, R_GBR );
  1043     ADD_imm32_r32( disp, R_ECX );
  1044     MEM_READ_BYTE( R_ECX, R_EAX );
  1045     store_reg( R_EAX, 0 );
  1046     sh4_x86.tstate = TSTATE_NONE;
  1047 :}
  1048 MOV.B @(disp, Rm), R0 {:  
  1049     load_reg( R_ECX, Rm );
  1050     ADD_imm32_r32( disp, R_ECX );
  1051     MEM_READ_BYTE( R_ECX, R_EAX );
  1052     store_reg( R_EAX, 0 );
  1053     sh4_x86.tstate = TSTATE_NONE;
  1054 :}
  1055 MOV.L Rm, @Rn {:
  1056     load_reg( R_EAX, Rm );
  1057     load_reg( R_ECX, Rn );
  1058     check_walign32(R_ECX);
  1059     MEM_WRITE_LONG( R_ECX, R_EAX );
  1060     sh4_x86.tstate = TSTATE_NONE;
  1061 :}
  1062 MOV.L Rm, @-Rn {:  
  1063     load_reg( R_EAX, Rm );
  1064     load_reg( R_ECX, Rn );
  1065     check_walign32( R_ECX );
  1066     ADD_imm8s_r32( -4, R_ECX );
  1067     store_reg( R_ECX, Rn );
  1068     MEM_WRITE_LONG( R_ECX, R_EAX );
  1069     sh4_x86.tstate = TSTATE_NONE;
  1070 :}
  1071 MOV.L Rm, @(R0, Rn) {:  
  1072     load_reg( R_EAX, 0 );
  1073     load_reg( R_ECX, Rn );
  1074     ADD_r32_r32( R_EAX, R_ECX );
  1075     check_walign32( R_ECX );
  1076     load_reg( R_EAX, Rm );
  1077     MEM_WRITE_LONG( R_ECX, R_EAX );
  1078     sh4_x86.tstate = TSTATE_NONE;
  1079 :}
  1080 MOV.L R0, @(disp, GBR) {:  
  1081     load_spreg( R_ECX, R_GBR );
  1082     load_reg( R_EAX, 0 );
  1083     ADD_imm32_r32( disp, R_ECX );
  1084     check_walign32( R_ECX );
  1085     MEM_WRITE_LONG( R_ECX, R_EAX );
  1086     sh4_x86.tstate = TSTATE_NONE;
  1087 :}
  1088 MOV.L Rm, @(disp, Rn) {:  
  1089     load_reg( R_ECX, Rn );
  1090     load_reg( R_EAX, Rm );
  1091     ADD_imm32_r32( disp, R_ECX );
  1092     check_walign32( R_ECX );
  1093     MEM_WRITE_LONG( R_ECX, R_EAX );
  1094     sh4_x86.tstate = TSTATE_NONE;
  1095 :}
  1096 MOV.L @Rm, Rn {:  
  1097     load_reg( R_ECX, Rm );
  1098     check_ralign32( R_ECX );
  1099     MEM_READ_LONG( R_ECX, R_EAX );
  1100     store_reg( R_EAX, Rn );
  1101     sh4_x86.tstate = TSTATE_NONE;
  1102 :}
  1103 MOV.L @Rm+, Rn {:  
  1104     load_reg( R_EAX, Rm );
  1105     check_ralign32( R_EAX );
  1106     MOV_r32_r32( R_EAX, R_ECX );
  1107     ADD_imm8s_r32( 4, R_EAX );
  1108     store_reg( R_EAX, Rm );
  1109     MEM_READ_LONG( R_ECX, R_EAX );
  1110     store_reg( R_EAX, Rn );
  1111     sh4_x86.tstate = TSTATE_NONE;
  1112 :}
  1113 MOV.L @(R0, Rm), Rn {:  
  1114     load_reg( R_EAX, 0 );
  1115     load_reg( R_ECX, Rm );
  1116     ADD_r32_r32( R_EAX, R_ECX );
  1117     check_ralign32( R_ECX );
  1118     MEM_READ_LONG( R_ECX, R_EAX );
  1119     store_reg( R_EAX, Rn );
  1120     sh4_x86.tstate = TSTATE_NONE;
  1121 :}
  1122 MOV.L @(disp, GBR), R0 {:
  1123     load_spreg( R_ECX, R_GBR );
  1124     ADD_imm32_r32( disp, R_ECX );
  1125     check_ralign32( R_ECX );
  1126     MEM_READ_LONG( R_ECX, R_EAX );
  1127     store_reg( R_EAX, 0 );
  1128     sh4_x86.tstate = TSTATE_NONE;
  1129 :}
  1130 MOV.L @(disp, PC), Rn {:  
  1131     if( sh4_x86.in_delay_slot ) {
  1132 	SLOTILLEGAL();
  1133     } else {
  1134 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1135 	if( IS_IN_ICACHE(target) ) {
  1136 	    // If the target address is in the same page as the code, it's
  1137 	    // pretty safe to just ref it directly and circumvent the whole
  1138 	    // memory subsystem. (this is a big performance win)
  1140 	    // FIXME: There's a corner-case that's not handled here when
  1141 	    // the current code-page is in the ITLB but not in the UTLB.
  1142 	    // (should generate a TLB miss although need to test SH4 
  1143 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1144 	    // behaviour though.
  1145 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1146 	    MOV_moff32_EAX( ptr );
  1147 	} else {
  1148 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1149 	    // different virtual address than the translation was done with,
  1150 	    // but we can safely assume that the low bits are the same.
  1151 	    load_imm32( R_ECX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1152 	    ADD_sh4r_r32( R_PC, R_ECX );
  1153 	    MEM_READ_LONG( R_ECX, R_EAX );
  1154 	    sh4_x86.tstate = TSTATE_NONE;
  1156 	store_reg( R_EAX, Rn );
  1158 :}
  1159 MOV.L @(disp, Rm), Rn {:  
  1160     load_reg( R_ECX, Rm );
  1161     ADD_imm8s_r32( disp, R_ECX );
  1162     check_ralign32( R_ECX );
  1163     MEM_READ_LONG( R_ECX, R_EAX );
  1164     store_reg( R_EAX, Rn );
  1165     sh4_x86.tstate = TSTATE_NONE;
  1166 :}
  1167 MOV.W Rm, @Rn {:  
  1168     load_reg( R_ECX, Rn );
  1169     check_walign16( R_ECX );
  1170     load_reg( R_EAX, Rm );
  1171     MEM_WRITE_WORD( R_ECX, R_EAX );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 MOV.W Rm, @-Rn {:  
  1175     load_reg( R_ECX, Rn );
  1176     check_walign16( R_ECX );
  1177     load_reg( R_EAX, Rm );
  1178     ADD_imm8s_r32( -2, R_ECX );
  1179     store_reg( R_ECX, Rn );
  1180     MEM_WRITE_WORD( R_ECX, R_EAX );
  1181     sh4_x86.tstate = TSTATE_NONE;
  1182 :}
  1183 MOV.W Rm, @(R0, Rn) {:  
  1184     load_reg( R_EAX, 0 );
  1185     load_reg( R_ECX, Rn );
  1186     ADD_r32_r32( R_EAX, R_ECX );
  1187     check_walign16( R_ECX );
  1188     load_reg( R_EAX, Rm );
  1189     MEM_WRITE_WORD( R_ECX, R_EAX );
  1190     sh4_x86.tstate = TSTATE_NONE;
  1191 :}
  1192 MOV.W R0, @(disp, GBR) {:  
  1193     load_spreg( R_ECX, R_GBR );
  1194     load_reg( R_EAX, 0 );
  1195     ADD_imm32_r32( disp, R_ECX );
  1196     check_walign16( R_ECX );
  1197     MEM_WRITE_WORD( R_ECX, R_EAX );
  1198     sh4_x86.tstate = TSTATE_NONE;
  1199 :}
  1200 MOV.W R0, @(disp, Rn) {:  
  1201     load_reg( R_ECX, Rn );
  1202     load_reg( R_EAX, 0 );
  1203     ADD_imm32_r32( disp, R_ECX );
  1204     check_walign16( R_ECX );
  1205     MEM_WRITE_WORD( R_ECX, R_EAX );
  1206     sh4_x86.tstate = TSTATE_NONE;
  1207 :}
  1208 MOV.W @Rm, Rn {:  
  1209     load_reg( R_ECX, Rm );
  1210     check_ralign16( R_ECX );
  1211     MEM_READ_WORD( R_ECX, R_EAX );
  1212     store_reg( R_EAX, Rn );
  1213     sh4_x86.tstate = TSTATE_NONE;
  1214 :}
  1215 MOV.W @Rm+, Rn {:  
  1216     load_reg( R_EAX, Rm );
  1217     check_ralign16( R_EAX );
  1218     MOV_r32_r32( R_EAX, R_ECX );
  1219     ADD_imm8s_r32( 2, R_EAX );
  1220     store_reg( R_EAX, Rm );
  1221     MEM_READ_WORD( R_ECX, R_EAX );
  1222     store_reg( R_EAX, Rn );
  1223     sh4_x86.tstate = TSTATE_NONE;
  1224 :}
  1225 MOV.W @(R0, Rm), Rn {:  
  1226     load_reg( R_EAX, 0 );
  1227     load_reg( R_ECX, Rm );
  1228     ADD_r32_r32( R_EAX, R_ECX );
  1229     check_ralign16( R_ECX );
  1230     MEM_READ_WORD( R_ECX, R_EAX );
  1231     store_reg( R_EAX, Rn );
  1232     sh4_x86.tstate = TSTATE_NONE;
  1233 :}
  1234 MOV.W @(disp, GBR), R0 {:  
  1235     load_spreg( R_ECX, R_GBR );
  1236     ADD_imm32_r32( disp, R_ECX );
  1237     check_ralign16( R_ECX );
  1238     MEM_READ_WORD( R_ECX, R_EAX );
  1239     store_reg( R_EAX, 0 );
  1240     sh4_x86.tstate = TSTATE_NONE;
  1241 :}
  1242 MOV.W @(disp, PC), Rn {:  
  1243     if( sh4_x86.in_delay_slot ) {
  1244 	SLOTILLEGAL();
  1245     } else {
  1246 	// See comments for MOV.L @(disp, PC), Rn
  1247 	uint32_t target = pc + disp + 4;
  1248 	if( IS_IN_ICACHE(target) ) {
  1249 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1250 	    MOV_moff32_EAX( ptr );
  1251 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1252 	} else {
  1253 	    load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1254 	    ADD_sh4r_r32( R_PC, R_ECX );
  1255 	    MEM_READ_WORD( R_ECX, R_EAX );
  1256 	    sh4_x86.tstate = TSTATE_NONE;
  1258 	store_reg( R_EAX, Rn );
  1260 :}
  1261 MOV.W @(disp, Rm), R0 {:  
  1262     load_reg( R_ECX, Rm );
  1263     ADD_imm32_r32( disp, R_ECX );
  1264     check_ralign16( R_ECX );
  1265     MEM_READ_WORD( R_ECX, R_EAX );
  1266     store_reg( R_EAX, 0 );
  1267     sh4_x86.tstate = TSTATE_NONE;
  1268 :}
  1269 MOVA @(disp, PC), R0 {:  
  1270     if( sh4_x86.in_delay_slot ) {
  1271 	SLOTILLEGAL();
  1272     } else {
  1273 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1274 	ADD_sh4r_r32( R_PC, R_ECX );
  1275 	store_reg( R_ECX, 0 );
  1277 :}
  1278 MOVCA.L R0, @Rn {:  
  1279     load_reg( R_EAX, 0 );
  1280     load_reg( R_ECX, Rn );
  1281     check_walign32( R_ECX );
  1282     MEM_WRITE_LONG( R_ECX, R_EAX );
  1283     sh4_x86.tstate = TSTATE_NONE;
  1284 :}
  1286 /* Control transfer instructions */
  1287 BF disp {:
  1288     if( sh4_x86.in_delay_slot ) {
  1289 	SLOTILLEGAL();
  1290     } else {
  1291 	JT_rel8( EXIT_BLOCK_SIZE, nottaken );
  1292 	exit_block( disp + pc + 4, pc+2 );
  1293 	JMP_TARGET(nottaken);
  1294 	return 2;
  1296 :}
  1297 BF/S disp {:
  1298     if( sh4_x86.in_delay_slot ) {
  1299 	SLOTILLEGAL();
  1300     } else {
  1301 	sh4_x86.in_delay_slot = TRUE;
  1302 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1303 	    CMP_imm8s_sh4r( 1, R_T );
  1304 	    sh4_x86.tstate = TSTATE_E;
  1306 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1307 	sh4_translate_instruction(pc+2);
  1308 	exit_block( disp + pc + 4, pc+4 );
  1309 	// not taken
  1310 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1311 	sh4_translate_instruction(pc+2);
  1312 	return 4;
  1314 :}
  1315 BRA disp {:  
  1316     if( sh4_x86.in_delay_slot ) {
  1317 	SLOTILLEGAL();
  1318     } else {
  1319 	sh4_x86.in_delay_slot = TRUE;
  1320 	sh4_translate_instruction( pc + 2 );
  1321 	exit_block( disp + pc + 4, pc+4 );
  1322 	sh4_x86.branch_taken = TRUE;
  1323 	return 4;
  1325 :}
  1326 BRAF Rn {:  
  1327     if( sh4_x86.in_delay_slot ) {
  1328 	SLOTILLEGAL();
  1329     } else {
  1330 	load_reg( R_EAX, Rn );
  1331 	ADD_imm32_r32( pc + 4, R_EAX );
  1332 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1333 	sh4_x86.in_delay_slot = TRUE;
  1334 	sh4_x86.tstate = TSTATE_NONE;
  1335 	sh4_translate_instruction( pc + 2 );
  1336 	exit_block_pcset(pc+2);
  1337 	sh4_x86.branch_taken = TRUE;
  1338 	return 4;
  1340 :}
  1341 BSR disp {:  
  1342     if( sh4_x86.in_delay_slot ) {
  1343 	SLOTILLEGAL();
  1344     } else {
  1345 	load_imm32( R_EAX, pc + 4 );
  1346 	store_spreg( R_EAX, R_PR );
  1347 	sh4_x86.in_delay_slot = TRUE;
  1348 	sh4_translate_instruction( pc + 2 );
  1349 	exit_block( disp + pc + 4, pc+4 );
  1350 	sh4_x86.branch_taken = TRUE;
  1351 	return 4;
  1353 :}
  1354 BSRF Rn {:  
  1355     if( sh4_x86.in_delay_slot ) {
  1356 	SLOTILLEGAL();
  1357     } else {
  1358 	load_imm32( R_ECX, pc + 4 );
  1359 	store_spreg( R_ECX, R_PR );
  1360 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1361 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1362 	sh4_x86.in_delay_slot = TRUE;
  1363 	sh4_x86.tstate = TSTATE_NONE;
  1364 	sh4_translate_instruction( pc + 2 );
  1365 	exit_block_pcset(pc+2);
  1366 	sh4_x86.branch_taken = TRUE;
  1367 	return 4;
  1369 :}
  1370 BT disp {:
  1371     if( sh4_x86.in_delay_slot ) {
  1372 	SLOTILLEGAL();
  1373     } else {
  1374 	JF_rel8( EXIT_BLOCK_SIZE, nottaken );
  1375 	exit_block( disp + pc + 4, pc+2 );
  1376 	JMP_TARGET(nottaken);
  1377 	return 2;
  1379 :}
  1380 BT/S disp {:
  1381     if( sh4_x86.in_delay_slot ) {
  1382 	SLOTILLEGAL();
  1383     } else {
  1384 	sh4_x86.in_delay_slot = TRUE;
  1385 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1386 	    CMP_imm8s_sh4r( 1, R_T );
  1387 	    sh4_x86.tstate = TSTATE_E;
  1389 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1390 	sh4_translate_instruction(pc+2);
  1391 	exit_block( disp + pc + 4, pc+4 );
  1392 	// not taken
  1393 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1394 	sh4_translate_instruction(pc+2);
  1395 	return 4;
  1397 :}
  1398 JMP @Rn {:  
  1399     if( sh4_x86.in_delay_slot ) {
  1400 	SLOTILLEGAL();
  1401     } else {
  1402 	load_reg( R_ECX, Rn );
  1403 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1404 	sh4_x86.in_delay_slot = TRUE;
  1405 	sh4_translate_instruction(pc+2);
  1406 	exit_block_pcset(pc+2);
  1407 	sh4_x86.branch_taken = TRUE;
  1408 	return 4;
  1410 :}
  1411 JSR @Rn {:  
  1412     if( sh4_x86.in_delay_slot ) {
  1413 	SLOTILLEGAL();
  1414     } else {
  1415 	load_imm32( R_EAX, pc + 4 );
  1416 	store_spreg( R_EAX, R_PR );
  1417 	load_reg( R_ECX, Rn );
  1418 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1419 	sh4_x86.in_delay_slot = TRUE;
  1420 	sh4_translate_instruction(pc+2);
  1421 	exit_block_pcset(pc+2);
  1422 	sh4_x86.branch_taken = TRUE;
  1423 	return 4;
  1425 :}
  1426 RTE {:  
  1427     if( sh4_x86.in_delay_slot ) {
  1428 	SLOTILLEGAL();
  1429     } else {
  1430 	check_priv();
  1431 	load_spreg( R_ECX, R_SPC );
  1432 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1433 	load_spreg( R_EAX, R_SSR );
  1434 	call_func1( sh4_write_sr, R_EAX );
  1435 	sh4_x86.in_delay_slot = TRUE;
  1436 	sh4_x86.priv_checked = FALSE;
  1437 	sh4_x86.fpuen_checked = FALSE;
  1438 	sh4_x86.tstate = TSTATE_NONE;
  1439 	sh4_translate_instruction(pc+2);
  1440 	exit_block_pcset(pc+2);
  1441 	sh4_x86.branch_taken = TRUE;
  1442 	return 4;
  1444 :}
  1445 RTS {:  
  1446     if( sh4_x86.in_delay_slot ) {
  1447 	SLOTILLEGAL();
  1448     } else {
  1449 	load_spreg( R_ECX, R_PR );
  1450 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1451 	sh4_x86.in_delay_slot = TRUE;
  1452 	sh4_translate_instruction(pc+2);
  1453 	exit_block_pcset(pc+2);
  1454 	sh4_x86.branch_taken = TRUE;
  1455 	return 4;
  1457 :}
  1458 TRAPA #imm {:  
  1459     if( sh4_x86.in_delay_slot ) {
  1460 	SLOTILLEGAL();
  1461     } else {
  1462 	load_imm32( R_ECX, pc+2 );
  1463 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1464 	load_imm32( R_EAX, imm );
  1465 	call_func1( sh4_raise_trap, R_EAX );
  1466 	sh4_x86.tstate = TSTATE_NONE;
  1467 	exit_block_pcset(pc);
  1468 	sh4_x86.branch_taken = TRUE;
  1469 	return 2;
  1471 :}
  1472 UNDEF {:  
  1473     if( sh4_x86.in_delay_slot ) {
  1474 	SLOTILLEGAL();
  1475     } else {
  1476 	JMP_exc(EXC_ILLEGAL);
  1477 	return 2;
  1479 :}
  1481 CLRMAC {:  
  1482     XOR_r32_r32(R_EAX, R_EAX);
  1483     store_spreg( R_EAX, R_MACL );
  1484     store_spreg( R_EAX, R_MACH );
  1485     sh4_x86.tstate = TSTATE_NONE;
  1486 :}
  1487 CLRS {:
  1488     CLC();
  1489     SETC_sh4r(R_S);
  1490     sh4_x86.tstate = TSTATE_C;
  1491 :}
  1492 CLRT {:  
  1493     CLC();
  1494     SETC_t();
  1495     sh4_x86.tstate = TSTATE_C;
  1496 :}
  1497 SETS {:  
  1498     STC();
  1499     SETC_sh4r(R_S);
  1500     sh4_x86.tstate = TSTATE_C;
  1501 :}
  1502 SETT {:  
  1503     STC();
  1504     SETC_t();
  1505     sh4_x86.tstate = TSTATE_C;
  1506 :}
  1508 /* Floating point moves */
  1509 FMOV FRm, FRn {:  
  1510     /* As horrible as this looks, it's actually covering 5 separate cases:
  1511      * 1. 32-bit fr-to-fr (PR=0)
  1512      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1513      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1514      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1515      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1516      */
  1517     check_fpuen();
  1518     load_spreg( R_ECX, R_FPSCR );
  1519     load_fr_bank( R_EDX );
  1520     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1521     JNE_rel8(8, doublesize);
  1522     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1523     store_fr( R_EDX, R_EAX, FRn );
  1524     if( FRm&1 ) {
  1525 	JMP_rel8(24, end);
  1526 	JMP_TARGET(doublesize);
  1527 	load_xf_bank( R_ECX ); 
  1528 	load_fr( R_ECX, R_EAX, FRm-1 );
  1529 	if( FRn&1 ) {
  1530 	    load_fr( R_ECX, R_EDX, FRm );
  1531 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1532 	    store_fr( R_ECX, R_EDX, FRn );
  1533 	} else /* FRn&1 == 0 */ {
  1534 	    load_fr( R_ECX, R_ECX, FRm );
  1535 	    store_fr( R_EDX, R_EAX, FRn );
  1536 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1538 	JMP_TARGET(end);
  1539     } else /* FRm&1 == 0 */ {
  1540 	if( FRn&1 ) {
  1541 	    JMP_rel8(24, end);
  1542 	    load_xf_bank( R_ECX );
  1543 	    load_fr( R_EDX, R_EAX, FRm );
  1544 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1545 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1546 	    store_fr( R_ECX, R_EDX, FRn );
  1547 	    JMP_TARGET(end);
  1548 	} else /* FRn&1 == 0 */ {
  1549 	    JMP_rel8(12, end);
  1550 	    load_fr( R_EDX, R_EAX, FRm );
  1551 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1552 	    store_fr( R_EDX, R_EAX, FRn );
  1553 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1554 	    JMP_TARGET(end);
  1557     sh4_x86.tstate = TSTATE_NONE;
  1558 :}
  1559 FMOV FRm, @Rn {: 
  1560     check_fpuen();
  1561     load_reg( R_ECX, Rn );
  1562     check_walign32( R_ECX );
  1563     load_spreg( R_EDX, R_FPSCR );
  1564     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1565     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1566     load_fr_bank( R_EDX );
  1567     load_fr( R_EDX, R_EAX, FRm );
  1568     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1569     if( FRm&1 ) {
  1570 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1571 	JMP_TARGET(doublesize);
  1572 	load_xf_bank( R_EDX );
  1573 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1574 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1575 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1576 	JMP_TARGET(end);
  1577     } else {
  1578 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1579 	JMP_TARGET(doublesize);
  1580 	load_fr_bank( R_EDX );
  1581 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1582 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1583 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1584 	JMP_TARGET(end);
  1586     sh4_x86.tstate = TSTATE_NONE;
  1587 :}
  1588 FMOV @Rm, FRn {:  
  1589     check_fpuen();
  1590     load_reg( R_ECX, Rm );
  1591     check_ralign32( R_ECX );
  1592     load_spreg( R_EDX, R_FPSCR );
  1593     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1594     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1595     MEM_READ_LONG( R_ECX, R_EAX );
  1596     load_fr_bank( R_EDX );
  1597     store_fr( R_EDX, R_EAX, FRn );
  1598     if( FRn&1 ) {
  1599 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1600 	JMP_TARGET(doublesize);
  1601 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1602 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1603 	load_xf_bank( R_EDX );
  1604 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1605 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1606 	JMP_TARGET(end);
  1607     } else {
  1608 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1609 	JMP_TARGET(doublesize);
  1610 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1611 	load_fr_bank( R_EDX );
  1612 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1613 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1614 	JMP_TARGET(end);
  1616     sh4_x86.tstate = TSTATE_NONE;
  1617 :}
  1618 FMOV FRm, @-Rn {:  
  1619     check_fpuen();
  1620     load_reg( R_ECX, Rn );
  1621     check_walign32( R_ECX );
  1622     load_spreg( R_EDX, R_FPSCR );
  1623     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1624     JNE_rel8(14 + MEM_WRITE_SIZE, doublesize);
  1625     load_fr_bank( R_EDX );
  1626     load_fr( R_EDX, R_EAX, FRm );
  1627     ADD_imm8s_r32(-4,R_ECX);
  1628     store_reg( R_ECX, Rn );
  1629     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1630     if( FRm&1 ) {
  1631 	JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
  1632 	JMP_TARGET(doublesize);
  1633 	load_xf_bank( R_EDX );
  1634 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1635 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1636 	ADD_imm8s_r32(-8,R_ECX);
  1637 	store_reg( R_ECX, Rn );
  1638 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1639 	JMP_TARGET(end);
  1640     } else {
  1641 	JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
  1642 	JMP_TARGET(doublesize);
  1643 	load_fr_bank( R_EDX );
  1644 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1645 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1646 	ADD_imm8s_r32(-8,R_ECX);
  1647 	store_reg( R_ECX, Rn );
  1648 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1649 	JMP_TARGET(end);
  1651     sh4_x86.tstate = TSTATE_NONE;
  1652 :}
  1653 FMOV @Rm+, FRn {:
  1654     check_fpuen();
  1655     load_reg( R_ECX, Rm );
  1656     check_ralign32( R_ECX );
  1657     MOV_r32_r32( R_ECX, R_EAX );
  1658     load_spreg( R_EDX, R_FPSCR );
  1659     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1660     JNE_rel8(14 + MEM_READ_SIZE, doublesize);
  1661     ADD_imm8s_r32( 4, R_EAX );
  1662     store_reg( R_EAX, Rm );
  1663     MEM_READ_LONG( R_ECX, R_EAX );
  1664     load_fr_bank( R_EDX );
  1665     store_fr( R_EDX, R_EAX, FRn );
  1666     if( FRn&1 ) {
  1667 	JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
  1668 	JMP_TARGET(doublesize);
  1669 	ADD_imm8s_r32( 8, R_EAX );
  1670 	store_reg(R_EAX, Rm);
  1671 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1672 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1673 	load_xf_bank( R_EDX );
  1674 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1675 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1676 	JMP_TARGET(end);
  1677     } else {
  1678 	JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
  1679 	ADD_imm8s_r32( 8, R_EAX );
  1680 	store_reg(R_EAX, Rm);
  1681 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1682 	load_fr_bank( R_EDX );
  1683 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1684 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1685 	JMP_TARGET(end);
  1687     sh4_x86.tstate = TSTATE_NONE;
  1688 :}
  1689 FMOV FRm, @(R0, Rn) {:  
  1690     check_fpuen();
  1691     load_reg( R_ECX, Rn );
  1692     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1693     check_walign32( R_ECX );
  1694     load_spreg( R_EDX, R_FPSCR );
  1695     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1696     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1697     load_fr_bank( R_EDX );
  1698     load_fr( R_EDX, R_EAX, FRm );
  1699     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1700     if( FRm&1 ) {
  1701 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1702 	JMP_TARGET(doublesize);
  1703 	load_xf_bank( R_EDX );
  1704 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1705 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1706 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1707 	JMP_TARGET(end);
  1708     } else {
  1709 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1710 	JMP_TARGET(doublesize);
  1711 	load_fr_bank( R_EDX );
  1712 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1713 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1714 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1715 	JMP_TARGET(end);
  1717     sh4_x86.tstate = TSTATE_NONE;
  1718 :}
  1719 FMOV @(R0, Rm), FRn {:  
  1720     check_fpuen();
  1721     load_reg( R_ECX, Rm );
  1722     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1723     check_ralign32( R_ECX );
  1724     load_spreg( R_EDX, R_FPSCR );
  1725     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1726     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1727     MEM_READ_LONG( R_ECX, R_EAX );
  1728     load_fr_bank( R_EDX );
  1729     store_fr( R_EDX, R_EAX, FRn );
  1730     if( FRn&1 ) {
  1731 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1732 	JMP_TARGET(doublesize);
  1733 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1734 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1735 	load_xf_bank( R_EDX );
  1736 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1737 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1738 	JMP_TARGET(end);
  1739     } else {
  1740 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1741 	JMP_TARGET(doublesize);
  1742 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1743 	load_fr_bank( R_EDX );
  1744 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1745 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1746 	JMP_TARGET(end);
  1748     sh4_x86.tstate = TSTATE_NONE;
  1749 :}
  1750 FLDI0 FRn {:  /* IFF PR=0 */
  1751     check_fpuen();
  1752     load_spreg( R_ECX, R_FPSCR );
  1753     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1754     JNE_rel8(8, end);
  1755     XOR_r32_r32( R_EAX, R_EAX );
  1756     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1757     store_fr( R_ECX, R_EAX, FRn );
  1758     JMP_TARGET(end);
  1759     sh4_x86.tstate = TSTATE_NONE;
  1760 :}
  1761 FLDI1 FRn {:  /* IFF PR=0 */
  1762     check_fpuen();
  1763     load_spreg( R_ECX, R_FPSCR );
  1764     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1765     JNE_rel8(11, end);
  1766     load_imm32(R_EAX, 0x3F800000);
  1767     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1768     store_fr( R_ECX, R_EAX, FRn );
  1769     JMP_TARGET(end);
  1770     sh4_x86.tstate = TSTATE_NONE;
  1771 :}
  1773 FLOAT FPUL, FRn {:  
  1774     check_fpuen();
  1775     load_spreg( R_ECX, R_FPSCR );
  1776     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1777     FILD_sh4r(R_FPUL);
  1778     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1779     JNE_rel8(5, doubleprec);
  1780     pop_fr( R_EDX, FRn );
  1781     JMP_rel8(3, end);
  1782     JMP_TARGET(doubleprec);
  1783     pop_dr( R_EDX, FRn );
  1784     JMP_TARGET(end);
  1785     sh4_x86.tstate = TSTATE_NONE;
  1786 :}
  1787 FTRC FRm, FPUL {:  
  1788     check_fpuen();
  1789     load_spreg( R_ECX, R_FPSCR );
  1790     load_fr_bank( R_EDX );
  1791     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1792     JNE_rel8(5, doubleprec);
  1793     push_fr( R_EDX, FRm );
  1794     JMP_rel8(3, doop);
  1795     JMP_TARGET(doubleprec);
  1796     push_dr( R_EDX, FRm );
  1797     JMP_TARGET( doop );
  1798     load_imm32( R_ECX, (uint32_t)&max_int );
  1799     FILD_r32ind( R_ECX );
  1800     FCOMIP_st(1);
  1801     JNA_rel8( 32, sat );
  1802     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1803     FILD_r32ind( R_ECX );           // 2
  1804     FCOMIP_st(1);                   // 2
  1805     JAE_rel8( 21, sat2 );            // 2
  1806     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1807     FNSTCW_r32ind( R_EAX );
  1808     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1809     FLDCW_r32ind( R_EDX );
  1810     FISTP_sh4r(R_FPUL);             // 3
  1811     FLDCW_r32ind( R_EAX );
  1812     JMP_rel8( 9, end );             // 2
  1814     JMP_TARGET(sat);
  1815     JMP_TARGET(sat2);
  1816     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1817     store_spreg( R_ECX, R_FPUL );
  1818     FPOP_st();
  1819     JMP_TARGET(end);
  1820     sh4_x86.tstate = TSTATE_NONE;
  1821 :}
  1822 FLDS FRm, FPUL {:  
  1823     check_fpuen();
  1824     load_fr_bank( R_ECX );
  1825     load_fr( R_ECX, R_EAX, FRm );
  1826     store_spreg( R_EAX, R_FPUL );
  1827     sh4_x86.tstate = TSTATE_NONE;
  1828 :}
  1829 FSTS FPUL, FRn {:  
  1830     check_fpuen();
  1831     load_fr_bank( R_ECX );
  1832     load_spreg( R_EAX, R_FPUL );
  1833     store_fr( R_ECX, R_EAX, FRn );
  1834     sh4_x86.tstate = TSTATE_NONE;
  1835 :}
  1836 FCNVDS FRm, FPUL {:  
  1837     check_fpuen();
  1838     load_spreg( R_ECX, R_FPSCR );
  1839     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1840     JE_rel8(9, end); // only when PR=1
  1841     load_fr_bank( R_ECX );
  1842     push_dr( R_ECX, FRm );
  1843     pop_fpul();
  1844     JMP_TARGET(end);
  1845     sh4_x86.tstate = TSTATE_NONE;
  1846 :}
  1847 FCNVSD FPUL, FRn {:  
  1848     check_fpuen();
  1849     load_spreg( R_ECX, R_FPSCR );
  1850     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1851     JE_rel8(9, end); // only when PR=1
  1852     load_fr_bank( R_ECX );
  1853     push_fpul();
  1854     pop_dr( R_ECX, FRn );
  1855     JMP_TARGET(end);
  1856     sh4_x86.tstate = TSTATE_NONE;
  1857 :}
  1859 /* Floating point instructions */
  1860 FABS FRn {:  
  1861     check_fpuen();
  1862     load_spreg( R_ECX, R_FPSCR );
  1863     load_fr_bank( R_EDX );
  1864     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1865     JNE_rel8(10, doubleprec);
  1866     push_fr(R_EDX, FRn); // 3
  1867     FABS_st0(); // 2
  1868     pop_fr( R_EDX, FRn); //3
  1869     JMP_rel8(8,end); // 2
  1870     JMP_TARGET(doubleprec);
  1871     push_dr(R_EDX, FRn);
  1872     FABS_st0();
  1873     pop_dr(R_EDX, FRn);
  1874     JMP_TARGET(end);
  1875     sh4_x86.tstate = TSTATE_NONE;
  1876 :}
  1877 FADD FRm, FRn {:  
  1878     check_fpuen();
  1879     load_spreg( R_ECX, R_FPSCR );
  1880     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1881     load_fr_bank( R_EDX );
  1882     JNE_rel8(13,doubleprec);
  1883     push_fr(R_EDX, FRm);
  1884     push_fr(R_EDX, FRn);
  1885     FADDP_st(1);
  1886     pop_fr(R_EDX, FRn);
  1887     JMP_rel8(11,end);
  1888     JMP_TARGET(doubleprec);
  1889     push_dr(R_EDX, FRm);
  1890     push_dr(R_EDX, FRn);
  1891     FADDP_st(1);
  1892     pop_dr(R_EDX, FRn);
  1893     JMP_TARGET(end);
  1894     sh4_x86.tstate = TSTATE_NONE;
  1895 :}
  1896 FDIV FRm, FRn {:  
  1897     check_fpuen();
  1898     load_spreg( R_ECX, R_FPSCR );
  1899     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1900     load_fr_bank( R_EDX );
  1901     JNE_rel8(13, doubleprec);
  1902     push_fr(R_EDX, FRn);
  1903     push_fr(R_EDX, FRm);
  1904     FDIVP_st(1);
  1905     pop_fr(R_EDX, FRn);
  1906     JMP_rel8(11, end);
  1907     JMP_TARGET(doubleprec);
  1908     push_dr(R_EDX, FRn);
  1909     push_dr(R_EDX, FRm);
  1910     FDIVP_st(1);
  1911     pop_dr(R_EDX, FRn);
  1912     JMP_TARGET(end);
  1913     sh4_x86.tstate = TSTATE_NONE;
  1914 :}
  1915 FMAC FR0, FRm, FRn {:  
  1916     check_fpuen();
  1917     load_spreg( R_ECX, R_FPSCR );
  1918     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  1919     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1920     JNE_rel8(18, doubleprec);
  1921     push_fr( R_EDX, 0 );
  1922     push_fr( R_EDX, FRm );
  1923     FMULP_st(1);
  1924     push_fr( R_EDX, FRn );
  1925     FADDP_st(1);
  1926     pop_fr( R_EDX, FRn );
  1927     JMP_rel8(16, end);
  1928     JMP_TARGET(doubleprec);
  1929     push_dr( R_EDX, 0 );
  1930     push_dr( R_EDX, FRm );
  1931     FMULP_st(1);
  1932     push_dr( R_EDX, FRn );
  1933     FADDP_st(1);
  1934     pop_dr( R_EDX, FRn );
  1935     JMP_TARGET(end);
  1936     sh4_x86.tstate = TSTATE_NONE;
  1937 :}
  1939 FMUL FRm, FRn {:  
  1940     check_fpuen();
  1941     load_spreg( R_ECX, R_FPSCR );
  1942     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1943     load_fr_bank( R_EDX );
  1944     JNE_rel8(13, doubleprec);
  1945     push_fr(R_EDX, FRm);
  1946     push_fr(R_EDX, FRn);
  1947     FMULP_st(1);
  1948     pop_fr(R_EDX, FRn);
  1949     JMP_rel8(11, end);
  1950     JMP_TARGET(doubleprec);
  1951     push_dr(R_EDX, FRm);
  1952     push_dr(R_EDX, FRn);
  1953     FMULP_st(1);
  1954     pop_dr(R_EDX, FRn);
  1955     JMP_TARGET(end);
  1956     sh4_x86.tstate = TSTATE_NONE;
  1957 :}
  1958 FNEG FRn {:  
  1959     check_fpuen();
  1960     load_spreg( R_ECX, R_FPSCR );
  1961     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1962     load_fr_bank( R_EDX );
  1963     JNE_rel8(10, doubleprec);
  1964     push_fr(R_EDX, FRn);
  1965     FCHS_st0();
  1966     pop_fr(R_EDX, FRn);
  1967     JMP_rel8(8, end);
  1968     JMP_TARGET(doubleprec);
  1969     push_dr(R_EDX, FRn);
  1970     FCHS_st0();
  1971     pop_dr(R_EDX, FRn);
  1972     JMP_TARGET(end);
  1973     sh4_x86.tstate = TSTATE_NONE;
  1974 :}
  1975 FSRRA FRn {:  
  1976     check_fpuen();
  1977     load_spreg( R_ECX, R_FPSCR );
  1978     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1979     load_fr_bank( R_EDX );
  1980     JNE_rel8(12, end); // PR=0 only
  1981     FLD1_st0();
  1982     push_fr(R_EDX, FRn);
  1983     FSQRT_st0();
  1984     FDIVP_st(1);
  1985     pop_fr(R_EDX, FRn);
  1986     JMP_TARGET(end);
  1987     sh4_x86.tstate = TSTATE_NONE;
  1988 :}
  1989 FSQRT FRn {:  
  1990     check_fpuen();
  1991     load_spreg( R_ECX, R_FPSCR );
  1992     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1993     load_fr_bank( R_EDX );
  1994     JNE_rel8(10, doubleprec);
  1995     push_fr(R_EDX, FRn);
  1996     FSQRT_st0();
  1997     pop_fr(R_EDX, FRn);
  1998     JMP_rel8(8, end);
  1999     JMP_TARGET(doubleprec);
  2000     push_dr(R_EDX, FRn);
  2001     FSQRT_st0();
  2002     pop_dr(R_EDX, FRn);
  2003     JMP_TARGET(end);
  2004     sh4_x86.tstate = TSTATE_NONE;
  2005 :}
  2006 FSUB FRm, FRn {:  
  2007     check_fpuen();
  2008     load_spreg( R_ECX, R_FPSCR );
  2009     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2010     load_fr_bank( R_EDX );
  2011     JNE_rel8(13, doubleprec);
  2012     push_fr(R_EDX, FRn);
  2013     push_fr(R_EDX, FRm);
  2014     FSUBP_st(1);
  2015     pop_fr(R_EDX, FRn);
  2016     JMP_rel8(11, end);
  2017     JMP_TARGET(doubleprec);
  2018     push_dr(R_EDX, FRn);
  2019     push_dr(R_EDX, FRm);
  2020     FSUBP_st(1);
  2021     pop_dr(R_EDX, FRn);
  2022     JMP_TARGET(end);
  2023     sh4_x86.tstate = TSTATE_NONE;
  2024 :}
  2026 FCMP/EQ FRm, FRn {:  
  2027     check_fpuen();
  2028     load_spreg( R_ECX, R_FPSCR );
  2029     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2030     load_fr_bank( R_EDX );
  2031     JNE_rel8(8, doubleprec);
  2032     push_fr(R_EDX, FRm);
  2033     push_fr(R_EDX, FRn);
  2034     JMP_rel8(6, end);
  2035     JMP_TARGET(doubleprec);
  2036     push_dr(R_EDX, FRm);
  2037     push_dr(R_EDX, FRn);
  2038     JMP_TARGET(end);
  2039     FCOMIP_st(1);
  2040     SETE_t();
  2041     FPOP_st();
  2042     sh4_x86.tstate = TSTATE_NONE;
  2043 :}
  2044 FCMP/GT FRm, FRn {:  
  2045     check_fpuen();
  2046     load_spreg( R_ECX, R_FPSCR );
  2047     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2048     load_fr_bank( R_EDX );
  2049     JNE_rel8(8, doubleprec);
  2050     push_fr(R_EDX, FRm);
  2051     push_fr(R_EDX, FRn);
  2052     JMP_rel8(6, end);
  2053     JMP_TARGET(doubleprec);
  2054     push_dr(R_EDX, FRm);
  2055     push_dr(R_EDX, FRn);
  2056     JMP_TARGET(end);
  2057     FCOMIP_st(1);
  2058     SETA_t();
  2059     FPOP_st();
  2060     sh4_x86.tstate = TSTATE_NONE;
  2061 :}
  2063 FSCA FPUL, FRn {:  
  2064     check_fpuen();
  2065     load_spreg( R_ECX, R_FPSCR );
  2066     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2067     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2068     load_fr_bank( R_ECX );
  2069     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2070     load_spreg( R_EDX, R_FPUL );
  2071     call_func2( sh4_fsca, R_EDX, R_ECX );
  2072     JMP_TARGET(doubleprec);
  2073     sh4_x86.tstate = TSTATE_NONE;
  2074 :}
  2075 FIPR FVm, FVn {:  
  2076     check_fpuen();
  2077     load_spreg( R_ECX, R_FPSCR );
  2078     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2079     JNE_rel8(44, doubleprec);
  2081     load_fr_bank( R_ECX );
  2082     push_fr( R_ECX, FVm<<2 );
  2083     push_fr( R_ECX, FVn<<2 );
  2084     FMULP_st(1);
  2085     push_fr( R_ECX, (FVm<<2)+1);
  2086     push_fr( R_ECX, (FVn<<2)+1);
  2087     FMULP_st(1);
  2088     FADDP_st(1);
  2089     push_fr( R_ECX, (FVm<<2)+2);
  2090     push_fr( R_ECX, (FVn<<2)+2);
  2091     FMULP_st(1);
  2092     FADDP_st(1);
  2093     push_fr( R_ECX, (FVm<<2)+3);
  2094     push_fr( R_ECX, (FVn<<2)+3);
  2095     FMULP_st(1);
  2096     FADDP_st(1);
  2097     pop_fr( R_ECX, (FVn<<2)+3);
  2098     JMP_TARGET(doubleprec);
  2099     sh4_x86.tstate = TSTATE_NONE;
  2100 :}
  2101 FTRV XMTRX, FVn {:  
  2102     check_fpuen();
  2103     load_spreg( R_ECX, R_FPSCR );
  2104     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2105     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2106     load_fr_bank( R_EDX );                 // 3
  2107     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2108     load_xf_bank( R_ECX );                 // 12
  2109     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2110     JMP_TARGET(doubleprec);
  2111     sh4_x86.tstate = TSTATE_NONE;
  2112 :}
  2114 FRCHG {:  
  2115     check_fpuen();
  2116     load_spreg( R_ECX, R_FPSCR );
  2117     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2118     store_spreg( R_ECX, R_FPSCR );
  2119     update_fr_bank( R_ECX );
  2120     sh4_x86.tstate = TSTATE_NONE;
  2121 :}
  2122 FSCHG {:  
  2123     check_fpuen();
  2124     load_spreg( R_ECX, R_FPSCR );
  2125     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2126     store_spreg( R_ECX, R_FPSCR );
  2127     sh4_x86.tstate = TSTATE_NONE;
  2128 :}
  2130 /* Processor control instructions */
  2131 LDC Rm, SR {:
  2132     if( sh4_x86.in_delay_slot ) {
  2133 	SLOTILLEGAL();
  2134     } else {
  2135 	check_priv();
  2136 	load_reg( R_EAX, Rm );
  2137 	call_func1( sh4_write_sr, R_EAX );
  2138 	sh4_x86.priv_checked = FALSE;
  2139 	sh4_x86.fpuen_checked = FALSE;
  2140 	sh4_x86.tstate = TSTATE_NONE;
  2142 :}
  2143 LDC Rm, GBR {: 
  2144     load_reg( R_EAX, Rm );
  2145     store_spreg( R_EAX, R_GBR );
  2146 :}
  2147 LDC Rm, VBR {:  
  2148     check_priv();
  2149     load_reg( R_EAX, Rm );
  2150     store_spreg( R_EAX, R_VBR );
  2151     sh4_x86.tstate = TSTATE_NONE;
  2152 :}
  2153 LDC Rm, SSR {:  
  2154     check_priv();
  2155     load_reg( R_EAX, Rm );
  2156     store_spreg( R_EAX, R_SSR );
  2157     sh4_x86.tstate = TSTATE_NONE;
  2158 :}
  2159 LDC Rm, SGR {:  
  2160     check_priv();
  2161     load_reg( R_EAX, Rm );
  2162     store_spreg( R_EAX, R_SGR );
  2163     sh4_x86.tstate = TSTATE_NONE;
  2164 :}
  2165 LDC Rm, SPC {:  
  2166     check_priv();
  2167     load_reg( R_EAX, Rm );
  2168     store_spreg( R_EAX, R_SPC );
  2169     sh4_x86.tstate = TSTATE_NONE;
  2170 :}
  2171 LDC Rm, DBR {:  
  2172     check_priv();
  2173     load_reg( R_EAX, Rm );
  2174     store_spreg( R_EAX, R_DBR );
  2175     sh4_x86.tstate = TSTATE_NONE;
  2176 :}
  2177 LDC Rm, Rn_BANK {:  
  2178     check_priv();
  2179     load_reg( R_EAX, Rm );
  2180     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2181     sh4_x86.tstate = TSTATE_NONE;
  2182 :}
  2183 LDC.L @Rm+, GBR {:  
  2184     load_reg( R_EAX, Rm );
  2185     check_ralign32( R_EAX );
  2186     MOV_r32_r32( R_EAX, R_ECX );
  2187     ADD_imm8s_r32( 4, R_EAX );
  2188     store_reg( R_EAX, Rm );
  2189     MEM_READ_LONG( R_ECX, R_EAX );
  2190     store_spreg( R_EAX, R_GBR );
  2191     sh4_x86.tstate = TSTATE_NONE;
  2192 :}
  2193 LDC.L @Rm+, SR {:
  2194     if( sh4_x86.in_delay_slot ) {
  2195 	SLOTILLEGAL();
  2196     } else {
  2197 	check_priv();
  2198 	load_reg( R_EAX, Rm );
  2199 	check_ralign32( R_EAX );
  2200 	MOV_r32_r32( R_EAX, R_ECX );
  2201 	ADD_imm8s_r32( 4, R_EAX );
  2202 	store_reg( R_EAX, Rm );
  2203 	MEM_READ_LONG( R_ECX, R_EAX );
  2204 	call_func1( sh4_write_sr, R_EAX );
  2205 	sh4_x86.priv_checked = FALSE;
  2206 	sh4_x86.fpuen_checked = FALSE;
  2207 	sh4_x86.tstate = TSTATE_NONE;
  2209 :}
  2210 LDC.L @Rm+, VBR {:  
  2211     check_priv();
  2212     load_reg( R_EAX, Rm );
  2213     check_ralign32( R_EAX );
  2214     MOV_r32_r32( R_EAX, R_ECX );
  2215     ADD_imm8s_r32( 4, R_EAX );
  2216     store_reg( R_EAX, Rm );
  2217     MEM_READ_LONG( R_ECX, R_EAX );
  2218     store_spreg( R_EAX, R_VBR );
  2219     sh4_x86.tstate = TSTATE_NONE;
  2220 :}
  2221 LDC.L @Rm+, SSR {:
  2222     check_priv();
  2223     load_reg( R_EAX, Rm );
  2224     check_ralign32( R_EAX );
  2225     MOV_r32_r32( R_EAX, R_ECX );
  2226     ADD_imm8s_r32( 4, R_EAX );
  2227     store_reg( R_EAX, Rm );
  2228     MEM_READ_LONG( R_ECX, R_EAX );
  2229     store_spreg( R_EAX, R_SSR );
  2230     sh4_x86.tstate = TSTATE_NONE;
  2231 :}
  2232 LDC.L @Rm+, SGR {:  
  2233     check_priv();
  2234     load_reg( R_EAX, Rm );
  2235     check_ralign32( R_EAX );
  2236     MOV_r32_r32( R_EAX, R_ECX );
  2237     ADD_imm8s_r32( 4, R_EAX );
  2238     store_reg( R_EAX, Rm );
  2239     MEM_READ_LONG( R_ECX, R_EAX );
  2240     store_spreg( R_EAX, R_SGR );
  2241     sh4_x86.tstate = TSTATE_NONE;
  2242 :}
  2243 LDC.L @Rm+, SPC {:  
  2244     check_priv();
  2245     load_reg( R_EAX, Rm );
  2246     check_ralign32( R_EAX );
  2247     MOV_r32_r32( R_EAX, R_ECX );
  2248     ADD_imm8s_r32( 4, R_EAX );
  2249     store_reg( R_EAX, Rm );
  2250     MEM_READ_LONG( R_ECX, R_EAX );
  2251     store_spreg( R_EAX, R_SPC );
  2252     sh4_x86.tstate = TSTATE_NONE;
  2253 :}
  2254 LDC.L @Rm+, DBR {:  
  2255     check_priv();
  2256     load_reg( R_EAX, Rm );
  2257     check_ralign32( R_EAX );
  2258     MOV_r32_r32( R_EAX, R_ECX );
  2259     ADD_imm8s_r32( 4, R_EAX );
  2260     store_reg( R_EAX, Rm );
  2261     MEM_READ_LONG( R_ECX, R_EAX );
  2262     store_spreg( R_EAX, R_DBR );
  2263     sh4_x86.tstate = TSTATE_NONE;
  2264 :}
  2265 LDC.L @Rm+, Rn_BANK {:  
  2266     check_priv();
  2267     load_reg( R_EAX, Rm );
  2268     check_ralign32( R_EAX );
  2269     MOV_r32_r32( R_EAX, R_ECX );
  2270     ADD_imm8s_r32( 4, R_EAX );
  2271     store_reg( R_EAX, Rm );
  2272     MEM_READ_LONG( R_ECX, R_EAX );
  2273     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2274     sh4_x86.tstate = TSTATE_NONE;
  2275 :}
  2276 LDS Rm, FPSCR {:  
  2277     load_reg( R_EAX, Rm );
  2278     store_spreg( R_EAX, R_FPSCR );
  2279     update_fr_bank( R_EAX );
  2280     sh4_x86.tstate = TSTATE_NONE;
  2281 :}
  2282 LDS.L @Rm+, FPSCR {:  
  2283     load_reg( R_EAX, Rm );
  2284     check_ralign32( R_EAX );
  2285     MOV_r32_r32( R_EAX, R_ECX );
  2286     ADD_imm8s_r32( 4, R_EAX );
  2287     store_reg( R_EAX, Rm );
  2288     MEM_READ_LONG( R_ECX, R_EAX );
  2289     store_spreg( R_EAX, R_FPSCR );
  2290     update_fr_bank( R_EAX );
  2291     sh4_x86.tstate = TSTATE_NONE;
  2292 :}
  2293 LDS Rm, FPUL {:  
  2294     load_reg( R_EAX, Rm );
  2295     store_spreg( R_EAX, R_FPUL );
  2296 :}
  2297 LDS.L @Rm+, FPUL {:  
  2298     load_reg( R_EAX, Rm );
  2299     check_ralign32( R_EAX );
  2300     MOV_r32_r32( R_EAX, R_ECX );
  2301     ADD_imm8s_r32( 4, R_EAX );
  2302     store_reg( R_EAX, Rm );
  2303     MEM_READ_LONG( R_ECX, R_EAX );
  2304     store_spreg( R_EAX, R_FPUL );
  2305     sh4_x86.tstate = TSTATE_NONE;
  2306 :}
  2307 LDS Rm, MACH {: 
  2308     load_reg( R_EAX, Rm );
  2309     store_spreg( R_EAX, R_MACH );
  2310 :}
  2311 LDS.L @Rm+, MACH {:  
  2312     load_reg( R_EAX, Rm );
  2313     check_ralign32( R_EAX );
  2314     MOV_r32_r32( R_EAX, R_ECX );
  2315     ADD_imm8s_r32( 4, R_EAX );
  2316     store_reg( R_EAX, Rm );
  2317     MEM_READ_LONG( R_ECX, R_EAX );
  2318     store_spreg( R_EAX, R_MACH );
  2319     sh4_x86.tstate = TSTATE_NONE;
  2320 :}
  2321 LDS Rm, MACL {:  
  2322     load_reg( R_EAX, Rm );
  2323     store_spreg( R_EAX, R_MACL );
  2324 :}
  2325 LDS.L @Rm+, MACL {:  
  2326     load_reg( R_EAX, Rm );
  2327     check_ralign32( R_EAX );
  2328     MOV_r32_r32( R_EAX, R_ECX );
  2329     ADD_imm8s_r32( 4, R_EAX );
  2330     store_reg( R_EAX, Rm );
  2331     MEM_READ_LONG( R_ECX, R_EAX );
  2332     store_spreg( R_EAX, R_MACL );
  2333     sh4_x86.tstate = TSTATE_NONE;
  2334 :}
  2335 LDS Rm, PR {:  
  2336     load_reg( R_EAX, Rm );
  2337     store_spreg( R_EAX, R_PR );
  2338 :}
  2339 LDS.L @Rm+, PR {:  
  2340     load_reg( R_EAX, Rm );
  2341     check_ralign32( R_EAX );
  2342     MOV_r32_r32( R_EAX, R_ECX );
  2343     ADD_imm8s_r32( 4, R_EAX );
  2344     store_reg( R_EAX, Rm );
  2345     MEM_READ_LONG( R_ECX, R_EAX );
  2346     store_spreg( R_EAX, R_PR );
  2347     sh4_x86.tstate = TSTATE_NONE;
  2348 :}
  2349 LDTLB {:  
  2350     call_func0( MMU_ldtlb );
  2351 :}
  2352 OCBI @Rn {:  :}
  2353 OCBP @Rn {:  :}
  2354 OCBWB @Rn {:  :}
  2355 PREF @Rn {:
  2356     load_reg( R_EAX, Rn );
  2357     MOV_r32_r32( R_EAX, R_ECX );
  2358     AND_imm32_r32( 0xFC000000, R_EAX );
  2359     CMP_imm32_r32( 0xE0000000, R_EAX );
  2360     JNE_rel8(CALL_FUNC1_SIZE, end);
  2361     call_func1( sh4_flush_store_queue, R_ECX );
  2362     JMP_TARGET(end);
  2363     sh4_x86.tstate = TSTATE_NONE;
  2364 :}
  2365 SLEEP {: 
  2366     check_priv();
  2367     call_func0( sh4_sleep );
  2368     sh4_x86.tstate = TSTATE_NONE;
  2369     sh4_x86.in_delay_slot = FALSE;
  2370     return 2;
  2371 :}
  2372 STC SR, Rn {:
  2373     check_priv();
  2374     call_func0(sh4_read_sr);
  2375     store_reg( R_EAX, Rn );
  2376     sh4_x86.tstate = TSTATE_NONE;
  2377 :}
  2378 STC GBR, Rn {:  
  2379     load_spreg( R_EAX, R_GBR );
  2380     store_reg( R_EAX, Rn );
  2381 :}
  2382 STC VBR, Rn {:  
  2383     check_priv();
  2384     load_spreg( R_EAX, R_VBR );
  2385     store_reg( R_EAX, Rn );
  2386     sh4_x86.tstate = TSTATE_NONE;
  2387 :}
  2388 STC SSR, Rn {:  
  2389     check_priv();
  2390     load_spreg( R_EAX, R_SSR );
  2391     store_reg( R_EAX, Rn );
  2392     sh4_x86.tstate = TSTATE_NONE;
  2393 :}
  2394 STC SPC, Rn {:  
  2395     check_priv();
  2396     load_spreg( R_EAX, R_SPC );
  2397     store_reg( R_EAX, Rn );
  2398     sh4_x86.tstate = TSTATE_NONE;
  2399 :}
  2400 STC SGR, Rn {:  
  2401     check_priv();
  2402     load_spreg( R_EAX, R_SGR );
  2403     store_reg( R_EAX, Rn );
  2404     sh4_x86.tstate = TSTATE_NONE;
  2405 :}
  2406 STC DBR, Rn {:  
  2407     check_priv();
  2408     load_spreg( R_EAX, R_DBR );
  2409     store_reg( R_EAX, Rn );
  2410     sh4_x86.tstate = TSTATE_NONE;
  2411 :}
  2412 STC Rm_BANK, Rn {:
  2413     check_priv();
  2414     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2415     store_reg( R_EAX, Rn );
  2416     sh4_x86.tstate = TSTATE_NONE;
  2417 :}
  2418 STC.L SR, @-Rn {:
  2419     check_priv();
  2420     call_func0( sh4_read_sr );
  2421     load_reg( R_ECX, Rn );
  2422     check_walign32( R_ECX );
  2423     ADD_imm8s_r32( -4, R_ECX );
  2424     store_reg( R_ECX, Rn );
  2425     MEM_WRITE_LONG( R_ECX, R_EAX );
  2426     sh4_x86.tstate = TSTATE_NONE;
  2427 :}
  2428 STC.L VBR, @-Rn {:  
  2429     check_priv();
  2430     load_reg( R_ECX, Rn );
  2431     check_walign32( R_ECX );
  2432     ADD_imm8s_r32( -4, R_ECX );
  2433     store_reg( R_ECX, Rn );
  2434     load_spreg( R_EAX, R_VBR );
  2435     MEM_WRITE_LONG( R_ECX, R_EAX );
  2436     sh4_x86.tstate = TSTATE_NONE;
  2437 :}
  2438 STC.L SSR, @-Rn {:  
  2439     check_priv();
  2440     load_reg( R_ECX, Rn );
  2441     check_walign32( R_ECX );
  2442     ADD_imm8s_r32( -4, R_ECX );
  2443     store_reg( R_ECX, Rn );
  2444     load_spreg( R_EAX, R_SSR );
  2445     MEM_WRITE_LONG( R_ECX, R_EAX );
  2446     sh4_x86.tstate = TSTATE_NONE;
  2447 :}
  2448 STC.L SPC, @-Rn {:
  2449     check_priv();
  2450     load_reg( R_ECX, Rn );
  2451     check_walign32( R_ECX );
  2452     ADD_imm8s_r32( -4, R_ECX );
  2453     store_reg( R_ECX, Rn );
  2454     load_spreg( R_EAX, R_SPC );
  2455     MEM_WRITE_LONG( R_ECX, R_EAX );
  2456     sh4_x86.tstate = TSTATE_NONE;
  2457 :}
  2458 STC.L SGR, @-Rn {:  
  2459     check_priv();
  2460     load_reg( R_ECX, Rn );
  2461     check_walign32( R_ECX );
  2462     ADD_imm8s_r32( -4, R_ECX );
  2463     store_reg( R_ECX, Rn );
  2464     load_spreg( R_EAX, R_SGR );
  2465     MEM_WRITE_LONG( R_ECX, R_EAX );
  2466     sh4_x86.tstate = TSTATE_NONE;
  2467 :}
  2468 STC.L DBR, @-Rn {:  
  2469     check_priv();
  2470     load_reg( R_ECX, Rn );
  2471     check_walign32( R_ECX );
  2472     ADD_imm8s_r32( -4, R_ECX );
  2473     store_reg( R_ECX, Rn );
  2474     load_spreg( R_EAX, R_DBR );
  2475     MEM_WRITE_LONG( R_ECX, R_EAX );
  2476     sh4_x86.tstate = TSTATE_NONE;
  2477 :}
  2478 STC.L Rm_BANK, @-Rn {:  
  2479     check_priv();
  2480     load_reg( R_ECX, Rn );
  2481     check_walign32( R_ECX );
  2482     ADD_imm8s_r32( -4, R_ECX );
  2483     store_reg( R_ECX, Rn );
  2484     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2485     MEM_WRITE_LONG( R_ECX, R_EAX );
  2486     sh4_x86.tstate = TSTATE_NONE;
  2487 :}
  2488 STC.L GBR, @-Rn {:  
  2489     load_reg( R_ECX, Rn );
  2490     check_walign32( R_ECX );
  2491     ADD_imm8s_r32( -4, R_ECX );
  2492     store_reg( R_ECX, Rn );
  2493     load_spreg( R_EAX, R_GBR );
  2494     MEM_WRITE_LONG( R_ECX, R_EAX );
  2495     sh4_x86.tstate = TSTATE_NONE;
  2496 :}
  2497 STS FPSCR, Rn {:  
  2498     load_spreg( R_EAX, R_FPSCR );
  2499     store_reg( R_EAX, Rn );
  2500 :}
  2501 STS.L FPSCR, @-Rn {:  
  2502     load_reg( R_ECX, Rn );
  2503     check_walign32( R_ECX );
  2504     ADD_imm8s_r32( -4, R_ECX );
  2505     store_reg( R_ECX, Rn );
  2506     load_spreg( R_EAX, R_FPSCR );
  2507     MEM_WRITE_LONG( R_ECX, R_EAX );
  2508     sh4_x86.tstate = TSTATE_NONE;
  2509 :}
  2510 STS FPUL, Rn {:  
  2511     load_spreg( R_EAX, R_FPUL );
  2512     store_reg( R_EAX, Rn );
  2513 :}
  2514 STS.L FPUL, @-Rn {:  
  2515     load_reg( R_ECX, Rn );
  2516     check_walign32( R_ECX );
  2517     ADD_imm8s_r32( -4, R_ECX );
  2518     store_reg( R_ECX, Rn );
  2519     load_spreg( R_EAX, R_FPUL );
  2520     MEM_WRITE_LONG( R_ECX, R_EAX );
  2521     sh4_x86.tstate = TSTATE_NONE;
  2522 :}
  2523 STS MACH, Rn {:  
  2524     load_spreg( R_EAX, R_MACH );
  2525     store_reg( R_EAX, Rn );
  2526 :}
  2527 STS.L MACH, @-Rn {:  
  2528     load_reg( R_ECX, Rn );
  2529     check_walign32( R_ECX );
  2530     ADD_imm8s_r32( -4, R_ECX );
  2531     store_reg( R_ECX, Rn );
  2532     load_spreg( R_EAX, R_MACH );
  2533     MEM_WRITE_LONG( R_ECX, R_EAX );
  2534     sh4_x86.tstate = TSTATE_NONE;
  2535 :}
  2536 STS MACL, Rn {:  
  2537     load_spreg( R_EAX, R_MACL );
  2538     store_reg( R_EAX, Rn );
  2539 :}
  2540 STS.L MACL, @-Rn {:  
  2541     load_reg( R_ECX, Rn );
  2542     check_walign32( R_ECX );
  2543     ADD_imm8s_r32( -4, R_ECX );
  2544     store_reg( R_ECX, Rn );
  2545     load_spreg( R_EAX, R_MACL );
  2546     MEM_WRITE_LONG( R_ECX, R_EAX );
  2547     sh4_x86.tstate = TSTATE_NONE;
  2548 :}
  2549 STS PR, Rn {:  
  2550     load_spreg( R_EAX, R_PR );
  2551     store_reg( R_EAX, Rn );
  2552 :}
  2553 STS.L PR, @-Rn {:  
  2554     load_reg( R_ECX, Rn );
  2555     check_walign32( R_ECX );
  2556     ADD_imm8s_r32( -4, R_ECX );
  2557     store_reg( R_ECX, Rn );
  2558     load_spreg( R_EAX, R_PR );
  2559     MEM_WRITE_LONG( R_ECX, R_EAX );
  2560     sh4_x86.tstate = TSTATE_NONE;
  2561 :}
  2563 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2564 %%
  2565     sh4_x86.in_delay_slot = FALSE;
  2566     return 0;
.