Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 561:533f6b478071
prev559:06714bc64271
next569:a1c49e1e8776
author nkeynes
date Tue Jan 01 05:08:38 2008 +0000 (16 years ago)
branchlxdream-mmu
permissions -rw-r--r--
last change Enable Id keyword on all source files
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     uint32_t exc_code;
    41 };
    43 /** 
    44  * Struct to manage internal translation state. This state is not saved -
    45  * it is only valid between calls to sh4_translate_begin_block() and
    46  * sh4_translate_end_block()
    47  */
    48 struct sh4_x86_state {
    49     gboolean in_delay_slot;
    50     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    51     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    52     gboolean branch_taken; /* true if we branched unconditionally */
    53     uint32_t block_start_pc;
    54     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    55     int tstate;
    57     /* Allocated memory for the (block-wide) back-patch list */
    58     struct backpatch_record *backpatch_list;
    59     uint32_t backpatch_posn;
    60     uint32_t backpatch_size;
    61 };
    63 #define TSTATE_NONE -1
    64 #define TSTATE_O    0
    65 #define TSTATE_C    2
    66 #define TSTATE_E    4
    67 #define TSTATE_NE   5
    68 #define TSTATE_G    0xF
    69 #define TSTATE_GE   0xD
    70 #define TSTATE_A    7
    71 #define TSTATE_AE   3
    73 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    74 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    75 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    76     OP(0x70+sh4_x86.tstate); OP(rel8); \
    77     MARK_JMP(rel8,label)
    78 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    79 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    80 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    81     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    82     MARK_JMP(rel8, label)
    84 static struct sh4_x86_state sh4_x86;
    86 static uint32_t max_int = 0x7FFFFFFF;
    87 static uint32_t min_int = 0x80000000;
    88 static uint32_t save_fcw; /* save value for fpu control word */
    89 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    91 void sh4_x86_init()
    92 {
    93     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    94     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
    95 }
    98 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
    99 {
   100     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   101 	sh4_x86.backpatch_size <<= 1;
   102 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   103 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   104 	assert( sh4_x86.backpatch_list != NULL );
   105     }
   106     if( sh4_x86.in_delay_slot ) {
   107 	fixup_pc -= 2;
   108     }
   109     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   110     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   111     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   112     sh4_x86.backpatch_posn++;
   113 }
   115 /**
   116  * Emit an instruction to load an SH4 reg into a real register
   117  */
   118 static inline void load_reg( int x86reg, int sh4reg ) 
   119 {
   120     /* mov [bp+n], reg */
   121     OP(0x8B);
   122     OP(0x45 + (x86reg<<3));
   123     OP(REG_OFFSET(r[sh4reg]));
   124 }
   126 static inline void load_reg16s( int x86reg, int sh4reg )
   127 {
   128     OP(0x0F);
   129     OP(0xBF);
   130     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   131 }
   133 static inline void load_reg16u( int x86reg, int sh4reg )
   134 {
   135     OP(0x0F);
   136     OP(0xB7);
   137     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   139 }
   141 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   142 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   143 /**
   144  * Emit an instruction to load an immediate value into a register
   145  */
   146 static inline void load_imm32( int x86reg, uint32_t value ) {
   147     /* mov #value, reg */
   148     OP(0xB8 + x86reg);
   149     OP32(value);
   150 }
   152 /**
   153  * Load an immediate 64-bit quantity (note: x86-64 only)
   154  */
   155 static inline void load_imm64( int x86reg, uint32_t value ) {
   156     /* mov #value, reg */
   157     REXW();
   158     OP(0xB8 + x86reg);
   159     OP64(value);
   160 }
   163 /**
   164  * Emit an instruction to store an SH4 reg (RN)
   165  */
   166 void static inline store_reg( int x86reg, int sh4reg ) {
   167     /* mov reg, [bp+n] */
   168     OP(0x89);
   169     OP(0x45 + (x86reg<<3));
   170     OP(REG_OFFSET(r[sh4reg]));
   171 }
   173 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   175 /**
   176  * Load an FR register (single-precision floating point) into an integer x86
   177  * register (eg for register-to-register moves)
   178  */
   179 void static inline load_fr( int bankreg, int x86reg, int frm )
   180 {
   181     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   182 }
   184 /**
   185  * Store an FR register (single-precision floating point) into an integer x86
   186  * register (eg for register-to-register moves)
   187  */
   188 void static inline store_fr( int bankreg, int x86reg, int frn )
   189 {
   190     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   191 }
   194 /**
   195  * Load a pointer to the back fp back into the specified x86 register. The
   196  * bankreg must have been previously loaded with FPSCR.
   197  * NB: 12 bytes
   198  */
   199 static inline void load_xf_bank( int bankreg )
   200 {
   201     NOT_r32( bankreg );
   202     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   203     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   204     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   205 }
   207 /**
   208  * Update the fr_bank pointer based on the current fpscr value.
   209  */
   210 static inline void update_fr_bank( int fpscrreg )
   211 {
   212     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   213     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   214     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   215     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   216 }
   217 /**
   218  * Push FPUL (as a 32-bit float) onto the FPU stack
   219  */
   220 static inline void push_fpul( )
   221 {
   222     OP(0xD9); OP(0x45); OP(R_FPUL);
   223 }
   225 /**
   226  * Pop FPUL (as a 32-bit float) from the FPU stack
   227  */
   228 static inline void pop_fpul( )
   229 {
   230     OP(0xD9); OP(0x5D); OP(R_FPUL);
   231 }
   233 /**
   234  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   235  * with the location of the current fp bank.
   236  */
   237 static inline void push_fr( int bankreg, int frm ) 
   238 {
   239     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   240 }
   242 /**
   243  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   244  * with bankreg previously loaded with the location of the current fp bank.
   245  */
   246 static inline void pop_fr( int bankreg, int frm )
   247 {
   248     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   249 }
   251 /**
   252  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   253  * with the location of the current fp bank.
   254  */
   255 static inline void push_dr( int bankreg, int frm )
   256 {
   257     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   258 }
   260 static inline void pop_dr( int bankreg, int frm )
   261 {
   262     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   263 }
   265 /* Exception checks - Note that all exception checks will clobber EAX */
   267 #define check_priv( ) \
   268     if( !sh4_x86.priv_checked ) { \
   269 	sh4_x86.priv_checked = TRUE;\
   270 	load_spreg( R_EAX, R_SR );\
   271 	AND_imm32_r32( SR_MD, R_EAX );\
   272 	if( sh4_x86.in_delay_slot ) {\
   273 	    JE_exc( EXC_SLOT_ILLEGAL );\
   274 	} else {\
   275 	    JE_exc( EXC_ILLEGAL );\
   276 	}\
   277     }\
   279 #define check_fpuen( ) \
   280     if( !sh4_x86.fpuen_checked ) {\
   281 	sh4_x86.fpuen_checked = TRUE;\
   282 	load_spreg( R_EAX, R_SR );\
   283 	AND_imm32_r32( SR_FD, R_EAX );\
   284 	if( sh4_x86.in_delay_slot ) {\
   285 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   286 	} else {\
   287 	    JNE_exc(EXC_FPU_DISABLED);\
   288 	}\
   289     }
   291 #define check_ralign16( x86reg ) \
   292     TEST_imm32_r32( 0x00000001, x86reg ); \
   293     JNE_exc(EXC_DATA_ADDR_READ)
   295 #define check_walign16( x86reg ) \
   296     TEST_imm32_r32( 0x00000001, x86reg ); \
   297     JNE_exc(EXC_DATA_ADDR_WRITE);
   299 #define check_ralign32( x86reg ) \
   300     TEST_imm32_r32( 0x00000003, x86reg ); \
   301     JNE_exc(EXC_DATA_ADDR_READ)
   303 #define check_walign32( x86reg ) \
   304     TEST_imm32_r32( 0x00000003, x86reg ); \
   305     JNE_exc(EXC_DATA_ADDR_WRITE);
   307 #define UNDEF()
   308 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   309 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); TEST_r32_r32( R_EDX, R_EDX ); JNE_exc(-1); MEM_RESULT(value_reg) 
   310 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); TEST_r32_r32( R_EDX, R_EDX ); JNE_exc(-1); MEM_RESULT(value_reg) 
   311 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); TEST_r32_r32( R_EDX, R_EDX ); JNE_exc(-1); MEM_RESULT(value_reg) 
   312 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg); TEST_r32_r32( R_EAX, R_EAX ); JNE_exc(-1);
   313 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg); TEST_r32_r32( R_EAX, R_EAX ); JNE_exc(-1);
   314 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg); TEST_r32_r32( R_EAX, R_EAX ); JNE_exc(-1);
   316 #define MEM_READ_SIZE  (CALL_FUNC1_SIZE+8)
   317 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE+8)
   319 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   321 extern uint16_t *sh4_icache;
   322 extern uint32_t sh4_icache_addr;
   324 /****** Import appropriate calling conventions ******/
   325 #if SH4_TRANSLATOR == TARGET_X86_64
   326 #include "sh4/ia64abi.h"
   327 #else /* SH4_TRANSLATOR == TARGET_X86 */
   328 #ifdef APPLE_BUILD
   329 #include "sh4/ia32mac.h"
   330 #else
   331 #include "sh4/ia32abi.h"
   332 #endif
   333 #endif
   336 /**
   337  * Translate a single instruction. Delayed branches are handled specially
   338  * by translating both branch and delayed instruction as a single unit (as
   339  * 
   340  *
   341  * @return true if the instruction marks the end of a basic block
   342  * (eg a branch or 
   343  */
   344 uint32_t sh4_translate_instruction( sh4addr_t pc )
   345 {
   346     uint32_t ir;
   347     /* Read instruction */
   348     uint32_t pageaddr = pc >> 12;
   349     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   350 	ir = sh4_icache[(pc&0xFFF)>>1];
   351     } else {
   352 	uint64_t phys = mmu_vma_to_phys_exec(pc);
   353 	sh4_icache = (uint16_t *)mem_get_page((uint32_t)phys);
   354 	if( ((uintptr_t)sh4_icache) < MAX_IO_REGIONS ) {
   355 	    /* If someone's actually been so daft as to try to execute out of an IO
   356 	     * region, fallback on the full-blown memory read
   357 	     */
   358 	    sh4_icache = NULL;
   359 	    ir = sh4_read_word(pc);
   360 	} else {
   361 	    sh4_icache_addr = pageaddr;
   362 	    ir = sh4_icache[(pc&0xFFF)>>1];
   363 	}
   364     }
   366 %%
   367 /* ALU operations */
   368 ADD Rm, Rn {:
   369     load_reg( R_EAX, Rm );
   370     load_reg( R_ECX, Rn );
   371     ADD_r32_r32( R_EAX, R_ECX );
   372     store_reg( R_ECX, Rn );
   373     sh4_x86.tstate = TSTATE_NONE;
   374 :}
   375 ADD #imm, Rn {:  
   376     load_reg( R_EAX, Rn );
   377     ADD_imm8s_r32( imm, R_EAX );
   378     store_reg( R_EAX, Rn );
   379     sh4_x86.tstate = TSTATE_NONE;
   380 :}
   381 ADDC Rm, Rn {:
   382     if( sh4_x86.tstate != TSTATE_C ) {
   383 	LDC_t();
   384     }
   385     load_reg( R_EAX, Rm );
   386     load_reg( R_ECX, Rn );
   387     ADC_r32_r32( R_EAX, R_ECX );
   388     store_reg( R_ECX, Rn );
   389     SETC_t();
   390     sh4_x86.tstate = TSTATE_C;
   391 :}
   392 ADDV Rm, Rn {:
   393     load_reg( R_EAX, Rm );
   394     load_reg( R_ECX, Rn );
   395     ADD_r32_r32( R_EAX, R_ECX );
   396     store_reg( R_ECX, Rn );
   397     SETO_t();
   398     sh4_x86.tstate = TSTATE_O;
   399 :}
   400 AND Rm, Rn {:
   401     load_reg( R_EAX, Rm );
   402     load_reg( R_ECX, Rn );
   403     AND_r32_r32( R_EAX, R_ECX );
   404     store_reg( R_ECX, Rn );
   405     sh4_x86.tstate = TSTATE_NONE;
   406 :}
   407 AND #imm, R0 {:  
   408     load_reg( R_EAX, 0 );
   409     AND_imm32_r32(imm, R_EAX); 
   410     store_reg( R_EAX, 0 );
   411     sh4_x86.tstate = TSTATE_NONE;
   412 :}
   413 AND.B #imm, @(R0, GBR) {: 
   414     load_reg( R_EAX, 0 );
   415     load_spreg( R_ECX, R_GBR );
   416     ADD_r32_r32( R_EAX, R_ECX );
   417     PUSH_realigned_r32(R_ECX);
   418     MEM_READ_BYTE( R_ECX, R_EAX );
   419     POP_realigned_r32(R_ECX);
   420     AND_imm32_r32(imm, R_EAX );
   421     MEM_WRITE_BYTE( R_ECX, R_EAX );
   422     sh4_x86.tstate = TSTATE_NONE;
   423 :}
   424 CMP/EQ Rm, Rn {:  
   425     load_reg( R_EAX, Rm );
   426     load_reg( R_ECX, Rn );
   427     CMP_r32_r32( R_EAX, R_ECX );
   428     SETE_t();
   429     sh4_x86.tstate = TSTATE_E;
   430 :}
   431 CMP/EQ #imm, R0 {:  
   432     load_reg( R_EAX, 0 );
   433     CMP_imm8s_r32(imm, R_EAX);
   434     SETE_t();
   435     sh4_x86.tstate = TSTATE_E;
   436 :}
   437 CMP/GE Rm, Rn {:  
   438     load_reg( R_EAX, Rm );
   439     load_reg( R_ECX, Rn );
   440     CMP_r32_r32( R_EAX, R_ECX );
   441     SETGE_t();
   442     sh4_x86.tstate = TSTATE_GE;
   443 :}
   444 CMP/GT Rm, Rn {: 
   445     load_reg( R_EAX, Rm );
   446     load_reg( R_ECX, Rn );
   447     CMP_r32_r32( R_EAX, R_ECX );
   448     SETG_t();
   449     sh4_x86.tstate = TSTATE_G;
   450 :}
   451 CMP/HI Rm, Rn {:  
   452     load_reg( R_EAX, Rm );
   453     load_reg( R_ECX, Rn );
   454     CMP_r32_r32( R_EAX, R_ECX );
   455     SETA_t();
   456     sh4_x86.tstate = TSTATE_A;
   457 :}
   458 CMP/HS Rm, Rn {: 
   459     load_reg( R_EAX, Rm );
   460     load_reg( R_ECX, Rn );
   461     CMP_r32_r32( R_EAX, R_ECX );
   462     SETAE_t();
   463     sh4_x86.tstate = TSTATE_AE;
   464  :}
   465 CMP/PL Rn {: 
   466     load_reg( R_EAX, Rn );
   467     CMP_imm8s_r32( 0, R_EAX );
   468     SETG_t();
   469     sh4_x86.tstate = TSTATE_G;
   470 :}
   471 CMP/PZ Rn {:  
   472     load_reg( R_EAX, Rn );
   473     CMP_imm8s_r32( 0, R_EAX );
   474     SETGE_t();
   475     sh4_x86.tstate = TSTATE_GE;
   476 :}
   477 CMP/STR Rm, Rn {:  
   478     load_reg( R_EAX, Rm );
   479     load_reg( R_ECX, Rn );
   480     XOR_r32_r32( R_ECX, R_EAX );
   481     TEST_r8_r8( R_AL, R_AL );
   482     JE_rel8(13, target1);
   483     TEST_r8_r8( R_AH, R_AH ); // 2
   484     JE_rel8(9, target2);
   485     SHR_imm8_r32( 16, R_EAX ); // 3
   486     TEST_r8_r8( R_AL, R_AL ); // 2
   487     JE_rel8(2, target3);
   488     TEST_r8_r8( R_AH, R_AH ); // 2
   489     JMP_TARGET(target1);
   490     JMP_TARGET(target2);
   491     JMP_TARGET(target3);
   492     SETE_t();
   493     sh4_x86.tstate = TSTATE_E;
   494 :}
   495 DIV0S Rm, Rn {:
   496     load_reg( R_EAX, Rm );
   497     load_reg( R_ECX, Rn );
   498     SHR_imm8_r32( 31, R_EAX );
   499     SHR_imm8_r32( 31, R_ECX );
   500     store_spreg( R_EAX, R_M );
   501     store_spreg( R_ECX, R_Q );
   502     CMP_r32_r32( R_EAX, R_ECX );
   503     SETNE_t();
   504     sh4_x86.tstate = TSTATE_NE;
   505 :}
   506 DIV0U {:  
   507     XOR_r32_r32( R_EAX, R_EAX );
   508     store_spreg( R_EAX, R_Q );
   509     store_spreg( R_EAX, R_M );
   510     store_spreg( R_EAX, R_T );
   511     sh4_x86.tstate = TSTATE_C; // works for DIV1
   512 :}
   513 DIV1 Rm, Rn {:
   514     load_spreg( R_ECX, R_M );
   515     load_reg( R_EAX, Rn );
   516     if( sh4_x86.tstate != TSTATE_C ) {
   517 	LDC_t();
   518     }
   519     RCL1_r32( R_EAX );
   520     SETC_r8( R_DL ); // Q'
   521     CMP_sh4r_r32( R_Q, R_ECX );
   522     JE_rel8(5, mqequal);
   523     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   524     JMP_rel8(3, end);
   525     JMP_TARGET(mqequal);
   526     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   527     JMP_TARGET(end);
   528     store_reg( R_EAX, Rn ); // Done with Rn now
   529     SETC_r8(R_AL); // tmp1
   530     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   531     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   532     store_spreg( R_ECX, R_Q );
   533     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   534     MOVZX_r8_r32( R_AL, R_EAX );
   535     store_spreg( R_EAX, R_T );
   536     sh4_x86.tstate = TSTATE_NONE;
   537 :}
   538 DMULS.L Rm, Rn {:  
   539     load_reg( R_EAX, Rm );
   540     load_reg( R_ECX, Rn );
   541     IMUL_r32(R_ECX);
   542     store_spreg( R_EDX, R_MACH );
   543     store_spreg( R_EAX, R_MACL );
   544     sh4_x86.tstate = TSTATE_NONE;
   545 :}
   546 DMULU.L Rm, Rn {:  
   547     load_reg( R_EAX, Rm );
   548     load_reg( R_ECX, Rn );
   549     MUL_r32(R_ECX);
   550     store_spreg( R_EDX, R_MACH );
   551     store_spreg( R_EAX, R_MACL );    
   552     sh4_x86.tstate = TSTATE_NONE;
   553 :}
   554 DT Rn {:  
   555     load_reg( R_EAX, Rn );
   556     ADD_imm8s_r32( -1, R_EAX );
   557     store_reg( R_EAX, Rn );
   558     SETE_t();
   559     sh4_x86.tstate = TSTATE_E;
   560 :}
   561 EXTS.B Rm, Rn {:  
   562     load_reg( R_EAX, Rm );
   563     MOVSX_r8_r32( R_EAX, R_EAX );
   564     store_reg( R_EAX, Rn );
   565 :}
   566 EXTS.W Rm, Rn {:  
   567     load_reg( R_EAX, Rm );
   568     MOVSX_r16_r32( R_EAX, R_EAX );
   569     store_reg( R_EAX, Rn );
   570 :}
   571 EXTU.B Rm, Rn {:  
   572     load_reg( R_EAX, Rm );
   573     MOVZX_r8_r32( R_EAX, R_EAX );
   574     store_reg( R_EAX, Rn );
   575 :}
   576 EXTU.W Rm, Rn {:  
   577     load_reg( R_EAX, Rm );
   578     MOVZX_r16_r32( R_EAX, R_EAX );
   579     store_reg( R_EAX, Rn );
   580 :}
   581 MAC.L @Rm+, @Rn+ {:  
   582     load_reg( R_ECX, Rm );
   583     check_ralign32( R_ECX );
   584     load_reg( R_ECX, Rn );
   585     check_ralign32( R_ECX );
   586     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   587     MEM_READ_LONG( R_ECX, R_EAX );
   588     PUSH_realigned_r32( R_EAX );
   589     load_reg( R_ECX, Rm );
   590     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   591     MEM_READ_LONG( R_ECX, R_EAX );
   592     POP_realigned_r32( R_ECX );
   593     IMUL_r32( R_ECX );
   594     ADD_r32_sh4r( R_EAX, R_MACL );
   595     ADC_r32_sh4r( R_EDX, R_MACH );
   597     load_spreg( R_ECX, R_S );
   598     TEST_r32_r32(R_ECX, R_ECX);
   599     JE_rel8( CALL_FUNC0_SIZE, nosat );
   600     call_func0( signsat48 );
   601     JMP_TARGET( nosat );
   602     sh4_x86.tstate = TSTATE_NONE;
   603 :}
   604 MAC.W @Rm+, @Rn+ {:  
   605     load_reg( R_ECX, Rm );
   606     check_ralign16( R_ECX );
   607     load_reg( R_ECX, Rn );
   608     check_ralign16( R_ECX );
   609     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   610     MEM_READ_WORD( R_ECX, R_EAX );
   611     PUSH_realigned_r32( R_EAX );
   612     load_reg( R_ECX, Rm );
   613     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   614     MEM_READ_WORD( R_ECX, R_EAX );
   615     POP_realigned_r32( R_ECX );
   616     IMUL_r32( R_ECX );
   618     load_spreg( R_ECX, R_S );
   619     TEST_r32_r32( R_ECX, R_ECX );
   620     JE_rel8( 47, nosat );
   622     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   623     JNO_rel8( 51, end );            // 2
   624     load_imm32( R_EDX, 1 );         // 5
   625     store_spreg( R_EDX, R_MACH );   // 6
   626     JS_rel8( 13, positive );        // 2
   627     load_imm32( R_EAX, 0x80000000 );// 5
   628     store_spreg( R_EAX, R_MACL );   // 6
   629     JMP_rel8( 25, end2 );           // 2
   631     JMP_TARGET(positive);
   632     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   633     store_spreg( R_EAX, R_MACL );   // 6
   634     JMP_rel8( 12, end3);            // 2
   636     JMP_TARGET(nosat);
   637     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   638     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   639     JMP_TARGET(end);
   640     JMP_TARGET(end2);
   641     JMP_TARGET(end3);
   642     sh4_x86.tstate = TSTATE_NONE;
   643 :}
   644 MOVT Rn {:  
   645     load_spreg( R_EAX, R_T );
   646     store_reg( R_EAX, Rn );
   647 :}
   648 MUL.L Rm, Rn {:  
   649     load_reg( R_EAX, Rm );
   650     load_reg( R_ECX, Rn );
   651     MUL_r32( R_ECX );
   652     store_spreg( R_EAX, R_MACL );
   653     sh4_x86.tstate = TSTATE_NONE;
   654 :}
   655 MULS.W Rm, Rn {:
   656     load_reg16s( R_EAX, Rm );
   657     load_reg16s( R_ECX, Rn );
   658     MUL_r32( R_ECX );
   659     store_spreg( R_EAX, R_MACL );
   660     sh4_x86.tstate = TSTATE_NONE;
   661 :}
   662 MULU.W Rm, Rn {:  
   663     load_reg16u( R_EAX, Rm );
   664     load_reg16u( R_ECX, Rn );
   665     MUL_r32( R_ECX );
   666     store_spreg( R_EAX, R_MACL );
   667     sh4_x86.tstate = TSTATE_NONE;
   668 :}
   669 NEG Rm, Rn {:
   670     load_reg( R_EAX, Rm );
   671     NEG_r32( R_EAX );
   672     store_reg( R_EAX, Rn );
   673     sh4_x86.tstate = TSTATE_NONE;
   674 :}
   675 NEGC Rm, Rn {:  
   676     load_reg( R_EAX, Rm );
   677     XOR_r32_r32( R_ECX, R_ECX );
   678     LDC_t();
   679     SBB_r32_r32( R_EAX, R_ECX );
   680     store_reg( R_ECX, Rn );
   681     SETC_t();
   682     sh4_x86.tstate = TSTATE_C;
   683 :}
   684 NOT Rm, Rn {:  
   685     load_reg( R_EAX, Rm );
   686     NOT_r32( R_EAX );
   687     store_reg( R_EAX, Rn );
   688     sh4_x86.tstate = TSTATE_NONE;
   689 :}
   690 OR Rm, Rn {:  
   691     load_reg( R_EAX, Rm );
   692     load_reg( R_ECX, Rn );
   693     OR_r32_r32( R_EAX, R_ECX );
   694     store_reg( R_ECX, Rn );
   695     sh4_x86.tstate = TSTATE_NONE;
   696 :}
   697 OR #imm, R0 {:
   698     load_reg( R_EAX, 0 );
   699     OR_imm32_r32(imm, R_EAX);
   700     store_reg( R_EAX, 0 );
   701     sh4_x86.tstate = TSTATE_NONE;
   702 :}
   703 OR.B #imm, @(R0, GBR) {:  
   704     load_reg( R_EAX, 0 );
   705     load_spreg( R_ECX, R_GBR );
   706     ADD_r32_r32( R_EAX, R_ECX );
   707     PUSH_realigned_r32(R_ECX);
   708     MEM_READ_BYTE( R_ECX, R_EAX );
   709     POP_realigned_r32(R_ECX);
   710     OR_imm32_r32(imm, R_EAX );
   711     MEM_WRITE_BYTE( R_ECX, R_EAX );
   712     sh4_x86.tstate = TSTATE_NONE;
   713 :}
   714 ROTCL Rn {:
   715     load_reg( R_EAX, Rn );
   716     if( sh4_x86.tstate != TSTATE_C ) {
   717 	LDC_t();
   718     }
   719     RCL1_r32( R_EAX );
   720     store_reg( R_EAX, Rn );
   721     SETC_t();
   722     sh4_x86.tstate = TSTATE_C;
   723 :}
   724 ROTCR Rn {:  
   725     load_reg( R_EAX, Rn );
   726     if( sh4_x86.tstate != TSTATE_C ) {
   727 	LDC_t();
   728     }
   729     RCR1_r32( R_EAX );
   730     store_reg( R_EAX, Rn );
   731     SETC_t();
   732     sh4_x86.tstate = TSTATE_C;
   733 :}
   734 ROTL Rn {:  
   735     load_reg( R_EAX, Rn );
   736     ROL1_r32( R_EAX );
   737     store_reg( R_EAX, Rn );
   738     SETC_t();
   739     sh4_x86.tstate = TSTATE_C;
   740 :}
   741 ROTR Rn {:  
   742     load_reg( R_EAX, Rn );
   743     ROR1_r32( R_EAX );
   744     store_reg( R_EAX, Rn );
   745     SETC_t();
   746     sh4_x86.tstate = TSTATE_C;
   747 :}
   748 SHAD Rm, Rn {:
   749     /* Annoyingly enough, not directly convertible */
   750     load_reg( R_EAX, Rn );
   751     load_reg( R_ECX, Rm );
   752     CMP_imm32_r32( 0, R_ECX );
   753     JGE_rel8(16, doshl);
   755     NEG_r32( R_ECX );      // 2
   756     AND_imm8_r8( 0x1F, R_CL ); // 3
   757     JE_rel8( 4, emptysar);     // 2
   758     SAR_r32_CL( R_EAX );       // 2
   759     JMP_rel8(10, end);          // 2
   761     JMP_TARGET(emptysar);
   762     SAR_imm8_r32(31, R_EAX );  // 3
   763     JMP_rel8(5, end2);
   765     JMP_TARGET(doshl);
   766     AND_imm8_r8( 0x1F, R_CL ); // 3
   767     SHL_r32_CL( R_EAX );       // 2
   768     JMP_TARGET(end);
   769     JMP_TARGET(end2);
   770     store_reg( R_EAX, Rn );
   771     sh4_x86.tstate = TSTATE_NONE;
   772 :}
   773 SHLD Rm, Rn {:  
   774     load_reg( R_EAX, Rn );
   775     load_reg( R_ECX, Rm );
   776     CMP_imm32_r32( 0, R_ECX );
   777     JGE_rel8(15, doshl);
   779     NEG_r32( R_ECX );      // 2
   780     AND_imm8_r8( 0x1F, R_CL ); // 3
   781     JE_rel8( 4, emptyshr );
   782     SHR_r32_CL( R_EAX );       // 2
   783     JMP_rel8(9, end);          // 2
   785     JMP_TARGET(emptyshr);
   786     XOR_r32_r32( R_EAX, R_EAX );
   787     JMP_rel8(5, end2);
   789     JMP_TARGET(doshl);
   790     AND_imm8_r8( 0x1F, R_CL ); // 3
   791     SHL_r32_CL( R_EAX );       // 2
   792     JMP_TARGET(end);
   793     JMP_TARGET(end2);
   794     store_reg( R_EAX, Rn );
   795     sh4_x86.tstate = TSTATE_NONE;
   796 :}
   797 SHAL Rn {: 
   798     load_reg( R_EAX, Rn );
   799     SHL1_r32( R_EAX );
   800     SETC_t();
   801     store_reg( R_EAX, Rn );
   802     sh4_x86.tstate = TSTATE_C;
   803 :}
   804 SHAR Rn {:  
   805     load_reg( R_EAX, Rn );
   806     SAR1_r32( R_EAX );
   807     SETC_t();
   808     store_reg( R_EAX, Rn );
   809     sh4_x86.tstate = TSTATE_C;
   810 :}
   811 SHLL Rn {:  
   812     load_reg( R_EAX, Rn );
   813     SHL1_r32( R_EAX );
   814     SETC_t();
   815     store_reg( R_EAX, Rn );
   816     sh4_x86.tstate = TSTATE_C;
   817 :}
   818 SHLL2 Rn {:
   819     load_reg( R_EAX, Rn );
   820     SHL_imm8_r32( 2, R_EAX );
   821     store_reg( R_EAX, Rn );
   822     sh4_x86.tstate = TSTATE_NONE;
   823 :}
   824 SHLL8 Rn {:  
   825     load_reg( R_EAX, Rn );
   826     SHL_imm8_r32( 8, R_EAX );
   827     store_reg( R_EAX, Rn );
   828     sh4_x86.tstate = TSTATE_NONE;
   829 :}
   830 SHLL16 Rn {:  
   831     load_reg( R_EAX, Rn );
   832     SHL_imm8_r32( 16, R_EAX );
   833     store_reg( R_EAX, Rn );
   834     sh4_x86.tstate = TSTATE_NONE;
   835 :}
   836 SHLR Rn {:  
   837     load_reg( R_EAX, Rn );
   838     SHR1_r32( R_EAX );
   839     SETC_t();
   840     store_reg( R_EAX, Rn );
   841     sh4_x86.tstate = TSTATE_C;
   842 :}
   843 SHLR2 Rn {:  
   844     load_reg( R_EAX, Rn );
   845     SHR_imm8_r32( 2, R_EAX );
   846     store_reg( R_EAX, Rn );
   847     sh4_x86.tstate = TSTATE_NONE;
   848 :}
   849 SHLR8 Rn {:  
   850     load_reg( R_EAX, Rn );
   851     SHR_imm8_r32( 8, R_EAX );
   852     store_reg( R_EAX, Rn );
   853     sh4_x86.tstate = TSTATE_NONE;
   854 :}
   855 SHLR16 Rn {:  
   856     load_reg( R_EAX, Rn );
   857     SHR_imm8_r32( 16, R_EAX );
   858     store_reg( R_EAX, Rn );
   859     sh4_x86.tstate = TSTATE_NONE;
   860 :}
   861 SUB Rm, Rn {:  
   862     load_reg( R_EAX, Rm );
   863     load_reg( R_ECX, Rn );
   864     SUB_r32_r32( R_EAX, R_ECX );
   865     store_reg( R_ECX, Rn );
   866     sh4_x86.tstate = TSTATE_NONE;
   867 :}
   868 SUBC Rm, Rn {:  
   869     load_reg( R_EAX, Rm );
   870     load_reg( R_ECX, Rn );
   871     if( sh4_x86.tstate != TSTATE_C ) {
   872 	LDC_t();
   873     }
   874     SBB_r32_r32( R_EAX, R_ECX );
   875     store_reg( R_ECX, Rn );
   876     SETC_t();
   877     sh4_x86.tstate = TSTATE_C;
   878 :}
   879 SUBV Rm, Rn {:  
   880     load_reg( R_EAX, Rm );
   881     load_reg( R_ECX, Rn );
   882     SUB_r32_r32( R_EAX, R_ECX );
   883     store_reg( R_ECX, Rn );
   884     SETO_t();
   885     sh4_x86.tstate = TSTATE_O;
   886 :}
   887 SWAP.B Rm, Rn {:  
   888     load_reg( R_EAX, Rm );
   889     XCHG_r8_r8( R_AL, R_AH );
   890     store_reg( R_EAX, Rn );
   891 :}
   892 SWAP.W Rm, Rn {:  
   893     load_reg( R_EAX, Rm );
   894     MOV_r32_r32( R_EAX, R_ECX );
   895     SHL_imm8_r32( 16, R_ECX );
   896     SHR_imm8_r32( 16, R_EAX );
   897     OR_r32_r32( R_EAX, R_ECX );
   898     store_reg( R_ECX, Rn );
   899     sh4_x86.tstate = TSTATE_NONE;
   900 :}
   901 TAS.B @Rn {:  
   902     load_reg( R_ECX, Rn );
   903     MEM_READ_BYTE( R_ECX, R_EAX );
   904     TEST_r8_r8( R_AL, R_AL );
   905     SETE_t();
   906     OR_imm8_r8( 0x80, R_AL );
   907     load_reg( R_ECX, Rn );
   908     MEM_WRITE_BYTE( R_ECX, R_EAX );
   909     sh4_x86.tstate = TSTATE_NONE;
   910 :}
   911 TST Rm, Rn {:  
   912     load_reg( R_EAX, Rm );
   913     load_reg( R_ECX, Rn );
   914     TEST_r32_r32( R_EAX, R_ECX );
   915     SETE_t();
   916     sh4_x86.tstate = TSTATE_E;
   917 :}
   918 TST #imm, R0 {:  
   919     load_reg( R_EAX, 0 );
   920     TEST_imm32_r32( imm, R_EAX );
   921     SETE_t();
   922     sh4_x86.tstate = TSTATE_E;
   923 :}
   924 TST.B #imm, @(R0, GBR) {:  
   925     load_reg( R_EAX, 0);
   926     load_reg( R_ECX, R_GBR);
   927     ADD_r32_r32( R_EAX, R_ECX );
   928     MEM_READ_BYTE( R_ECX, R_EAX );
   929     TEST_imm8_r8( imm, R_AL );
   930     SETE_t();
   931     sh4_x86.tstate = TSTATE_E;
   932 :}
   933 XOR Rm, Rn {:  
   934     load_reg( R_EAX, Rm );
   935     load_reg( R_ECX, Rn );
   936     XOR_r32_r32( R_EAX, R_ECX );
   937     store_reg( R_ECX, Rn );
   938     sh4_x86.tstate = TSTATE_NONE;
   939 :}
   940 XOR #imm, R0 {:  
   941     load_reg( R_EAX, 0 );
   942     XOR_imm32_r32( imm, R_EAX );
   943     store_reg( R_EAX, 0 );
   944     sh4_x86.tstate = TSTATE_NONE;
   945 :}
   946 XOR.B #imm, @(R0, GBR) {:  
   947     load_reg( R_EAX, 0 );
   948     load_spreg( R_ECX, R_GBR );
   949     ADD_r32_r32( R_EAX, R_ECX );
   950     PUSH_realigned_r32(R_ECX);
   951     MEM_READ_BYTE(R_ECX, R_EAX);
   952     POP_realigned_r32(R_ECX);
   953     XOR_imm32_r32( imm, R_EAX );
   954     MEM_WRITE_BYTE( R_ECX, R_EAX );
   955     sh4_x86.tstate = TSTATE_NONE;
   956 :}
   957 XTRCT Rm, Rn {:
   958     load_reg( R_EAX, Rm );
   959     load_reg( R_ECX, Rn );
   960     SHL_imm8_r32( 16, R_EAX );
   961     SHR_imm8_r32( 16, R_ECX );
   962     OR_r32_r32( R_EAX, R_ECX );
   963     store_reg( R_ECX, Rn );
   964     sh4_x86.tstate = TSTATE_NONE;
   965 :}
   967 /* Data move instructions */
   968 MOV Rm, Rn {:  
   969     load_reg( R_EAX, Rm );
   970     store_reg( R_EAX, Rn );
   971 :}
   972 MOV #imm, Rn {:  
   973     load_imm32( R_EAX, imm );
   974     store_reg( R_EAX, Rn );
   975 :}
   976 MOV.B Rm, @Rn {:  
   977     load_reg( R_EAX, Rm );
   978     load_reg( R_ECX, Rn );
   979     MEM_WRITE_BYTE( R_ECX, R_EAX );
   980     sh4_x86.tstate = TSTATE_NONE;
   981 :}
   982 MOV.B Rm, @-Rn {:  
   983     load_reg( R_EAX, Rm );
   984     load_reg( R_ECX, Rn );
   985     ADD_imm8s_r32( -1, R_ECX );
   986     store_reg( R_ECX, Rn );
   987     MEM_WRITE_BYTE( R_ECX, R_EAX );
   988     sh4_x86.tstate = TSTATE_NONE;
   989 :}
   990 MOV.B Rm, @(R0, Rn) {:  
   991     load_reg( R_EAX, 0 );
   992     load_reg( R_ECX, Rn );
   993     ADD_r32_r32( R_EAX, R_ECX );
   994     load_reg( R_EAX, Rm );
   995     MEM_WRITE_BYTE( R_ECX, R_EAX );
   996     sh4_x86.tstate = TSTATE_NONE;
   997 :}
   998 MOV.B R0, @(disp, GBR) {:  
   999     load_reg( R_EAX, 0 );
  1000     load_spreg( R_ECX, R_GBR );
  1001     ADD_imm32_r32( disp, R_ECX );
  1002     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1003     sh4_x86.tstate = TSTATE_NONE;
  1004 :}
  1005 MOV.B R0, @(disp, Rn) {:  
  1006     load_reg( R_EAX, 0 );
  1007     load_reg( R_ECX, Rn );
  1008     ADD_imm32_r32( disp, R_ECX );
  1009     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1010     sh4_x86.tstate = TSTATE_NONE;
  1011 :}
  1012 MOV.B @Rm, Rn {:  
  1013     load_reg( R_ECX, Rm );
  1014     MEM_READ_BYTE( R_ECX, R_EAX );
  1015     store_reg( R_EAX, Rn );
  1016     sh4_x86.tstate = TSTATE_NONE;
  1017 :}
  1018 MOV.B @Rm+, Rn {:  
  1019     load_reg( R_ECX, Rm );
  1020     MOV_r32_r32( R_ECX, R_EAX );
  1021     ADD_imm8s_r32( 1, R_EAX );
  1022     store_reg( R_EAX, Rm );
  1023     MEM_READ_BYTE( R_ECX, R_EAX );
  1024     store_reg( R_EAX, Rn );
  1025     sh4_x86.tstate = TSTATE_NONE;
  1026 :}
  1027 MOV.B @(R0, Rm), Rn {:  
  1028     load_reg( R_EAX, 0 );
  1029     load_reg( R_ECX, Rm );
  1030     ADD_r32_r32( R_EAX, R_ECX );
  1031     MEM_READ_BYTE( R_ECX, R_EAX );
  1032     store_reg( R_EAX, Rn );
  1033     sh4_x86.tstate = TSTATE_NONE;
  1034 :}
  1035 MOV.B @(disp, GBR), R0 {:  
  1036     load_spreg( R_ECX, R_GBR );
  1037     ADD_imm32_r32( disp, R_ECX );
  1038     MEM_READ_BYTE( R_ECX, R_EAX );
  1039     store_reg( R_EAX, 0 );
  1040     sh4_x86.tstate = TSTATE_NONE;
  1041 :}
  1042 MOV.B @(disp, Rm), R0 {:  
  1043     load_reg( R_ECX, Rm );
  1044     ADD_imm32_r32( disp, R_ECX );
  1045     MEM_READ_BYTE( R_ECX, R_EAX );
  1046     store_reg( R_EAX, 0 );
  1047     sh4_x86.tstate = TSTATE_NONE;
  1048 :}
  1049 MOV.L Rm, @Rn {:
  1050     load_reg( R_EAX, Rm );
  1051     load_reg( R_ECX, Rn );
  1052     check_walign32(R_ECX);
  1053     MEM_WRITE_LONG( R_ECX, R_EAX );
  1054     sh4_x86.tstate = TSTATE_NONE;
  1055 :}
  1056 MOV.L Rm, @-Rn {:  
  1057     load_reg( R_EAX, Rm );
  1058     load_reg( R_ECX, Rn );
  1059     check_walign32( R_ECX );
  1060     ADD_imm8s_r32( -4, R_ECX );
  1061     store_reg( R_ECX, Rn );
  1062     MEM_WRITE_LONG( R_ECX, R_EAX );
  1063     sh4_x86.tstate = TSTATE_NONE;
  1064 :}
  1065 MOV.L Rm, @(R0, Rn) {:  
  1066     load_reg( R_EAX, 0 );
  1067     load_reg( R_ECX, Rn );
  1068     ADD_r32_r32( R_EAX, R_ECX );
  1069     check_walign32( R_ECX );
  1070     load_reg( R_EAX, Rm );
  1071     MEM_WRITE_LONG( R_ECX, R_EAX );
  1072     sh4_x86.tstate = TSTATE_NONE;
  1073 :}
  1074 MOV.L R0, @(disp, GBR) {:  
  1075     load_spreg( R_ECX, R_GBR );
  1076     load_reg( R_EAX, 0 );
  1077     ADD_imm32_r32( disp, R_ECX );
  1078     check_walign32( R_ECX );
  1079     MEM_WRITE_LONG( R_ECX, R_EAX );
  1080     sh4_x86.tstate = TSTATE_NONE;
  1081 :}
  1082 MOV.L Rm, @(disp, Rn) {:  
  1083     load_reg( R_ECX, Rn );
  1084     load_reg( R_EAX, Rm );
  1085     ADD_imm32_r32( disp, R_ECX );
  1086     check_walign32( R_ECX );
  1087     MEM_WRITE_LONG( R_ECX, R_EAX );
  1088     sh4_x86.tstate = TSTATE_NONE;
  1089 :}
  1090 MOV.L @Rm, Rn {:  
  1091     load_reg( R_ECX, Rm );
  1092     check_ralign32( R_ECX );
  1093     MEM_READ_LONG( R_ECX, R_EAX );
  1094     store_reg( R_EAX, Rn );
  1095     sh4_x86.tstate = TSTATE_NONE;
  1096 :}
  1097 MOV.L @Rm+, Rn {:  
  1098     load_reg( R_EAX, Rm );
  1099     check_ralign32( R_EAX );
  1100     MOV_r32_r32( R_EAX, R_ECX );
  1101     ADD_imm8s_r32( 4, R_EAX );
  1102     store_reg( R_EAX, Rm );
  1103     MEM_READ_LONG( R_ECX, R_EAX );
  1104     store_reg( R_EAX, Rn );
  1105     sh4_x86.tstate = TSTATE_NONE;
  1106 :}
  1107 MOV.L @(R0, Rm), Rn {:  
  1108     load_reg( R_EAX, 0 );
  1109     load_reg( R_ECX, Rm );
  1110     ADD_r32_r32( R_EAX, R_ECX );
  1111     check_ralign32( R_ECX );
  1112     MEM_READ_LONG( R_ECX, R_EAX );
  1113     store_reg( R_EAX, Rn );
  1114     sh4_x86.tstate = TSTATE_NONE;
  1115 :}
  1116 MOV.L @(disp, GBR), R0 {:
  1117     load_spreg( R_ECX, R_GBR );
  1118     ADD_imm32_r32( disp, R_ECX );
  1119     check_ralign32( R_ECX );
  1120     MEM_READ_LONG( R_ECX, R_EAX );
  1121     store_reg( R_EAX, 0 );
  1122     sh4_x86.tstate = TSTATE_NONE;
  1123 :}
  1124 MOV.L @(disp, PC), Rn {:  
  1125     if( sh4_x86.in_delay_slot ) {
  1126 	SLOTILLEGAL();
  1127     } else {
  1128 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1129 	sh4ptr_t ptr = sh4_get_region_by_vma(target);
  1130 	if( ptr != NULL ) {
  1131 	    MOV_moff32_EAX( ptr );
  1132 	} else {
  1133 	    load_imm32( R_ECX, target );
  1134 	    MEM_READ_LONG( R_ECX, R_EAX );
  1136 	store_reg( R_EAX, Rn );
  1137 	sh4_x86.tstate = TSTATE_NONE;
  1139 :}
  1140 MOV.L @(disp, Rm), Rn {:  
  1141     load_reg( R_ECX, Rm );
  1142     ADD_imm8s_r32( disp, R_ECX );
  1143     check_ralign32( R_ECX );
  1144     MEM_READ_LONG( R_ECX, R_EAX );
  1145     store_reg( R_EAX, Rn );
  1146     sh4_x86.tstate = TSTATE_NONE;
  1147 :}
  1148 MOV.W Rm, @Rn {:  
  1149     load_reg( R_ECX, Rn );
  1150     check_walign16( R_ECX );
  1151     load_reg( R_EAX, Rm );
  1152     MEM_WRITE_WORD( R_ECX, R_EAX );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOV.W Rm, @-Rn {:  
  1156     load_reg( R_ECX, Rn );
  1157     check_walign16( R_ECX );
  1158     load_reg( R_EAX, Rm );
  1159     ADD_imm8s_r32( -2, R_ECX );
  1160     store_reg( R_ECX, Rn );
  1161     MEM_WRITE_WORD( R_ECX, R_EAX );
  1162     sh4_x86.tstate = TSTATE_NONE;
  1163 :}
  1164 MOV.W Rm, @(R0, Rn) {:  
  1165     load_reg( R_EAX, 0 );
  1166     load_reg( R_ECX, Rn );
  1167     ADD_r32_r32( R_EAX, R_ECX );
  1168     check_walign16( R_ECX );
  1169     load_reg( R_EAX, Rm );
  1170     MEM_WRITE_WORD( R_ECX, R_EAX );
  1171     sh4_x86.tstate = TSTATE_NONE;
  1172 :}
  1173 MOV.W R0, @(disp, GBR) {:  
  1174     load_spreg( R_ECX, R_GBR );
  1175     load_reg( R_EAX, 0 );
  1176     ADD_imm32_r32( disp, R_ECX );
  1177     check_walign16( R_ECX );
  1178     MEM_WRITE_WORD( R_ECX, R_EAX );
  1179     sh4_x86.tstate = TSTATE_NONE;
  1180 :}
  1181 MOV.W R0, @(disp, Rn) {:  
  1182     load_reg( R_ECX, Rn );
  1183     load_reg( R_EAX, 0 );
  1184     ADD_imm32_r32( disp, R_ECX );
  1185     check_walign16( R_ECX );
  1186     MEM_WRITE_WORD( R_ECX, R_EAX );
  1187     sh4_x86.tstate = TSTATE_NONE;
  1188 :}
  1189 MOV.W @Rm, Rn {:  
  1190     load_reg( R_ECX, Rm );
  1191     check_ralign16( R_ECX );
  1192     MEM_READ_WORD( R_ECX, R_EAX );
  1193     store_reg( R_EAX, Rn );
  1194     sh4_x86.tstate = TSTATE_NONE;
  1195 :}
  1196 MOV.W @Rm+, Rn {:  
  1197     load_reg( R_EAX, Rm );
  1198     check_ralign16( R_EAX );
  1199     MOV_r32_r32( R_EAX, R_ECX );
  1200     ADD_imm8s_r32( 2, R_EAX );
  1201     store_reg( R_EAX, Rm );
  1202     MEM_READ_WORD( R_ECX, R_EAX );
  1203     store_reg( R_EAX, Rn );
  1204     sh4_x86.tstate = TSTATE_NONE;
  1205 :}
  1206 MOV.W @(R0, Rm), Rn {:  
  1207     load_reg( R_EAX, 0 );
  1208     load_reg( R_ECX, Rm );
  1209     ADD_r32_r32( R_EAX, R_ECX );
  1210     check_ralign16( R_ECX );
  1211     MEM_READ_WORD( R_ECX, R_EAX );
  1212     store_reg( R_EAX, Rn );
  1213     sh4_x86.tstate = TSTATE_NONE;
  1214 :}
  1215 MOV.W @(disp, GBR), R0 {:  
  1216     load_spreg( R_ECX, R_GBR );
  1217     ADD_imm32_r32( disp, R_ECX );
  1218     check_ralign16( R_ECX );
  1219     MEM_READ_WORD( R_ECX, R_EAX );
  1220     store_reg( R_EAX, 0 );
  1221     sh4_x86.tstate = TSTATE_NONE;
  1222 :}
  1223 MOV.W @(disp, PC), Rn {:  
  1224     if( sh4_x86.in_delay_slot ) {
  1225 	SLOTILLEGAL();
  1226     } else {
  1227 	load_imm32( R_ECX, pc + disp + 4 );
  1228 	MEM_READ_WORD( R_ECX, R_EAX );
  1229 	store_reg( R_EAX, Rn );
  1230 	sh4_x86.tstate = TSTATE_NONE;
  1232 :}
  1233 MOV.W @(disp, Rm), R0 {:  
  1234     load_reg( R_ECX, Rm );
  1235     ADD_imm32_r32( disp, R_ECX );
  1236     check_ralign16( R_ECX );
  1237     MEM_READ_WORD( R_ECX, R_EAX );
  1238     store_reg( R_EAX, 0 );
  1239     sh4_x86.tstate = TSTATE_NONE;
  1240 :}
  1241 MOVA @(disp, PC), R0 {:  
  1242     if( sh4_x86.in_delay_slot ) {
  1243 	SLOTILLEGAL();
  1244     } else {
  1245 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1246 	store_reg( R_ECX, 0 );
  1248 :}
  1249 MOVCA.L R0, @Rn {:  
  1250     load_reg( R_EAX, 0 );
  1251     load_reg( R_ECX, Rn );
  1252     check_walign32( R_ECX );
  1253     MEM_WRITE_LONG( R_ECX, R_EAX );
  1254     sh4_x86.tstate = TSTATE_NONE;
  1255 :}
  1257 /* Control transfer instructions */
  1258 BF disp {:
  1259     if( sh4_x86.in_delay_slot ) {
  1260 	SLOTILLEGAL();
  1261     } else {
  1262 	JT_rel8( EXIT_BLOCK_SIZE, nottaken );
  1263 	exit_block( disp + pc + 4, pc+2 );
  1264 	JMP_TARGET(nottaken);
  1265 	return 2;
  1267 :}
  1268 BF/S disp {:
  1269     if( sh4_x86.in_delay_slot ) {
  1270 	SLOTILLEGAL();
  1271     } else {
  1272 	sh4_x86.in_delay_slot = TRUE;
  1273 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1274 	    CMP_imm8s_sh4r( 1, R_T );
  1275 	    sh4_x86.tstate = TSTATE_E;
  1277 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1278 	sh4_translate_instruction(pc+2);
  1279 	exit_block( disp + pc + 4, pc+4 );
  1280 	// not taken
  1281 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1282 	sh4_translate_instruction(pc+2);
  1283 	return 4;
  1285 :}
  1286 BRA disp {:  
  1287     if( sh4_x86.in_delay_slot ) {
  1288 	SLOTILLEGAL();
  1289     } else {
  1290 	sh4_x86.in_delay_slot = TRUE;
  1291 	sh4_translate_instruction( pc + 2 );
  1292 	exit_block( disp + pc + 4, pc+4 );
  1293 	sh4_x86.branch_taken = TRUE;
  1294 	return 4;
  1296 :}
  1297 BRAF Rn {:  
  1298     if( sh4_x86.in_delay_slot ) {
  1299 	SLOTILLEGAL();
  1300     } else {
  1301 	load_reg( R_EAX, Rn );
  1302 	ADD_imm32_r32( pc + 4, R_EAX );
  1303 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1304 	sh4_x86.in_delay_slot = TRUE;
  1305 	sh4_x86.tstate = TSTATE_NONE;
  1306 	sh4_translate_instruction( pc + 2 );
  1307 	exit_block_pcset(pc+2);
  1308 	sh4_x86.branch_taken = TRUE;
  1309 	return 4;
  1311 :}
  1312 BSR disp {:  
  1313     if( sh4_x86.in_delay_slot ) {
  1314 	SLOTILLEGAL();
  1315     } else {
  1316 	load_imm32( R_EAX, pc + 4 );
  1317 	store_spreg( R_EAX, R_PR );
  1318 	sh4_x86.in_delay_slot = TRUE;
  1319 	sh4_translate_instruction( pc + 2 );
  1320 	exit_block( disp + pc + 4, pc+4 );
  1321 	sh4_x86.branch_taken = TRUE;
  1322 	return 4;
  1324 :}
  1325 BSRF Rn {:  
  1326     if( sh4_x86.in_delay_slot ) {
  1327 	SLOTILLEGAL();
  1328     } else {
  1329 	load_imm32( R_ECX, pc + 4 );
  1330 	store_spreg( R_ECX, R_PR );
  1331 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1332 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1333 	sh4_x86.in_delay_slot = TRUE;
  1334 	sh4_x86.tstate = TSTATE_NONE;
  1335 	sh4_translate_instruction( pc + 2 );
  1336 	exit_block_pcset(pc+2);
  1337 	sh4_x86.branch_taken = TRUE;
  1338 	return 4;
  1340 :}
  1341 BT disp {:
  1342     if( sh4_x86.in_delay_slot ) {
  1343 	SLOTILLEGAL();
  1344     } else {
  1345 	JF_rel8( EXIT_BLOCK_SIZE, nottaken );
  1346 	exit_block( disp + pc + 4, pc+2 );
  1347 	JMP_TARGET(nottaken);
  1348 	return 2;
  1350 :}
  1351 BT/S disp {:
  1352     if( sh4_x86.in_delay_slot ) {
  1353 	SLOTILLEGAL();
  1354     } else {
  1355 	sh4_x86.in_delay_slot = TRUE;
  1356 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1357 	    CMP_imm8s_sh4r( 1, R_T );
  1358 	    sh4_x86.tstate = TSTATE_E;
  1360 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1361 	sh4_translate_instruction(pc+2);
  1362 	exit_block( disp + pc + 4, pc+4 );
  1363 	// not taken
  1364 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1365 	sh4_translate_instruction(pc+2);
  1366 	return 4;
  1368 :}
  1369 JMP @Rn {:  
  1370     if( sh4_x86.in_delay_slot ) {
  1371 	SLOTILLEGAL();
  1372     } else {
  1373 	load_reg( R_ECX, Rn );
  1374 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1375 	sh4_x86.in_delay_slot = TRUE;
  1376 	sh4_translate_instruction(pc+2);
  1377 	exit_block_pcset(pc+2);
  1378 	sh4_x86.branch_taken = TRUE;
  1379 	return 4;
  1381 :}
  1382 JSR @Rn {:  
  1383     if( sh4_x86.in_delay_slot ) {
  1384 	SLOTILLEGAL();
  1385     } else {
  1386 	load_imm32( R_EAX, pc + 4 );
  1387 	store_spreg( R_EAX, R_PR );
  1388 	load_reg( R_ECX, Rn );
  1389 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1390 	sh4_x86.in_delay_slot = TRUE;
  1391 	sh4_translate_instruction(pc+2);
  1392 	exit_block_pcset(pc+2);
  1393 	sh4_x86.branch_taken = TRUE;
  1394 	return 4;
  1396 :}
  1397 RTE {:  
  1398     if( sh4_x86.in_delay_slot ) {
  1399 	SLOTILLEGAL();
  1400     } else {
  1401 	check_priv();
  1402 	load_spreg( R_ECX, R_SPC );
  1403 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1404 	load_spreg( R_EAX, R_SSR );
  1405 	call_func1( sh4_write_sr, R_EAX );
  1406 	sh4_x86.in_delay_slot = TRUE;
  1407 	sh4_x86.priv_checked = FALSE;
  1408 	sh4_x86.fpuen_checked = FALSE;
  1409 	sh4_x86.tstate = TSTATE_NONE;
  1410 	sh4_translate_instruction(pc+2);
  1411 	exit_block_pcset(pc+2);
  1412 	sh4_x86.branch_taken = TRUE;
  1413 	return 4;
  1415 :}
  1416 RTS {:  
  1417     if( sh4_x86.in_delay_slot ) {
  1418 	SLOTILLEGAL();
  1419     } else {
  1420 	load_spreg( R_ECX, R_PR );
  1421 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1422 	sh4_x86.in_delay_slot = TRUE;
  1423 	sh4_translate_instruction(pc+2);
  1424 	exit_block_pcset(pc+2);
  1425 	sh4_x86.branch_taken = TRUE;
  1426 	return 4;
  1428 :}
  1429 TRAPA #imm {:  
  1430     if( sh4_x86.in_delay_slot ) {
  1431 	SLOTILLEGAL();
  1432     } else {
  1433 	load_imm32( R_ECX, pc+2 );
  1434 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1435 	load_imm32( R_EAX, imm );
  1436 	call_func1( sh4_raise_trap, R_EAX );
  1437 	sh4_x86.tstate = TSTATE_NONE;
  1438 	exit_block_pcset(pc);
  1439 	sh4_x86.branch_taken = TRUE;
  1440 	return 2;
  1442 :}
  1443 UNDEF {:  
  1444     if( sh4_x86.in_delay_slot ) {
  1445 	SLOTILLEGAL();
  1446     } else {
  1447 	JMP_exc(EXC_ILLEGAL);
  1448 	return 2;
  1450 :}
  1452 CLRMAC {:  
  1453     XOR_r32_r32(R_EAX, R_EAX);
  1454     store_spreg( R_EAX, R_MACL );
  1455     store_spreg( R_EAX, R_MACH );
  1456     sh4_x86.tstate = TSTATE_NONE;
  1457 :}
  1458 CLRS {:
  1459     CLC();
  1460     SETC_sh4r(R_S);
  1461     sh4_x86.tstate = TSTATE_C;
  1462 :}
  1463 CLRT {:  
  1464     CLC();
  1465     SETC_t();
  1466     sh4_x86.tstate = TSTATE_C;
  1467 :}
  1468 SETS {:  
  1469     STC();
  1470     SETC_sh4r(R_S);
  1471     sh4_x86.tstate = TSTATE_C;
  1472 :}
  1473 SETT {:  
  1474     STC();
  1475     SETC_t();
  1476     sh4_x86.tstate = TSTATE_C;
  1477 :}
  1479 /* Floating point moves */
  1480 FMOV FRm, FRn {:  
  1481     /* As horrible as this looks, it's actually covering 5 separate cases:
  1482      * 1. 32-bit fr-to-fr (PR=0)
  1483      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1484      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1485      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1486      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1487      */
  1488     check_fpuen();
  1489     load_spreg( R_ECX, R_FPSCR );
  1490     load_fr_bank( R_EDX );
  1491     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1492     JNE_rel8(8, doublesize);
  1493     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1494     store_fr( R_EDX, R_EAX, FRn );
  1495     if( FRm&1 ) {
  1496 	JMP_rel8(24, end);
  1497 	JMP_TARGET(doublesize);
  1498 	load_xf_bank( R_ECX ); 
  1499 	load_fr( R_ECX, R_EAX, FRm-1 );
  1500 	if( FRn&1 ) {
  1501 	    load_fr( R_ECX, R_EDX, FRm );
  1502 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1503 	    store_fr( R_ECX, R_EDX, FRn );
  1504 	} else /* FRn&1 == 0 */ {
  1505 	    load_fr( R_ECX, R_ECX, FRm );
  1506 	    store_fr( R_EDX, R_EAX, FRn );
  1507 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1509 	JMP_TARGET(end);
  1510     } else /* FRm&1 == 0 */ {
  1511 	if( FRn&1 ) {
  1512 	    JMP_rel8(24, end);
  1513 	    load_xf_bank( R_ECX );
  1514 	    load_fr( R_EDX, R_EAX, FRm );
  1515 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1516 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1517 	    store_fr( R_ECX, R_EDX, FRn );
  1518 	    JMP_TARGET(end);
  1519 	} else /* FRn&1 == 0 */ {
  1520 	    JMP_rel8(12, end);
  1521 	    load_fr( R_EDX, R_EAX, FRm );
  1522 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1523 	    store_fr( R_EDX, R_EAX, FRn );
  1524 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1525 	    JMP_TARGET(end);
  1528     sh4_x86.tstate = TSTATE_NONE;
  1529 :}
  1530 FMOV FRm, @Rn {: 
  1531     check_fpuen();
  1532     load_reg( R_ECX, Rn );
  1533     check_walign32( R_ECX );
  1534     load_spreg( R_EDX, R_FPSCR );
  1535     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1536     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1537     load_fr_bank( R_EDX );
  1538     load_fr( R_EDX, R_EAX, FRm );
  1539     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1540     if( FRm&1 ) {
  1541 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1542 	JMP_TARGET(doublesize);
  1543 	load_xf_bank( R_EDX );
  1544 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1545 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1546 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1547 	JMP_TARGET(end);
  1548     } else {
  1549 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1550 	JMP_TARGET(doublesize);
  1551 	load_fr_bank( R_EDX );
  1552 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1553 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1554 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1555 	JMP_TARGET(end);
  1557     sh4_x86.tstate = TSTATE_NONE;
  1558 :}
  1559 FMOV @Rm, FRn {:  
  1560     check_fpuen();
  1561     load_reg( R_ECX, Rm );
  1562     check_ralign32( R_ECX );
  1563     load_spreg( R_EDX, R_FPSCR );
  1564     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1565     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1566     MEM_READ_LONG( R_ECX, R_EAX );
  1567     load_fr_bank( R_EDX );
  1568     store_fr( R_EDX, R_EAX, FRn );
  1569     if( FRn&1 ) {
  1570 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1571 	JMP_TARGET(doublesize);
  1572 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1573 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1574 	load_xf_bank( R_EDX );
  1575 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1576 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1577 	JMP_TARGET(end);
  1578     } else {
  1579 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1580 	JMP_TARGET(doublesize);
  1581 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1582 	load_fr_bank( R_EDX );
  1583 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1584 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1585 	JMP_TARGET(end);
  1587     sh4_x86.tstate = TSTATE_NONE;
  1588 :}
  1589 FMOV FRm, @-Rn {:  
  1590     check_fpuen();
  1591     load_reg( R_ECX, Rn );
  1592     check_walign32( R_ECX );
  1593     load_spreg( R_EDX, R_FPSCR );
  1594     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1595     JNE_rel8(14 + MEM_WRITE_SIZE, doublesize);
  1596     load_fr_bank( R_EDX );
  1597     load_fr( R_EDX, R_EAX, FRm );
  1598     ADD_imm8s_r32(-4,R_ECX);
  1599     store_reg( R_ECX, Rn );
  1600     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1601     if( FRm&1 ) {
  1602 	JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
  1603 	JMP_TARGET(doublesize);
  1604 	load_xf_bank( R_EDX );
  1605 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1606 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1607 	ADD_imm8s_r32(-8,R_ECX);
  1608 	store_reg( R_ECX, Rn );
  1609 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1610 	JMP_TARGET(end);
  1611     } else {
  1612 	JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
  1613 	JMP_TARGET(doublesize);
  1614 	load_fr_bank( R_EDX );
  1615 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1616 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1617 	ADD_imm8s_r32(-8,R_ECX);
  1618 	store_reg( R_ECX, Rn );
  1619 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1620 	JMP_TARGET(end);
  1622     sh4_x86.tstate = TSTATE_NONE;
  1623 :}
  1624 FMOV @Rm+, FRn {:
  1625     check_fpuen();
  1626     load_reg( R_ECX, Rm );
  1627     check_ralign32( R_ECX );
  1628     MOV_r32_r32( R_ECX, R_EAX );
  1629     load_spreg( R_EDX, R_FPSCR );
  1630     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1631     JNE_rel8(14 + MEM_READ_SIZE, doublesize);
  1632     ADD_imm8s_r32( 4, R_EAX );
  1633     store_reg( R_EAX, Rm );
  1634     MEM_READ_LONG( R_ECX, R_EAX );
  1635     load_fr_bank( R_EDX );
  1636     store_fr( R_EDX, R_EAX, FRn );
  1637     if( FRn&1 ) {
  1638 	JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
  1639 	JMP_TARGET(doublesize);
  1640 	ADD_imm8s_r32( 8, R_EAX );
  1641 	store_reg(R_EAX, Rm);
  1642 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1643 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1644 	load_xf_bank( R_EDX );
  1645 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1646 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1647 	JMP_TARGET(end);
  1648     } else {
  1649 	JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
  1650 	ADD_imm8s_r32( 8, R_EAX );
  1651 	store_reg(R_EAX, Rm);
  1652 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1653 	load_fr_bank( R_EDX );
  1654 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1655 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1656 	JMP_TARGET(end);
  1658     sh4_x86.tstate = TSTATE_NONE;
  1659 :}
  1660 FMOV FRm, @(R0, Rn) {:  
  1661     check_fpuen();
  1662     load_reg( R_ECX, Rn );
  1663     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1664     check_walign32( R_ECX );
  1665     load_spreg( R_EDX, R_FPSCR );
  1666     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1667     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1668     load_fr_bank( R_EDX );
  1669     load_fr( R_EDX, R_EAX, FRm );
  1670     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1671     if( FRm&1 ) {
  1672 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1673 	JMP_TARGET(doublesize);
  1674 	load_xf_bank( R_EDX );
  1675 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1676 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1677 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1678 	JMP_TARGET(end);
  1679     } else {
  1680 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1681 	JMP_TARGET(doublesize);
  1682 	load_fr_bank( R_EDX );
  1683 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1684 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1685 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1686 	JMP_TARGET(end);
  1688     sh4_x86.tstate = TSTATE_NONE;
  1689 :}
  1690 FMOV @(R0, Rm), FRn {:  
  1691     check_fpuen();
  1692     load_reg( R_ECX, Rm );
  1693     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1694     check_ralign32( R_ECX );
  1695     load_spreg( R_EDX, R_FPSCR );
  1696     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1697     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1698     MEM_READ_LONG( R_ECX, R_EAX );
  1699     load_fr_bank( R_EDX );
  1700     store_fr( R_EDX, R_EAX, FRn );
  1701     if( FRn&1 ) {
  1702 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1703 	JMP_TARGET(doublesize);
  1704 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1705 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1706 	load_xf_bank( R_EDX );
  1707 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1708 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1709 	JMP_TARGET(end);
  1710     } else {
  1711 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1712 	JMP_TARGET(doublesize);
  1713 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1714 	load_fr_bank( R_EDX );
  1715 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1716 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1717 	JMP_TARGET(end);
  1719     sh4_x86.tstate = TSTATE_NONE;
  1720 :}
  1721 FLDI0 FRn {:  /* IFF PR=0 */
  1722     check_fpuen();
  1723     load_spreg( R_ECX, R_FPSCR );
  1724     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1725     JNE_rel8(8, end);
  1726     XOR_r32_r32( R_EAX, R_EAX );
  1727     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1728     store_fr( R_ECX, R_EAX, FRn );
  1729     JMP_TARGET(end);
  1730     sh4_x86.tstate = TSTATE_NONE;
  1731 :}
  1732 FLDI1 FRn {:  /* IFF PR=0 */
  1733     check_fpuen();
  1734     load_spreg( R_ECX, R_FPSCR );
  1735     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1736     JNE_rel8(11, end);
  1737     load_imm32(R_EAX, 0x3F800000);
  1738     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1739     store_fr( R_ECX, R_EAX, FRn );
  1740     JMP_TARGET(end);
  1741     sh4_x86.tstate = TSTATE_NONE;
  1742 :}
  1744 FLOAT FPUL, FRn {:  
  1745     check_fpuen();
  1746     load_spreg( R_ECX, R_FPSCR );
  1747     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1748     FILD_sh4r(R_FPUL);
  1749     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1750     JNE_rel8(5, doubleprec);
  1751     pop_fr( R_EDX, FRn );
  1752     JMP_rel8(3, end);
  1753     JMP_TARGET(doubleprec);
  1754     pop_dr( R_EDX, FRn );
  1755     JMP_TARGET(end);
  1756     sh4_x86.tstate = TSTATE_NONE;
  1757 :}
  1758 FTRC FRm, FPUL {:  
  1759     check_fpuen();
  1760     load_spreg( R_ECX, R_FPSCR );
  1761     load_fr_bank( R_EDX );
  1762     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1763     JNE_rel8(5, doubleprec);
  1764     push_fr( R_EDX, FRm );
  1765     JMP_rel8(3, doop);
  1766     JMP_TARGET(doubleprec);
  1767     push_dr( R_EDX, FRm );
  1768     JMP_TARGET( doop );
  1769     load_imm32( R_ECX, (uint32_t)&max_int );
  1770     FILD_r32ind( R_ECX );
  1771     FCOMIP_st(1);
  1772     JNA_rel8( 32, sat );
  1773     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1774     FILD_r32ind( R_ECX );           // 2
  1775     FCOMIP_st(1);                   // 2
  1776     JAE_rel8( 21, sat2 );            // 2
  1777     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1778     FNSTCW_r32ind( R_EAX );
  1779     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1780     FLDCW_r32ind( R_EDX );
  1781     FISTP_sh4r(R_FPUL);             // 3
  1782     FLDCW_r32ind( R_EAX );
  1783     JMP_rel8( 9, end );             // 2
  1785     JMP_TARGET(sat);
  1786     JMP_TARGET(sat2);
  1787     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1788     store_spreg( R_ECX, R_FPUL );
  1789     FPOP_st();
  1790     JMP_TARGET(end);
  1791     sh4_x86.tstate = TSTATE_NONE;
  1792 :}
  1793 FLDS FRm, FPUL {:  
  1794     check_fpuen();
  1795     load_fr_bank( R_ECX );
  1796     load_fr( R_ECX, R_EAX, FRm );
  1797     store_spreg( R_EAX, R_FPUL );
  1798     sh4_x86.tstate = TSTATE_NONE;
  1799 :}
  1800 FSTS FPUL, FRn {:  
  1801     check_fpuen();
  1802     load_fr_bank( R_ECX );
  1803     load_spreg( R_EAX, R_FPUL );
  1804     store_fr( R_ECX, R_EAX, FRn );
  1805     sh4_x86.tstate = TSTATE_NONE;
  1806 :}
  1807 FCNVDS FRm, FPUL {:  
  1808     check_fpuen();
  1809     load_spreg( R_ECX, R_FPSCR );
  1810     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1811     JE_rel8(9, end); // only when PR=1
  1812     load_fr_bank( R_ECX );
  1813     push_dr( R_ECX, FRm );
  1814     pop_fpul();
  1815     JMP_TARGET(end);
  1816     sh4_x86.tstate = TSTATE_NONE;
  1817 :}
  1818 FCNVSD FPUL, FRn {:  
  1819     check_fpuen();
  1820     load_spreg( R_ECX, R_FPSCR );
  1821     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1822     JE_rel8(9, end); // only when PR=1
  1823     load_fr_bank( R_ECX );
  1824     push_fpul();
  1825     pop_dr( R_ECX, FRn );
  1826     JMP_TARGET(end);
  1827     sh4_x86.tstate = TSTATE_NONE;
  1828 :}
  1830 /* Floating point instructions */
  1831 FABS FRn {:  
  1832     check_fpuen();
  1833     load_spreg( R_ECX, R_FPSCR );
  1834     load_fr_bank( R_EDX );
  1835     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1836     JNE_rel8(10, doubleprec);
  1837     push_fr(R_EDX, FRn); // 3
  1838     FABS_st0(); // 2
  1839     pop_fr( R_EDX, FRn); //3
  1840     JMP_rel8(8,end); // 2
  1841     JMP_TARGET(doubleprec);
  1842     push_dr(R_EDX, FRn);
  1843     FABS_st0();
  1844     pop_dr(R_EDX, FRn);
  1845     JMP_TARGET(end);
  1846     sh4_x86.tstate = TSTATE_NONE;
  1847 :}
  1848 FADD FRm, FRn {:  
  1849     check_fpuen();
  1850     load_spreg( R_ECX, R_FPSCR );
  1851     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1852     load_fr_bank( R_EDX );
  1853     JNE_rel8(13,doubleprec);
  1854     push_fr(R_EDX, FRm);
  1855     push_fr(R_EDX, FRn);
  1856     FADDP_st(1);
  1857     pop_fr(R_EDX, FRn);
  1858     JMP_rel8(11,end);
  1859     JMP_TARGET(doubleprec);
  1860     push_dr(R_EDX, FRm);
  1861     push_dr(R_EDX, FRn);
  1862     FADDP_st(1);
  1863     pop_dr(R_EDX, FRn);
  1864     JMP_TARGET(end);
  1865     sh4_x86.tstate = TSTATE_NONE;
  1866 :}
  1867 FDIV FRm, FRn {:  
  1868     check_fpuen();
  1869     load_spreg( R_ECX, R_FPSCR );
  1870     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1871     load_fr_bank( R_EDX );
  1872     JNE_rel8(13, doubleprec);
  1873     push_fr(R_EDX, FRn);
  1874     push_fr(R_EDX, FRm);
  1875     FDIVP_st(1);
  1876     pop_fr(R_EDX, FRn);
  1877     JMP_rel8(11, end);
  1878     JMP_TARGET(doubleprec);
  1879     push_dr(R_EDX, FRn);
  1880     push_dr(R_EDX, FRm);
  1881     FDIVP_st(1);
  1882     pop_dr(R_EDX, FRn);
  1883     JMP_TARGET(end);
  1884     sh4_x86.tstate = TSTATE_NONE;
  1885 :}
  1886 FMAC FR0, FRm, FRn {:  
  1887     check_fpuen();
  1888     load_spreg( R_ECX, R_FPSCR );
  1889     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  1890     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1891     JNE_rel8(18, doubleprec);
  1892     push_fr( R_EDX, 0 );
  1893     push_fr( R_EDX, FRm );
  1894     FMULP_st(1);
  1895     push_fr( R_EDX, FRn );
  1896     FADDP_st(1);
  1897     pop_fr( R_EDX, FRn );
  1898     JMP_rel8(16, end);
  1899     JMP_TARGET(doubleprec);
  1900     push_dr( R_EDX, 0 );
  1901     push_dr( R_EDX, FRm );
  1902     FMULP_st(1);
  1903     push_dr( R_EDX, FRn );
  1904     FADDP_st(1);
  1905     pop_dr( R_EDX, FRn );
  1906     JMP_TARGET(end);
  1907     sh4_x86.tstate = TSTATE_NONE;
  1908 :}
  1910 FMUL FRm, FRn {:  
  1911     check_fpuen();
  1912     load_spreg( R_ECX, R_FPSCR );
  1913     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1914     load_fr_bank( R_EDX );
  1915     JNE_rel8(13, doubleprec);
  1916     push_fr(R_EDX, FRm);
  1917     push_fr(R_EDX, FRn);
  1918     FMULP_st(1);
  1919     pop_fr(R_EDX, FRn);
  1920     JMP_rel8(11, end);
  1921     JMP_TARGET(doubleprec);
  1922     push_dr(R_EDX, FRm);
  1923     push_dr(R_EDX, FRn);
  1924     FMULP_st(1);
  1925     pop_dr(R_EDX, FRn);
  1926     JMP_TARGET(end);
  1927     sh4_x86.tstate = TSTATE_NONE;
  1928 :}
  1929 FNEG FRn {:  
  1930     check_fpuen();
  1931     load_spreg( R_ECX, R_FPSCR );
  1932     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1933     load_fr_bank( R_EDX );
  1934     JNE_rel8(10, doubleprec);
  1935     push_fr(R_EDX, FRn);
  1936     FCHS_st0();
  1937     pop_fr(R_EDX, FRn);
  1938     JMP_rel8(8, end);
  1939     JMP_TARGET(doubleprec);
  1940     push_dr(R_EDX, FRn);
  1941     FCHS_st0();
  1942     pop_dr(R_EDX, FRn);
  1943     JMP_TARGET(end);
  1944     sh4_x86.tstate = TSTATE_NONE;
  1945 :}
  1946 FSRRA FRn {:  
  1947     check_fpuen();
  1948     load_spreg( R_ECX, R_FPSCR );
  1949     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1950     load_fr_bank( R_EDX );
  1951     JNE_rel8(12, end); // PR=0 only
  1952     FLD1_st0();
  1953     push_fr(R_EDX, FRn);
  1954     FSQRT_st0();
  1955     FDIVP_st(1);
  1956     pop_fr(R_EDX, FRn);
  1957     JMP_TARGET(end);
  1958     sh4_x86.tstate = TSTATE_NONE;
  1959 :}
  1960 FSQRT FRn {:  
  1961     check_fpuen();
  1962     load_spreg( R_ECX, R_FPSCR );
  1963     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1964     load_fr_bank( R_EDX );
  1965     JNE_rel8(10, doubleprec);
  1966     push_fr(R_EDX, FRn);
  1967     FSQRT_st0();
  1968     pop_fr(R_EDX, FRn);
  1969     JMP_rel8(8, end);
  1970     JMP_TARGET(doubleprec);
  1971     push_dr(R_EDX, FRn);
  1972     FSQRT_st0();
  1973     pop_dr(R_EDX, FRn);
  1974     JMP_TARGET(end);
  1975     sh4_x86.tstate = TSTATE_NONE;
  1976 :}
  1977 FSUB FRm, FRn {:  
  1978     check_fpuen();
  1979     load_spreg( R_ECX, R_FPSCR );
  1980     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1981     load_fr_bank( R_EDX );
  1982     JNE_rel8(13, doubleprec);
  1983     push_fr(R_EDX, FRn);
  1984     push_fr(R_EDX, FRm);
  1985     FSUBP_st(1);
  1986     pop_fr(R_EDX, FRn);
  1987     JMP_rel8(11, end);
  1988     JMP_TARGET(doubleprec);
  1989     push_dr(R_EDX, FRn);
  1990     push_dr(R_EDX, FRm);
  1991     FSUBP_st(1);
  1992     pop_dr(R_EDX, FRn);
  1993     JMP_TARGET(end);
  1994     sh4_x86.tstate = TSTATE_NONE;
  1995 :}
  1997 FCMP/EQ FRm, FRn {:  
  1998     check_fpuen();
  1999     load_spreg( R_ECX, R_FPSCR );
  2000     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2001     load_fr_bank( R_EDX );
  2002     JNE_rel8(8, doubleprec);
  2003     push_fr(R_EDX, FRm);
  2004     push_fr(R_EDX, FRn);
  2005     JMP_rel8(6, end);
  2006     JMP_TARGET(doubleprec);
  2007     push_dr(R_EDX, FRm);
  2008     push_dr(R_EDX, FRn);
  2009     JMP_TARGET(end);
  2010     FCOMIP_st(1);
  2011     SETE_t();
  2012     FPOP_st();
  2013     sh4_x86.tstate = TSTATE_NONE;
  2014 :}
  2015 FCMP/GT FRm, FRn {:  
  2016     check_fpuen();
  2017     load_spreg( R_ECX, R_FPSCR );
  2018     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2019     load_fr_bank( R_EDX );
  2020     JNE_rel8(8, doubleprec);
  2021     push_fr(R_EDX, FRm);
  2022     push_fr(R_EDX, FRn);
  2023     JMP_rel8(6, end);
  2024     JMP_TARGET(doubleprec);
  2025     push_dr(R_EDX, FRm);
  2026     push_dr(R_EDX, FRn);
  2027     JMP_TARGET(end);
  2028     FCOMIP_st(1);
  2029     SETA_t();
  2030     FPOP_st();
  2031     sh4_x86.tstate = TSTATE_NONE;
  2032 :}
  2034 FSCA FPUL, FRn {:  
  2035     check_fpuen();
  2036     load_spreg( R_ECX, R_FPSCR );
  2037     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2038     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2039     load_fr_bank( R_ECX );
  2040     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2041     load_spreg( R_EDX, R_FPUL );
  2042     call_func2( sh4_fsca, R_EDX, R_ECX );
  2043     JMP_TARGET(doubleprec);
  2044     sh4_x86.tstate = TSTATE_NONE;
  2045 :}
  2046 FIPR FVm, FVn {:  
  2047     check_fpuen();
  2048     load_spreg( R_ECX, R_FPSCR );
  2049     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2050     JNE_rel8(44, doubleprec);
  2052     load_fr_bank( R_ECX );
  2053     push_fr( R_ECX, FVm<<2 );
  2054     push_fr( R_ECX, FVn<<2 );
  2055     FMULP_st(1);
  2056     push_fr( R_ECX, (FVm<<2)+1);
  2057     push_fr( R_ECX, (FVn<<2)+1);
  2058     FMULP_st(1);
  2059     FADDP_st(1);
  2060     push_fr( R_ECX, (FVm<<2)+2);
  2061     push_fr( R_ECX, (FVn<<2)+2);
  2062     FMULP_st(1);
  2063     FADDP_st(1);
  2064     push_fr( R_ECX, (FVm<<2)+3);
  2065     push_fr( R_ECX, (FVn<<2)+3);
  2066     FMULP_st(1);
  2067     FADDP_st(1);
  2068     pop_fr( R_ECX, (FVn<<2)+3);
  2069     JMP_TARGET(doubleprec);
  2070     sh4_x86.tstate = TSTATE_NONE;
  2071 :}
  2072 FTRV XMTRX, FVn {:  
  2073     check_fpuen();
  2074     load_spreg( R_ECX, R_FPSCR );
  2075     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2076     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2077     load_fr_bank( R_EDX );                 // 3
  2078     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2079     load_xf_bank( R_ECX );                 // 12
  2080     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2081     JMP_TARGET(doubleprec);
  2082     sh4_x86.tstate = TSTATE_NONE;
  2083 :}
  2085 FRCHG {:  
  2086     check_fpuen();
  2087     load_spreg( R_ECX, R_FPSCR );
  2088     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2089     store_spreg( R_ECX, R_FPSCR );
  2090     update_fr_bank( R_ECX );
  2091     sh4_x86.tstate = TSTATE_NONE;
  2092 :}
  2093 FSCHG {:  
  2094     check_fpuen();
  2095     load_spreg( R_ECX, R_FPSCR );
  2096     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2097     store_spreg( R_ECX, R_FPSCR );
  2098     sh4_x86.tstate = TSTATE_NONE;
  2099 :}
  2101 /* Processor control instructions */
  2102 LDC Rm, SR {:
  2103     if( sh4_x86.in_delay_slot ) {
  2104 	SLOTILLEGAL();
  2105     } else {
  2106 	check_priv();
  2107 	load_reg( R_EAX, Rm );
  2108 	call_func1( sh4_write_sr, R_EAX );
  2109 	sh4_x86.priv_checked = FALSE;
  2110 	sh4_x86.fpuen_checked = FALSE;
  2111 	sh4_x86.tstate = TSTATE_NONE;
  2113 :}
  2114 LDC Rm, GBR {: 
  2115     load_reg( R_EAX, Rm );
  2116     store_spreg( R_EAX, R_GBR );
  2117 :}
  2118 LDC Rm, VBR {:  
  2119     check_priv();
  2120     load_reg( R_EAX, Rm );
  2121     store_spreg( R_EAX, R_VBR );
  2122     sh4_x86.tstate = TSTATE_NONE;
  2123 :}
  2124 LDC Rm, SSR {:  
  2125     check_priv();
  2126     load_reg( R_EAX, Rm );
  2127     store_spreg( R_EAX, R_SSR );
  2128     sh4_x86.tstate = TSTATE_NONE;
  2129 :}
  2130 LDC Rm, SGR {:  
  2131     check_priv();
  2132     load_reg( R_EAX, Rm );
  2133     store_spreg( R_EAX, R_SGR );
  2134     sh4_x86.tstate = TSTATE_NONE;
  2135 :}
  2136 LDC Rm, SPC {:  
  2137     check_priv();
  2138     load_reg( R_EAX, Rm );
  2139     store_spreg( R_EAX, R_SPC );
  2140     sh4_x86.tstate = TSTATE_NONE;
  2141 :}
  2142 LDC Rm, DBR {:  
  2143     check_priv();
  2144     load_reg( R_EAX, Rm );
  2145     store_spreg( R_EAX, R_DBR );
  2146     sh4_x86.tstate = TSTATE_NONE;
  2147 :}
  2148 LDC Rm, Rn_BANK {:  
  2149     check_priv();
  2150     load_reg( R_EAX, Rm );
  2151     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2152     sh4_x86.tstate = TSTATE_NONE;
  2153 :}
  2154 LDC.L @Rm+, GBR {:  
  2155     load_reg( R_EAX, Rm );
  2156     check_ralign32( R_EAX );
  2157     MOV_r32_r32( R_EAX, R_ECX );
  2158     ADD_imm8s_r32( 4, R_EAX );
  2159     store_reg( R_EAX, Rm );
  2160     MEM_READ_LONG( R_ECX, R_EAX );
  2161     store_spreg( R_EAX, R_GBR );
  2162     sh4_x86.tstate = TSTATE_NONE;
  2163 :}
  2164 LDC.L @Rm+, SR {:
  2165     if( sh4_x86.in_delay_slot ) {
  2166 	SLOTILLEGAL();
  2167     } else {
  2168 	check_priv();
  2169 	load_reg( R_EAX, Rm );
  2170 	check_ralign32( R_EAX );
  2171 	MOV_r32_r32( R_EAX, R_ECX );
  2172 	ADD_imm8s_r32( 4, R_EAX );
  2173 	store_reg( R_EAX, Rm );
  2174 	MEM_READ_LONG( R_ECX, R_EAX );
  2175 	call_func1( sh4_write_sr, R_EAX );
  2176 	sh4_x86.priv_checked = FALSE;
  2177 	sh4_x86.fpuen_checked = FALSE;
  2178 	sh4_x86.tstate = TSTATE_NONE;
  2180 :}
  2181 LDC.L @Rm+, VBR {:  
  2182     check_priv();
  2183     load_reg( R_EAX, Rm );
  2184     check_ralign32( R_EAX );
  2185     MOV_r32_r32( R_EAX, R_ECX );
  2186     ADD_imm8s_r32( 4, R_EAX );
  2187     store_reg( R_EAX, Rm );
  2188     MEM_READ_LONG( R_ECX, R_EAX );
  2189     store_spreg( R_EAX, R_VBR );
  2190     sh4_x86.tstate = TSTATE_NONE;
  2191 :}
  2192 LDC.L @Rm+, SSR {:
  2193     check_priv();
  2194     load_reg( R_EAX, Rm );
  2195     check_ralign32( R_EAX );
  2196     MOV_r32_r32( R_EAX, R_ECX );
  2197     ADD_imm8s_r32( 4, R_EAX );
  2198     store_reg( R_EAX, Rm );
  2199     MEM_READ_LONG( R_ECX, R_EAX );
  2200     store_spreg( R_EAX, R_SSR );
  2201     sh4_x86.tstate = TSTATE_NONE;
  2202 :}
  2203 LDC.L @Rm+, SGR {:  
  2204     check_priv();
  2205     load_reg( R_EAX, Rm );
  2206     check_ralign32( R_EAX );
  2207     MOV_r32_r32( R_EAX, R_ECX );
  2208     ADD_imm8s_r32( 4, R_EAX );
  2209     store_reg( R_EAX, Rm );
  2210     MEM_READ_LONG( R_ECX, R_EAX );
  2211     store_spreg( R_EAX, R_SGR );
  2212     sh4_x86.tstate = TSTATE_NONE;
  2213 :}
  2214 LDC.L @Rm+, SPC {:  
  2215     check_priv();
  2216     load_reg( R_EAX, Rm );
  2217     check_ralign32( R_EAX );
  2218     MOV_r32_r32( R_EAX, R_ECX );
  2219     ADD_imm8s_r32( 4, R_EAX );
  2220     store_reg( R_EAX, Rm );
  2221     MEM_READ_LONG( R_ECX, R_EAX );
  2222     store_spreg( R_EAX, R_SPC );
  2223     sh4_x86.tstate = TSTATE_NONE;
  2224 :}
  2225 LDC.L @Rm+, DBR {:  
  2226     check_priv();
  2227     load_reg( R_EAX, Rm );
  2228     check_ralign32( R_EAX );
  2229     MOV_r32_r32( R_EAX, R_ECX );
  2230     ADD_imm8s_r32( 4, R_EAX );
  2231     store_reg( R_EAX, Rm );
  2232     MEM_READ_LONG( R_ECX, R_EAX );
  2233     store_spreg( R_EAX, R_DBR );
  2234     sh4_x86.tstate = TSTATE_NONE;
  2235 :}
  2236 LDC.L @Rm+, Rn_BANK {:  
  2237     check_priv();
  2238     load_reg( R_EAX, Rm );
  2239     check_ralign32( R_EAX );
  2240     MOV_r32_r32( R_EAX, R_ECX );
  2241     ADD_imm8s_r32( 4, R_EAX );
  2242     store_reg( R_EAX, Rm );
  2243     MEM_READ_LONG( R_ECX, R_EAX );
  2244     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2245     sh4_x86.tstate = TSTATE_NONE;
  2246 :}
  2247 LDS Rm, FPSCR {:  
  2248     load_reg( R_EAX, Rm );
  2249     store_spreg( R_EAX, R_FPSCR );
  2250     update_fr_bank( R_EAX );
  2251     sh4_x86.tstate = TSTATE_NONE;
  2252 :}
  2253 LDS.L @Rm+, FPSCR {:  
  2254     load_reg( R_EAX, Rm );
  2255     check_ralign32( R_EAX );
  2256     MOV_r32_r32( R_EAX, R_ECX );
  2257     ADD_imm8s_r32( 4, R_EAX );
  2258     store_reg( R_EAX, Rm );
  2259     MEM_READ_LONG( R_ECX, R_EAX );
  2260     store_spreg( R_EAX, R_FPSCR );
  2261     update_fr_bank( R_EAX );
  2262     sh4_x86.tstate = TSTATE_NONE;
  2263 :}
  2264 LDS Rm, FPUL {:  
  2265     load_reg( R_EAX, Rm );
  2266     store_spreg( R_EAX, R_FPUL );
  2267 :}
  2268 LDS.L @Rm+, FPUL {:  
  2269     load_reg( R_EAX, Rm );
  2270     check_ralign32( R_EAX );
  2271     MOV_r32_r32( R_EAX, R_ECX );
  2272     ADD_imm8s_r32( 4, R_EAX );
  2273     store_reg( R_EAX, Rm );
  2274     MEM_READ_LONG( R_ECX, R_EAX );
  2275     store_spreg( R_EAX, R_FPUL );
  2276     sh4_x86.tstate = TSTATE_NONE;
  2277 :}
  2278 LDS Rm, MACH {: 
  2279     load_reg( R_EAX, Rm );
  2280     store_spreg( R_EAX, R_MACH );
  2281 :}
  2282 LDS.L @Rm+, MACH {:  
  2283     load_reg( R_EAX, Rm );
  2284     check_ralign32( R_EAX );
  2285     MOV_r32_r32( R_EAX, R_ECX );
  2286     ADD_imm8s_r32( 4, R_EAX );
  2287     store_reg( R_EAX, Rm );
  2288     MEM_READ_LONG( R_ECX, R_EAX );
  2289     store_spreg( R_EAX, R_MACH );
  2290     sh4_x86.tstate = TSTATE_NONE;
  2291 :}
  2292 LDS Rm, MACL {:  
  2293     load_reg( R_EAX, Rm );
  2294     store_spreg( R_EAX, R_MACL );
  2295 :}
  2296 LDS.L @Rm+, MACL {:  
  2297     load_reg( R_EAX, Rm );
  2298     check_ralign32( R_EAX );
  2299     MOV_r32_r32( R_EAX, R_ECX );
  2300     ADD_imm8s_r32( 4, R_EAX );
  2301     store_reg( R_EAX, Rm );
  2302     MEM_READ_LONG( R_ECX, R_EAX );
  2303     store_spreg( R_EAX, R_MACL );
  2304     sh4_x86.tstate = TSTATE_NONE;
  2305 :}
  2306 LDS Rm, PR {:  
  2307     load_reg( R_EAX, Rm );
  2308     store_spreg( R_EAX, R_PR );
  2309 :}
  2310 LDS.L @Rm+, PR {:  
  2311     load_reg( R_EAX, Rm );
  2312     check_ralign32( R_EAX );
  2313     MOV_r32_r32( R_EAX, R_ECX );
  2314     ADD_imm8s_r32( 4, R_EAX );
  2315     store_reg( R_EAX, Rm );
  2316     MEM_READ_LONG( R_ECX, R_EAX );
  2317     store_spreg( R_EAX, R_PR );
  2318     sh4_x86.tstate = TSTATE_NONE;
  2319 :}
  2320 LDTLB {:  
  2321     call_func0( MMU_ldtlb );
  2322 :}
  2323 OCBI @Rn {:  :}
  2324 OCBP @Rn {:  :}
  2325 OCBWB @Rn {:  :}
  2326 PREF @Rn {:
  2327     load_reg( R_EAX, Rn );
  2328     MOV_r32_r32( R_EAX, R_ECX );
  2329     AND_imm32_r32( 0xFC000000, R_EAX );
  2330     CMP_imm32_r32( 0xE0000000, R_EAX );
  2331     JNE_rel8(CALL_FUNC1_SIZE, end);
  2332     call_func1( sh4_flush_store_queue, R_ECX );
  2333     JMP_TARGET(end);
  2334     sh4_x86.tstate = TSTATE_NONE;
  2335 :}
  2336 SLEEP {: 
  2337     check_priv();
  2338     call_func0( sh4_sleep );
  2339     sh4_x86.tstate = TSTATE_NONE;
  2340     sh4_x86.in_delay_slot = FALSE;
  2341     return 2;
  2342 :}
  2343 STC SR, Rn {:
  2344     check_priv();
  2345     call_func0(sh4_read_sr);
  2346     store_reg( R_EAX, Rn );
  2347     sh4_x86.tstate = TSTATE_NONE;
  2348 :}
  2349 STC GBR, Rn {:  
  2350     load_spreg( R_EAX, R_GBR );
  2351     store_reg( R_EAX, Rn );
  2352 :}
  2353 STC VBR, Rn {:  
  2354     check_priv();
  2355     load_spreg( R_EAX, R_VBR );
  2356     store_reg( R_EAX, Rn );
  2357     sh4_x86.tstate = TSTATE_NONE;
  2358 :}
  2359 STC SSR, Rn {:  
  2360     check_priv();
  2361     load_spreg( R_EAX, R_SSR );
  2362     store_reg( R_EAX, Rn );
  2363     sh4_x86.tstate = TSTATE_NONE;
  2364 :}
  2365 STC SPC, Rn {:  
  2366     check_priv();
  2367     load_spreg( R_EAX, R_SPC );
  2368     store_reg( R_EAX, Rn );
  2369     sh4_x86.tstate = TSTATE_NONE;
  2370 :}
  2371 STC SGR, Rn {:  
  2372     check_priv();
  2373     load_spreg( R_EAX, R_SGR );
  2374     store_reg( R_EAX, Rn );
  2375     sh4_x86.tstate = TSTATE_NONE;
  2376 :}
  2377 STC DBR, Rn {:  
  2378     check_priv();
  2379     load_spreg( R_EAX, R_DBR );
  2380     store_reg( R_EAX, Rn );
  2381     sh4_x86.tstate = TSTATE_NONE;
  2382 :}
  2383 STC Rm_BANK, Rn {:
  2384     check_priv();
  2385     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2386     store_reg( R_EAX, Rn );
  2387     sh4_x86.tstate = TSTATE_NONE;
  2388 :}
  2389 STC.L SR, @-Rn {:
  2390     check_priv();
  2391     call_func0( sh4_read_sr );
  2392     load_reg( R_ECX, Rn );
  2393     check_walign32( R_ECX );
  2394     ADD_imm8s_r32( -4, R_ECX );
  2395     store_reg( R_ECX, Rn );
  2396     MEM_WRITE_LONG( R_ECX, R_EAX );
  2397     sh4_x86.tstate = TSTATE_NONE;
  2398 :}
  2399 STC.L VBR, @-Rn {:  
  2400     check_priv();
  2401     load_reg( R_ECX, Rn );
  2402     check_walign32( R_ECX );
  2403     ADD_imm8s_r32( -4, R_ECX );
  2404     store_reg( R_ECX, Rn );
  2405     load_spreg( R_EAX, R_VBR );
  2406     MEM_WRITE_LONG( R_ECX, R_EAX );
  2407     sh4_x86.tstate = TSTATE_NONE;
  2408 :}
  2409 STC.L SSR, @-Rn {:  
  2410     check_priv();
  2411     load_reg( R_ECX, Rn );
  2412     check_walign32( R_ECX );
  2413     ADD_imm8s_r32( -4, R_ECX );
  2414     store_reg( R_ECX, Rn );
  2415     load_spreg( R_EAX, R_SSR );
  2416     MEM_WRITE_LONG( R_ECX, R_EAX );
  2417     sh4_x86.tstate = TSTATE_NONE;
  2418 :}
  2419 STC.L SPC, @-Rn {:
  2420     check_priv();
  2421     load_reg( R_ECX, Rn );
  2422     check_walign32( R_ECX );
  2423     ADD_imm8s_r32( -4, R_ECX );
  2424     store_reg( R_ECX, Rn );
  2425     load_spreg( R_EAX, R_SPC );
  2426     MEM_WRITE_LONG( R_ECX, R_EAX );
  2427     sh4_x86.tstate = TSTATE_NONE;
  2428 :}
  2429 STC.L SGR, @-Rn {:  
  2430     check_priv();
  2431     load_reg( R_ECX, Rn );
  2432     check_walign32( R_ECX );
  2433     ADD_imm8s_r32( -4, R_ECX );
  2434     store_reg( R_ECX, Rn );
  2435     load_spreg( R_EAX, R_SGR );
  2436     MEM_WRITE_LONG( R_ECX, R_EAX );
  2437     sh4_x86.tstate = TSTATE_NONE;
  2438 :}
  2439 STC.L DBR, @-Rn {:  
  2440     check_priv();
  2441     load_reg( R_ECX, Rn );
  2442     check_walign32( R_ECX );
  2443     ADD_imm8s_r32( -4, R_ECX );
  2444     store_reg( R_ECX, Rn );
  2445     load_spreg( R_EAX, R_DBR );
  2446     MEM_WRITE_LONG( R_ECX, R_EAX );
  2447     sh4_x86.tstate = TSTATE_NONE;
  2448 :}
  2449 STC.L Rm_BANK, @-Rn {:  
  2450     check_priv();
  2451     load_reg( R_ECX, Rn );
  2452     check_walign32( R_ECX );
  2453     ADD_imm8s_r32( -4, R_ECX );
  2454     store_reg( R_ECX, Rn );
  2455     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2456     MEM_WRITE_LONG( R_ECX, R_EAX );
  2457     sh4_x86.tstate = TSTATE_NONE;
  2458 :}
  2459 STC.L GBR, @-Rn {:  
  2460     load_reg( R_ECX, Rn );
  2461     check_walign32( R_ECX );
  2462     ADD_imm8s_r32( -4, R_ECX );
  2463     store_reg( R_ECX, Rn );
  2464     load_spreg( R_EAX, R_GBR );
  2465     MEM_WRITE_LONG( R_ECX, R_EAX );
  2466     sh4_x86.tstate = TSTATE_NONE;
  2467 :}
  2468 STS FPSCR, Rn {:  
  2469     load_spreg( R_EAX, R_FPSCR );
  2470     store_reg( R_EAX, Rn );
  2471 :}
  2472 STS.L FPSCR, @-Rn {:  
  2473     load_reg( R_ECX, Rn );
  2474     check_walign32( R_ECX );
  2475     ADD_imm8s_r32( -4, R_ECX );
  2476     store_reg( R_ECX, Rn );
  2477     load_spreg( R_EAX, R_FPSCR );
  2478     MEM_WRITE_LONG( R_ECX, R_EAX );
  2479     sh4_x86.tstate = TSTATE_NONE;
  2480 :}
  2481 STS FPUL, Rn {:  
  2482     load_spreg( R_EAX, R_FPUL );
  2483     store_reg( R_EAX, Rn );
  2484 :}
  2485 STS.L FPUL, @-Rn {:  
  2486     load_reg( R_ECX, Rn );
  2487     check_walign32( R_ECX );
  2488     ADD_imm8s_r32( -4, R_ECX );
  2489     store_reg( R_ECX, Rn );
  2490     load_spreg( R_EAX, R_FPUL );
  2491     MEM_WRITE_LONG( R_ECX, R_EAX );
  2492     sh4_x86.tstate = TSTATE_NONE;
  2493 :}
  2494 STS MACH, Rn {:  
  2495     load_spreg( R_EAX, R_MACH );
  2496     store_reg( R_EAX, Rn );
  2497 :}
  2498 STS.L MACH, @-Rn {:  
  2499     load_reg( R_ECX, Rn );
  2500     check_walign32( R_ECX );
  2501     ADD_imm8s_r32( -4, R_ECX );
  2502     store_reg( R_ECX, Rn );
  2503     load_spreg( R_EAX, R_MACH );
  2504     MEM_WRITE_LONG( R_ECX, R_EAX );
  2505     sh4_x86.tstate = TSTATE_NONE;
  2506 :}
  2507 STS MACL, Rn {:  
  2508     load_spreg( R_EAX, R_MACL );
  2509     store_reg( R_EAX, Rn );
  2510 :}
  2511 STS.L MACL, @-Rn {:  
  2512     load_reg( R_ECX, Rn );
  2513     check_walign32( R_ECX );
  2514     ADD_imm8s_r32( -4, R_ECX );
  2515     store_reg( R_ECX, Rn );
  2516     load_spreg( R_EAX, R_MACL );
  2517     MEM_WRITE_LONG( R_ECX, R_EAX );
  2518     sh4_x86.tstate = TSTATE_NONE;
  2519 :}
  2520 STS PR, Rn {:  
  2521     load_spreg( R_EAX, R_PR );
  2522     store_reg( R_EAX, Rn );
  2523 :}
  2524 STS.L PR, @-Rn {:  
  2525     load_reg( R_ECX, Rn );
  2526     check_walign32( R_ECX );
  2527     ADD_imm8s_r32( -4, R_ECX );
  2528     store_reg( R_ECX, Rn );
  2529     load_spreg( R_EAX, R_PR );
  2530     MEM_WRITE_LONG( R_ECX, R_EAX );
  2531     sh4_x86.tstate = TSTATE_NONE;
  2532 :}
  2534 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2535 %%
  2536     sh4_x86.in_delay_slot = FALSE;
  2537     return 0;
.