Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 569:a1c49e1e8776
prev561:533f6b478071
next570:d2893980fbf5
author nkeynes
date Fri Jan 04 11:54:17 2008 +0000 (12 years ago)
branchlxdream-mmu
permissions -rw-r--r--
last change Bring icache partially into line with the mmu, a little less slow with AT off
now.
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 struct backpatch_record {
    38     uint32_t *fixup_addr;
    39     uint32_t fixup_icount;
    40     uint32_t exc_code;
    41 };
    43 /** 
    44  * Struct to manage internal translation state. This state is not saved -
    45  * it is only valid between calls to sh4_translate_begin_block() and
    46  * sh4_translate_end_block()
    47  */
    48 struct sh4_x86_state {
    49     gboolean in_delay_slot;
    50     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    51     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    52     gboolean branch_taken; /* true if we branched unconditionally */
    53     uint32_t block_start_pc;
    54     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    55     int tstate;
    57     /* Allocated memory for the (block-wide) back-patch list */
    58     struct backpatch_record *backpatch_list;
    59     uint32_t backpatch_posn;
    60     uint32_t backpatch_size;
    61 };
    63 #define TSTATE_NONE -1
    64 #define TSTATE_O    0
    65 #define TSTATE_C    2
    66 #define TSTATE_E    4
    67 #define TSTATE_NE   5
    68 #define TSTATE_G    0xF
    69 #define TSTATE_GE   0xD
    70 #define TSTATE_A    7
    71 #define TSTATE_AE   3
    73 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    74 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    75 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    76     OP(0x70+sh4_x86.tstate); OP(rel8); \
    77     MARK_JMP(rel8,label)
    78 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    79 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    80 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    81     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    82     MARK_JMP(rel8, label)
    84 static struct sh4_x86_state sh4_x86;
    86 static uint32_t max_int = 0x7FFFFFFF;
    87 static uint32_t min_int = 0x80000000;
    88 static uint32_t save_fcw; /* save value for fpu control word */
    89 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    91 void sh4_x86_init()
    92 {
    93     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    94     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
    95 }
    98 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
    99 {
   100     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   101 	sh4_x86.backpatch_size <<= 1;
   102 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   103 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   104 	assert( sh4_x86.backpatch_list != NULL );
   105     }
   106     if( sh4_x86.in_delay_slot ) {
   107 	fixup_pc -= 2;
   108     }
   109     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_addr = (uint32_t *)fixup_addr;
   110     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   111     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   112     sh4_x86.backpatch_posn++;
   113 }
   115 /**
   116  * Emit an instruction to load an SH4 reg into a real register
   117  */
   118 static inline void load_reg( int x86reg, int sh4reg ) 
   119 {
   120     /* mov [bp+n], reg */
   121     OP(0x8B);
   122     OP(0x45 + (x86reg<<3));
   123     OP(REG_OFFSET(r[sh4reg]));
   124 }
   126 static inline void load_reg16s( int x86reg, int sh4reg )
   127 {
   128     OP(0x0F);
   129     OP(0xBF);
   130     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   131 }
   133 static inline void load_reg16u( int x86reg, int sh4reg )
   134 {
   135     OP(0x0F);
   136     OP(0xB7);
   137     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   139 }
   141 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   142 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   143 /**
   144  * Emit an instruction to load an immediate value into a register
   145  */
   146 static inline void load_imm32( int x86reg, uint32_t value ) {
   147     /* mov #value, reg */
   148     OP(0xB8 + x86reg);
   149     OP32(value);
   150 }
   152 /**
   153  * Load an immediate 64-bit quantity (note: x86-64 only)
   154  */
   155 static inline void load_imm64( int x86reg, uint32_t value ) {
   156     /* mov #value, reg */
   157     REXW();
   158     OP(0xB8 + x86reg);
   159     OP64(value);
   160 }
   163 /**
   164  * Emit an instruction to store an SH4 reg (RN)
   165  */
   166 void static inline store_reg( int x86reg, int sh4reg ) {
   167     /* mov reg, [bp+n] */
   168     OP(0x89);
   169     OP(0x45 + (x86reg<<3));
   170     OP(REG_OFFSET(r[sh4reg]));
   171 }
   173 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   175 /**
   176  * Load an FR register (single-precision floating point) into an integer x86
   177  * register (eg for register-to-register moves)
   178  */
   179 void static inline load_fr( int bankreg, int x86reg, int frm )
   180 {
   181     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   182 }
   184 /**
   185  * Store an FR register (single-precision floating point) into an integer x86
   186  * register (eg for register-to-register moves)
   187  */
   188 void static inline store_fr( int bankreg, int x86reg, int frn )
   189 {
   190     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   191 }
   194 /**
   195  * Load a pointer to the back fp back into the specified x86 register. The
   196  * bankreg must have been previously loaded with FPSCR.
   197  * NB: 12 bytes
   198  */
   199 static inline void load_xf_bank( int bankreg )
   200 {
   201     NOT_r32( bankreg );
   202     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   203     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   204     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   205 }
   207 /**
   208  * Update the fr_bank pointer based on the current fpscr value.
   209  */
   210 static inline void update_fr_bank( int fpscrreg )
   211 {
   212     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   213     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   214     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   215     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   216 }
   217 /**
   218  * Push FPUL (as a 32-bit float) onto the FPU stack
   219  */
   220 static inline void push_fpul( )
   221 {
   222     OP(0xD9); OP(0x45); OP(R_FPUL);
   223 }
   225 /**
   226  * Pop FPUL (as a 32-bit float) from the FPU stack
   227  */
   228 static inline void pop_fpul( )
   229 {
   230     OP(0xD9); OP(0x5D); OP(R_FPUL);
   231 }
   233 /**
   234  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   235  * with the location of the current fp bank.
   236  */
   237 static inline void push_fr( int bankreg, int frm ) 
   238 {
   239     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   240 }
   242 /**
   243  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   244  * with bankreg previously loaded with the location of the current fp bank.
   245  */
   246 static inline void pop_fr( int bankreg, int frm )
   247 {
   248     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   249 }
   251 /**
   252  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   253  * with the location of the current fp bank.
   254  */
   255 static inline void push_dr( int bankreg, int frm )
   256 {
   257     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   258 }
   260 static inline void pop_dr( int bankreg, int frm )
   261 {
   262     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   263 }
   265 /* Exception checks - Note that all exception checks will clobber EAX */
   267 #define check_priv( ) \
   268     if( !sh4_x86.priv_checked ) { \
   269 	sh4_x86.priv_checked = TRUE;\
   270 	load_spreg( R_EAX, R_SR );\
   271 	AND_imm32_r32( SR_MD, R_EAX );\
   272 	if( sh4_x86.in_delay_slot ) {\
   273 	    JE_exc( EXC_SLOT_ILLEGAL );\
   274 	} else {\
   275 	    JE_exc( EXC_ILLEGAL );\
   276 	}\
   277     }\
   279 #define check_fpuen( ) \
   280     if( !sh4_x86.fpuen_checked ) {\
   281 	sh4_x86.fpuen_checked = TRUE;\
   282 	load_spreg( R_EAX, R_SR );\
   283 	AND_imm32_r32( SR_FD, R_EAX );\
   284 	if( sh4_x86.in_delay_slot ) {\
   285 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   286 	} else {\
   287 	    JNE_exc(EXC_FPU_DISABLED);\
   288 	}\
   289     }
   291 #define check_ralign16( x86reg ) \
   292     TEST_imm32_r32( 0x00000001, x86reg ); \
   293     JNE_exc(EXC_DATA_ADDR_READ)
   295 #define check_walign16( x86reg ) \
   296     TEST_imm32_r32( 0x00000001, x86reg ); \
   297     JNE_exc(EXC_DATA_ADDR_WRITE);
   299 #define check_ralign32( x86reg ) \
   300     TEST_imm32_r32( 0x00000003, x86reg ); \
   301     JNE_exc(EXC_DATA_ADDR_READ)
   303 #define check_walign32( x86reg ) \
   304     TEST_imm32_r32( 0x00000003, x86reg ); \
   305     JNE_exc(EXC_DATA_ADDR_WRITE);
   307 #define UNDEF()
   308 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   309 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); TEST_r32_r32( R_EDX, R_EDX ); JNE_exc(-1); MEM_RESULT(value_reg) 
   310 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); TEST_r32_r32( R_EDX, R_EDX ); JNE_exc(-1); MEM_RESULT(value_reg) 
   311 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); TEST_r32_r32( R_EDX, R_EDX ); JNE_exc(-1); MEM_RESULT(value_reg) 
   312 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg); TEST_r32_r32( R_EAX, R_EAX ); JNE_exc(-1);
   313 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg); TEST_r32_r32( R_EAX, R_EAX ); JNE_exc(-1);
   314 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg); TEST_r32_r32( R_EAX, R_EAX ); JNE_exc(-1);
   316 #define MEM_READ_SIZE  (CALL_FUNC1_SIZE+8)
   317 #define MEM_WRITE_SIZE (CALL_FUNC2_SIZE+8)
   319 #define SLOTILLEGAL() JMP_exc(EXC_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   321 /****** Import appropriate calling conventions ******/
   322 #if SH4_TRANSLATOR == TARGET_X86_64
   323 #include "sh4/ia64abi.h"
   324 #else /* SH4_TRANSLATOR == TARGET_X86 */
   325 #ifdef APPLE_BUILD
   326 #include "sh4/ia32mac.h"
   327 #else
   328 #include "sh4/ia32abi.h"
   329 #endif
   330 #endif
   333 /**
   334  * Translate a single instruction. Delayed branches are handled specially
   335  * by translating both branch and delayed instruction as a single unit (as
   336  * 
   337  *
   338  * @return true if the instruction marks the end of a basic block
   339  * (eg a branch or 
   340  */
   341 uint32_t sh4_translate_instruction( sh4addr_t pc )
   342 {
   343     uint32_t ir;
   344     /* Read instruction */
   345     if( IS_IN_ICACHE(pc) ) {
   346 	ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   347     } else {
   348 	ir = sh4_read_word(pc);
   349     }
   350 %%
   351 /* ALU operations */
   352 ADD Rm, Rn {:
   353     load_reg( R_EAX, Rm );
   354     load_reg( R_ECX, Rn );
   355     ADD_r32_r32( R_EAX, R_ECX );
   356     store_reg( R_ECX, Rn );
   357     sh4_x86.tstate = TSTATE_NONE;
   358 :}
   359 ADD #imm, Rn {:  
   360     load_reg( R_EAX, Rn );
   361     ADD_imm8s_r32( imm, R_EAX );
   362     store_reg( R_EAX, Rn );
   363     sh4_x86.tstate = TSTATE_NONE;
   364 :}
   365 ADDC Rm, Rn {:
   366     if( sh4_x86.tstate != TSTATE_C ) {
   367 	LDC_t();
   368     }
   369     load_reg( R_EAX, Rm );
   370     load_reg( R_ECX, Rn );
   371     ADC_r32_r32( R_EAX, R_ECX );
   372     store_reg( R_ECX, Rn );
   373     SETC_t();
   374     sh4_x86.tstate = TSTATE_C;
   375 :}
   376 ADDV Rm, Rn {:
   377     load_reg( R_EAX, Rm );
   378     load_reg( R_ECX, Rn );
   379     ADD_r32_r32( R_EAX, R_ECX );
   380     store_reg( R_ECX, Rn );
   381     SETO_t();
   382     sh4_x86.tstate = TSTATE_O;
   383 :}
   384 AND Rm, Rn {:
   385     load_reg( R_EAX, Rm );
   386     load_reg( R_ECX, Rn );
   387     AND_r32_r32( R_EAX, R_ECX );
   388     store_reg( R_ECX, Rn );
   389     sh4_x86.tstate = TSTATE_NONE;
   390 :}
   391 AND #imm, R0 {:  
   392     load_reg( R_EAX, 0 );
   393     AND_imm32_r32(imm, R_EAX); 
   394     store_reg( R_EAX, 0 );
   395     sh4_x86.tstate = TSTATE_NONE;
   396 :}
   397 AND.B #imm, @(R0, GBR) {: 
   398     load_reg( R_EAX, 0 );
   399     load_spreg( R_ECX, R_GBR );
   400     ADD_r32_r32( R_EAX, R_ECX );
   401     PUSH_realigned_r32(R_ECX);
   402     MEM_READ_BYTE( R_ECX, R_EAX );
   403     POP_realigned_r32(R_ECX);
   404     AND_imm32_r32(imm, R_EAX );
   405     MEM_WRITE_BYTE( R_ECX, R_EAX );
   406     sh4_x86.tstate = TSTATE_NONE;
   407 :}
   408 CMP/EQ Rm, Rn {:  
   409     load_reg( R_EAX, Rm );
   410     load_reg( R_ECX, Rn );
   411     CMP_r32_r32( R_EAX, R_ECX );
   412     SETE_t();
   413     sh4_x86.tstate = TSTATE_E;
   414 :}
   415 CMP/EQ #imm, R0 {:  
   416     load_reg( R_EAX, 0 );
   417     CMP_imm8s_r32(imm, R_EAX);
   418     SETE_t();
   419     sh4_x86.tstate = TSTATE_E;
   420 :}
   421 CMP/GE Rm, Rn {:  
   422     load_reg( R_EAX, Rm );
   423     load_reg( R_ECX, Rn );
   424     CMP_r32_r32( R_EAX, R_ECX );
   425     SETGE_t();
   426     sh4_x86.tstate = TSTATE_GE;
   427 :}
   428 CMP/GT Rm, Rn {: 
   429     load_reg( R_EAX, Rm );
   430     load_reg( R_ECX, Rn );
   431     CMP_r32_r32( R_EAX, R_ECX );
   432     SETG_t();
   433     sh4_x86.tstate = TSTATE_G;
   434 :}
   435 CMP/HI Rm, Rn {:  
   436     load_reg( R_EAX, Rm );
   437     load_reg( R_ECX, Rn );
   438     CMP_r32_r32( R_EAX, R_ECX );
   439     SETA_t();
   440     sh4_x86.tstate = TSTATE_A;
   441 :}
   442 CMP/HS Rm, Rn {: 
   443     load_reg( R_EAX, Rm );
   444     load_reg( R_ECX, Rn );
   445     CMP_r32_r32( R_EAX, R_ECX );
   446     SETAE_t();
   447     sh4_x86.tstate = TSTATE_AE;
   448  :}
   449 CMP/PL Rn {: 
   450     load_reg( R_EAX, Rn );
   451     CMP_imm8s_r32( 0, R_EAX );
   452     SETG_t();
   453     sh4_x86.tstate = TSTATE_G;
   454 :}
   455 CMP/PZ Rn {:  
   456     load_reg( R_EAX, Rn );
   457     CMP_imm8s_r32( 0, R_EAX );
   458     SETGE_t();
   459     sh4_x86.tstate = TSTATE_GE;
   460 :}
   461 CMP/STR Rm, Rn {:  
   462     load_reg( R_EAX, Rm );
   463     load_reg( R_ECX, Rn );
   464     XOR_r32_r32( R_ECX, R_EAX );
   465     TEST_r8_r8( R_AL, R_AL );
   466     JE_rel8(13, target1);
   467     TEST_r8_r8( R_AH, R_AH ); // 2
   468     JE_rel8(9, target2);
   469     SHR_imm8_r32( 16, R_EAX ); // 3
   470     TEST_r8_r8( R_AL, R_AL ); // 2
   471     JE_rel8(2, target3);
   472     TEST_r8_r8( R_AH, R_AH ); // 2
   473     JMP_TARGET(target1);
   474     JMP_TARGET(target2);
   475     JMP_TARGET(target3);
   476     SETE_t();
   477     sh4_x86.tstate = TSTATE_E;
   478 :}
   479 DIV0S Rm, Rn {:
   480     load_reg( R_EAX, Rm );
   481     load_reg( R_ECX, Rn );
   482     SHR_imm8_r32( 31, R_EAX );
   483     SHR_imm8_r32( 31, R_ECX );
   484     store_spreg( R_EAX, R_M );
   485     store_spreg( R_ECX, R_Q );
   486     CMP_r32_r32( R_EAX, R_ECX );
   487     SETNE_t();
   488     sh4_x86.tstate = TSTATE_NE;
   489 :}
   490 DIV0U {:  
   491     XOR_r32_r32( R_EAX, R_EAX );
   492     store_spreg( R_EAX, R_Q );
   493     store_spreg( R_EAX, R_M );
   494     store_spreg( R_EAX, R_T );
   495     sh4_x86.tstate = TSTATE_C; // works for DIV1
   496 :}
   497 DIV1 Rm, Rn {:
   498     load_spreg( R_ECX, R_M );
   499     load_reg( R_EAX, Rn );
   500     if( sh4_x86.tstate != TSTATE_C ) {
   501 	LDC_t();
   502     }
   503     RCL1_r32( R_EAX );
   504     SETC_r8( R_DL ); // Q'
   505     CMP_sh4r_r32( R_Q, R_ECX );
   506     JE_rel8(5, mqequal);
   507     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   508     JMP_rel8(3, end);
   509     JMP_TARGET(mqequal);
   510     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   511     JMP_TARGET(end);
   512     store_reg( R_EAX, Rn ); // Done with Rn now
   513     SETC_r8(R_AL); // tmp1
   514     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   515     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   516     store_spreg( R_ECX, R_Q );
   517     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   518     MOVZX_r8_r32( R_AL, R_EAX );
   519     store_spreg( R_EAX, R_T );
   520     sh4_x86.tstate = TSTATE_NONE;
   521 :}
   522 DMULS.L Rm, Rn {:  
   523     load_reg( R_EAX, Rm );
   524     load_reg( R_ECX, Rn );
   525     IMUL_r32(R_ECX);
   526     store_spreg( R_EDX, R_MACH );
   527     store_spreg( R_EAX, R_MACL );
   528     sh4_x86.tstate = TSTATE_NONE;
   529 :}
   530 DMULU.L Rm, Rn {:  
   531     load_reg( R_EAX, Rm );
   532     load_reg( R_ECX, Rn );
   533     MUL_r32(R_ECX);
   534     store_spreg( R_EDX, R_MACH );
   535     store_spreg( R_EAX, R_MACL );    
   536     sh4_x86.tstate = TSTATE_NONE;
   537 :}
   538 DT Rn {:  
   539     load_reg( R_EAX, Rn );
   540     ADD_imm8s_r32( -1, R_EAX );
   541     store_reg( R_EAX, Rn );
   542     SETE_t();
   543     sh4_x86.tstate = TSTATE_E;
   544 :}
   545 EXTS.B Rm, Rn {:  
   546     load_reg( R_EAX, Rm );
   547     MOVSX_r8_r32( R_EAX, R_EAX );
   548     store_reg( R_EAX, Rn );
   549 :}
   550 EXTS.W Rm, Rn {:  
   551     load_reg( R_EAX, Rm );
   552     MOVSX_r16_r32( R_EAX, R_EAX );
   553     store_reg( R_EAX, Rn );
   554 :}
   555 EXTU.B Rm, Rn {:  
   556     load_reg( R_EAX, Rm );
   557     MOVZX_r8_r32( R_EAX, R_EAX );
   558     store_reg( R_EAX, Rn );
   559 :}
   560 EXTU.W Rm, Rn {:  
   561     load_reg( R_EAX, Rm );
   562     MOVZX_r16_r32( R_EAX, R_EAX );
   563     store_reg( R_EAX, Rn );
   564 :}
   565 MAC.L @Rm+, @Rn+ {:  
   566     load_reg( R_ECX, Rm );
   567     check_ralign32( R_ECX );
   568     load_reg( R_ECX, Rn );
   569     check_ralign32( R_ECX );
   570     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   571     MEM_READ_LONG( R_ECX, R_EAX );
   572     PUSH_realigned_r32( R_EAX );
   573     load_reg( R_ECX, Rm );
   574     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   575     MEM_READ_LONG( R_ECX, R_EAX );
   576     POP_realigned_r32( R_ECX );
   577     IMUL_r32( R_ECX );
   578     ADD_r32_sh4r( R_EAX, R_MACL );
   579     ADC_r32_sh4r( R_EDX, R_MACH );
   581     load_spreg( R_ECX, R_S );
   582     TEST_r32_r32(R_ECX, R_ECX);
   583     JE_rel8( CALL_FUNC0_SIZE, nosat );
   584     call_func0( signsat48 );
   585     JMP_TARGET( nosat );
   586     sh4_x86.tstate = TSTATE_NONE;
   587 :}
   588 MAC.W @Rm+, @Rn+ {:  
   589     load_reg( R_ECX, Rm );
   590     check_ralign16( R_ECX );
   591     load_reg( R_ECX, Rn );
   592     check_ralign16( R_ECX );
   593     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   594     MEM_READ_WORD( R_ECX, R_EAX );
   595     PUSH_realigned_r32( R_EAX );
   596     load_reg( R_ECX, Rm );
   597     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   598     MEM_READ_WORD( R_ECX, R_EAX );
   599     POP_realigned_r32( R_ECX );
   600     IMUL_r32( R_ECX );
   602     load_spreg( R_ECX, R_S );
   603     TEST_r32_r32( R_ECX, R_ECX );
   604     JE_rel8( 47, nosat );
   606     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   607     JNO_rel8( 51, end );            // 2
   608     load_imm32( R_EDX, 1 );         // 5
   609     store_spreg( R_EDX, R_MACH );   // 6
   610     JS_rel8( 13, positive );        // 2
   611     load_imm32( R_EAX, 0x80000000 );// 5
   612     store_spreg( R_EAX, R_MACL );   // 6
   613     JMP_rel8( 25, end2 );           // 2
   615     JMP_TARGET(positive);
   616     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   617     store_spreg( R_EAX, R_MACL );   // 6
   618     JMP_rel8( 12, end3);            // 2
   620     JMP_TARGET(nosat);
   621     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   622     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   623     JMP_TARGET(end);
   624     JMP_TARGET(end2);
   625     JMP_TARGET(end3);
   626     sh4_x86.tstate = TSTATE_NONE;
   627 :}
   628 MOVT Rn {:  
   629     load_spreg( R_EAX, R_T );
   630     store_reg( R_EAX, Rn );
   631 :}
   632 MUL.L Rm, Rn {:  
   633     load_reg( R_EAX, Rm );
   634     load_reg( R_ECX, Rn );
   635     MUL_r32( R_ECX );
   636     store_spreg( R_EAX, R_MACL );
   637     sh4_x86.tstate = TSTATE_NONE;
   638 :}
   639 MULS.W Rm, Rn {:
   640     load_reg16s( R_EAX, Rm );
   641     load_reg16s( R_ECX, Rn );
   642     MUL_r32( R_ECX );
   643     store_spreg( R_EAX, R_MACL );
   644     sh4_x86.tstate = TSTATE_NONE;
   645 :}
   646 MULU.W Rm, Rn {:  
   647     load_reg16u( R_EAX, Rm );
   648     load_reg16u( R_ECX, Rn );
   649     MUL_r32( R_ECX );
   650     store_spreg( R_EAX, R_MACL );
   651     sh4_x86.tstate = TSTATE_NONE;
   652 :}
   653 NEG Rm, Rn {:
   654     load_reg( R_EAX, Rm );
   655     NEG_r32( R_EAX );
   656     store_reg( R_EAX, Rn );
   657     sh4_x86.tstate = TSTATE_NONE;
   658 :}
   659 NEGC Rm, Rn {:  
   660     load_reg( R_EAX, Rm );
   661     XOR_r32_r32( R_ECX, R_ECX );
   662     LDC_t();
   663     SBB_r32_r32( R_EAX, R_ECX );
   664     store_reg( R_ECX, Rn );
   665     SETC_t();
   666     sh4_x86.tstate = TSTATE_C;
   667 :}
   668 NOT Rm, Rn {:  
   669     load_reg( R_EAX, Rm );
   670     NOT_r32( R_EAX );
   671     store_reg( R_EAX, Rn );
   672     sh4_x86.tstate = TSTATE_NONE;
   673 :}
   674 OR Rm, Rn {:  
   675     load_reg( R_EAX, Rm );
   676     load_reg( R_ECX, Rn );
   677     OR_r32_r32( R_EAX, R_ECX );
   678     store_reg( R_ECX, Rn );
   679     sh4_x86.tstate = TSTATE_NONE;
   680 :}
   681 OR #imm, R0 {:
   682     load_reg( R_EAX, 0 );
   683     OR_imm32_r32(imm, R_EAX);
   684     store_reg( R_EAX, 0 );
   685     sh4_x86.tstate = TSTATE_NONE;
   686 :}
   687 OR.B #imm, @(R0, GBR) {:  
   688     load_reg( R_EAX, 0 );
   689     load_spreg( R_ECX, R_GBR );
   690     ADD_r32_r32( R_EAX, R_ECX );
   691     PUSH_realigned_r32(R_ECX);
   692     MEM_READ_BYTE( R_ECX, R_EAX );
   693     POP_realigned_r32(R_ECX);
   694     OR_imm32_r32(imm, R_EAX );
   695     MEM_WRITE_BYTE( R_ECX, R_EAX );
   696     sh4_x86.tstate = TSTATE_NONE;
   697 :}
   698 ROTCL Rn {:
   699     load_reg( R_EAX, Rn );
   700     if( sh4_x86.tstate != TSTATE_C ) {
   701 	LDC_t();
   702     }
   703     RCL1_r32( R_EAX );
   704     store_reg( R_EAX, Rn );
   705     SETC_t();
   706     sh4_x86.tstate = TSTATE_C;
   707 :}
   708 ROTCR Rn {:  
   709     load_reg( R_EAX, Rn );
   710     if( sh4_x86.tstate != TSTATE_C ) {
   711 	LDC_t();
   712     }
   713     RCR1_r32( R_EAX );
   714     store_reg( R_EAX, Rn );
   715     SETC_t();
   716     sh4_x86.tstate = TSTATE_C;
   717 :}
   718 ROTL Rn {:  
   719     load_reg( R_EAX, Rn );
   720     ROL1_r32( R_EAX );
   721     store_reg( R_EAX, Rn );
   722     SETC_t();
   723     sh4_x86.tstate = TSTATE_C;
   724 :}
   725 ROTR Rn {:  
   726     load_reg( R_EAX, Rn );
   727     ROR1_r32( R_EAX );
   728     store_reg( R_EAX, Rn );
   729     SETC_t();
   730     sh4_x86.tstate = TSTATE_C;
   731 :}
   732 SHAD Rm, Rn {:
   733     /* Annoyingly enough, not directly convertible */
   734     load_reg( R_EAX, Rn );
   735     load_reg( R_ECX, Rm );
   736     CMP_imm32_r32( 0, R_ECX );
   737     JGE_rel8(16, doshl);
   739     NEG_r32( R_ECX );      // 2
   740     AND_imm8_r8( 0x1F, R_CL ); // 3
   741     JE_rel8( 4, emptysar);     // 2
   742     SAR_r32_CL( R_EAX );       // 2
   743     JMP_rel8(10, end);          // 2
   745     JMP_TARGET(emptysar);
   746     SAR_imm8_r32(31, R_EAX );  // 3
   747     JMP_rel8(5, end2);
   749     JMP_TARGET(doshl);
   750     AND_imm8_r8( 0x1F, R_CL ); // 3
   751     SHL_r32_CL( R_EAX );       // 2
   752     JMP_TARGET(end);
   753     JMP_TARGET(end2);
   754     store_reg( R_EAX, Rn );
   755     sh4_x86.tstate = TSTATE_NONE;
   756 :}
   757 SHLD Rm, Rn {:  
   758     load_reg( R_EAX, Rn );
   759     load_reg( R_ECX, Rm );
   760     CMP_imm32_r32( 0, R_ECX );
   761     JGE_rel8(15, doshl);
   763     NEG_r32( R_ECX );      // 2
   764     AND_imm8_r8( 0x1F, R_CL ); // 3
   765     JE_rel8( 4, emptyshr );
   766     SHR_r32_CL( R_EAX );       // 2
   767     JMP_rel8(9, end);          // 2
   769     JMP_TARGET(emptyshr);
   770     XOR_r32_r32( R_EAX, R_EAX );
   771     JMP_rel8(5, end2);
   773     JMP_TARGET(doshl);
   774     AND_imm8_r8( 0x1F, R_CL ); // 3
   775     SHL_r32_CL( R_EAX );       // 2
   776     JMP_TARGET(end);
   777     JMP_TARGET(end2);
   778     store_reg( R_EAX, Rn );
   779     sh4_x86.tstate = TSTATE_NONE;
   780 :}
   781 SHAL Rn {: 
   782     load_reg( R_EAX, Rn );
   783     SHL1_r32( R_EAX );
   784     SETC_t();
   785     store_reg( R_EAX, Rn );
   786     sh4_x86.tstate = TSTATE_C;
   787 :}
   788 SHAR Rn {:  
   789     load_reg( R_EAX, Rn );
   790     SAR1_r32( R_EAX );
   791     SETC_t();
   792     store_reg( R_EAX, Rn );
   793     sh4_x86.tstate = TSTATE_C;
   794 :}
   795 SHLL Rn {:  
   796     load_reg( R_EAX, Rn );
   797     SHL1_r32( R_EAX );
   798     SETC_t();
   799     store_reg( R_EAX, Rn );
   800     sh4_x86.tstate = TSTATE_C;
   801 :}
   802 SHLL2 Rn {:
   803     load_reg( R_EAX, Rn );
   804     SHL_imm8_r32( 2, R_EAX );
   805     store_reg( R_EAX, Rn );
   806     sh4_x86.tstate = TSTATE_NONE;
   807 :}
   808 SHLL8 Rn {:  
   809     load_reg( R_EAX, Rn );
   810     SHL_imm8_r32( 8, R_EAX );
   811     store_reg( R_EAX, Rn );
   812     sh4_x86.tstate = TSTATE_NONE;
   813 :}
   814 SHLL16 Rn {:  
   815     load_reg( R_EAX, Rn );
   816     SHL_imm8_r32( 16, R_EAX );
   817     store_reg( R_EAX, Rn );
   818     sh4_x86.tstate = TSTATE_NONE;
   819 :}
   820 SHLR Rn {:  
   821     load_reg( R_EAX, Rn );
   822     SHR1_r32( R_EAX );
   823     SETC_t();
   824     store_reg( R_EAX, Rn );
   825     sh4_x86.tstate = TSTATE_C;
   826 :}
   827 SHLR2 Rn {:  
   828     load_reg( R_EAX, Rn );
   829     SHR_imm8_r32( 2, R_EAX );
   830     store_reg( R_EAX, Rn );
   831     sh4_x86.tstate = TSTATE_NONE;
   832 :}
   833 SHLR8 Rn {:  
   834     load_reg( R_EAX, Rn );
   835     SHR_imm8_r32( 8, R_EAX );
   836     store_reg( R_EAX, Rn );
   837     sh4_x86.tstate = TSTATE_NONE;
   838 :}
   839 SHLR16 Rn {:  
   840     load_reg( R_EAX, Rn );
   841     SHR_imm8_r32( 16, R_EAX );
   842     store_reg( R_EAX, Rn );
   843     sh4_x86.tstate = TSTATE_NONE;
   844 :}
   845 SUB Rm, Rn {:  
   846     load_reg( R_EAX, Rm );
   847     load_reg( R_ECX, Rn );
   848     SUB_r32_r32( R_EAX, R_ECX );
   849     store_reg( R_ECX, Rn );
   850     sh4_x86.tstate = TSTATE_NONE;
   851 :}
   852 SUBC Rm, Rn {:  
   853     load_reg( R_EAX, Rm );
   854     load_reg( R_ECX, Rn );
   855     if( sh4_x86.tstate != TSTATE_C ) {
   856 	LDC_t();
   857     }
   858     SBB_r32_r32( R_EAX, R_ECX );
   859     store_reg( R_ECX, Rn );
   860     SETC_t();
   861     sh4_x86.tstate = TSTATE_C;
   862 :}
   863 SUBV Rm, Rn {:  
   864     load_reg( R_EAX, Rm );
   865     load_reg( R_ECX, Rn );
   866     SUB_r32_r32( R_EAX, R_ECX );
   867     store_reg( R_ECX, Rn );
   868     SETO_t();
   869     sh4_x86.tstate = TSTATE_O;
   870 :}
   871 SWAP.B Rm, Rn {:  
   872     load_reg( R_EAX, Rm );
   873     XCHG_r8_r8( R_AL, R_AH );
   874     store_reg( R_EAX, Rn );
   875 :}
   876 SWAP.W Rm, Rn {:  
   877     load_reg( R_EAX, Rm );
   878     MOV_r32_r32( R_EAX, R_ECX );
   879     SHL_imm8_r32( 16, R_ECX );
   880     SHR_imm8_r32( 16, R_EAX );
   881     OR_r32_r32( R_EAX, R_ECX );
   882     store_reg( R_ECX, Rn );
   883     sh4_x86.tstate = TSTATE_NONE;
   884 :}
   885 TAS.B @Rn {:  
   886     load_reg( R_ECX, Rn );
   887     MEM_READ_BYTE( R_ECX, R_EAX );
   888     TEST_r8_r8( R_AL, R_AL );
   889     SETE_t();
   890     OR_imm8_r8( 0x80, R_AL );
   891     load_reg( R_ECX, Rn );
   892     MEM_WRITE_BYTE( R_ECX, R_EAX );
   893     sh4_x86.tstate = TSTATE_NONE;
   894 :}
   895 TST Rm, Rn {:  
   896     load_reg( R_EAX, Rm );
   897     load_reg( R_ECX, Rn );
   898     TEST_r32_r32( R_EAX, R_ECX );
   899     SETE_t();
   900     sh4_x86.tstate = TSTATE_E;
   901 :}
   902 TST #imm, R0 {:  
   903     load_reg( R_EAX, 0 );
   904     TEST_imm32_r32( imm, R_EAX );
   905     SETE_t();
   906     sh4_x86.tstate = TSTATE_E;
   907 :}
   908 TST.B #imm, @(R0, GBR) {:  
   909     load_reg( R_EAX, 0);
   910     load_reg( R_ECX, R_GBR);
   911     ADD_r32_r32( R_EAX, R_ECX );
   912     MEM_READ_BYTE( R_ECX, R_EAX );
   913     TEST_imm8_r8( imm, R_AL );
   914     SETE_t();
   915     sh4_x86.tstate = TSTATE_E;
   916 :}
   917 XOR Rm, Rn {:  
   918     load_reg( R_EAX, Rm );
   919     load_reg( R_ECX, Rn );
   920     XOR_r32_r32( R_EAX, R_ECX );
   921     store_reg( R_ECX, Rn );
   922     sh4_x86.tstate = TSTATE_NONE;
   923 :}
   924 XOR #imm, R0 {:  
   925     load_reg( R_EAX, 0 );
   926     XOR_imm32_r32( imm, R_EAX );
   927     store_reg( R_EAX, 0 );
   928     sh4_x86.tstate = TSTATE_NONE;
   929 :}
   930 XOR.B #imm, @(R0, GBR) {:  
   931     load_reg( R_EAX, 0 );
   932     load_spreg( R_ECX, R_GBR );
   933     ADD_r32_r32( R_EAX, R_ECX );
   934     PUSH_realigned_r32(R_ECX);
   935     MEM_READ_BYTE(R_ECX, R_EAX);
   936     POP_realigned_r32(R_ECX);
   937     XOR_imm32_r32( imm, R_EAX );
   938     MEM_WRITE_BYTE( R_ECX, R_EAX );
   939     sh4_x86.tstate = TSTATE_NONE;
   940 :}
   941 XTRCT Rm, Rn {:
   942     load_reg( R_EAX, Rm );
   943     load_reg( R_ECX, Rn );
   944     SHL_imm8_r32( 16, R_EAX );
   945     SHR_imm8_r32( 16, R_ECX );
   946     OR_r32_r32( R_EAX, R_ECX );
   947     store_reg( R_ECX, Rn );
   948     sh4_x86.tstate = TSTATE_NONE;
   949 :}
   951 /* Data move instructions */
   952 MOV Rm, Rn {:  
   953     load_reg( R_EAX, Rm );
   954     store_reg( R_EAX, Rn );
   955 :}
   956 MOV #imm, Rn {:  
   957     load_imm32( R_EAX, imm );
   958     store_reg( R_EAX, Rn );
   959 :}
   960 MOV.B Rm, @Rn {:  
   961     load_reg( R_EAX, Rm );
   962     load_reg( R_ECX, Rn );
   963     MEM_WRITE_BYTE( R_ECX, R_EAX );
   964     sh4_x86.tstate = TSTATE_NONE;
   965 :}
   966 MOV.B Rm, @-Rn {:  
   967     load_reg( R_EAX, Rm );
   968     load_reg( R_ECX, Rn );
   969     ADD_imm8s_r32( -1, R_ECX );
   970     store_reg( R_ECX, Rn );
   971     MEM_WRITE_BYTE( R_ECX, R_EAX );
   972     sh4_x86.tstate = TSTATE_NONE;
   973 :}
   974 MOV.B Rm, @(R0, Rn) {:  
   975     load_reg( R_EAX, 0 );
   976     load_reg( R_ECX, Rn );
   977     ADD_r32_r32( R_EAX, R_ECX );
   978     load_reg( R_EAX, Rm );
   979     MEM_WRITE_BYTE( R_ECX, R_EAX );
   980     sh4_x86.tstate = TSTATE_NONE;
   981 :}
   982 MOV.B R0, @(disp, GBR) {:  
   983     load_reg( R_EAX, 0 );
   984     load_spreg( R_ECX, R_GBR );
   985     ADD_imm32_r32( disp, R_ECX );
   986     MEM_WRITE_BYTE( R_ECX, R_EAX );
   987     sh4_x86.tstate = TSTATE_NONE;
   988 :}
   989 MOV.B R0, @(disp, Rn) {:  
   990     load_reg( R_EAX, 0 );
   991     load_reg( R_ECX, Rn );
   992     ADD_imm32_r32( disp, R_ECX );
   993     MEM_WRITE_BYTE( R_ECX, R_EAX );
   994     sh4_x86.tstate = TSTATE_NONE;
   995 :}
   996 MOV.B @Rm, Rn {:  
   997     load_reg( R_ECX, Rm );
   998     MEM_READ_BYTE( R_ECX, R_EAX );
   999     store_reg( R_EAX, Rn );
  1000     sh4_x86.tstate = TSTATE_NONE;
  1001 :}
  1002 MOV.B @Rm+, Rn {:  
  1003     load_reg( R_ECX, Rm );
  1004     MOV_r32_r32( R_ECX, R_EAX );
  1005     ADD_imm8s_r32( 1, R_EAX );
  1006     store_reg( R_EAX, Rm );
  1007     MEM_READ_BYTE( R_ECX, R_EAX );
  1008     store_reg( R_EAX, Rn );
  1009     sh4_x86.tstate = TSTATE_NONE;
  1010 :}
  1011 MOV.B @(R0, Rm), Rn {:  
  1012     load_reg( R_EAX, 0 );
  1013     load_reg( R_ECX, Rm );
  1014     ADD_r32_r32( R_EAX, R_ECX );
  1015     MEM_READ_BYTE( R_ECX, R_EAX );
  1016     store_reg( R_EAX, Rn );
  1017     sh4_x86.tstate = TSTATE_NONE;
  1018 :}
  1019 MOV.B @(disp, GBR), R0 {:  
  1020     load_spreg( R_ECX, R_GBR );
  1021     ADD_imm32_r32( disp, R_ECX );
  1022     MEM_READ_BYTE( R_ECX, R_EAX );
  1023     store_reg( R_EAX, 0 );
  1024     sh4_x86.tstate = TSTATE_NONE;
  1025 :}
  1026 MOV.B @(disp, Rm), R0 {:  
  1027     load_reg( R_ECX, Rm );
  1028     ADD_imm32_r32( disp, R_ECX );
  1029     MEM_READ_BYTE( R_ECX, R_EAX );
  1030     store_reg( R_EAX, 0 );
  1031     sh4_x86.tstate = TSTATE_NONE;
  1032 :}
  1033 MOV.L Rm, @Rn {:
  1034     load_reg( R_EAX, Rm );
  1035     load_reg( R_ECX, Rn );
  1036     check_walign32(R_ECX);
  1037     MEM_WRITE_LONG( R_ECX, R_EAX );
  1038     sh4_x86.tstate = TSTATE_NONE;
  1039 :}
  1040 MOV.L Rm, @-Rn {:  
  1041     load_reg( R_EAX, Rm );
  1042     load_reg( R_ECX, Rn );
  1043     check_walign32( R_ECX );
  1044     ADD_imm8s_r32( -4, R_ECX );
  1045     store_reg( R_ECX, Rn );
  1046     MEM_WRITE_LONG( R_ECX, R_EAX );
  1047     sh4_x86.tstate = TSTATE_NONE;
  1048 :}
  1049 MOV.L Rm, @(R0, Rn) {:  
  1050     load_reg( R_EAX, 0 );
  1051     load_reg( R_ECX, Rn );
  1052     ADD_r32_r32( R_EAX, R_ECX );
  1053     check_walign32( R_ECX );
  1054     load_reg( R_EAX, Rm );
  1055     MEM_WRITE_LONG( R_ECX, R_EAX );
  1056     sh4_x86.tstate = TSTATE_NONE;
  1057 :}
  1058 MOV.L R0, @(disp, GBR) {:  
  1059     load_spreg( R_ECX, R_GBR );
  1060     load_reg( R_EAX, 0 );
  1061     ADD_imm32_r32( disp, R_ECX );
  1062     check_walign32( R_ECX );
  1063     MEM_WRITE_LONG( R_ECX, R_EAX );
  1064     sh4_x86.tstate = TSTATE_NONE;
  1065 :}
  1066 MOV.L Rm, @(disp, Rn) {:  
  1067     load_reg( R_ECX, Rn );
  1068     load_reg( R_EAX, Rm );
  1069     ADD_imm32_r32( disp, R_ECX );
  1070     check_walign32( R_ECX );
  1071     MEM_WRITE_LONG( R_ECX, R_EAX );
  1072     sh4_x86.tstate = TSTATE_NONE;
  1073 :}
  1074 MOV.L @Rm, Rn {:  
  1075     load_reg( R_ECX, Rm );
  1076     check_ralign32( R_ECX );
  1077     MEM_READ_LONG( R_ECX, R_EAX );
  1078     store_reg( R_EAX, Rn );
  1079     sh4_x86.tstate = TSTATE_NONE;
  1080 :}
  1081 MOV.L @Rm+, Rn {:  
  1082     load_reg( R_EAX, Rm );
  1083     check_ralign32( R_EAX );
  1084     MOV_r32_r32( R_EAX, R_ECX );
  1085     ADD_imm8s_r32( 4, R_EAX );
  1086     store_reg( R_EAX, Rm );
  1087     MEM_READ_LONG( R_ECX, R_EAX );
  1088     store_reg( R_EAX, Rn );
  1089     sh4_x86.tstate = TSTATE_NONE;
  1090 :}
  1091 MOV.L @(R0, Rm), Rn {:  
  1092     load_reg( R_EAX, 0 );
  1093     load_reg( R_ECX, Rm );
  1094     ADD_r32_r32( R_EAX, R_ECX );
  1095     check_ralign32( R_ECX );
  1096     MEM_READ_LONG( R_ECX, R_EAX );
  1097     store_reg( R_EAX, Rn );
  1098     sh4_x86.tstate = TSTATE_NONE;
  1099 :}
  1100 MOV.L @(disp, GBR), R0 {:
  1101     load_spreg( R_ECX, R_GBR );
  1102     ADD_imm32_r32( disp, R_ECX );
  1103     check_ralign32( R_ECX );
  1104     MEM_READ_LONG( R_ECX, R_EAX );
  1105     store_reg( R_EAX, 0 );
  1106     sh4_x86.tstate = TSTATE_NONE;
  1107 :}
  1108 MOV.L @(disp, PC), Rn {:  
  1109     if( sh4_x86.in_delay_slot ) {
  1110 	SLOTILLEGAL();
  1111     } else {
  1112 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1113 	if( IS_IN_ICACHE(target) ) {
  1114 	    // If the target address is in the same page as the code, it's
  1115 	    // pretty safe to just ref it directly and circumvent the whole
  1116 	    // memory subsystem. (this is a big performance win)
  1118 	    // FIXME: There's a corner-case that's not handled here when
  1119 	    // the current code-page is in the ITLB but not in the UTLB.
  1120 	    // (should generate a TLB miss although need to test SH4 
  1121 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1122 	    // behaviour though.
  1123 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1124 	    MOV_moff32_EAX( ptr );
  1125 	} else {
  1126 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1127 	    // different virtual address than the translation was done with,
  1128 	    // but we can safely assume that the low bits are the same.
  1129 	    load_imm32( R_ECX, (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1130 	    ADD_sh4r_r32( R_PC, R_ECX );
  1131 	    MEM_READ_LONG( R_ECX, R_EAX );
  1132 	    sh4_x86.tstate = TSTATE_NONE;
  1134 	store_reg( R_EAX, Rn );
  1136 :}
  1137 MOV.L @(disp, Rm), Rn {:  
  1138     load_reg( R_ECX, Rm );
  1139     ADD_imm8s_r32( disp, R_ECX );
  1140     check_ralign32( R_ECX );
  1141     MEM_READ_LONG( R_ECX, R_EAX );
  1142     store_reg( R_EAX, Rn );
  1143     sh4_x86.tstate = TSTATE_NONE;
  1144 :}
  1145 MOV.W Rm, @Rn {:  
  1146     load_reg( R_ECX, Rn );
  1147     check_walign16( R_ECX );
  1148     load_reg( R_EAX, Rm );
  1149     MEM_WRITE_WORD( R_ECX, R_EAX );
  1150     sh4_x86.tstate = TSTATE_NONE;
  1151 :}
  1152 MOV.W Rm, @-Rn {:  
  1153     load_reg( R_ECX, Rn );
  1154     check_walign16( R_ECX );
  1155     load_reg( R_EAX, Rm );
  1156     ADD_imm8s_r32( -2, R_ECX );
  1157     store_reg( R_ECX, Rn );
  1158     MEM_WRITE_WORD( R_ECX, R_EAX );
  1159     sh4_x86.tstate = TSTATE_NONE;
  1160 :}
  1161 MOV.W Rm, @(R0, Rn) {:  
  1162     load_reg( R_EAX, 0 );
  1163     load_reg( R_ECX, Rn );
  1164     ADD_r32_r32( R_EAX, R_ECX );
  1165     check_walign16( R_ECX );
  1166     load_reg( R_EAX, Rm );
  1167     MEM_WRITE_WORD( R_ECX, R_EAX );
  1168     sh4_x86.tstate = TSTATE_NONE;
  1169 :}
  1170 MOV.W R0, @(disp, GBR) {:  
  1171     load_spreg( R_ECX, R_GBR );
  1172     load_reg( R_EAX, 0 );
  1173     ADD_imm32_r32( disp, R_ECX );
  1174     check_walign16( R_ECX );
  1175     MEM_WRITE_WORD( R_ECX, R_EAX );
  1176     sh4_x86.tstate = TSTATE_NONE;
  1177 :}
  1178 MOV.W R0, @(disp, Rn) {:  
  1179     load_reg( R_ECX, Rn );
  1180     load_reg( R_EAX, 0 );
  1181     ADD_imm32_r32( disp, R_ECX );
  1182     check_walign16( R_ECX );
  1183     MEM_WRITE_WORD( R_ECX, R_EAX );
  1184     sh4_x86.tstate = TSTATE_NONE;
  1185 :}
  1186 MOV.W @Rm, Rn {:  
  1187     load_reg( R_ECX, Rm );
  1188     check_ralign16( R_ECX );
  1189     MEM_READ_WORD( R_ECX, R_EAX );
  1190     store_reg( R_EAX, Rn );
  1191     sh4_x86.tstate = TSTATE_NONE;
  1192 :}
  1193 MOV.W @Rm+, Rn {:  
  1194     load_reg( R_EAX, Rm );
  1195     check_ralign16( R_EAX );
  1196     MOV_r32_r32( R_EAX, R_ECX );
  1197     ADD_imm8s_r32( 2, R_EAX );
  1198     store_reg( R_EAX, Rm );
  1199     MEM_READ_WORD( R_ECX, R_EAX );
  1200     store_reg( R_EAX, Rn );
  1201     sh4_x86.tstate = TSTATE_NONE;
  1202 :}
  1203 MOV.W @(R0, Rm), Rn {:  
  1204     load_reg( R_EAX, 0 );
  1205     load_reg( R_ECX, Rm );
  1206     ADD_r32_r32( R_EAX, R_ECX );
  1207     check_ralign16( R_ECX );
  1208     MEM_READ_WORD( R_ECX, R_EAX );
  1209     store_reg( R_EAX, Rn );
  1210     sh4_x86.tstate = TSTATE_NONE;
  1211 :}
  1212 MOV.W @(disp, GBR), R0 {:  
  1213     load_spreg( R_ECX, R_GBR );
  1214     ADD_imm32_r32( disp, R_ECX );
  1215     check_ralign16( R_ECX );
  1216     MEM_READ_WORD( R_ECX, R_EAX );
  1217     store_reg( R_EAX, 0 );
  1218     sh4_x86.tstate = TSTATE_NONE;
  1219 :}
  1220 MOV.W @(disp, PC), Rn {:  
  1221     if( sh4_x86.in_delay_slot ) {
  1222 	SLOTILLEGAL();
  1223     } else {
  1224 	// See comments for MOV.L @(disp, PC), Rn
  1225 	uint32_t target = pc + disp + 4;
  1226 	if( IS_IN_ICACHE(target) ) {
  1227 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1228 	    MOV_moff32_EAX( ptr );
  1229 	    MOVSX_r16_r32( R_EAX, R_EAX );
  1230 	} else {
  1231 	    load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 );
  1232 	    ADD_sh4r_r32( R_PC, R_ECX );
  1233 	    MEM_READ_WORD( R_ECX, R_EAX );
  1234 	    sh4_x86.tstate = TSTATE_NONE;
  1236 	store_reg( R_EAX, Rn );
  1238 :}
  1239 MOV.W @(disp, Rm), R0 {:  
  1240     load_reg( R_ECX, Rm );
  1241     ADD_imm32_r32( disp, R_ECX );
  1242     check_ralign16( R_ECX );
  1243     MEM_READ_WORD( R_ECX, R_EAX );
  1244     store_reg( R_EAX, 0 );
  1245     sh4_x86.tstate = TSTATE_NONE;
  1246 :}
  1247 MOVA @(disp, PC), R0 {:  
  1248     if( sh4_x86.in_delay_slot ) {
  1249 	SLOTILLEGAL();
  1250     } else {
  1251 	load_imm32( R_ECX, (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03) );
  1252 	ADD_sh4r_r32( R_PC, R_ECX );
  1253 	store_reg( R_ECX, 0 );
  1255 :}
  1256 MOVCA.L R0, @Rn {:  
  1257     load_reg( R_EAX, 0 );
  1258     load_reg( R_ECX, Rn );
  1259     check_walign32( R_ECX );
  1260     MEM_WRITE_LONG( R_ECX, R_EAX );
  1261     sh4_x86.tstate = TSTATE_NONE;
  1262 :}
  1264 /* Control transfer instructions */
  1265 BF disp {:
  1266     if( sh4_x86.in_delay_slot ) {
  1267 	SLOTILLEGAL();
  1268     } else {
  1269 	JT_rel8( EXIT_BLOCK_SIZE, nottaken );
  1270 	exit_block( disp + pc + 4, pc+2 );
  1271 	JMP_TARGET(nottaken);
  1272 	return 2;
  1274 :}
  1275 BF/S disp {:
  1276     if( sh4_x86.in_delay_slot ) {
  1277 	SLOTILLEGAL();
  1278     } else {
  1279 	sh4_x86.in_delay_slot = TRUE;
  1280 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1281 	    CMP_imm8s_sh4r( 1, R_T );
  1282 	    sh4_x86.tstate = TSTATE_E;
  1284 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1285 	sh4_translate_instruction(pc+2);
  1286 	exit_block( disp + pc + 4, pc+4 );
  1287 	// not taken
  1288 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1289 	sh4_translate_instruction(pc+2);
  1290 	return 4;
  1292 :}
  1293 BRA disp {:  
  1294     if( sh4_x86.in_delay_slot ) {
  1295 	SLOTILLEGAL();
  1296     } else {
  1297 	sh4_x86.in_delay_slot = TRUE;
  1298 	sh4_translate_instruction( pc + 2 );
  1299 	exit_block( disp + pc + 4, pc+4 );
  1300 	sh4_x86.branch_taken = TRUE;
  1301 	return 4;
  1303 :}
  1304 BRAF Rn {:  
  1305     if( sh4_x86.in_delay_slot ) {
  1306 	SLOTILLEGAL();
  1307     } else {
  1308 	load_reg( R_EAX, Rn );
  1309 	ADD_imm32_r32( pc + 4, R_EAX );
  1310 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1311 	sh4_x86.in_delay_slot = TRUE;
  1312 	sh4_x86.tstate = TSTATE_NONE;
  1313 	sh4_translate_instruction( pc + 2 );
  1314 	exit_block_pcset(pc+2);
  1315 	sh4_x86.branch_taken = TRUE;
  1316 	return 4;
  1318 :}
  1319 BSR disp {:  
  1320     if( sh4_x86.in_delay_slot ) {
  1321 	SLOTILLEGAL();
  1322     } else {
  1323 	load_imm32( R_EAX, pc + 4 );
  1324 	store_spreg( R_EAX, R_PR );
  1325 	sh4_x86.in_delay_slot = TRUE;
  1326 	sh4_translate_instruction( pc + 2 );
  1327 	exit_block( disp + pc + 4, pc+4 );
  1328 	sh4_x86.branch_taken = TRUE;
  1329 	return 4;
  1331 :}
  1332 BSRF Rn {:  
  1333     if( sh4_x86.in_delay_slot ) {
  1334 	SLOTILLEGAL();
  1335     } else {
  1336 	load_imm32( R_ECX, pc + 4 );
  1337 	store_spreg( R_ECX, R_PR );
  1338 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1339 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1340 	sh4_x86.in_delay_slot = TRUE;
  1341 	sh4_x86.tstate = TSTATE_NONE;
  1342 	sh4_translate_instruction( pc + 2 );
  1343 	exit_block_pcset(pc+2);
  1344 	sh4_x86.branch_taken = TRUE;
  1345 	return 4;
  1347 :}
  1348 BT disp {:
  1349     if( sh4_x86.in_delay_slot ) {
  1350 	SLOTILLEGAL();
  1351     } else {
  1352 	JF_rel8( EXIT_BLOCK_SIZE, nottaken );
  1353 	exit_block( disp + pc + 4, pc+2 );
  1354 	JMP_TARGET(nottaken);
  1355 	return 2;
  1357 :}
  1358 BT/S disp {:
  1359     if( sh4_x86.in_delay_slot ) {
  1360 	SLOTILLEGAL();
  1361     } else {
  1362 	sh4_x86.in_delay_slot = TRUE;
  1363 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1364 	    CMP_imm8s_sh4r( 1, R_T );
  1365 	    sh4_x86.tstate = TSTATE_E;
  1367 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1368 	sh4_translate_instruction(pc+2);
  1369 	exit_block( disp + pc + 4, pc+4 );
  1370 	// not taken
  1371 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1372 	sh4_translate_instruction(pc+2);
  1373 	return 4;
  1375 :}
  1376 JMP @Rn {:  
  1377     if( sh4_x86.in_delay_slot ) {
  1378 	SLOTILLEGAL();
  1379     } else {
  1380 	load_reg( R_ECX, Rn );
  1381 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1382 	sh4_x86.in_delay_slot = TRUE;
  1383 	sh4_translate_instruction(pc+2);
  1384 	exit_block_pcset(pc+2);
  1385 	sh4_x86.branch_taken = TRUE;
  1386 	return 4;
  1388 :}
  1389 JSR @Rn {:  
  1390     if( sh4_x86.in_delay_slot ) {
  1391 	SLOTILLEGAL();
  1392     } else {
  1393 	load_imm32( R_EAX, pc + 4 );
  1394 	store_spreg( R_EAX, R_PR );
  1395 	load_reg( R_ECX, Rn );
  1396 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1397 	sh4_x86.in_delay_slot = TRUE;
  1398 	sh4_translate_instruction(pc+2);
  1399 	exit_block_pcset(pc+2);
  1400 	sh4_x86.branch_taken = TRUE;
  1401 	return 4;
  1403 :}
  1404 RTE {:  
  1405     if( sh4_x86.in_delay_slot ) {
  1406 	SLOTILLEGAL();
  1407     } else {
  1408 	check_priv();
  1409 	load_spreg( R_ECX, R_SPC );
  1410 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1411 	load_spreg( R_EAX, R_SSR );
  1412 	call_func1( sh4_write_sr, R_EAX );
  1413 	sh4_x86.in_delay_slot = TRUE;
  1414 	sh4_x86.priv_checked = FALSE;
  1415 	sh4_x86.fpuen_checked = FALSE;
  1416 	sh4_x86.tstate = TSTATE_NONE;
  1417 	sh4_translate_instruction(pc+2);
  1418 	exit_block_pcset(pc+2);
  1419 	sh4_x86.branch_taken = TRUE;
  1420 	return 4;
  1422 :}
  1423 RTS {:  
  1424     if( sh4_x86.in_delay_slot ) {
  1425 	SLOTILLEGAL();
  1426     } else {
  1427 	load_spreg( R_ECX, R_PR );
  1428 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1429 	sh4_x86.in_delay_slot = TRUE;
  1430 	sh4_translate_instruction(pc+2);
  1431 	exit_block_pcset(pc+2);
  1432 	sh4_x86.branch_taken = TRUE;
  1433 	return 4;
  1435 :}
  1436 TRAPA #imm {:  
  1437     if( sh4_x86.in_delay_slot ) {
  1438 	SLOTILLEGAL();
  1439     } else {
  1440 	load_imm32( R_ECX, pc+2 );
  1441 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1442 	load_imm32( R_EAX, imm );
  1443 	call_func1( sh4_raise_trap, R_EAX );
  1444 	sh4_x86.tstate = TSTATE_NONE;
  1445 	exit_block_pcset(pc);
  1446 	sh4_x86.branch_taken = TRUE;
  1447 	return 2;
  1449 :}
  1450 UNDEF {:  
  1451     if( sh4_x86.in_delay_slot ) {
  1452 	SLOTILLEGAL();
  1453     } else {
  1454 	JMP_exc(EXC_ILLEGAL);
  1455 	return 2;
  1457 :}
  1459 CLRMAC {:  
  1460     XOR_r32_r32(R_EAX, R_EAX);
  1461     store_spreg( R_EAX, R_MACL );
  1462     store_spreg( R_EAX, R_MACH );
  1463     sh4_x86.tstate = TSTATE_NONE;
  1464 :}
  1465 CLRS {:
  1466     CLC();
  1467     SETC_sh4r(R_S);
  1468     sh4_x86.tstate = TSTATE_C;
  1469 :}
  1470 CLRT {:  
  1471     CLC();
  1472     SETC_t();
  1473     sh4_x86.tstate = TSTATE_C;
  1474 :}
  1475 SETS {:  
  1476     STC();
  1477     SETC_sh4r(R_S);
  1478     sh4_x86.tstate = TSTATE_C;
  1479 :}
  1480 SETT {:  
  1481     STC();
  1482     SETC_t();
  1483     sh4_x86.tstate = TSTATE_C;
  1484 :}
  1486 /* Floating point moves */
  1487 FMOV FRm, FRn {:  
  1488     /* As horrible as this looks, it's actually covering 5 separate cases:
  1489      * 1. 32-bit fr-to-fr (PR=0)
  1490      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1491      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1492      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1493      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1494      */
  1495     check_fpuen();
  1496     load_spreg( R_ECX, R_FPSCR );
  1497     load_fr_bank( R_EDX );
  1498     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1499     JNE_rel8(8, doublesize);
  1500     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1501     store_fr( R_EDX, R_EAX, FRn );
  1502     if( FRm&1 ) {
  1503 	JMP_rel8(24, end);
  1504 	JMP_TARGET(doublesize);
  1505 	load_xf_bank( R_ECX ); 
  1506 	load_fr( R_ECX, R_EAX, FRm-1 );
  1507 	if( FRn&1 ) {
  1508 	    load_fr( R_ECX, R_EDX, FRm );
  1509 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1510 	    store_fr( R_ECX, R_EDX, FRn );
  1511 	} else /* FRn&1 == 0 */ {
  1512 	    load_fr( R_ECX, R_ECX, FRm );
  1513 	    store_fr( R_EDX, R_EAX, FRn );
  1514 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1516 	JMP_TARGET(end);
  1517     } else /* FRm&1 == 0 */ {
  1518 	if( FRn&1 ) {
  1519 	    JMP_rel8(24, end);
  1520 	    load_xf_bank( R_ECX );
  1521 	    load_fr( R_EDX, R_EAX, FRm );
  1522 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1523 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1524 	    store_fr( R_ECX, R_EDX, FRn );
  1525 	    JMP_TARGET(end);
  1526 	} else /* FRn&1 == 0 */ {
  1527 	    JMP_rel8(12, end);
  1528 	    load_fr( R_EDX, R_EAX, FRm );
  1529 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1530 	    store_fr( R_EDX, R_EAX, FRn );
  1531 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1532 	    JMP_TARGET(end);
  1535     sh4_x86.tstate = TSTATE_NONE;
  1536 :}
  1537 FMOV FRm, @Rn {: 
  1538     check_fpuen();
  1539     load_reg( R_ECX, Rn );
  1540     check_walign32( R_ECX );
  1541     load_spreg( R_EDX, R_FPSCR );
  1542     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1543     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1544     load_fr_bank( R_EDX );
  1545     load_fr( R_EDX, R_EAX, FRm );
  1546     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1547     if( FRm&1 ) {
  1548 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1549 	JMP_TARGET(doublesize);
  1550 	load_xf_bank( R_EDX );
  1551 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1552 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1553 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1554 	JMP_TARGET(end);
  1555     } else {
  1556 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1557 	JMP_TARGET(doublesize);
  1558 	load_fr_bank( R_EDX );
  1559 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1560 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1561 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1562 	JMP_TARGET(end);
  1564     sh4_x86.tstate = TSTATE_NONE;
  1565 :}
  1566 FMOV @Rm, FRn {:  
  1567     check_fpuen();
  1568     load_reg( R_ECX, Rm );
  1569     check_ralign32( R_ECX );
  1570     load_spreg( R_EDX, R_FPSCR );
  1571     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1572     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1573     MEM_READ_LONG( R_ECX, R_EAX );
  1574     load_fr_bank( R_EDX );
  1575     store_fr( R_EDX, R_EAX, FRn );
  1576     if( FRn&1 ) {
  1577 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1578 	JMP_TARGET(doublesize);
  1579 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1580 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1581 	load_xf_bank( R_EDX );
  1582 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1583 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1584 	JMP_TARGET(end);
  1585     } else {
  1586 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1587 	JMP_TARGET(doublesize);
  1588 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1589 	load_fr_bank( R_EDX );
  1590 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1591 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1592 	JMP_TARGET(end);
  1594     sh4_x86.tstate = TSTATE_NONE;
  1595 :}
  1596 FMOV FRm, @-Rn {:  
  1597     check_fpuen();
  1598     load_reg( R_ECX, Rn );
  1599     check_walign32( R_ECX );
  1600     load_spreg( R_EDX, R_FPSCR );
  1601     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1602     JNE_rel8(14 + MEM_WRITE_SIZE, doublesize);
  1603     load_fr_bank( R_EDX );
  1604     load_fr( R_EDX, R_EAX, FRm );
  1605     ADD_imm8s_r32(-4,R_ECX);
  1606     store_reg( R_ECX, Rn );
  1607     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1608     if( FRm&1 ) {
  1609 	JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
  1610 	JMP_TARGET(doublesize);
  1611 	load_xf_bank( R_EDX );
  1612 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1613 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1614 	ADD_imm8s_r32(-8,R_ECX);
  1615 	store_reg( R_ECX, Rn );
  1616 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1617 	JMP_TARGET(end);
  1618     } else {
  1619 	JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
  1620 	JMP_TARGET(doublesize);
  1621 	load_fr_bank( R_EDX );
  1622 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1623 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1624 	ADD_imm8s_r32(-8,R_ECX);
  1625 	store_reg( R_ECX, Rn );
  1626 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1627 	JMP_TARGET(end);
  1629     sh4_x86.tstate = TSTATE_NONE;
  1630 :}
  1631 FMOV @Rm+, FRn {:
  1632     check_fpuen();
  1633     load_reg( R_ECX, Rm );
  1634     check_ralign32( R_ECX );
  1635     MOV_r32_r32( R_ECX, R_EAX );
  1636     load_spreg( R_EDX, R_FPSCR );
  1637     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1638     JNE_rel8(14 + MEM_READ_SIZE, doublesize);
  1639     ADD_imm8s_r32( 4, R_EAX );
  1640     store_reg( R_EAX, Rm );
  1641     MEM_READ_LONG( R_ECX, R_EAX );
  1642     load_fr_bank( R_EDX );
  1643     store_fr( R_EDX, R_EAX, FRn );
  1644     if( FRn&1 ) {
  1645 	JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
  1646 	JMP_TARGET(doublesize);
  1647 	ADD_imm8s_r32( 8, R_EAX );
  1648 	store_reg(R_EAX, Rm);
  1649 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1650 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1651 	load_xf_bank( R_EDX );
  1652 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1653 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1654 	JMP_TARGET(end);
  1655     } else {
  1656 	JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
  1657 	ADD_imm8s_r32( 8, R_EAX );
  1658 	store_reg(R_EAX, Rm);
  1659 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1660 	load_fr_bank( R_EDX );
  1661 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1662 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1663 	JMP_TARGET(end);
  1665     sh4_x86.tstate = TSTATE_NONE;
  1666 :}
  1667 FMOV FRm, @(R0, Rn) {:  
  1668     check_fpuen();
  1669     load_reg( R_ECX, Rn );
  1670     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1671     check_walign32( R_ECX );
  1672     load_spreg( R_EDX, R_FPSCR );
  1673     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1674     JNE_rel8(8 + MEM_WRITE_SIZE, doublesize);
  1675     load_fr_bank( R_EDX );
  1676     load_fr( R_EDX, R_EAX, FRm );
  1677     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1678     if( FRm&1 ) {
  1679 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1680 	JMP_TARGET(doublesize);
  1681 	load_xf_bank( R_EDX );
  1682 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1683 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1684 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1685 	JMP_TARGET(end);
  1686     } else {
  1687 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1688 	JMP_TARGET(doublesize);
  1689 	load_fr_bank( R_EDX );
  1690 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1691 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1692 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1693 	JMP_TARGET(end);
  1695     sh4_x86.tstate = TSTATE_NONE;
  1696 :}
  1697 FMOV @(R0, Rm), FRn {:  
  1698     check_fpuen();
  1699     load_reg( R_ECX, Rm );
  1700     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1701     check_ralign32( R_ECX );
  1702     load_spreg( R_EDX, R_FPSCR );
  1703     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1704     JNE_rel8(8 + MEM_READ_SIZE, doublesize);
  1705     MEM_READ_LONG( R_ECX, R_EAX );
  1706     load_fr_bank( R_EDX );
  1707     store_fr( R_EDX, R_EAX, FRn );
  1708     if( FRn&1 ) {
  1709 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1710 	JMP_TARGET(doublesize);
  1711 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1712 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1713 	load_xf_bank( R_EDX );
  1714 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1715 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1716 	JMP_TARGET(end);
  1717     } else {
  1718 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1719 	JMP_TARGET(doublesize);
  1720 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1721 	load_fr_bank( R_EDX );
  1722 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1723 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1724 	JMP_TARGET(end);
  1726     sh4_x86.tstate = TSTATE_NONE;
  1727 :}
  1728 FLDI0 FRn {:  /* IFF PR=0 */
  1729     check_fpuen();
  1730     load_spreg( R_ECX, R_FPSCR );
  1731     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1732     JNE_rel8(8, end);
  1733     XOR_r32_r32( R_EAX, R_EAX );
  1734     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1735     store_fr( R_ECX, R_EAX, FRn );
  1736     JMP_TARGET(end);
  1737     sh4_x86.tstate = TSTATE_NONE;
  1738 :}
  1739 FLDI1 FRn {:  /* IFF PR=0 */
  1740     check_fpuen();
  1741     load_spreg( R_ECX, R_FPSCR );
  1742     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1743     JNE_rel8(11, end);
  1744     load_imm32(R_EAX, 0x3F800000);
  1745     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1746     store_fr( R_ECX, R_EAX, FRn );
  1747     JMP_TARGET(end);
  1748     sh4_x86.tstate = TSTATE_NONE;
  1749 :}
  1751 FLOAT FPUL, FRn {:  
  1752     check_fpuen();
  1753     load_spreg( R_ECX, R_FPSCR );
  1754     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1755     FILD_sh4r(R_FPUL);
  1756     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1757     JNE_rel8(5, doubleprec);
  1758     pop_fr( R_EDX, FRn );
  1759     JMP_rel8(3, end);
  1760     JMP_TARGET(doubleprec);
  1761     pop_dr( R_EDX, FRn );
  1762     JMP_TARGET(end);
  1763     sh4_x86.tstate = TSTATE_NONE;
  1764 :}
  1765 FTRC FRm, FPUL {:  
  1766     check_fpuen();
  1767     load_spreg( R_ECX, R_FPSCR );
  1768     load_fr_bank( R_EDX );
  1769     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1770     JNE_rel8(5, doubleprec);
  1771     push_fr( R_EDX, FRm );
  1772     JMP_rel8(3, doop);
  1773     JMP_TARGET(doubleprec);
  1774     push_dr( R_EDX, FRm );
  1775     JMP_TARGET( doop );
  1776     load_imm32( R_ECX, (uint32_t)&max_int );
  1777     FILD_r32ind( R_ECX );
  1778     FCOMIP_st(1);
  1779     JNA_rel8( 32, sat );
  1780     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1781     FILD_r32ind( R_ECX );           // 2
  1782     FCOMIP_st(1);                   // 2
  1783     JAE_rel8( 21, sat2 );            // 2
  1784     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1785     FNSTCW_r32ind( R_EAX );
  1786     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1787     FLDCW_r32ind( R_EDX );
  1788     FISTP_sh4r(R_FPUL);             // 3
  1789     FLDCW_r32ind( R_EAX );
  1790     JMP_rel8( 9, end );             // 2
  1792     JMP_TARGET(sat);
  1793     JMP_TARGET(sat2);
  1794     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1795     store_spreg( R_ECX, R_FPUL );
  1796     FPOP_st();
  1797     JMP_TARGET(end);
  1798     sh4_x86.tstate = TSTATE_NONE;
  1799 :}
  1800 FLDS FRm, FPUL {:  
  1801     check_fpuen();
  1802     load_fr_bank( R_ECX );
  1803     load_fr( R_ECX, R_EAX, FRm );
  1804     store_spreg( R_EAX, R_FPUL );
  1805     sh4_x86.tstate = TSTATE_NONE;
  1806 :}
  1807 FSTS FPUL, FRn {:  
  1808     check_fpuen();
  1809     load_fr_bank( R_ECX );
  1810     load_spreg( R_EAX, R_FPUL );
  1811     store_fr( R_ECX, R_EAX, FRn );
  1812     sh4_x86.tstate = TSTATE_NONE;
  1813 :}
  1814 FCNVDS FRm, FPUL {:  
  1815     check_fpuen();
  1816     load_spreg( R_ECX, R_FPSCR );
  1817     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1818     JE_rel8(9, end); // only when PR=1
  1819     load_fr_bank( R_ECX );
  1820     push_dr( R_ECX, FRm );
  1821     pop_fpul();
  1822     JMP_TARGET(end);
  1823     sh4_x86.tstate = TSTATE_NONE;
  1824 :}
  1825 FCNVSD FPUL, FRn {:  
  1826     check_fpuen();
  1827     load_spreg( R_ECX, R_FPSCR );
  1828     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1829     JE_rel8(9, end); // only when PR=1
  1830     load_fr_bank( R_ECX );
  1831     push_fpul();
  1832     pop_dr( R_ECX, FRn );
  1833     JMP_TARGET(end);
  1834     sh4_x86.tstate = TSTATE_NONE;
  1835 :}
  1837 /* Floating point instructions */
  1838 FABS FRn {:  
  1839     check_fpuen();
  1840     load_spreg( R_ECX, R_FPSCR );
  1841     load_fr_bank( R_EDX );
  1842     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1843     JNE_rel8(10, doubleprec);
  1844     push_fr(R_EDX, FRn); // 3
  1845     FABS_st0(); // 2
  1846     pop_fr( R_EDX, FRn); //3
  1847     JMP_rel8(8,end); // 2
  1848     JMP_TARGET(doubleprec);
  1849     push_dr(R_EDX, FRn);
  1850     FABS_st0();
  1851     pop_dr(R_EDX, FRn);
  1852     JMP_TARGET(end);
  1853     sh4_x86.tstate = TSTATE_NONE;
  1854 :}
  1855 FADD FRm, FRn {:  
  1856     check_fpuen();
  1857     load_spreg( R_ECX, R_FPSCR );
  1858     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1859     load_fr_bank( R_EDX );
  1860     JNE_rel8(13,doubleprec);
  1861     push_fr(R_EDX, FRm);
  1862     push_fr(R_EDX, FRn);
  1863     FADDP_st(1);
  1864     pop_fr(R_EDX, FRn);
  1865     JMP_rel8(11,end);
  1866     JMP_TARGET(doubleprec);
  1867     push_dr(R_EDX, FRm);
  1868     push_dr(R_EDX, FRn);
  1869     FADDP_st(1);
  1870     pop_dr(R_EDX, FRn);
  1871     JMP_TARGET(end);
  1872     sh4_x86.tstate = TSTATE_NONE;
  1873 :}
  1874 FDIV FRm, FRn {:  
  1875     check_fpuen();
  1876     load_spreg( R_ECX, R_FPSCR );
  1877     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1878     load_fr_bank( R_EDX );
  1879     JNE_rel8(13, doubleprec);
  1880     push_fr(R_EDX, FRn);
  1881     push_fr(R_EDX, FRm);
  1882     FDIVP_st(1);
  1883     pop_fr(R_EDX, FRn);
  1884     JMP_rel8(11, end);
  1885     JMP_TARGET(doubleprec);
  1886     push_dr(R_EDX, FRn);
  1887     push_dr(R_EDX, FRm);
  1888     FDIVP_st(1);
  1889     pop_dr(R_EDX, FRn);
  1890     JMP_TARGET(end);
  1891     sh4_x86.tstate = TSTATE_NONE;
  1892 :}
  1893 FMAC FR0, FRm, FRn {:  
  1894     check_fpuen();
  1895     load_spreg( R_ECX, R_FPSCR );
  1896     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  1897     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1898     JNE_rel8(18, doubleprec);
  1899     push_fr( R_EDX, 0 );
  1900     push_fr( R_EDX, FRm );
  1901     FMULP_st(1);
  1902     push_fr( R_EDX, FRn );
  1903     FADDP_st(1);
  1904     pop_fr( R_EDX, FRn );
  1905     JMP_rel8(16, end);
  1906     JMP_TARGET(doubleprec);
  1907     push_dr( R_EDX, 0 );
  1908     push_dr( R_EDX, FRm );
  1909     FMULP_st(1);
  1910     push_dr( R_EDX, FRn );
  1911     FADDP_st(1);
  1912     pop_dr( R_EDX, FRn );
  1913     JMP_TARGET(end);
  1914     sh4_x86.tstate = TSTATE_NONE;
  1915 :}
  1917 FMUL FRm, FRn {:  
  1918     check_fpuen();
  1919     load_spreg( R_ECX, R_FPSCR );
  1920     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1921     load_fr_bank( R_EDX );
  1922     JNE_rel8(13, doubleprec);
  1923     push_fr(R_EDX, FRm);
  1924     push_fr(R_EDX, FRn);
  1925     FMULP_st(1);
  1926     pop_fr(R_EDX, FRn);
  1927     JMP_rel8(11, end);
  1928     JMP_TARGET(doubleprec);
  1929     push_dr(R_EDX, FRm);
  1930     push_dr(R_EDX, FRn);
  1931     FMULP_st(1);
  1932     pop_dr(R_EDX, FRn);
  1933     JMP_TARGET(end);
  1934     sh4_x86.tstate = TSTATE_NONE;
  1935 :}
  1936 FNEG FRn {:  
  1937     check_fpuen();
  1938     load_spreg( R_ECX, R_FPSCR );
  1939     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1940     load_fr_bank( R_EDX );
  1941     JNE_rel8(10, doubleprec);
  1942     push_fr(R_EDX, FRn);
  1943     FCHS_st0();
  1944     pop_fr(R_EDX, FRn);
  1945     JMP_rel8(8, end);
  1946     JMP_TARGET(doubleprec);
  1947     push_dr(R_EDX, FRn);
  1948     FCHS_st0();
  1949     pop_dr(R_EDX, FRn);
  1950     JMP_TARGET(end);
  1951     sh4_x86.tstate = TSTATE_NONE;
  1952 :}
  1953 FSRRA FRn {:  
  1954     check_fpuen();
  1955     load_spreg( R_ECX, R_FPSCR );
  1956     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1957     load_fr_bank( R_EDX );
  1958     JNE_rel8(12, end); // PR=0 only
  1959     FLD1_st0();
  1960     push_fr(R_EDX, FRn);
  1961     FSQRT_st0();
  1962     FDIVP_st(1);
  1963     pop_fr(R_EDX, FRn);
  1964     JMP_TARGET(end);
  1965     sh4_x86.tstate = TSTATE_NONE;
  1966 :}
  1967 FSQRT FRn {:  
  1968     check_fpuen();
  1969     load_spreg( R_ECX, R_FPSCR );
  1970     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1971     load_fr_bank( R_EDX );
  1972     JNE_rel8(10, doubleprec);
  1973     push_fr(R_EDX, FRn);
  1974     FSQRT_st0();
  1975     pop_fr(R_EDX, FRn);
  1976     JMP_rel8(8, end);
  1977     JMP_TARGET(doubleprec);
  1978     push_dr(R_EDX, FRn);
  1979     FSQRT_st0();
  1980     pop_dr(R_EDX, FRn);
  1981     JMP_TARGET(end);
  1982     sh4_x86.tstate = TSTATE_NONE;
  1983 :}
  1984 FSUB FRm, FRn {:  
  1985     check_fpuen();
  1986     load_spreg( R_ECX, R_FPSCR );
  1987     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1988     load_fr_bank( R_EDX );
  1989     JNE_rel8(13, doubleprec);
  1990     push_fr(R_EDX, FRn);
  1991     push_fr(R_EDX, FRm);
  1992     FSUBP_st(1);
  1993     pop_fr(R_EDX, FRn);
  1994     JMP_rel8(11, end);
  1995     JMP_TARGET(doubleprec);
  1996     push_dr(R_EDX, FRn);
  1997     push_dr(R_EDX, FRm);
  1998     FSUBP_st(1);
  1999     pop_dr(R_EDX, FRn);
  2000     JMP_TARGET(end);
  2001     sh4_x86.tstate = TSTATE_NONE;
  2002 :}
  2004 FCMP/EQ FRm, FRn {:  
  2005     check_fpuen();
  2006     load_spreg( R_ECX, R_FPSCR );
  2007     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2008     load_fr_bank( R_EDX );
  2009     JNE_rel8(8, doubleprec);
  2010     push_fr(R_EDX, FRm);
  2011     push_fr(R_EDX, FRn);
  2012     JMP_rel8(6, end);
  2013     JMP_TARGET(doubleprec);
  2014     push_dr(R_EDX, FRm);
  2015     push_dr(R_EDX, FRn);
  2016     JMP_TARGET(end);
  2017     FCOMIP_st(1);
  2018     SETE_t();
  2019     FPOP_st();
  2020     sh4_x86.tstate = TSTATE_NONE;
  2021 :}
  2022 FCMP/GT FRm, FRn {:  
  2023     check_fpuen();
  2024     load_spreg( R_ECX, R_FPSCR );
  2025     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2026     load_fr_bank( R_EDX );
  2027     JNE_rel8(8, doubleprec);
  2028     push_fr(R_EDX, FRm);
  2029     push_fr(R_EDX, FRn);
  2030     JMP_rel8(6, end);
  2031     JMP_TARGET(doubleprec);
  2032     push_dr(R_EDX, FRm);
  2033     push_dr(R_EDX, FRn);
  2034     JMP_TARGET(end);
  2035     FCOMIP_st(1);
  2036     SETA_t();
  2037     FPOP_st();
  2038     sh4_x86.tstate = TSTATE_NONE;
  2039 :}
  2041 FSCA FPUL, FRn {:  
  2042     check_fpuen();
  2043     load_spreg( R_ECX, R_FPSCR );
  2044     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2045     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2046     load_fr_bank( R_ECX );
  2047     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2048     load_spreg( R_EDX, R_FPUL );
  2049     call_func2( sh4_fsca, R_EDX, R_ECX );
  2050     JMP_TARGET(doubleprec);
  2051     sh4_x86.tstate = TSTATE_NONE;
  2052 :}
  2053 FIPR FVm, FVn {:  
  2054     check_fpuen();
  2055     load_spreg( R_ECX, R_FPSCR );
  2056     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2057     JNE_rel8(44, doubleprec);
  2059     load_fr_bank( R_ECX );
  2060     push_fr( R_ECX, FVm<<2 );
  2061     push_fr( R_ECX, FVn<<2 );
  2062     FMULP_st(1);
  2063     push_fr( R_ECX, (FVm<<2)+1);
  2064     push_fr( R_ECX, (FVn<<2)+1);
  2065     FMULP_st(1);
  2066     FADDP_st(1);
  2067     push_fr( R_ECX, (FVm<<2)+2);
  2068     push_fr( R_ECX, (FVn<<2)+2);
  2069     FMULP_st(1);
  2070     FADDP_st(1);
  2071     push_fr( R_ECX, (FVm<<2)+3);
  2072     push_fr( R_ECX, (FVn<<2)+3);
  2073     FMULP_st(1);
  2074     FADDP_st(1);
  2075     pop_fr( R_ECX, (FVn<<2)+3);
  2076     JMP_TARGET(doubleprec);
  2077     sh4_x86.tstate = TSTATE_NONE;
  2078 :}
  2079 FTRV XMTRX, FVn {:  
  2080     check_fpuen();
  2081     load_spreg( R_ECX, R_FPSCR );
  2082     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2083     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2084     load_fr_bank( R_EDX );                 // 3
  2085     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2086     load_xf_bank( R_ECX );                 // 12
  2087     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2088     JMP_TARGET(doubleprec);
  2089     sh4_x86.tstate = TSTATE_NONE;
  2090 :}
  2092 FRCHG {:  
  2093     check_fpuen();
  2094     load_spreg( R_ECX, R_FPSCR );
  2095     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2096     store_spreg( R_ECX, R_FPSCR );
  2097     update_fr_bank( R_ECX );
  2098     sh4_x86.tstate = TSTATE_NONE;
  2099 :}
  2100 FSCHG {:  
  2101     check_fpuen();
  2102     load_spreg( R_ECX, R_FPSCR );
  2103     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2104     store_spreg( R_ECX, R_FPSCR );
  2105     sh4_x86.tstate = TSTATE_NONE;
  2106 :}
  2108 /* Processor control instructions */
  2109 LDC Rm, SR {:
  2110     if( sh4_x86.in_delay_slot ) {
  2111 	SLOTILLEGAL();
  2112     } else {
  2113 	check_priv();
  2114 	load_reg( R_EAX, Rm );
  2115 	call_func1( sh4_write_sr, R_EAX );
  2116 	sh4_x86.priv_checked = FALSE;
  2117 	sh4_x86.fpuen_checked = FALSE;
  2118 	sh4_x86.tstate = TSTATE_NONE;
  2120 :}
  2121 LDC Rm, GBR {: 
  2122     load_reg( R_EAX, Rm );
  2123     store_spreg( R_EAX, R_GBR );
  2124 :}
  2125 LDC Rm, VBR {:  
  2126     check_priv();
  2127     load_reg( R_EAX, Rm );
  2128     store_spreg( R_EAX, R_VBR );
  2129     sh4_x86.tstate = TSTATE_NONE;
  2130 :}
  2131 LDC Rm, SSR {:  
  2132     check_priv();
  2133     load_reg( R_EAX, Rm );
  2134     store_spreg( R_EAX, R_SSR );
  2135     sh4_x86.tstate = TSTATE_NONE;
  2136 :}
  2137 LDC Rm, SGR {:  
  2138     check_priv();
  2139     load_reg( R_EAX, Rm );
  2140     store_spreg( R_EAX, R_SGR );
  2141     sh4_x86.tstate = TSTATE_NONE;
  2142 :}
  2143 LDC Rm, SPC {:  
  2144     check_priv();
  2145     load_reg( R_EAX, Rm );
  2146     store_spreg( R_EAX, R_SPC );
  2147     sh4_x86.tstate = TSTATE_NONE;
  2148 :}
  2149 LDC Rm, DBR {:  
  2150     check_priv();
  2151     load_reg( R_EAX, Rm );
  2152     store_spreg( R_EAX, R_DBR );
  2153     sh4_x86.tstate = TSTATE_NONE;
  2154 :}
  2155 LDC Rm, Rn_BANK {:  
  2156     check_priv();
  2157     load_reg( R_EAX, Rm );
  2158     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2159     sh4_x86.tstate = TSTATE_NONE;
  2160 :}
  2161 LDC.L @Rm+, GBR {:  
  2162     load_reg( R_EAX, Rm );
  2163     check_ralign32( R_EAX );
  2164     MOV_r32_r32( R_EAX, R_ECX );
  2165     ADD_imm8s_r32( 4, R_EAX );
  2166     store_reg( R_EAX, Rm );
  2167     MEM_READ_LONG( R_ECX, R_EAX );
  2168     store_spreg( R_EAX, R_GBR );
  2169     sh4_x86.tstate = TSTATE_NONE;
  2170 :}
  2171 LDC.L @Rm+, SR {:
  2172     if( sh4_x86.in_delay_slot ) {
  2173 	SLOTILLEGAL();
  2174     } else {
  2175 	check_priv();
  2176 	load_reg( R_EAX, Rm );
  2177 	check_ralign32( R_EAX );
  2178 	MOV_r32_r32( R_EAX, R_ECX );
  2179 	ADD_imm8s_r32( 4, R_EAX );
  2180 	store_reg( R_EAX, Rm );
  2181 	MEM_READ_LONG( R_ECX, R_EAX );
  2182 	call_func1( sh4_write_sr, R_EAX );
  2183 	sh4_x86.priv_checked = FALSE;
  2184 	sh4_x86.fpuen_checked = FALSE;
  2185 	sh4_x86.tstate = TSTATE_NONE;
  2187 :}
  2188 LDC.L @Rm+, VBR {:  
  2189     check_priv();
  2190     load_reg( R_EAX, Rm );
  2191     check_ralign32( R_EAX );
  2192     MOV_r32_r32( R_EAX, R_ECX );
  2193     ADD_imm8s_r32( 4, R_EAX );
  2194     store_reg( R_EAX, Rm );
  2195     MEM_READ_LONG( R_ECX, R_EAX );
  2196     store_spreg( R_EAX, R_VBR );
  2197     sh4_x86.tstate = TSTATE_NONE;
  2198 :}
  2199 LDC.L @Rm+, SSR {:
  2200     check_priv();
  2201     load_reg( R_EAX, Rm );
  2202     check_ralign32( R_EAX );
  2203     MOV_r32_r32( R_EAX, R_ECX );
  2204     ADD_imm8s_r32( 4, R_EAX );
  2205     store_reg( R_EAX, Rm );
  2206     MEM_READ_LONG( R_ECX, R_EAX );
  2207     store_spreg( R_EAX, R_SSR );
  2208     sh4_x86.tstate = TSTATE_NONE;
  2209 :}
  2210 LDC.L @Rm+, SGR {:  
  2211     check_priv();
  2212     load_reg( R_EAX, Rm );
  2213     check_ralign32( R_EAX );
  2214     MOV_r32_r32( R_EAX, R_ECX );
  2215     ADD_imm8s_r32( 4, R_EAX );
  2216     store_reg( R_EAX, Rm );
  2217     MEM_READ_LONG( R_ECX, R_EAX );
  2218     store_spreg( R_EAX, R_SGR );
  2219     sh4_x86.tstate = TSTATE_NONE;
  2220 :}
  2221 LDC.L @Rm+, SPC {:  
  2222     check_priv();
  2223     load_reg( R_EAX, Rm );
  2224     check_ralign32( R_EAX );
  2225     MOV_r32_r32( R_EAX, R_ECX );
  2226     ADD_imm8s_r32( 4, R_EAX );
  2227     store_reg( R_EAX, Rm );
  2228     MEM_READ_LONG( R_ECX, R_EAX );
  2229     store_spreg( R_EAX, R_SPC );
  2230     sh4_x86.tstate = TSTATE_NONE;
  2231 :}
  2232 LDC.L @Rm+, DBR {:  
  2233     check_priv();
  2234     load_reg( R_EAX, Rm );
  2235     check_ralign32( R_EAX );
  2236     MOV_r32_r32( R_EAX, R_ECX );
  2237     ADD_imm8s_r32( 4, R_EAX );
  2238     store_reg( R_EAX, Rm );
  2239     MEM_READ_LONG( R_ECX, R_EAX );
  2240     store_spreg( R_EAX, R_DBR );
  2241     sh4_x86.tstate = TSTATE_NONE;
  2242 :}
  2243 LDC.L @Rm+, Rn_BANK {:  
  2244     check_priv();
  2245     load_reg( R_EAX, Rm );
  2246     check_ralign32( R_EAX );
  2247     MOV_r32_r32( R_EAX, R_ECX );
  2248     ADD_imm8s_r32( 4, R_EAX );
  2249     store_reg( R_EAX, Rm );
  2250     MEM_READ_LONG( R_ECX, R_EAX );
  2251     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2252     sh4_x86.tstate = TSTATE_NONE;
  2253 :}
  2254 LDS Rm, FPSCR {:  
  2255     load_reg( R_EAX, Rm );
  2256     store_spreg( R_EAX, R_FPSCR );
  2257     update_fr_bank( R_EAX );
  2258     sh4_x86.tstate = TSTATE_NONE;
  2259 :}
  2260 LDS.L @Rm+, FPSCR {:  
  2261     load_reg( R_EAX, Rm );
  2262     check_ralign32( R_EAX );
  2263     MOV_r32_r32( R_EAX, R_ECX );
  2264     ADD_imm8s_r32( 4, R_EAX );
  2265     store_reg( R_EAX, Rm );
  2266     MEM_READ_LONG( R_ECX, R_EAX );
  2267     store_spreg( R_EAX, R_FPSCR );
  2268     update_fr_bank( R_EAX );
  2269     sh4_x86.tstate = TSTATE_NONE;
  2270 :}
  2271 LDS Rm, FPUL {:  
  2272     load_reg( R_EAX, Rm );
  2273     store_spreg( R_EAX, R_FPUL );
  2274 :}
  2275 LDS.L @Rm+, FPUL {:  
  2276     load_reg( R_EAX, Rm );
  2277     check_ralign32( R_EAX );
  2278     MOV_r32_r32( R_EAX, R_ECX );
  2279     ADD_imm8s_r32( 4, R_EAX );
  2280     store_reg( R_EAX, Rm );
  2281     MEM_READ_LONG( R_ECX, R_EAX );
  2282     store_spreg( R_EAX, R_FPUL );
  2283     sh4_x86.tstate = TSTATE_NONE;
  2284 :}
  2285 LDS Rm, MACH {: 
  2286     load_reg( R_EAX, Rm );
  2287     store_spreg( R_EAX, R_MACH );
  2288 :}
  2289 LDS.L @Rm+, MACH {:  
  2290     load_reg( R_EAX, Rm );
  2291     check_ralign32( R_EAX );
  2292     MOV_r32_r32( R_EAX, R_ECX );
  2293     ADD_imm8s_r32( 4, R_EAX );
  2294     store_reg( R_EAX, Rm );
  2295     MEM_READ_LONG( R_ECX, R_EAX );
  2296     store_spreg( R_EAX, R_MACH );
  2297     sh4_x86.tstate = TSTATE_NONE;
  2298 :}
  2299 LDS Rm, MACL {:  
  2300     load_reg( R_EAX, Rm );
  2301     store_spreg( R_EAX, R_MACL );
  2302 :}
  2303 LDS.L @Rm+, MACL {:  
  2304     load_reg( R_EAX, Rm );
  2305     check_ralign32( R_EAX );
  2306     MOV_r32_r32( R_EAX, R_ECX );
  2307     ADD_imm8s_r32( 4, R_EAX );
  2308     store_reg( R_EAX, Rm );
  2309     MEM_READ_LONG( R_ECX, R_EAX );
  2310     store_spreg( R_EAX, R_MACL );
  2311     sh4_x86.tstate = TSTATE_NONE;
  2312 :}
  2313 LDS Rm, PR {:  
  2314     load_reg( R_EAX, Rm );
  2315     store_spreg( R_EAX, R_PR );
  2316 :}
  2317 LDS.L @Rm+, PR {:  
  2318     load_reg( R_EAX, Rm );
  2319     check_ralign32( R_EAX );
  2320     MOV_r32_r32( R_EAX, R_ECX );
  2321     ADD_imm8s_r32( 4, R_EAX );
  2322     store_reg( R_EAX, Rm );
  2323     MEM_READ_LONG( R_ECX, R_EAX );
  2324     store_spreg( R_EAX, R_PR );
  2325     sh4_x86.tstate = TSTATE_NONE;
  2326 :}
  2327 LDTLB {:  
  2328     call_func0( MMU_ldtlb );
  2329 :}
  2330 OCBI @Rn {:  :}
  2331 OCBP @Rn {:  :}
  2332 OCBWB @Rn {:  :}
  2333 PREF @Rn {:
  2334     load_reg( R_EAX, Rn );
  2335     MOV_r32_r32( R_EAX, R_ECX );
  2336     AND_imm32_r32( 0xFC000000, R_EAX );
  2337     CMP_imm32_r32( 0xE0000000, R_EAX );
  2338     JNE_rel8(CALL_FUNC1_SIZE, end);
  2339     call_func1( sh4_flush_store_queue, R_ECX );
  2340     JMP_TARGET(end);
  2341     sh4_x86.tstate = TSTATE_NONE;
  2342 :}
  2343 SLEEP {: 
  2344     check_priv();
  2345     call_func0( sh4_sleep );
  2346     sh4_x86.tstate = TSTATE_NONE;
  2347     sh4_x86.in_delay_slot = FALSE;
  2348     return 2;
  2349 :}
  2350 STC SR, Rn {:
  2351     check_priv();
  2352     call_func0(sh4_read_sr);
  2353     store_reg( R_EAX, Rn );
  2354     sh4_x86.tstate = TSTATE_NONE;
  2355 :}
  2356 STC GBR, Rn {:  
  2357     load_spreg( R_EAX, R_GBR );
  2358     store_reg( R_EAX, Rn );
  2359 :}
  2360 STC VBR, Rn {:  
  2361     check_priv();
  2362     load_spreg( R_EAX, R_VBR );
  2363     store_reg( R_EAX, Rn );
  2364     sh4_x86.tstate = TSTATE_NONE;
  2365 :}
  2366 STC SSR, Rn {:  
  2367     check_priv();
  2368     load_spreg( R_EAX, R_SSR );
  2369     store_reg( R_EAX, Rn );
  2370     sh4_x86.tstate = TSTATE_NONE;
  2371 :}
  2372 STC SPC, Rn {:  
  2373     check_priv();
  2374     load_spreg( R_EAX, R_SPC );
  2375     store_reg( R_EAX, Rn );
  2376     sh4_x86.tstate = TSTATE_NONE;
  2377 :}
  2378 STC SGR, Rn {:  
  2379     check_priv();
  2380     load_spreg( R_EAX, R_SGR );
  2381     store_reg( R_EAX, Rn );
  2382     sh4_x86.tstate = TSTATE_NONE;
  2383 :}
  2384 STC DBR, Rn {:  
  2385     check_priv();
  2386     load_spreg( R_EAX, R_DBR );
  2387     store_reg( R_EAX, Rn );
  2388     sh4_x86.tstate = TSTATE_NONE;
  2389 :}
  2390 STC Rm_BANK, Rn {:
  2391     check_priv();
  2392     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2393     store_reg( R_EAX, Rn );
  2394     sh4_x86.tstate = TSTATE_NONE;
  2395 :}
  2396 STC.L SR, @-Rn {:
  2397     check_priv();
  2398     call_func0( sh4_read_sr );
  2399     load_reg( R_ECX, Rn );
  2400     check_walign32( R_ECX );
  2401     ADD_imm8s_r32( -4, R_ECX );
  2402     store_reg( R_ECX, Rn );
  2403     MEM_WRITE_LONG( R_ECX, R_EAX );
  2404     sh4_x86.tstate = TSTATE_NONE;
  2405 :}
  2406 STC.L VBR, @-Rn {:  
  2407     check_priv();
  2408     load_reg( R_ECX, Rn );
  2409     check_walign32( R_ECX );
  2410     ADD_imm8s_r32( -4, R_ECX );
  2411     store_reg( R_ECX, Rn );
  2412     load_spreg( R_EAX, R_VBR );
  2413     MEM_WRITE_LONG( R_ECX, R_EAX );
  2414     sh4_x86.tstate = TSTATE_NONE;
  2415 :}
  2416 STC.L SSR, @-Rn {:  
  2417     check_priv();
  2418     load_reg( R_ECX, Rn );
  2419     check_walign32( R_ECX );
  2420     ADD_imm8s_r32( -4, R_ECX );
  2421     store_reg( R_ECX, Rn );
  2422     load_spreg( R_EAX, R_SSR );
  2423     MEM_WRITE_LONG( R_ECX, R_EAX );
  2424     sh4_x86.tstate = TSTATE_NONE;
  2425 :}
  2426 STC.L SPC, @-Rn {:
  2427     check_priv();
  2428     load_reg( R_ECX, Rn );
  2429     check_walign32( R_ECX );
  2430     ADD_imm8s_r32( -4, R_ECX );
  2431     store_reg( R_ECX, Rn );
  2432     load_spreg( R_EAX, R_SPC );
  2433     MEM_WRITE_LONG( R_ECX, R_EAX );
  2434     sh4_x86.tstate = TSTATE_NONE;
  2435 :}
  2436 STC.L SGR, @-Rn {:  
  2437     check_priv();
  2438     load_reg( R_ECX, Rn );
  2439     check_walign32( R_ECX );
  2440     ADD_imm8s_r32( -4, R_ECX );
  2441     store_reg( R_ECX, Rn );
  2442     load_spreg( R_EAX, R_SGR );
  2443     MEM_WRITE_LONG( R_ECX, R_EAX );
  2444     sh4_x86.tstate = TSTATE_NONE;
  2445 :}
  2446 STC.L DBR, @-Rn {:  
  2447     check_priv();
  2448     load_reg( R_ECX, Rn );
  2449     check_walign32( R_ECX );
  2450     ADD_imm8s_r32( -4, R_ECX );
  2451     store_reg( R_ECX, Rn );
  2452     load_spreg( R_EAX, R_DBR );
  2453     MEM_WRITE_LONG( R_ECX, R_EAX );
  2454     sh4_x86.tstate = TSTATE_NONE;
  2455 :}
  2456 STC.L Rm_BANK, @-Rn {:  
  2457     check_priv();
  2458     load_reg( R_ECX, Rn );
  2459     check_walign32( R_ECX );
  2460     ADD_imm8s_r32( -4, R_ECX );
  2461     store_reg( R_ECX, Rn );
  2462     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2463     MEM_WRITE_LONG( R_ECX, R_EAX );
  2464     sh4_x86.tstate = TSTATE_NONE;
  2465 :}
  2466 STC.L GBR, @-Rn {:  
  2467     load_reg( R_ECX, Rn );
  2468     check_walign32( R_ECX );
  2469     ADD_imm8s_r32( -4, R_ECX );
  2470     store_reg( R_ECX, Rn );
  2471     load_spreg( R_EAX, R_GBR );
  2472     MEM_WRITE_LONG( R_ECX, R_EAX );
  2473     sh4_x86.tstate = TSTATE_NONE;
  2474 :}
  2475 STS FPSCR, Rn {:  
  2476     load_spreg( R_EAX, R_FPSCR );
  2477     store_reg( R_EAX, Rn );
  2478 :}
  2479 STS.L FPSCR, @-Rn {:  
  2480     load_reg( R_ECX, Rn );
  2481     check_walign32( R_ECX );
  2482     ADD_imm8s_r32( -4, R_ECX );
  2483     store_reg( R_ECX, Rn );
  2484     load_spreg( R_EAX, R_FPSCR );
  2485     MEM_WRITE_LONG( R_ECX, R_EAX );
  2486     sh4_x86.tstate = TSTATE_NONE;
  2487 :}
  2488 STS FPUL, Rn {:  
  2489     load_spreg( R_EAX, R_FPUL );
  2490     store_reg( R_EAX, Rn );
  2491 :}
  2492 STS.L FPUL, @-Rn {:  
  2493     load_reg( R_ECX, Rn );
  2494     check_walign32( R_ECX );
  2495     ADD_imm8s_r32( -4, R_ECX );
  2496     store_reg( R_ECX, Rn );
  2497     load_spreg( R_EAX, R_FPUL );
  2498     MEM_WRITE_LONG( R_ECX, R_EAX );
  2499     sh4_x86.tstate = TSTATE_NONE;
  2500 :}
  2501 STS MACH, Rn {:  
  2502     load_spreg( R_EAX, R_MACH );
  2503     store_reg( R_EAX, Rn );
  2504 :}
  2505 STS.L MACH, @-Rn {:  
  2506     load_reg( R_ECX, Rn );
  2507     check_walign32( R_ECX );
  2508     ADD_imm8s_r32( -4, R_ECX );
  2509     store_reg( R_ECX, Rn );
  2510     load_spreg( R_EAX, R_MACH );
  2511     MEM_WRITE_LONG( R_ECX, R_EAX );
  2512     sh4_x86.tstate = TSTATE_NONE;
  2513 :}
  2514 STS MACL, Rn {:  
  2515     load_spreg( R_EAX, R_MACL );
  2516     store_reg( R_EAX, Rn );
  2517 :}
  2518 STS.L MACL, @-Rn {:  
  2519     load_reg( R_ECX, Rn );
  2520     check_walign32( R_ECX );
  2521     ADD_imm8s_r32( -4, R_ECX );
  2522     store_reg( R_ECX, Rn );
  2523     load_spreg( R_EAX, R_MACL );
  2524     MEM_WRITE_LONG( R_ECX, R_EAX );
  2525     sh4_x86.tstate = TSTATE_NONE;
  2526 :}
  2527 STS PR, Rn {:  
  2528     load_spreg( R_EAX, R_PR );
  2529     store_reg( R_EAX, Rn );
  2530 :}
  2531 STS.L PR, @-Rn {:  
  2532     load_reg( R_ECX, Rn );
  2533     check_walign32( R_ECX );
  2534     ADD_imm8s_r32( -4, R_ECX );
  2535     store_reg( R_ECX, Rn );
  2536     load_spreg( R_EAX, R_PR );
  2537     MEM_WRITE_LONG( R_ECX, R_EAX );
  2538     sh4_x86.tstate = TSTATE_NONE;
  2539 :}
  2541 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2542 %%
  2543     sh4_x86.in_delay_slot = FALSE;
  2544     return 0;
.