Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 550:a27e31340147
prev547:d6e00ffc4adc
next553:4e6166258c22
author nkeynes
date Thu Dec 06 10:43:30 2007 +0000 (12 years ago)
permissions -rw-r--r--
last change Add support for the MMIO side of the TLB (and LDTLB)
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.20 2007-11-08 11:54:16 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 /** 
    38  * Struct to manage internal translation state. This state is not saved -
    39  * it is only valid between calls to sh4_translate_begin_block() and
    40  * sh4_translate_end_block()
    41  */
    42 struct sh4_x86_state {
    43     gboolean in_delay_slot;
    44     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    45     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    46     gboolean branch_taken; /* true if we branched unconditionally */
    47     uint32_t block_start_pc;
    48     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    49     int tstate;
    51     /* Allocated memory for the (block-wide) back-patch list */
    52     uint32_t **backpatch_list;
    53     uint32_t backpatch_posn;
    54     uint32_t backpatch_size;
    55 };
    57 #define TSTATE_NONE -1
    58 #define TSTATE_O    0
    59 #define TSTATE_C    2
    60 #define TSTATE_E    4
    61 #define TSTATE_NE   5
    62 #define TSTATE_G    0xF
    63 #define TSTATE_GE   0xD
    64 #define TSTATE_A    7
    65 #define TSTATE_AE   3
    67 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    68 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    69 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    70     OP(0x70+sh4_x86.tstate); OP(rel8); \
    71     MARK_JMP(rel8,label)
    72 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    73 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    74 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    75     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    76     MARK_JMP(rel8, label)
    79 #define EXIT_DATA_ADDR_READ 0
    80 #define EXIT_DATA_ADDR_WRITE 7
    81 #define EXIT_ILLEGAL 14
    82 #define EXIT_SLOT_ILLEGAL 21
    83 #define EXIT_FPU_DISABLED 28
    84 #define EXIT_SLOT_FPU_DISABLED 35
    86 static struct sh4_x86_state sh4_x86;
    88 static uint32_t max_int = 0x7FFFFFFF;
    89 static uint32_t min_int = 0x80000000;
    90 static uint32_t save_fcw; /* save value for fpu control word */
    91 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    93 void sh4_x86_init()
    94 {
    95     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    96     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
    97 }
   100 static void sh4_x86_add_backpatch( uint8_t *ptr )
   101 {
   102     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   103 	sh4_x86.backpatch_size <<= 1;
   104 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
   105 	assert( sh4_x86.backpatch_list != NULL );
   106     }
   107     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
   108 }
   110 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
   111 {
   112     unsigned int i;
   113     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
   114 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
   115     }
   116 }
   118 /**
   119  * Emit an instruction to load an SH4 reg into a real register
   120  */
   121 static inline void load_reg( int x86reg, int sh4reg ) 
   122 {
   123     /* mov [bp+n], reg */
   124     OP(0x8B);
   125     OP(0x45 + (x86reg<<3));
   126     OP(REG_OFFSET(r[sh4reg]));
   127 }
   129 static inline void load_reg16s( int x86reg, int sh4reg )
   130 {
   131     OP(0x0F);
   132     OP(0xBF);
   133     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   134 }
   136 static inline void load_reg16u( int x86reg, int sh4reg )
   137 {
   138     OP(0x0F);
   139     OP(0xB7);
   140     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   142 }
   144 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   145 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   146 /**
   147  * Emit an instruction to load an immediate value into a register
   148  */
   149 static inline void load_imm32( int x86reg, uint32_t value ) {
   150     /* mov #value, reg */
   151     OP(0xB8 + x86reg);
   152     OP32(value);
   153 }
   155 /**
   156  * Load an immediate 64-bit quantity (note: x86-64 only)
   157  */
   158 static inline void load_imm64( int x86reg, uint32_t value ) {
   159     /* mov #value, reg */
   160     REXW();
   161     OP(0xB8 + x86reg);
   162     OP64(value);
   163 }
   166 /**
   167  * Emit an instruction to store an SH4 reg (RN)
   168  */
   169 void static inline store_reg( int x86reg, int sh4reg ) {
   170     /* mov reg, [bp+n] */
   171     OP(0x89);
   172     OP(0x45 + (x86reg<<3));
   173     OP(REG_OFFSET(r[sh4reg]));
   174 }
   176 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   178 /**
   179  * Load an FR register (single-precision floating point) into an integer x86
   180  * register (eg for register-to-register moves)
   181  */
   182 void static inline load_fr( int bankreg, int x86reg, int frm )
   183 {
   184     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   185 }
   187 /**
   188  * Store an FR register (single-precision floating point) into an integer x86
   189  * register (eg for register-to-register moves)
   190  */
   191 void static inline store_fr( int bankreg, int x86reg, int frn )
   192 {
   193     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   194 }
   197 /**
   198  * Load a pointer to the back fp back into the specified x86 register. The
   199  * bankreg must have been previously loaded with FPSCR.
   200  * NB: 12 bytes
   201  */
   202 static inline void load_xf_bank( int bankreg )
   203 {
   204     NOT_r32( bankreg );
   205     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   206     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   207     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   208 }
   210 /**
   211  * Update the fr_bank pointer based on the current fpscr value.
   212  */
   213 static inline void update_fr_bank( int fpscrreg )
   214 {
   215     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   216     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   217     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   218     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   219 }
   220 /**
   221  * Push FPUL (as a 32-bit float) onto the FPU stack
   222  */
   223 static inline void push_fpul( )
   224 {
   225     OP(0xD9); OP(0x45); OP(R_FPUL);
   226 }
   228 /**
   229  * Pop FPUL (as a 32-bit float) from the FPU stack
   230  */
   231 static inline void pop_fpul( )
   232 {
   233     OP(0xD9); OP(0x5D); OP(R_FPUL);
   234 }
   236 /**
   237  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   238  * with the location of the current fp bank.
   239  */
   240 static inline void push_fr( int bankreg, int frm ) 
   241 {
   242     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   243 }
   245 /**
   246  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   247  * with bankreg previously loaded with the location of the current fp bank.
   248  */
   249 static inline void pop_fr( int bankreg, int frm )
   250 {
   251     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   252 }
   254 /**
   255  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   256  * with the location of the current fp bank.
   257  */
   258 static inline void push_dr( int bankreg, int frm )
   259 {
   260     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   261 }
   263 static inline void pop_dr( int bankreg, int frm )
   264 {
   265     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   266 }
   268 /* Exception checks - Note that all exception checks will clobber EAX */
   269 #define precheck() load_imm32(R_EDX, (pc-sh4_x86.block_start_pc-(sh4_x86.in_delay_slot?2:0))>>1)
   271 #define check_priv( ) \
   272     if( !sh4_x86.priv_checked ) { \
   273 	sh4_x86.priv_checked = TRUE;\
   274 	precheck();\
   275 	load_spreg( R_EAX, R_SR );\
   276 	AND_imm32_r32( SR_MD, R_EAX );\
   277 	if( sh4_x86.in_delay_slot ) {\
   278 	    JE_exit( EXIT_SLOT_ILLEGAL );\
   279 	} else {\
   280 	    JE_exit( EXIT_ILLEGAL );\
   281 	}\
   282     }\
   285 static void check_priv_no_precheck()
   286 {
   287     if( !sh4_x86.priv_checked ) {
   288 	sh4_x86.priv_checked = TRUE;
   289 	load_spreg( R_EAX, R_SR );
   290 	AND_imm32_r32( SR_MD, R_EAX );
   291 	if( sh4_x86.in_delay_slot ) {
   292 	    JE_exit( EXIT_SLOT_ILLEGAL );
   293 	} else {
   294 	    JE_exit( EXIT_ILLEGAL );
   295 	}
   296     }
   297 }
   299 #define check_fpuen( ) \
   300     if( !sh4_x86.fpuen_checked ) {\
   301 	sh4_x86.fpuen_checked = TRUE;\
   302 	precheck();\
   303 	load_spreg( R_EAX, R_SR );\
   304 	AND_imm32_r32( SR_FD, R_EAX );\
   305 	if( sh4_x86.in_delay_slot ) {\
   306 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);\
   307 	} else {\
   308 	    JNE_exit(EXIT_FPU_DISABLED);\
   309 	}\
   310     }
   312 static void check_fpuen_no_precheck()
   313 {
   314     if( !sh4_x86.fpuen_checked ) {
   315 	sh4_x86.fpuen_checked = TRUE;
   316 	load_spreg( R_EAX, R_SR );
   317 	AND_imm32_r32( SR_FD, R_EAX );
   318 	if( sh4_x86.in_delay_slot ) {
   319 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   320 	} else {
   321 	    JNE_exit(EXIT_FPU_DISABLED);
   322 	}
   323     }
   325 }
   327 static void check_ralign16( int x86reg )
   328 {
   329     TEST_imm32_r32( 0x00000001, x86reg );
   330     JNE_exit(EXIT_DATA_ADDR_READ);
   331 }
   333 static void check_walign16( int x86reg )
   334 {
   335     TEST_imm32_r32( 0x00000001, x86reg );
   336     JNE_exit(EXIT_DATA_ADDR_WRITE);
   337 }
   339 static void check_ralign32( int x86reg )
   340 {
   341     TEST_imm32_r32( 0x00000003, x86reg );
   342     JNE_exit(EXIT_DATA_ADDR_READ);
   343 }
   344 static void check_walign32( int x86reg )
   345 {
   346     TEST_imm32_r32( 0x00000003, x86reg );
   347     JNE_exit(EXIT_DATA_ADDR_WRITE);
   348 }
   350 #define UNDEF()
   351 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   352 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   353 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   354 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   355 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   356 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   357 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   359 #define SLOTILLEGAL() precheck(); JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   361 extern uint16_t *sh4_icache;
   362 extern uint32_t sh4_icache_addr;
   364 /****** Import appropriate calling conventions ******/
   365 #if SH4_TRANSLATOR == TARGET_X86_64
   366 #include "sh4/ia64abi.h"
   367 #else /* SH4_TRANSLATOR == TARGET_X86 */
   368 #ifdef APPLE_BUILD
   369 #include "sh4/ia32mac.h"
   370 #else
   371 #include "sh4/ia32abi.h"
   372 #endif
   373 #endif
   376 /**
   377  * Translate a single instruction. Delayed branches are handled specially
   378  * by translating both branch and delayed instruction as a single unit (as
   379  * 
   380  *
   381  * @return true if the instruction marks the end of a basic block
   382  * (eg a branch or 
   383  */
   384 uint32_t sh4_translate_instruction( sh4addr_t pc )
   385 {
   386     uint32_t ir;
   387     /* Read instruction */
   388     uint32_t pageaddr = pc >> 12;
   389     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   390 	ir = sh4_icache[(pc&0xFFF)>>1];
   391     } else {
   392 	sh4_icache = (uint16_t *)mem_get_page(pc);
   393 	if( ((uintptr_t)sh4_icache) < MAX_IO_REGIONS ) {
   394 	    /* If someone's actually been so daft as to try to execute out of an IO
   395 	     * region, fallback on the full-blown memory read
   396 	     */
   397 	    sh4_icache = NULL;
   398 	    ir = sh4_read_word(pc);
   399 	} else {
   400 	    sh4_icache_addr = pageaddr;
   401 	    ir = sh4_icache[(pc&0xFFF)>>1];
   402 	}
   403     }
   405 %%
   406 /* ALU operations */
   407 ADD Rm, Rn {:
   408     load_reg( R_EAX, Rm );
   409     load_reg( R_ECX, Rn );
   410     ADD_r32_r32( R_EAX, R_ECX );
   411     store_reg( R_ECX, Rn );
   412     sh4_x86.tstate = TSTATE_NONE;
   413 :}
   414 ADD #imm, Rn {:  
   415     load_reg( R_EAX, Rn );
   416     ADD_imm8s_r32( imm, R_EAX );
   417     store_reg( R_EAX, Rn );
   418     sh4_x86.tstate = TSTATE_NONE;
   419 :}
   420 ADDC Rm, Rn {:
   421     if( sh4_x86.tstate != TSTATE_C ) {
   422 	LDC_t();
   423     }
   424     load_reg( R_EAX, Rm );
   425     load_reg( R_ECX, Rn );
   426     ADC_r32_r32( R_EAX, R_ECX );
   427     store_reg( R_ECX, Rn );
   428     SETC_t();
   429     sh4_x86.tstate = TSTATE_C;
   430 :}
   431 ADDV Rm, Rn {:
   432     load_reg( R_EAX, Rm );
   433     load_reg( R_ECX, Rn );
   434     ADD_r32_r32( R_EAX, R_ECX );
   435     store_reg( R_ECX, Rn );
   436     SETO_t();
   437     sh4_x86.tstate = TSTATE_O;
   438 :}
   439 AND Rm, Rn {:
   440     load_reg( R_EAX, Rm );
   441     load_reg( R_ECX, Rn );
   442     AND_r32_r32( R_EAX, R_ECX );
   443     store_reg( R_ECX, Rn );
   444     sh4_x86.tstate = TSTATE_NONE;
   445 :}
   446 AND #imm, R0 {:  
   447     load_reg( R_EAX, 0 );
   448     AND_imm32_r32(imm, R_EAX); 
   449     store_reg( R_EAX, 0 );
   450     sh4_x86.tstate = TSTATE_NONE;
   451 :}
   452 AND.B #imm, @(R0, GBR) {: 
   453     load_reg( R_EAX, 0 );
   454     load_spreg( R_ECX, R_GBR );
   455     ADD_r32_r32( R_EAX, R_ECX );
   456     PUSH_realigned_r32(R_ECX);
   457     MEM_READ_BYTE( R_ECX, R_EAX );
   458     POP_realigned_r32(R_ECX);
   459     AND_imm32_r32(imm, R_EAX );
   460     MEM_WRITE_BYTE( R_ECX, R_EAX );
   461     sh4_x86.tstate = TSTATE_NONE;
   462 :}
   463 CMP/EQ Rm, Rn {:  
   464     load_reg( R_EAX, Rm );
   465     load_reg( R_ECX, Rn );
   466     CMP_r32_r32( R_EAX, R_ECX );
   467     SETE_t();
   468     sh4_x86.tstate = TSTATE_E;
   469 :}
   470 CMP/EQ #imm, R0 {:  
   471     load_reg( R_EAX, 0 );
   472     CMP_imm8s_r32(imm, R_EAX);
   473     SETE_t();
   474     sh4_x86.tstate = TSTATE_E;
   475 :}
   476 CMP/GE Rm, Rn {:  
   477     load_reg( R_EAX, Rm );
   478     load_reg( R_ECX, Rn );
   479     CMP_r32_r32( R_EAX, R_ECX );
   480     SETGE_t();
   481     sh4_x86.tstate = TSTATE_GE;
   482 :}
   483 CMP/GT Rm, Rn {: 
   484     load_reg( R_EAX, Rm );
   485     load_reg( R_ECX, Rn );
   486     CMP_r32_r32( R_EAX, R_ECX );
   487     SETG_t();
   488     sh4_x86.tstate = TSTATE_G;
   489 :}
   490 CMP/HI Rm, Rn {:  
   491     load_reg( R_EAX, Rm );
   492     load_reg( R_ECX, Rn );
   493     CMP_r32_r32( R_EAX, R_ECX );
   494     SETA_t();
   495     sh4_x86.tstate = TSTATE_A;
   496 :}
   497 CMP/HS Rm, Rn {: 
   498     load_reg( R_EAX, Rm );
   499     load_reg( R_ECX, Rn );
   500     CMP_r32_r32( R_EAX, R_ECX );
   501     SETAE_t();
   502     sh4_x86.tstate = TSTATE_AE;
   503  :}
   504 CMP/PL Rn {: 
   505     load_reg( R_EAX, Rn );
   506     CMP_imm8s_r32( 0, R_EAX );
   507     SETG_t();
   508     sh4_x86.tstate = TSTATE_G;
   509 :}
   510 CMP/PZ Rn {:  
   511     load_reg( R_EAX, Rn );
   512     CMP_imm8s_r32( 0, R_EAX );
   513     SETGE_t();
   514     sh4_x86.tstate = TSTATE_GE;
   515 :}
   516 CMP/STR Rm, Rn {:  
   517     load_reg( R_EAX, Rm );
   518     load_reg( R_ECX, Rn );
   519     XOR_r32_r32( R_ECX, R_EAX );
   520     TEST_r8_r8( R_AL, R_AL );
   521     JE_rel8(13, target1);
   522     TEST_r8_r8( R_AH, R_AH ); // 2
   523     JE_rel8(9, target2);
   524     SHR_imm8_r32( 16, R_EAX ); // 3
   525     TEST_r8_r8( R_AL, R_AL ); // 2
   526     JE_rel8(2, target3);
   527     TEST_r8_r8( R_AH, R_AH ); // 2
   528     JMP_TARGET(target1);
   529     JMP_TARGET(target2);
   530     JMP_TARGET(target3);
   531     SETE_t();
   532     sh4_x86.tstate = TSTATE_E;
   533 :}
   534 DIV0S Rm, Rn {:
   535     load_reg( R_EAX, Rm );
   536     load_reg( R_ECX, Rn );
   537     SHR_imm8_r32( 31, R_EAX );
   538     SHR_imm8_r32( 31, R_ECX );
   539     store_spreg( R_EAX, R_M );
   540     store_spreg( R_ECX, R_Q );
   541     CMP_r32_r32( R_EAX, R_ECX );
   542     SETNE_t();
   543     sh4_x86.tstate = TSTATE_NE;
   544 :}
   545 DIV0U {:  
   546     XOR_r32_r32( R_EAX, R_EAX );
   547     store_spreg( R_EAX, R_Q );
   548     store_spreg( R_EAX, R_M );
   549     store_spreg( R_EAX, R_T );
   550     sh4_x86.tstate = TSTATE_C; // works for DIV1
   551 :}
   552 DIV1 Rm, Rn {:
   553     load_spreg( R_ECX, R_M );
   554     load_reg( R_EAX, Rn );
   555     if( sh4_x86.tstate != TSTATE_C ) {
   556 	LDC_t();
   557     }
   558     RCL1_r32( R_EAX );
   559     SETC_r8( R_DL ); // Q'
   560     CMP_sh4r_r32( R_Q, R_ECX );
   561     JE_rel8(5, mqequal);
   562     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   563     JMP_rel8(3, end);
   564     JMP_TARGET(mqequal);
   565     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   566     JMP_TARGET(end);
   567     store_reg( R_EAX, Rn ); // Done with Rn now
   568     SETC_r8(R_AL); // tmp1
   569     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   570     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   571     store_spreg( R_ECX, R_Q );
   572     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   573     MOVZX_r8_r32( R_AL, R_EAX );
   574     store_spreg( R_EAX, R_T );
   575     sh4_x86.tstate = TSTATE_NONE;
   576 :}
   577 DMULS.L Rm, Rn {:  
   578     load_reg( R_EAX, Rm );
   579     load_reg( R_ECX, Rn );
   580     IMUL_r32(R_ECX);
   581     store_spreg( R_EDX, R_MACH );
   582     store_spreg( R_EAX, R_MACL );
   583     sh4_x86.tstate = TSTATE_NONE;
   584 :}
   585 DMULU.L Rm, Rn {:  
   586     load_reg( R_EAX, Rm );
   587     load_reg( R_ECX, Rn );
   588     MUL_r32(R_ECX);
   589     store_spreg( R_EDX, R_MACH );
   590     store_spreg( R_EAX, R_MACL );    
   591     sh4_x86.tstate = TSTATE_NONE;
   592 :}
   593 DT Rn {:  
   594     load_reg( R_EAX, Rn );
   595     ADD_imm8s_r32( -1, R_EAX );
   596     store_reg( R_EAX, Rn );
   597     SETE_t();
   598     sh4_x86.tstate = TSTATE_E;
   599 :}
   600 EXTS.B Rm, Rn {:  
   601     load_reg( R_EAX, Rm );
   602     MOVSX_r8_r32( R_EAX, R_EAX );
   603     store_reg( R_EAX, Rn );
   604 :}
   605 EXTS.W Rm, Rn {:  
   606     load_reg( R_EAX, Rm );
   607     MOVSX_r16_r32( R_EAX, R_EAX );
   608     store_reg( R_EAX, Rn );
   609 :}
   610 EXTU.B Rm, Rn {:  
   611     load_reg( R_EAX, Rm );
   612     MOVZX_r8_r32( R_EAX, R_EAX );
   613     store_reg( R_EAX, Rn );
   614 :}
   615 EXTU.W Rm, Rn {:  
   616     load_reg( R_EAX, Rm );
   617     MOVZX_r16_r32( R_EAX, R_EAX );
   618     store_reg( R_EAX, Rn );
   619 :}
   620 MAC.L @Rm+, @Rn+ {:  
   621     load_reg( R_ECX, Rm );
   622     precheck();
   623     check_ralign32( R_ECX );
   624     load_reg( R_ECX, Rn );
   625     check_ralign32( R_ECX );
   626     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   627     MEM_READ_LONG( R_ECX, R_EAX );
   628     PUSH_realigned_r32( R_EAX );
   629     load_reg( R_ECX, Rm );
   630     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   631     MEM_READ_LONG( R_ECX, R_EAX );
   632     POP_realigned_r32( R_ECX );
   633     IMUL_r32( R_ECX );
   634     ADD_r32_sh4r( R_EAX, R_MACL );
   635     ADC_r32_sh4r( R_EDX, R_MACH );
   637     load_spreg( R_ECX, R_S );
   638     TEST_r32_r32(R_ECX, R_ECX);
   639     JE_rel8( CALL_FUNC0_SIZE, nosat );
   640     call_func0( signsat48 );
   641     JMP_TARGET( nosat );
   642     sh4_x86.tstate = TSTATE_NONE;
   643 :}
   644 MAC.W @Rm+, @Rn+ {:  
   645     load_reg( R_ECX, Rm );
   646     precheck();
   647     check_ralign16( R_ECX );
   648     load_reg( R_ECX, Rn );
   649     check_ralign16( R_ECX );
   650     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   651     MEM_READ_WORD( R_ECX, R_EAX );
   652     PUSH_realigned_r32( R_EAX );
   653     load_reg( R_ECX, Rm );
   654     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   655     MEM_READ_WORD( R_ECX, R_EAX );
   656     POP_realigned_r32( R_ECX );
   657     IMUL_r32( R_ECX );
   659     load_spreg( R_ECX, R_S );
   660     TEST_r32_r32( R_ECX, R_ECX );
   661     JE_rel8( 47, nosat );
   663     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   664     JNO_rel8( 51, end );            // 2
   665     load_imm32( R_EDX, 1 );         // 5
   666     store_spreg( R_EDX, R_MACH );   // 6
   667     JS_rel8( 13, positive );        // 2
   668     load_imm32( R_EAX, 0x80000000 );// 5
   669     store_spreg( R_EAX, R_MACL );   // 6
   670     JMP_rel8( 25, end2 );           // 2
   672     JMP_TARGET(positive);
   673     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   674     store_spreg( R_EAX, R_MACL );   // 6
   675     JMP_rel8( 12, end3);            // 2
   677     JMP_TARGET(nosat);
   678     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   679     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   680     JMP_TARGET(end);
   681     JMP_TARGET(end2);
   682     JMP_TARGET(end3);
   683     sh4_x86.tstate = TSTATE_NONE;
   684 :}
   685 MOVT Rn {:  
   686     load_spreg( R_EAX, R_T );
   687     store_reg( R_EAX, Rn );
   688 :}
   689 MUL.L Rm, Rn {:  
   690     load_reg( R_EAX, Rm );
   691     load_reg( R_ECX, Rn );
   692     MUL_r32( R_ECX );
   693     store_spreg( R_EAX, R_MACL );
   694     sh4_x86.tstate = TSTATE_NONE;
   695 :}
   696 MULS.W Rm, Rn {:
   697     load_reg16s( R_EAX, Rm );
   698     load_reg16s( R_ECX, Rn );
   699     MUL_r32( R_ECX );
   700     store_spreg( R_EAX, R_MACL );
   701     sh4_x86.tstate = TSTATE_NONE;
   702 :}
   703 MULU.W Rm, Rn {:  
   704     load_reg16u( R_EAX, Rm );
   705     load_reg16u( R_ECX, Rn );
   706     MUL_r32( R_ECX );
   707     store_spreg( R_EAX, R_MACL );
   708     sh4_x86.tstate = TSTATE_NONE;
   709 :}
   710 NEG Rm, Rn {:
   711     load_reg( R_EAX, Rm );
   712     NEG_r32( R_EAX );
   713     store_reg( R_EAX, Rn );
   714     sh4_x86.tstate = TSTATE_NONE;
   715 :}
   716 NEGC Rm, Rn {:  
   717     load_reg( R_EAX, Rm );
   718     XOR_r32_r32( R_ECX, R_ECX );
   719     LDC_t();
   720     SBB_r32_r32( R_EAX, R_ECX );
   721     store_reg( R_ECX, Rn );
   722     SETC_t();
   723     sh4_x86.tstate = TSTATE_C;
   724 :}
   725 NOT Rm, Rn {:  
   726     load_reg( R_EAX, Rm );
   727     NOT_r32( R_EAX );
   728     store_reg( R_EAX, Rn );
   729     sh4_x86.tstate = TSTATE_NONE;
   730 :}
   731 OR Rm, Rn {:  
   732     load_reg( R_EAX, Rm );
   733     load_reg( R_ECX, Rn );
   734     OR_r32_r32( R_EAX, R_ECX );
   735     store_reg( R_ECX, Rn );
   736     sh4_x86.tstate = TSTATE_NONE;
   737 :}
   738 OR #imm, R0 {:
   739     load_reg( R_EAX, 0 );
   740     OR_imm32_r32(imm, R_EAX);
   741     store_reg( R_EAX, 0 );
   742     sh4_x86.tstate = TSTATE_NONE;
   743 :}
   744 OR.B #imm, @(R0, GBR) {:  
   745     load_reg( R_EAX, 0 );
   746     load_spreg( R_ECX, R_GBR );
   747     ADD_r32_r32( R_EAX, R_ECX );
   748     PUSH_realigned_r32(R_ECX);
   749     MEM_READ_BYTE( R_ECX, R_EAX );
   750     POP_realigned_r32(R_ECX);
   751     OR_imm32_r32(imm, R_EAX );
   752     MEM_WRITE_BYTE( R_ECX, R_EAX );
   753     sh4_x86.tstate = TSTATE_NONE;
   754 :}
   755 ROTCL Rn {:
   756     load_reg( R_EAX, Rn );
   757     if( sh4_x86.tstate != TSTATE_C ) {
   758 	LDC_t();
   759     }
   760     RCL1_r32( R_EAX );
   761     store_reg( R_EAX, Rn );
   762     SETC_t();
   763     sh4_x86.tstate = TSTATE_C;
   764 :}
   765 ROTCR Rn {:  
   766     load_reg( R_EAX, Rn );
   767     if( sh4_x86.tstate != TSTATE_C ) {
   768 	LDC_t();
   769     }
   770     RCR1_r32( R_EAX );
   771     store_reg( R_EAX, Rn );
   772     SETC_t();
   773     sh4_x86.tstate = TSTATE_C;
   774 :}
   775 ROTL Rn {:  
   776     load_reg( R_EAX, Rn );
   777     ROL1_r32( R_EAX );
   778     store_reg( R_EAX, Rn );
   779     SETC_t();
   780     sh4_x86.tstate = TSTATE_C;
   781 :}
   782 ROTR Rn {:  
   783     load_reg( R_EAX, Rn );
   784     ROR1_r32( R_EAX );
   785     store_reg( R_EAX, Rn );
   786     SETC_t();
   787     sh4_x86.tstate = TSTATE_C;
   788 :}
   789 SHAD Rm, Rn {:
   790     /* Annoyingly enough, not directly convertible */
   791     load_reg( R_EAX, Rn );
   792     load_reg( R_ECX, Rm );
   793     CMP_imm32_r32( 0, R_ECX );
   794     JGE_rel8(16, doshl);
   796     NEG_r32( R_ECX );      // 2
   797     AND_imm8_r8( 0x1F, R_CL ); // 3
   798     JE_rel8( 4, emptysar);     // 2
   799     SAR_r32_CL( R_EAX );       // 2
   800     JMP_rel8(10, end);          // 2
   802     JMP_TARGET(emptysar);
   803     SAR_imm8_r32(31, R_EAX );  // 3
   804     JMP_rel8(5, end2);
   806     JMP_TARGET(doshl);
   807     AND_imm8_r8( 0x1F, R_CL ); // 3
   808     SHL_r32_CL( R_EAX );       // 2
   809     JMP_TARGET(end);
   810     JMP_TARGET(end2);
   811     store_reg( R_EAX, Rn );
   812     sh4_x86.tstate = TSTATE_NONE;
   813 :}
   814 SHLD Rm, Rn {:  
   815     load_reg( R_EAX, Rn );
   816     load_reg( R_ECX, Rm );
   817     CMP_imm32_r32( 0, R_ECX );
   818     JGE_rel8(15, doshl);
   820     NEG_r32( R_ECX );      // 2
   821     AND_imm8_r8( 0x1F, R_CL ); // 3
   822     JE_rel8( 4, emptyshr );
   823     SHR_r32_CL( R_EAX );       // 2
   824     JMP_rel8(9, end);          // 2
   826     JMP_TARGET(emptyshr);
   827     XOR_r32_r32( R_EAX, R_EAX );
   828     JMP_rel8(5, end2);
   830     JMP_TARGET(doshl);
   831     AND_imm8_r8( 0x1F, R_CL ); // 3
   832     SHL_r32_CL( R_EAX );       // 2
   833     JMP_TARGET(end);
   834     JMP_TARGET(end2);
   835     store_reg( R_EAX, Rn );
   836     sh4_x86.tstate = TSTATE_NONE;
   837 :}
   838 SHAL Rn {: 
   839     load_reg( R_EAX, Rn );
   840     SHL1_r32( R_EAX );
   841     SETC_t();
   842     store_reg( R_EAX, Rn );
   843     sh4_x86.tstate = TSTATE_C;
   844 :}
   845 SHAR Rn {:  
   846     load_reg( R_EAX, Rn );
   847     SAR1_r32( R_EAX );
   848     SETC_t();
   849     store_reg( R_EAX, Rn );
   850     sh4_x86.tstate = TSTATE_C;
   851 :}
   852 SHLL Rn {:  
   853     load_reg( R_EAX, Rn );
   854     SHL1_r32( R_EAX );
   855     SETC_t();
   856     store_reg( R_EAX, Rn );
   857     sh4_x86.tstate = TSTATE_C;
   858 :}
   859 SHLL2 Rn {:
   860     load_reg( R_EAX, Rn );
   861     SHL_imm8_r32( 2, R_EAX );
   862     store_reg( R_EAX, Rn );
   863     sh4_x86.tstate = TSTATE_NONE;
   864 :}
   865 SHLL8 Rn {:  
   866     load_reg( R_EAX, Rn );
   867     SHL_imm8_r32( 8, R_EAX );
   868     store_reg( R_EAX, Rn );
   869     sh4_x86.tstate = TSTATE_NONE;
   870 :}
   871 SHLL16 Rn {:  
   872     load_reg( R_EAX, Rn );
   873     SHL_imm8_r32( 16, R_EAX );
   874     store_reg( R_EAX, Rn );
   875     sh4_x86.tstate = TSTATE_NONE;
   876 :}
   877 SHLR Rn {:  
   878     load_reg( R_EAX, Rn );
   879     SHR1_r32( R_EAX );
   880     SETC_t();
   881     store_reg( R_EAX, Rn );
   882     sh4_x86.tstate = TSTATE_C;
   883 :}
   884 SHLR2 Rn {:  
   885     load_reg( R_EAX, Rn );
   886     SHR_imm8_r32( 2, R_EAX );
   887     store_reg( R_EAX, Rn );
   888     sh4_x86.tstate = TSTATE_NONE;
   889 :}
   890 SHLR8 Rn {:  
   891     load_reg( R_EAX, Rn );
   892     SHR_imm8_r32( 8, R_EAX );
   893     store_reg( R_EAX, Rn );
   894     sh4_x86.tstate = TSTATE_NONE;
   895 :}
   896 SHLR16 Rn {:  
   897     load_reg( R_EAX, Rn );
   898     SHR_imm8_r32( 16, R_EAX );
   899     store_reg( R_EAX, Rn );
   900     sh4_x86.tstate = TSTATE_NONE;
   901 :}
   902 SUB Rm, Rn {:  
   903     load_reg( R_EAX, Rm );
   904     load_reg( R_ECX, Rn );
   905     SUB_r32_r32( R_EAX, R_ECX );
   906     store_reg( R_ECX, Rn );
   907     sh4_x86.tstate = TSTATE_NONE;
   908 :}
   909 SUBC Rm, Rn {:  
   910     load_reg( R_EAX, Rm );
   911     load_reg( R_ECX, Rn );
   912     if( sh4_x86.tstate != TSTATE_C ) {
   913 	LDC_t();
   914     }
   915     SBB_r32_r32( R_EAX, R_ECX );
   916     store_reg( R_ECX, Rn );
   917     SETC_t();
   918     sh4_x86.tstate = TSTATE_C;
   919 :}
   920 SUBV Rm, Rn {:  
   921     load_reg( R_EAX, Rm );
   922     load_reg( R_ECX, Rn );
   923     SUB_r32_r32( R_EAX, R_ECX );
   924     store_reg( R_ECX, Rn );
   925     SETO_t();
   926     sh4_x86.tstate = TSTATE_O;
   927 :}
   928 SWAP.B Rm, Rn {:  
   929     load_reg( R_EAX, Rm );
   930     XCHG_r8_r8( R_AL, R_AH );
   931     store_reg( R_EAX, Rn );
   932 :}
   933 SWAP.W Rm, Rn {:  
   934     load_reg( R_EAX, Rm );
   935     MOV_r32_r32( R_EAX, R_ECX );
   936     SHL_imm8_r32( 16, R_ECX );
   937     SHR_imm8_r32( 16, R_EAX );
   938     OR_r32_r32( R_EAX, R_ECX );
   939     store_reg( R_ECX, Rn );
   940     sh4_x86.tstate = TSTATE_NONE;
   941 :}
   942 TAS.B @Rn {:  
   943     load_reg( R_ECX, Rn );
   944     MEM_READ_BYTE( R_ECX, R_EAX );
   945     TEST_r8_r8( R_AL, R_AL );
   946     SETE_t();
   947     OR_imm8_r8( 0x80, R_AL );
   948     load_reg( R_ECX, Rn );
   949     MEM_WRITE_BYTE( R_ECX, R_EAX );
   950     sh4_x86.tstate = TSTATE_NONE;
   951 :}
   952 TST Rm, Rn {:  
   953     load_reg( R_EAX, Rm );
   954     load_reg( R_ECX, Rn );
   955     TEST_r32_r32( R_EAX, R_ECX );
   956     SETE_t();
   957     sh4_x86.tstate = TSTATE_E;
   958 :}
   959 TST #imm, R0 {:  
   960     load_reg( R_EAX, 0 );
   961     TEST_imm32_r32( imm, R_EAX );
   962     SETE_t();
   963     sh4_x86.tstate = TSTATE_E;
   964 :}
   965 TST.B #imm, @(R0, GBR) {:  
   966     load_reg( R_EAX, 0);
   967     load_reg( R_ECX, R_GBR);
   968     ADD_r32_r32( R_EAX, R_ECX );
   969     MEM_READ_BYTE( R_ECX, R_EAX );
   970     TEST_imm8_r8( imm, R_AL );
   971     SETE_t();
   972     sh4_x86.tstate = TSTATE_E;
   973 :}
   974 XOR Rm, Rn {:  
   975     load_reg( R_EAX, Rm );
   976     load_reg( R_ECX, Rn );
   977     XOR_r32_r32( R_EAX, R_ECX );
   978     store_reg( R_ECX, Rn );
   979     sh4_x86.tstate = TSTATE_NONE;
   980 :}
   981 XOR #imm, R0 {:  
   982     load_reg( R_EAX, 0 );
   983     XOR_imm32_r32( imm, R_EAX );
   984     store_reg( R_EAX, 0 );
   985     sh4_x86.tstate = TSTATE_NONE;
   986 :}
   987 XOR.B #imm, @(R0, GBR) {:  
   988     load_reg( R_EAX, 0 );
   989     load_spreg( R_ECX, R_GBR );
   990     ADD_r32_r32( R_EAX, R_ECX );
   991     PUSH_realigned_r32(R_ECX);
   992     MEM_READ_BYTE(R_ECX, R_EAX);
   993     POP_realigned_r32(R_ECX);
   994     XOR_imm32_r32( imm, R_EAX );
   995     MEM_WRITE_BYTE( R_ECX, R_EAX );
   996     sh4_x86.tstate = TSTATE_NONE;
   997 :}
   998 XTRCT Rm, Rn {:
   999     load_reg( R_EAX, Rm );
  1000     load_reg( R_ECX, Rn );
  1001     SHL_imm8_r32( 16, R_EAX );
  1002     SHR_imm8_r32( 16, R_ECX );
  1003     OR_r32_r32( R_EAX, R_ECX );
  1004     store_reg( R_ECX, Rn );
  1005     sh4_x86.tstate = TSTATE_NONE;
  1006 :}
  1008 /* Data move instructions */
  1009 MOV Rm, Rn {:  
  1010     load_reg( R_EAX, Rm );
  1011     store_reg( R_EAX, Rn );
  1012 :}
  1013 MOV #imm, Rn {:  
  1014     load_imm32( R_EAX, imm );
  1015     store_reg( R_EAX, Rn );
  1016 :}
  1017 MOV.B Rm, @Rn {:  
  1018     load_reg( R_EAX, Rm );
  1019     load_reg( R_ECX, Rn );
  1020     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1021     sh4_x86.tstate = TSTATE_NONE;
  1022 :}
  1023 MOV.B Rm, @-Rn {:  
  1024     load_reg( R_EAX, Rm );
  1025     load_reg( R_ECX, Rn );
  1026     ADD_imm8s_r32( -1, R_ECX );
  1027     store_reg( R_ECX, Rn );
  1028     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1029     sh4_x86.tstate = TSTATE_NONE;
  1030 :}
  1031 MOV.B Rm, @(R0, Rn) {:  
  1032     load_reg( R_EAX, 0 );
  1033     load_reg( R_ECX, Rn );
  1034     ADD_r32_r32( R_EAX, R_ECX );
  1035     load_reg( R_EAX, Rm );
  1036     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1037     sh4_x86.tstate = TSTATE_NONE;
  1038 :}
  1039 MOV.B R0, @(disp, GBR) {:  
  1040     load_reg( R_EAX, 0 );
  1041     load_spreg( R_ECX, R_GBR );
  1042     ADD_imm32_r32( disp, R_ECX );
  1043     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1044     sh4_x86.tstate = TSTATE_NONE;
  1045 :}
  1046 MOV.B R0, @(disp, Rn) {:  
  1047     load_reg( R_EAX, 0 );
  1048     load_reg( R_ECX, Rn );
  1049     ADD_imm32_r32( disp, R_ECX );
  1050     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1051     sh4_x86.tstate = TSTATE_NONE;
  1052 :}
  1053 MOV.B @Rm, Rn {:  
  1054     load_reg( R_ECX, Rm );
  1055     MEM_READ_BYTE( R_ECX, R_EAX );
  1056     store_reg( R_EAX, Rn );
  1057     sh4_x86.tstate = TSTATE_NONE;
  1058 :}
  1059 MOV.B @Rm+, Rn {:  
  1060     load_reg( R_ECX, Rm );
  1061     MOV_r32_r32( R_ECX, R_EAX );
  1062     ADD_imm8s_r32( 1, R_EAX );
  1063     store_reg( R_EAX, Rm );
  1064     MEM_READ_BYTE( R_ECX, R_EAX );
  1065     store_reg( R_EAX, Rn );
  1066     sh4_x86.tstate = TSTATE_NONE;
  1067 :}
  1068 MOV.B @(R0, Rm), Rn {:  
  1069     load_reg( R_EAX, 0 );
  1070     load_reg( R_ECX, Rm );
  1071     ADD_r32_r32( R_EAX, R_ECX );
  1072     MEM_READ_BYTE( R_ECX, R_EAX );
  1073     store_reg( R_EAX, Rn );
  1074     sh4_x86.tstate = TSTATE_NONE;
  1075 :}
  1076 MOV.B @(disp, GBR), R0 {:  
  1077     load_spreg( R_ECX, R_GBR );
  1078     ADD_imm32_r32( disp, R_ECX );
  1079     MEM_READ_BYTE( R_ECX, R_EAX );
  1080     store_reg( R_EAX, 0 );
  1081     sh4_x86.tstate = TSTATE_NONE;
  1082 :}
  1083 MOV.B @(disp, Rm), R0 {:  
  1084     load_reg( R_ECX, Rm );
  1085     ADD_imm32_r32( disp, R_ECX );
  1086     MEM_READ_BYTE( R_ECX, R_EAX );
  1087     store_reg( R_EAX, 0 );
  1088     sh4_x86.tstate = TSTATE_NONE;
  1089 :}
  1090 MOV.L Rm, @Rn {:
  1091     load_reg( R_EAX, Rm );
  1092     load_reg( R_ECX, Rn );
  1093     precheck();
  1094     check_walign32(R_ECX);
  1095     MEM_WRITE_LONG( R_ECX, R_EAX );
  1096     sh4_x86.tstate = TSTATE_NONE;
  1097 :}
  1098 MOV.L Rm, @-Rn {:  
  1099     load_reg( R_EAX, Rm );
  1100     load_reg( R_ECX, Rn );
  1101     precheck();
  1102     check_walign32( R_ECX );
  1103     ADD_imm8s_r32( -4, R_ECX );
  1104     store_reg( R_ECX, Rn );
  1105     MEM_WRITE_LONG( R_ECX, R_EAX );
  1106     sh4_x86.tstate = TSTATE_NONE;
  1107 :}
  1108 MOV.L Rm, @(R0, Rn) {:  
  1109     load_reg( R_EAX, 0 );
  1110     load_reg( R_ECX, Rn );
  1111     ADD_r32_r32( R_EAX, R_ECX );
  1112     precheck();
  1113     check_walign32( R_ECX );
  1114     load_reg( R_EAX, Rm );
  1115     MEM_WRITE_LONG( R_ECX, R_EAX );
  1116     sh4_x86.tstate = TSTATE_NONE;
  1117 :}
  1118 MOV.L R0, @(disp, GBR) {:  
  1119     load_spreg( R_ECX, R_GBR );
  1120     load_reg( R_EAX, 0 );
  1121     ADD_imm32_r32( disp, R_ECX );
  1122     precheck();
  1123     check_walign32( R_ECX );
  1124     MEM_WRITE_LONG( R_ECX, R_EAX );
  1125     sh4_x86.tstate = TSTATE_NONE;
  1126 :}
  1127 MOV.L Rm, @(disp, Rn) {:  
  1128     load_reg( R_ECX, Rn );
  1129     load_reg( R_EAX, Rm );
  1130     ADD_imm32_r32( disp, R_ECX );
  1131     precheck();
  1132     check_walign32( R_ECX );
  1133     MEM_WRITE_LONG( R_ECX, R_EAX );
  1134     sh4_x86.tstate = TSTATE_NONE;
  1135 :}
  1136 MOV.L @Rm, Rn {:  
  1137     load_reg( R_ECX, Rm );
  1138     precheck();
  1139     check_ralign32( R_ECX );
  1140     MEM_READ_LONG( R_ECX, R_EAX );
  1141     store_reg( R_EAX, Rn );
  1142     sh4_x86.tstate = TSTATE_NONE;
  1143 :}
  1144 MOV.L @Rm+, Rn {:  
  1145     load_reg( R_EAX, Rm );
  1146     precheck();
  1147     check_ralign32( R_EAX );
  1148     MOV_r32_r32( R_EAX, R_ECX );
  1149     ADD_imm8s_r32( 4, R_EAX );
  1150     store_reg( R_EAX, Rm );
  1151     MEM_READ_LONG( R_ECX, R_EAX );
  1152     store_reg( R_EAX, Rn );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 MOV.L @(R0, Rm), Rn {:  
  1156     load_reg( R_EAX, 0 );
  1157     load_reg( R_ECX, Rm );
  1158     ADD_r32_r32( R_EAX, R_ECX );
  1159     precheck();
  1160     check_ralign32( R_ECX );
  1161     MEM_READ_LONG( R_ECX, R_EAX );
  1162     store_reg( R_EAX, Rn );
  1163     sh4_x86.tstate = TSTATE_NONE;
  1164 :}
  1165 MOV.L @(disp, GBR), R0 {:
  1166     load_spreg( R_ECX, R_GBR );
  1167     ADD_imm32_r32( disp, R_ECX );
  1168     precheck();
  1169     check_ralign32( R_ECX );
  1170     MEM_READ_LONG( R_ECX, R_EAX );
  1171     store_reg( R_EAX, 0 );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 MOV.L @(disp, PC), Rn {:  
  1175     if( sh4_x86.in_delay_slot ) {
  1176 	SLOTILLEGAL();
  1177     } else {
  1178 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1179 	sh4ptr_t ptr = mem_get_region(target);
  1180 	if( ptr != NULL ) {
  1181 	    MOV_moff32_EAX( ptr );
  1182 	} else {
  1183 	    load_imm32( R_ECX, target );
  1184 	    MEM_READ_LONG( R_ECX, R_EAX );
  1186 	store_reg( R_EAX, Rn );
  1187 	sh4_x86.tstate = TSTATE_NONE;
  1189 :}
  1190 MOV.L @(disp, Rm), Rn {:  
  1191     load_reg( R_ECX, Rm );
  1192     ADD_imm8s_r32( disp, R_ECX );
  1193     precheck();
  1194     check_ralign32( R_ECX );
  1195     MEM_READ_LONG( R_ECX, R_EAX );
  1196     store_reg( R_EAX, Rn );
  1197     sh4_x86.tstate = TSTATE_NONE;
  1198 :}
  1199 MOV.W Rm, @Rn {:  
  1200     load_reg( R_ECX, Rn );
  1201     precheck();
  1202     check_walign16( R_ECX );
  1203     load_reg( R_EAX, Rm );
  1204     MEM_WRITE_WORD( R_ECX, R_EAX );
  1205     sh4_x86.tstate = TSTATE_NONE;
  1206 :}
  1207 MOV.W Rm, @-Rn {:  
  1208     load_reg( R_ECX, Rn );
  1209     precheck();
  1210     check_walign16( R_ECX );
  1211     load_reg( R_EAX, Rm );
  1212     ADD_imm8s_r32( -2, R_ECX );
  1213     store_reg( R_ECX, Rn );
  1214     MEM_WRITE_WORD( R_ECX, R_EAX );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 MOV.W Rm, @(R0, Rn) {:  
  1218     load_reg( R_EAX, 0 );
  1219     load_reg( R_ECX, Rn );
  1220     ADD_r32_r32( R_EAX, R_ECX );
  1221     precheck();
  1222     check_walign16( R_ECX );
  1223     load_reg( R_EAX, Rm );
  1224     MEM_WRITE_WORD( R_ECX, R_EAX );
  1225     sh4_x86.tstate = TSTATE_NONE;
  1226 :}
  1227 MOV.W R0, @(disp, GBR) {:  
  1228     load_spreg( R_ECX, R_GBR );
  1229     load_reg( R_EAX, 0 );
  1230     ADD_imm32_r32( disp, R_ECX );
  1231     precheck();
  1232     check_walign16( R_ECX );
  1233     MEM_WRITE_WORD( R_ECX, R_EAX );
  1234     sh4_x86.tstate = TSTATE_NONE;
  1235 :}
  1236 MOV.W R0, @(disp, Rn) {:  
  1237     load_reg( R_ECX, Rn );
  1238     load_reg( R_EAX, 0 );
  1239     ADD_imm32_r32( disp, R_ECX );
  1240     precheck();
  1241     check_walign16( R_ECX );
  1242     MEM_WRITE_WORD( R_ECX, R_EAX );
  1243     sh4_x86.tstate = TSTATE_NONE;
  1244 :}
  1245 MOV.W @Rm, Rn {:  
  1246     load_reg( R_ECX, Rm );
  1247     precheck();
  1248     check_ralign16( R_ECX );
  1249     MEM_READ_WORD( R_ECX, R_EAX );
  1250     store_reg( R_EAX, Rn );
  1251     sh4_x86.tstate = TSTATE_NONE;
  1252 :}
  1253 MOV.W @Rm+, Rn {:  
  1254     load_reg( R_EAX, Rm );
  1255     precheck();
  1256     check_ralign16( R_EAX );
  1257     MOV_r32_r32( R_EAX, R_ECX );
  1258     ADD_imm8s_r32( 2, R_EAX );
  1259     store_reg( R_EAX, Rm );
  1260     MEM_READ_WORD( R_ECX, R_EAX );
  1261     store_reg( R_EAX, Rn );
  1262     sh4_x86.tstate = TSTATE_NONE;
  1263 :}
  1264 MOV.W @(R0, Rm), Rn {:  
  1265     load_reg( R_EAX, 0 );
  1266     load_reg( R_ECX, Rm );
  1267     ADD_r32_r32( R_EAX, R_ECX );
  1268     precheck();
  1269     check_ralign16( R_ECX );
  1270     MEM_READ_WORD( R_ECX, R_EAX );
  1271     store_reg( R_EAX, Rn );
  1272     sh4_x86.tstate = TSTATE_NONE;
  1273 :}
  1274 MOV.W @(disp, GBR), R0 {:  
  1275     load_spreg( R_ECX, R_GBR );
  1276     ADD_imm32_r32( disp, R_ECX );
  1277     precheck();
  1278     check_ralign16( R_ECX );
  1279     MEM_READ_WORD( R_ECX, R_EAX );
  1280     store_reg( R_EAX, 0 );
  1281     sh4_x86.tstate = TSTATE_NONE;
  1282 :}
  1283 MOV.W @(disp, PC), Rn {:  
  1284     if( sh4_x86.in_delay_slot ) {
  1285 	SLOTILLEGAL();
  1286     } else {
  1287 	load_imm32( R_ECX, pc + disp + 4 );
  1288 	MEM_READ_WORD( R_ECX, R_EAX );
  1289 	store_reg( R_EAX, Rn );
  1290 	sh4_x86.tstate = TSTATE_NONE;
  1292 :}
  1293 MOV.W @(disp, Rm), R0 {:  
  1294     load_reg( R_ECX, Rm );
  1295     ADD_imm32_r32( disp, R_ECX );
  1296     precheck();
  1297     check_ralign16( R_ECX );
  1298     MEM_READ_WORD( R_ECX, R_EAX );
  1299     store_reg( R_EAX, 0 );
  1300     sh4_x86.tstate = TSTATE_NONE;
  1301 :}
  1302 MOVA @(disp, PC), R0 {:  
  1303     if( sh4_x86.in_delay_slot ) {
  1304 	SLOTILLEGAL();
  1305     } else {
  1306 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1307 	store_reg( R_ECX, 0 );
  1309 :}
  1310 MOVCA.L R0, @Rn {:  
  1311     load_reg( R_EAX, 0 );
  1312     load_reg( R_ECX, Rn );
  1313     precheck();
  1314     check_walign32( R_ECX );
  1315     MEM_WRITE_LONG( R_ECX, R_EAX );
  1316     sh4_x86.tstate = TSTATE_NONE;
  1317 :}
  1319 /* Control transfer instructions */
  1320 BF disp {:
  1321     if( sh4_x86.in_delay_slot ) {
  1322 	SLOTILLEGAL();
  1323     } else {
  1324 	JT_rel8( EXIT_BLOCK_SIZE, nottaken );
  1325 	exit_block( disp + pc + 4, pc+2 );
  1326 	JMP_TARGET(nottaken);
  1327 	return 2;
  1329 :}
  1330 BF/S disp {:
  1331     if( sh4_x86.in_delay_slot ) {
  1332 	SLOTILLEGAL();
  1333     } else {
  1334 	sh4_x86.in_delay_slot = TRUE;
  1335 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1336 	    CMP_imm8s_sh4r( 1, R_T );
  1337 	    sh4_x86.tstate = TSTATE_E;
  1339 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1340 	sh4_translate_instruction(pc+2);
  1341 	exit_block( disp + pc + 4, pc+4 );
  1342 	// not taken
  1343 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1344 	sh4_translate_instruction(pc+2);
  1345 	return 4;
  1347 :}
  1348 BRA disp {:  
  1349     if( sh4_x86.in_delay_slot ) {
  1350 	SLOTILLEGAL();
  1351     } else {
  1352 	sh4_x86.in_delay_slot = TRUE;
  1353 	sh4_translate_instruction( pc + 2 );
  1354 	exit_block( disp + pc + 4, pc+4 );
  1355 	sh4_x86.branch_taken = TRUE;
  1356 	return 4;
  1358 :}
  1359 BRAF Rn {:  
  1360     if( sh4_x86.in_delay_slot ) {
  1361 	SLOTILLEGAL();
  1362     } else {
  1363 	load_reg( R_EAX, Rn );
  1364 	ADD_imm32_r32( pc + 4, R_EAX );
  1365 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1366 	sh4_x86.in_delay_slot = TRUE;
  1367 	sh4_x86.tstate = TSTATE_NONE;
  1368 	sh4_translate_instruction( pc + 2 );
  1369 	exit_block_pcset(pc+2);
  1370 	sh4_x86.branch_taken = TRUE;
  1371 	return 4;
  1373 :}
  1374 BSR disp {:  
  1375     if( sh4_x86.in_delay_slot ) {
  1376 	SLOTILLEGAL();
  1377     } else {
  1378 	load_imm32( R_EAX, pc + 4 );
  1379 	store_spreg( R_EAX, R_PR );
  1380 	sh4_x86.in_delay_slot = TRUE;
  1381 	sh4_translate_instruction( pc + 2 );
  1382 	exit_block( disp + pc + 4, pc+4 );
  1383 	sh4_x86.branch_taken = TRUE;
  1384 	return 4;
  1386 :}
  1387 BSRF Rn {:  
  1388     if( sh4_x86.in_delay_slot ) {
  1389 	SLOTILLEGAL();
  1390     } else {
  1391 	load_imm32( R_ECX, pc + 4 );
  1392 	store_spreg( R_ECX, R_PR );
  1393 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1394 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1395 	sh4_x86.in_delay_slot = TRUE;
  1396 	sh4_x86.tstate = TSTATE_NONE;
  1397 	sh4_translate_instruction( pc + 2 );
  1398 	exit_block_pcset(pc+2);
  1399 	sh4_x86.branch_taken = TRUE;
  1400 	return 4;
  1402 :}
  1403 BT disp {:
  1404     if( sh4_x86.in_delay_slot ) {
  1405 	SLOTILLEGAL();
  1406     } else {
  1407 	JF_rel8( EXIT_BLOCK_SIZE, nottaken );
  1408 	exit_block( disp + pc + 4, pc+2 );
  1409 	JMP_TARGET(nottaken);
  1410 	return 2;
  1412 :}
  1413 BT/S disp {:
  1414     if( sh4_x86.in_delay_slot ) {
  1415 	SLOTILLEGAL();
  1416     } else {
  1417 	sh4_x86.in_delay_slot = TRUE;
  1418 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1419 	    CMP_imm8s_sh4r( 1, R_T );
  1420 	    sh4_x86.tstate = TSTATE_E;
  1422 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1423 	sh4_translate_instruction(pc+2);
  1424 	exit_block( disp + pc + 4, pc+4 );
  1425 	// not taken
  1426 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1427 	sh4_translate_instruction(pc+2);
  1428 	return 4;
  1430 :}
  1431 JMP @Rn {:  
  1432     if( sh4_x86.in_delay_slot ) {
  1433 	SLOTILLEGAL();
  1434     } else {
  1435 	load_reg( R_ECX, Rn );
  1436 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1437 	sh4_x86.in_delay_slot = TRUE;
  1438 	sh4_translate_instruction(pc+2);
  1439 	exit_block_pcset(pc+2);
  1440 	sh4_x86.branch_taken = TRUE;
  1441 	return 4;
  1443 :}
  1444 JSR @Rn {:  
  1445     if( sh4_x86.in_delay_slot ) {
  1446 	SLOTILLEGAL();
  1447     } else {
  1448 	load_imm32( R_EAX, pc + 4 );
  1449 	store_spreg( R_EAX, R_PR );
  1450 	load_reg( R_ECX, Rn );
  1451 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1452 	sh4_x86.in_delay_slot = TRUE;
  1453 	sh4_translate_instruction(pc+2);
  1454 	exit_block_pcset(pc+2);
  1455 	sh4_x86.branch_taken = TRUE;
  1456 	return 4;
  1458 :}
  1459 RTE {:  
  1460     if( sh4_x86.in_delay_slot ) {
  1461 	SLOTILLEGAL();
  1462     } else {
  1463 	check_priv();
  1464 	load_spreg( R_ECX, R_SPC );
  1465 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1466 	load_spreg( R_EAX, R_SSR );
  1467 	call_func1( sh4_write_sr, R_EAX );
  1468 	sh4_x86.in_delay_slot = TRUE;
  1469 	sh4_x86.priv_checked = FALSE;
  1470 	sh4_x86.fpuen_checked = FALSE;
  1471 	sh4_x86.tstate = TSTATE_NONE;
  1472 	sh4_translate_instruction(pc+2);
  1473 	exit_block_pcset(pc+2);
  1474 	sh4_x86.branch_taken = TRUE;
  1475 	return 4;
  1477 :}
  1478 RTS {:  
  1479     if( sh4_x86.in_delay_slot ) {
  1480 	SLOTILLEGAL();
  1481     } else {
  1482 	load_spreg( R_ECX, R_PR );
  1483 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1484 	sh4_x86.in_delay_slot = TRUE;
  1485 	sh4_translate_instruction(pc+2);
  1486 	exit_block_pcset(pc+2);
  1487 	sh4_x86.branch_taken = TRUE;
  1488 	return 4;
  1490 :}
  1491 TRAPA #imm {:  
  1492     if( sh4_x86.in_delay_slot ) {
  1493 	SLOTILLEGAL();
  1494     } else {
  1495 	load_imm32( R_ECX, pc+2 );
  1496 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1497 	load_imm32( R_EAX, imm );
  1498 	call_func1( sh4_raise_trap, R_EAX );
  1499 	sh4_x86.tstate = TSTATE_NONE;
  1500 	exit_block_pcset(pc);
  1501 	sh4_x86.branch_taken = TRUE;
  1502 	return 2;
  1504 :}
  1505 UNDEF {:  
  1506     if( sh4_x86.in_delay_slot ) {
  1507 	SLOTILLEGAL();
  1508     } else {
  1509 	precheck();
  1510 	JMP_exit(EXIT_ILLEGAL);
  1511 	return 2;
  1513 :}
  1515 CLRMAC {:  
  1516     XOR_r32_r32(R_EAX, R_EAX);
  1517     store_spreg( R_EAX, R_MACL );
  1518     store_spreg( R_EAX, R_MACH );
  1519     sh4_x86.tstate = TSTATE_NONE;
  1520 :}
  1521 CLRS {:
  1522     CLC();
  1523     SETC_sh4r(R_S);
  1524     sh4_x86.tstate = TSTATE_C;
  1525 :}
  1526 CLRT {:  
  1527     CLC();
  1528     SETC_t();
  1529     sh4_x86.tstate = TSTATE_C;
  1530 :}
  1531 SETS {:  
  1532     STC();
  1533     SETC_sh4r(R_S);
  1534     sh4_x86.tstate = TSTATE_C;
  1535 :}
  1536 SETT {:  
  1537     STC();
  1538     SETC_t();
  1539     sh4_x86.tstate = TSTATE_C;
  1540 :}
  1542 /* Floating point moves */
  1543 FMOV FRm, FRn {:  
  1544     /* As horrible as this looks, it's actually covering 5 separate cases:
  1545      * 1. 32-bit fr-to-fr (PR=0)
  1546      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1547      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1548      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1549      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1550      */
  1551     check_fpuen();
  1552     load_spreg( R_ECX, R_FPSCR );
  1553     load_fr_bank( R_EDX );
  1554     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1555     JNE_rel8(8, doublesize);
  1556     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1557     store_fr( R_EDX, R_EAX, FRn );
  1558     if( FRm&1 ) {
  1559 	JMP_rel8(24, end);
  1560 	JMP_TARGET(doublesize);
  1561 	load_xf_bank( R_ECX ); 
  1562 	load_fr( R_ECX, R_EAX, FRm-1 );
  1563 	if( FRn&1 ) {
  1564 	    load_fr( R_ECX, R_EDX, FRm );
  1565 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1566 	    store_fr( R_ECX, R_EDX, FRn );
  1567 	} else /* FRn&1 == 0 */ {
  1568 	    load_fr( R_ECX, R_ECX, FRm );
  1569 	    store_fr( R_EDX, R_EAX, FRn );
  1570 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1572 	JMP_TARGET(end);
  1573     } else /* FRm&1 == 0 */ {
  1574 	if( FRn&1 ) {
  1575 	    JMP_rel8(24, end);
  1576 	    load_xf_bank( R_ECX );
  1577 	    load_fr( R_EDX, R_EAX, FRm );
  1578 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1579 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1580 	    store_fr( R_ECX, R_EDX, FRn );
  1581 	    JMP_TARGET(end);
  1582 	} else /* FRn&1 == 0 */ {
  1583 	    JMP_rel8(12, end);
  1584 	    load_fr( R_EDX, R_EAX, FRm );
  1585 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1586 	    store_fr( R_EDX, R_EAX, FRn );
  1587 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1588 	    JMP_TARGET(end);
  1591     sh4_x86.tstate = TSTATE_NONE;
  1592 :}
  1593 FMOV FRm, @Rn {: 
  1594     precheck();
  1595     check_fpuen_no_precheck();
  1596     load_reg( R_ECX, Rn );
  1597     check_walign32( R_ECX );
  1598     load_spreg( R_EDX, R_FPSCR );
  1599     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1600     JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
  1601     load_fr_bank( R_EDX );
  1602     load_fr( R_EDX, R_EAX, FRm );
  1603     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1604     if( FRm&1 ) {
  1605 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1606 	JMP_TARGET(doublesize);
  1607 	load_xf_bank( R_EDX );
  1608 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1609 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1610 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1611 	JMP_TARGET(end);
  1612     } else {
  1613 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1614 	JMP_TARGET(doublesize);
  1615 	load_fr_bank( R_EDX );
  1616 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1617 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1618 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1619 	JMP_TARGET(end);
  1621     sh4_x86.tstate = TSTATE_NONE;
  1622 :}
  1623 FMOV @Rm, FRn {:  
  1624     precheck();
  1625     check_fpuen_no_precheck();
  1626     load_reg( R_ECX, Rm );
  1627     check_ralign32( R_ECX );
  1628     load_spreg( R_EDX, R_FPSCR );
  1629     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1630     JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
  1631     MEM_READ_LONG( R_ECX, R_EAX );
  1632     load_fr_bank( R_EDX );
  1633     store_fr( R_EDX, R_EAX, FRn );
  1634     if( FRn&1 ) {
  1635 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1636 	JMP_TARGET(doublesize);
  1637 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1638 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1639 	load_xf_bank( R_EDX );
  1640 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1641 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1642 	JMP_TARGET(end);
  1643     } else {
  1644 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1645 	JMP_TARGET(doublesize);
  1646 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1647 	load_fr_bank( R_EDX );
  1648 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1649 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1650 	JMP_TARGET(end);
  1652     sh4_x86.tstate = TSTATE_NONE;
  1653 :}
  1654 FMOV FRm, @-Rn {:  
  1655     precheck();
  1656     check_fpuen_no_precheck();
  1657     load_reg( R_ECX, Rn );
  1658     check_walign32( R_ECX );
  1659     load_spreg( R_EDX, R_FPSCR );
  1660     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1661     JNE_rel8(14 + CALL_FUNC2_SIZE, doublesize);
  1662     load_fr_bank( R_EDX );
  1663     load_fr( R_EDX, R_EAX, FRm );
  1664     ADD_imm8s_r32(-4,R_ECX);
  1665     store_reg( R_ECX, Rn );
  1666     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1667     if( FRm&1 ) {
  1668 	JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
  1669 	JMP_TARGET(doublesize);
  1670 	load_xf_bank( R_EDX );
  1671 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1672 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1673 	ADD_imm8s_r32(-8,R_ECX);
  1674 	store_reg( R_ECX, Rn );
  1675 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1676 	JMP_TARGET(end);
  1677     } else {
  1678 	JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
  1679 	JMP_TARGET(doublesize);
  1680 	load_fr_bank( R_EDX );
  1681 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1682 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1683 	ADD_imm8s_r32(-8,R_ECX);
  1684 	store_reg( R_ECX, Rn );
  1685 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1686 	JMP_TARGET(end);
  1688     sh4_x86.tstate = TSTATE_NONE;
  1689 :}
  1690 FMOV @Rm+, FRn {:
  1691     precheck();
  1692     check_fpuen_no_precheck();
  1693     load_reg( R_ECX, Rm );
  1694     check_ralign32( R_ECX );
  1695     MOV_r32_r32( R_ECX, R_EAX );
  1696     load_spreg( R_EDX, R_FPSCR );
  1697     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1698     JNE_rel8(14 + CALL_FUNC1_SIZE, doublesize);
  1699     ADD_imm8s_r32( 4, R_EAX );
  1700     store_reg( R_EAX, Rm );
  1701     MEM_READ_LONG( R_ECX, R_EAX );
  1702     load_fr_bank( R_EDX );
  1703     store_fr( R_EDX, R_EAX, FRn );
  1704     if( FRn&1 ) {
  1705 	JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
  1706 	JMP_TARGET(doublesize);
  1707 	ADD_imm8s_r32( 8, R_EAX );
  1708 	store_reg(R_EAX, Rm);
  1709 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1710 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1711 	load_xf_bank( R_EDX );
  1712 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1713 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1714 	JMP_TARGET(end);
  1715     } else {
  1716 	JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
  1717 	ADD_imm8s_r32( 8, R_EAX );
  1718 	store_reg(R_EAX, Rm);
  1719 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1720 	load_fr_bank( R_EDX );
  1721 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1722 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1723 	JMP_TARGET(end);
  1725     sh4_x86.tstate = TSTATE_NONE;
  1726 :}
  1727 FMOV FRm, @(R0, Rn) {:  
  1728     precheck();
  1729     check_fpuen_no_precheck();
  1730     load_reg( R_ECX, Rn );
  1731     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1732     check_walign32( R_ECX );
  1733     load_spreg( R_EDX, R_FPSCR );
  1734     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1735     JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
  1736     load_fr_bank( R_EDX );
  1737     load_fr( R_EDX, R_EAX, FRm );
  1738     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1739     if( FRm&1 ) {
  1740 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1741 	JMP_TARGET(doublesize);
  1742 	load_xf_bank( R_EDX );
  1743 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1744 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1745 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1746 	JMP_TARGET(end);
  1747     } else {
  1748 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1749 	JMP_TARGET(doublesize);
  1750 	load_fr_bank( R_EDX );
  1751 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1752 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1753 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1754 	JMP_TARGET(end);
  1756     sh4_x86.tstate = TSTATE_NONE;
  1757 :}
  1758 FMOV @(R0, Rm), FRn {:  
  1759     precheck();
  1760     check_fpuen_no_precheck();
  1761     load_reg( R_ECX, Rm );
  1762     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1763     check_ralign32( R_ECX );
  1764     load_spreg( R_EDX, R_FPSCR );
  1765     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1766     JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
  1767     MEM_READ_LONG( R_ECX, R_EAX );
  1768     load_fr_bank( R_EDX );
  1769     store_fr( R_EDX, R_EAX, FRn );
  1770     if( FRn&1 ) {
  1771 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1772 	JMP_TARGET(doublesize);
  1773 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1774 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1775 	load_xf_bank( R_EDX );
  1776 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1777 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1778 	JMP_TARGET(end);
  1779     } else {
  1780 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1781 	JMP_TARGET(doublesize);
  1782 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1783 	load_fr_bank( R_EDX );
  1784 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1785 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1786 	JMP_TARGET(end);
  1788     sh4_x86.tstate = TSTATE_NONE;
  1789 :}
  1790 FLDI0 FRn {:  /* IFF PR=0 */
  1791     check_fpuen();
  1792     load_spreg( R_ECX, R_FPSCR );
  1793     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1794     JNE_rel8(8, end);
  1795     XOR_r32_r32( R_EAX, R_EAX );
  1796     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1797     store_fr( R_ECX, R_EAX, FRn );
  1798     JMP_TARGET(end);
  1799     sh4_x86.tstate = TSTATE_NONE;
  1800 :}
  1801 FLDI1 FRn {:  /* IFF PR=0 */
  1802     check_fpuen();
  1803     load_spreg( R_ECX, R_FPSCR );
  1804     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1805     JNE_rel8(11, end);
  1806     load_imm32(R_EAX, 0x3F800000);
  1807     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1808     store_fr( R_ECX, R_EAX, FRn );
  1809     JMP_TARGET(end);
  1810     sh4_x86.tstate = TSTATE_NONE;
  1811 :}
  1813 FLOAT FPUL, FRn {:  
  1814     check_fpuen();
  1815     load_spreg( R_ECX, R_FPSCR );
  1816     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1817     FILD_sh4r(R_FPUL);
  1818     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1819     JNE_rel8(5, doubleprec);
  1820     pop_fr( R_EDX, FRn );
  1821     JMP_rel8(3, end);
  1822     JMP_TARGET(doubleprec);
  1823     pop_dr( R_EDX, FRn );
  1824     JMP_TARGET(end);
  1825     sh4_x86.tstate = TSTATE_NONE;
  1826 :}
  1827 FTRC FRm, FPUL {:  
  1828     check_fpuen();
  1829     load_spreg( R_ECX, R_FPSCR );
  1830     load_fr_bank( R_EDX );
  1831     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1832     JNE_rel8(5, doubleprec);
  1833     push_fr( R_EDX, FRm );
  1834     JMP_rel8(3, doop);
  1835     JMP_TARGET(doubleprec);
  1836     push_dr( R_EDX, FRm );
  1837     JMP_TARGET( doop );
  1838     load_imm32( R_ECX, (uint32_t)&max_int );
  1839     FILD_r32ind( R_ECX );
  1840     FCOMIP_st(1);
  1841     JNA_rel8( 32, sat );
  1842     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1843     FILD_r32ind( R_ECX );           // 2
  1844     FCOMIP_st(1);                   // 2
  1845     JAE_rel8( 21, sat2 );            // 2
  1846     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1847     FNSTCW_r32ind( R_EAX );
  1848     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1849     FLDCW_r32ind( R_EDX );
  1850     FISTP_sh4r(R_FPUL);             // 3
  1851     FLDCW_r32ind( R_EAX );
  1852     JMP_rel8( 9, end );             // 2
  1854     JMP_TARGET(sat);
  1855     JMP_TARGET(sat2);
  1856     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1857     store_spreg( R_ECX, R_FPUL );
  1858     FPOP_st();
  1859     JMP_TARGET(end);
  1860     sh4_x86.tstate = TSTATE_NONE;
  1861 :}
  1862 FLDS FRm, FPUL {:  
  1863     check_fpuen();
  1864     load_fr_bank( R_ECX );
  1865     load_fr( R_ECX, R_EAX, FRm );
  1866     store_spreg( R_EAX, R_FPUL );
  1867     sh4_x86.tstate = TSTATE_NONE;
  1868 :}
  1869 FSTS FPUL, FRn {:  
  1870     check_fpuen();
  1871     load_fr_bank( R_ECX );
  1872     load_spreg( R_EAX, R_FPUL );
  1873     store_fr( R_ECX, R_EAX, FRn );
  1874     sh4_x86.tstate = TSTATE_NONE;
  1875 :}
  1876 FCNVDS FRm, FPUL {:  
  1877     check_fpuen();
  1878     load_spreg( R_ECX, R_FPSCR );
  1879     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1880     JE_rel8(9, end); // only when PR=1
  1881     load_fr_bank( R_ECX );
  1882     push_dr( R_ECX, FRm );
  1883     pop_fpul();
  1884     JMP_TARGET(end);
  1885     sh4_x86.tstate = TSTATE_NONE;
  1886 :}
  1887 FCNVSD FPUL, FRn {:  
  1888     check_fpuen();
  1889     load_spreg( R_ECX, R_FPSCR );
  1890     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1891     JE_rel8(9, end); // only when PR=1
  1892     load_fr_bank( R_ECX );
  1893     push_fpul();
  1894     pop_dr( R_ECX, FRn );
  1895     JMP_TARGET(end);
  1896     sh4_x86.tstate = TSTATE_NONE;
  1897 :}
  1899 /* Floating point instructions */
  1900 FABS FRn {:  
  1901     check_fpuen();
  1902     load_spreg( R_ECX, R_FPSCR );
  1903     load_fr_bank( R_EDX );
  1904     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1905     JNE_rel8(10, doubleprec);
  1906     push_fr(R_EDX, FRn); // 3
  1907     FABS_st0(); // 2
  1908     pop_fr( R_EDX, FRn); //3
  1909     JMP_rel8(8,end); // 2
  1910     JMP_TARGET(doubleprec);
  1911     push_dr(R_EDX, FRn);
  1912     FABS_st0();
  1913     pop_dr(R_EDX, FRn);
  1914     JMP_TARGET(end);
  1915     sh4_x86.tstate = TSTATE_NONE;
  1916 :}
  1917 FADD FRm, FRn {:  
  1918     check_fpuen();
  1919     load_spreg( R_ECX, R_FPSCR );
  1920     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1921     load_fr_bank( R_EDX );
  1922     JNE_rel8(13,doubleprec);
  1923     push_fr(R_EDX, FRm);
  1924     push_fr(R_EDX, FRn);
  1925     FADDP_st(1);
  1926     pop_fr(R_EDX, FRn);
  1927     JMP_rel8(11,end);
  1928     JMP_TARGET(doubleprec);
  1929     push_dr(R_EDX, FRm);
  1930     push_dr(R_EDX, FRn);
  1931     FADDP_st(1);
  1932     pop_dr(R_EDX, FRn);
  1933     JMP_TARGET(end);
  1934     sh4_x86.tstate = TSTATE_NONE;
  1935 :}
  1936 FDIV FRm, FRn {:  
  1937     check_fpuen();
  1938     load_spreg( R_ECX, R_FPSCR );
  1939     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1940     load_fr_bank( R_EDX );
  1941     JNE_rel8(13, doubleprec);
  1942     push_fr(R_EDX, FRn);
  1943     push_fr(R_EDX, FRm);
  1944     FDIVP_st(1);
  1945     pop_fr(R_EDX, FRn);
  1946     JMP_rel8(11, end);
  1947     JMP_TARGET(doubleprec);
  1948     push_dr(R_EDX, FRn);
  1949     push_dr(R_EDX, FRm);
  1950     FDIVP_st(1);
  1951     pop_dr(R_EDX, FRn);
  1952     JMP_TARGET(end);
  1953     sh4_x86.tstate = TSTATE_NONE;
  1954 :}
  1955 FMAC FR0, FRm, FRn {:  
  1956     check_fpuen();
  1957     load_spreg( R_ECX, R_FPSCR );
  1958     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  1959     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1960     JNE_rel8(18, doubleprec);
  1961     push_fr( R_EDX, 0 );
  1962     push_fr( R_EDX, FRm );
  1963     FMULP_st(1);
  1964     push_fr( R_EDX, FRn );
  1965     FADDP_st(1);
  1966     pop_fr( R_EDX, FRn );
  1967     JMP_rel8(16, end);
  1968     JMP_TARGET(doubleprec);
  1969     push_dr( R_EDX, 0 );
  1970     push_dr( R_EDX, FRm );
  1971     FMULP_st(1);
  1972     push_dr( R_EDX, FRn );
  1973     FADDP_st(1);
  1974     pop_dr( R_EDX, FRn );
  1975     JMP_TARGET(end);
  1976     sh4_x86.tstate = TSTATE_NONE;
  1977 :}
  1979 FMUL FRm, FRn {:  
  1980     check_fpuen();
  1981     load_spreg( R_ECX, R_FPSCR );
  1982     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1983     load_fr_bank( R_EDX );
  1984     JNE_rel8(13, doubleprec);
  1985     push_fr(R_EDX, FRm);
  1986     push_fr(R_EDX, FRn);
  1987     FMULP_st(1);
  1988     pop_fr(R_EDX, FRn);
  1989     JMP_rel8(11, end);
  1990     JMP_TARGET(doubleprec);
  1991     push_dr(R_EDX, FRm);
  1992     push_dr(R_EDX, FRn);
  1993     FMULP_st(1);
  1994     pop_dr(R_EDX, FRn);
  1995     JMP_TARGET(end);
  1996     sh4_x86.tstate = TSTATE_NONE;
  1997 :}
  1998 FNEG FRn {:  
  1999     check_fpuen();
  2000     load_spreg( R_ECX, R_FPSCR );
  2001     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2002     load_fr_bank( R_EDX );
  2003     JNE_rel8(10, doubleprec);
  2004     push_fr(R_EDX, FRn);
  2005     FCHS_st0();
  2006     pop_fr(R_EDX, FRn);
  2007     JMP_rel8(8, end);
  2008     JMP_TARGET(doubleprec);
  2009     push_dr(R_EDX, FRn);
  2010     FCHS_st0();
  2011     pop_dr(R_EDX, FRn);
  2012     JMP_TARGET(end);
  2013     sh4_x86.tstate = TSTATE_NONE;
  2014 :}
  2015 FSRRA FRn {:  
  2016     check_fpuen();
  2017     load_spreg( R_ECX, R_FPSCR );
  2018     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2019     load_fr_bank( R_EDX );
  2020     JNE_rel8(12, end); // PR=0 only
  2021     FLD1_st0();
  2022     push_fr(R_EDX, FRn);
  2023     FSQRT_st0();
  2024     FDIVP_st(1);
  2025     pop_fr(R_EDX, FRn);
  2026     JMP_TARGET(end);
  2027     sh4_x86.tstate = TSTATE_NONE;
  2028 :}
  2029 FSQRT FRn {:  
  2030     check_fpuen();
  2031     load_spreg( R_ECX, R_FPSCR );
  2032     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2033     load_fr_bank( R_EDX );
  2034     JNE_rel8(10, doubleprec);
  2035     push_fr(R_EDX, FRn);
  2036     FSQRT_st0();
  2037     pop_fr(R_EDX, FRn);
  2038     JMP_rel8(8, end);
  2039     JMP_TARGET(doubleprec);
  2040     push_dr(R_EDX, FRn);
  2041     FSQRT_st0();
  2042     pop_dr(R_EDX, FRn);
  2043     JMP_TARGET(end);
  2044     sh4_x86.tstate = TSTATE_NONE;
  2045 :}
  2046 FSUB FRm, FRn {:  
  2047     check_fpuen();
  2048     load_spreg( R_ECX, R_FPSCR );
  2049     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2050     load_fr_bank( R_EDX );
  2051     JNE_rel8(13, doubleprec);
  2052     push_fr(R_EDX, FRn);
  2053     push_fr(R_EDX, FRm);
  2054     FSUBP_st(1);
  2055     pop_fr(R_EDX, FRn);
  2056     JMP_rel8(11, end);
  2057     JMP_TARGET(doubleprec);
  2058     push_dr(R_EDX, FRn);
  2059     push_dr(R_EDX, FRm);
  2060     FSUBP_st(1);
  2061     pop_dr(R_EDX, FRn);
  2062     JMP_TARGET(end);
  2063     sh4_x86.tstate = TSTATE_NONE;
  2064 :}
  2066 FCMP/EQ FRm, FRn {:  
  2067     check_fpuen();
  2068     load_spreg( R_ECX, R_FPSCR );
  2069     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2070     load_fr_bank( R_EDX );
  2071     JNE_rel8(8, doubleprec);
  2072     push_fr(R_EDX, FRm);
  2073     push_fr(R_EDX, FRn);
  2074     JMP_rel8(6, end);
  2075     JMP_TARGET(doubleprec);
  2076     push_dr(R_EDX, FRm);
  2077     push_dr(R_EDX, FRn);
  2078     JMP_TARGET(end);
  2079     FCOMIP_st(1);
  2080     SETE_t();
  2081     FPOP_st();
  2082     sh4_x86.tstate = TSTATE_NONE;
  2083 :}
  2084 FCMP/GT FRm, FRn {:  
  2085     check_fpuen();
  2086     load_spreg( R_ECX, R_FPSCR );
  2087     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2088     load_fr_bank( R_EDX );
  2089     JNE_rel8(8, doubleprec);
  2090     push_fr(R_EDX, FRm);
  2091     push_fr(R_EDX, FRn);
  2092     JMP_rel8(6, end);
  2093     JMP_TARGET(doubleprec);
  2094     push_dr(R_EDX, FRm);
  2095     push_dr(R_EDX, FRn);
  2096     JMP_TARGET(end);
  2097     FCOMIP_st(1);
  2098     SETA_t();
  2099     FPOP_st();
  2100     sh4_x86.tstate = TSTATE_NONE;
  2101 :}
  2103 FSCA FPUL, FRn {:  
  2104     check_fpuen();
  2105     load_spreg( R_ECX, R_FPSCR );
  2106     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2107     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2108     load_fr_bank( R_ECX );
  2109     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2110     load_spreg( R_EDX, R_FPUL );
  2111     call_func2( sh4_fsca, R_EDX, R_ECX );
  2112     JMP_TARGET(doubleprec);
  2113     sh4_x86.tstate = TSTATE_NONE;
  2114 :}
  2115 FIPR FVm, FVn {:  
  2116     check_fpuen();
  2117     load_spreg( R_ECX, R_FPSCR );
  2118     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2119     JNE_rel8(44, doubleprec);
  2121     load_fr_bank( R_ECX );
  2122     push_fr( R_ECX, FVm<<2 );
  2123     push_fr( R_ECX, FVn<<2 );
  2124     FMULP_st(1);
  2125     push_fr( R_ECX, (FVm<<2)+1);
  2126     push_fr( R_ECX, (FVn<<2)+1);
  2127     FMULP_st(1);
  2128     FADDP_st(1);
  2129     push_fr( R_ECX, (FVm<<2)+2);
  2130     push_fr( R_ECX, (FVn<<2)+2);
  2131     FMULP_st(1);
  2132     FADDP_st(1);
  2133     push_fr( R_ECX, (FVm<<2)+3);
  2134     push_fr( R_ECX, (FVn<<2)+3);
  2135     FMULP_st(1);
  2136     FADDP_st(1);
  2137     pop_fr( R_ECX, (FVn<<2)+3);
  2138     JMP_TARGET(doubleprec);
  2139     sh4_x86.tstate = TSTATE_NONE;
  2140 :}
  2141 FTRV XMTRX, FVn {:  
  2142     check_fpuen();
  2143     load_spreg( R_ECX, R_FPSCR );
  2144     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2145     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2146     load_fr_bank( R_EDX );                 // 3
  2147     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2148     load_xf_bank( R_ECX );                 // 12
  2149     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2150     JMP_TARGET(doubleprec);
  2151     sh4_x86.tstate = TSTATE_NONE;
  2152 :}
  2154 FRCHG {:  
  2155     check_fpuen();
  2156     load_spreg( R_ECX, R_FPSCR );
  2157     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2158     store_spreg( R_ECX, R_FPSCR );
  2159     update_fr_bank( R_ECX );
  2160     sh4_x86.tstate = TSTATE_NONE;
  2161 :}
  2162 FSCHG {:  
  2163     check_fpuen();
  2164     load_spreg( R_ECX, R_FPSCR );
  2165     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2166     store_spreg( R_ECX, R_FPSCR );
  2167     sh4_x86.tstate = TSTATE_NONE;
  2168 :}
  2170 /* Processor control instructions */
  2171 LDC Rm, SR {:
  2172     if( sh4_x86.in_delay_slot ) {
  2173 	SLOTILLEGAL();
  2174     } else {
  2175 	check_priv();
  2176 	load_reg( R_EAX, Rm );
  2177 	call_func1( sh4_write_sr, R_EAX );
  2178 	sh4_x86.priv_checked = FALSE;
  2179 	sh4_x86.fpuen_checked = FALSE;
  2180 	sh4_x86.tstate = TSTATE_NONE;
  2182 :}
  2183 LDC Rm, GBR {: 
  2184     load_reg( R_EAX, Rm );
  2185     store_spreg( R_EAX, R_GBR );
  2186 :}
  2187 LDC Rm, VBR {:  
  2188     check_priv();
  2189     load_reg( R_EAX, Rm );
  2190     store_spreg( R_EAX, R_VBR );
  2191     sh4_x86.tstate = TSTATE_NONE;
  2192 :}
  2193 LDC Rm, SSR {:  
  2194     check_priv();
  2195     load_reg( R_EAX, Rm );
  2196     store_spreg( R_EAX, R_SSR );
  2197     sh4_x86.tstate = TSTATE_NONE;
  2198 :}
  2199 LDC Rm, SGR {:  
  2200     check_priv();
  2201     load_reg( R_EAX, Rm );
  2202     store_spreg( R_EAX, R_SGR );
  2203     sh4_x86.tstate = TSTATE_NONE;
  2204 :}
  2205 LDC Rm, SPC {:  
  2206     check_priv();
  2207     load_reg( R_EAX, Rm );
  2208     store_spreg( R_EAX, R_SPC );
  2209     sh4_x86.tstate = TSTATE_NONE;
  2210 :}
  2211 LDC Rm, DBR {:  
  2212     check_priv();
  2213     load_reg( R_EAX, Rm );
  2214     store_spreg( R_EAX, R_DBR );
  2215     sh4_x86.tstate = TSTATE_NONE;
  2216 :}
  2217 LDC Rm, Rn_BANK {:  
  2218     check_priv();
  2219     load_reg( R_EAX, Rm );
  2220     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2221     sh4_x86.tstate = TSTATE_NONE;
  2222 :}
  2223 LDC.L @Rm+, GBR {:  
  2224     load_reg( R_EAX, Rm );
  2225     precheck();
  2226     check_ralign32( R_EAX );
  2227     MOV_r32_r32( R_EAX, R_ECX );
  2228     ADD_imm8s_r32( 4, R_EAX );
  2229     store_reg( R_EAX, Rm );
  2230     MEM_READ_LONG( R_ECX, R_EAX );
  2231     store_spreg( R_EAX, R_GBR );
  2232     sh4_x86.tstate = TSTATE_NONE;
  2233 :}
  2234 LDC.L @Rm+, SR {:
  2235     if( sh4_x86.in_delay_slot ) {
  2236 	SLOTILLEGAL();
  2237     } else {
  2238 	precheck();
  2239 	check_priv_no_precheck();
  2240 	load_reg( R_EAX, Rm );
  2241 	check_ralign32( R_EAX );
  2242 	MOV_r32_r32( R_EAX, R_ECX );
  2243 	ADD_imm8s_r32( 4, R_EAX );
  2244 	store_reg( R_EAX, Rm );
  2245 	MEM_READ_LONG( R_ECX, R_EAX );
  2246 	call_func1( sh4_write_sr, R_EAX );
  2247 	sh4_x86.priv_checked = FALSE;
  2248 	sh4_x86.fpuen_checked = FALSE;
  2249 	sh4_x86.tstate = TSTATE_NONE;
  2251 :}
  2252 LDC.L @Rm+, VBR {:  
  2253     precheck();
  2254     check_priv_no_precheck();
  2255     load_reg( R_EAX, Rm );
  2256     check_ralign32( R_EAX );
  2257     MOV_r32_r32( R_EAX, R_ECX );
  2258     ADD_imm8s_r32( 4, R_EAX );
  2259     store_reg( R_EAX, Rm );
  2260     MEM_READ_LONG( R_ECX, R_EAX );
  2261     store_spreg( R_EAX, R_VBR );
  2262     sh4_x86.tstate = TSTATE_NONE;
  2263 :}
  2264 LDC.L @Rm+, SSR {:
  2265     precheck();
  2266     check_priv_no_precheck();
  2267     load_reg( R_EAX, Rm );
  2268     check_ralign32( R_EAX );
  2269     MOV_r32_r32( R_EAX, R_ECX );
  2270     ADD_imm8s_r32( 4, R_EAX );
  2271     store_reg( R_EAX, Rm );
  2272     MEM_READ_LONG( R_ECX, R_EAX );
  2273     store_spreg( R_EAX, R_SSR );
  2274     sh4_x86.tstate = TSTATE_NONE;
  2275 :}
  2276 LDC.L @Rm+, SGR {:  
  2277     precheck();
  2278     check_priv_no_precheck();
  2279     load_reg( R_EAX, Rm );
  2280     check_ralign32( R_EAX );
  2281     MOV_r32_r32( R_EAX, R_ECX );
  2282     ADD_imm8s_r32( 4, R_EAX );
  2283     store_reg( R_EAX, Rm );
  2284     MEM_READ_LONG( R_ECX, R_EAX );
  2285     store_spreg( R_EAX, R_SGR );
  2286     sh4_x86.tstate = TSTATE_NONE;
  2287 :}
  2288 LDC.L @Rm+, SPC {:  
  2289     precheck();
  2290     check_priv_no_precheck();
  2291     load_reg( R_EAX, Rm );
  2292     check_ralign32( R_EAX );
  2293     MOV_r32_r32( R_EAX, R_ECX );
  2294     ADD_imm8s_r32( 4, R_EAX );
  2295     store_reg( R_EAX, Rm );
  2296     MEM_READ_LONG( R_ECX, R_EAX );
  2297     store_spreg( R_EAX, R_SPC );
  2298     sh4_x86.tstate = TSTATE_NONE;
  2299 :}
  2300 LDC.L @Rm+, DBR {:  
  2301     precheck();
  2302     check_priv_no_precheck();
  2303     load_reg( R_EAX, Rm );
  2304     check_ralign32( R_EAX );
  2305     MOV_r32_r32( R_EAX, R_ECX );
  2306     ADD_imm8s_r32( 4, R_EAX );
  2307     store_reg( R_EAX, Rm );
  2308     MEM_READ_LONG( R_ECX, R_EAX );
  2309     store_spreg( R_EAX, R_DBR );
  2310     sh4_x86.tstate = TSTATE_NONE;
  2311 :}
  2312 LDC.L @Rm+, Rn_BANK {:  
  2313     precheck();
  2314     check_priv_no_precheck();
  2315     load_reg( R_EAX, Rm );
  2316     check_ralign32( R_EAX );
  2317     MOV_r32_r32( R_EAX, R_ECX );
  2318     ADD_imm8s_r32( 4, R_EAX );
  2319     store_reg( R_EAX, Rm );
  2320     MEM_READ_LONG( R_ECX, R_EAX );
  2321     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2322     sh4_x86.tstate = TSTATE_NONE;
  2323 :}
  2324 LDS Rm, FPSCR {:  
  2325     load_reg( R_EAX, Rm );
  2326     store_spreg( R_EAX, R_FPSCR );
  2327     update_fr_bank( R_EAX );
  2328     sh4_x86.tstate = TSTATE_NONE;
  2329 :}
  2330 LDS.L @Rm+, FPSCR {:  
  2331     load_reg( R_EAX, Rm );
  2332     precheck();
  2333     check_ralign32( R_EAX );
  2334     MOV_r32_r32( R_EAX, R_ECX );
  2335     ADD_imm8s_r32( 4, R_EAX );
  2336     store_reg( R_EAX, Rm );
  2337     MEM_READ_LONG( R_ECX, R_EAX );
  2338     store_spreg( R_EAX, R_FPSCR );
  2339     update_fr_bank( R_EAX );
  2340     sh4_x86.tstate = TSTATE_NONE;
  2341 :}
  2342 LDS Rm, FPUL {:  
  2343     load_reg( R_EAX, Rm );
  2344     store_spreg( R_EAX, R_FPUL );
  2345 :}
  2346 LDS.L @Rm+, FPUL {:  
  2347     load_reg( R_EAX, Rm );
  2348     precheck();
  2349     check_ralign32( R_EAX );
  2350     MOV_r32_r32( R_EAX, R_ECX );
  2351     ADD_imm8s_r32( 4, R_EAX );
  2352     store_reg( R_EAX, Rm );
  2353     MEM_READ_LONG( R_ECX, R_EAX );
  2354     store_spreg( R_EAX, R_FPUL );
  2355     sh4_x86.tstate = TSTATE_NONE;
  2356 :}
  2357 LDS Rm, MACH {: 
  2358     load_reg( R_EAX, Rm );
  2359     store_spreg( R_EAX, R_MACH );
  2360 :}
  2361 LDS.L @Rm+, MACH {:  
  2362     load_reg( R_EAX, Rm );
  2363     precheck();
  2364     check_ralign32( R_EAX );
  2365     MOV_r32_r32( R_EAX, R_ECX );
  2366     ADD_imm8s_r32( 4, R_EAX );
  2367     store_reg( R_EAX, Rm );
  2368     MEM_READ_LONG( R_ECX, R_EAX );
  2369     store_spreg( R_EAX, R_MACH );
  2370     sh4_x86.tstate = TSTATE_NONE;
  2371 :}
  2372 LDS Rm, MACL {:  
  2373     load_reg( R_EAX, Rm );
  2374     store_spreg( R_EAX, R_MACL );
  2375 :}
  2376 LDS.L @Rm+, MACL {:  
  2377     load_reg( R_EAX, Rm );
  2378     precheck();
  2379     check_ralign32( R_EAX );
  2380     MOV_r32_r32( R_EAX, R_ECX );
  2381     ADD_imm8s_r32( 4, R_EAX );
  2382     store_reg( R_EAX, Rm );
  2383     MEM_READ_LONG( R_ECX, R_EAX );
  2384     store_spreg( R_EAX, R_MACL );
  2385     sh4_x86.tstate = TSTATE_NONE;
  2386 :}
  2387 LDS Rm, PR {:  
  2388     load_reg( R_EAX, Rm );
  2389     store_spreg( R_EAX, R_PR );
  2390 :}
  2391 LDS.L @Rm+, PR {:  
  2392     load_reg( R_EAX, Rm );
  2393     precheck();
  2394     check_ralign32( R_EAX );
  2395     MOV_r32_r32( R_EAX, R_ECX );
  2396     ADD_imm8s_r32( 4, R_EAX );
  2397     store_reg( R_EAX, Rm );
  2398     MEM_READ_LONG( R_ECX, R_EAX );
  2399     store_spreg( R_EAX, R_PR );
  2400     sh4_x86.tstate = TSTATE_NONE;
  2401 :}
  2402 LDTLB {:  
  2403     MMU_ldtlb();
  2404 :}
  2405 OCBI @Rn {:  :}
  2406 OCBP @Rn {:  :}
  2407 OCBWB @Rn {:  :}
  2408 PREF @Rn {:
  2409     load_reg( R_EAX, Rn );
  2410     MOV_r32_r32( R_EAX, R_ECX );
  2411     AND_imm32_r32( 0xFC000000, R_EAX );
  2412     CMP_imm32_r32( 0xE0000000, R_EAX );
  2413     JNE_rel8(CALL_FUNC1_SIZE, end);
  2414     call_func1( sh4_flush_store_queue, R_ECX );
  2415     JMP_TARGET(end);
  2416     sh4_x86.tstate = TSTATE_NONE;
  2417 :}
  2418 SLEEP {: 
  2419     check_priv();
  2420     call_func0( sh4_sleep );
  2421     sh4_x86.tstate = TSTATE_NONE;
  2422     sh4_x86.in_delay_slot = FALSE;
  2423     return 2;
  2424 :}
  2425 STC SR, Rn {:
  2426     check_priv();
  2427     call_func0(sh4_read_sr);
  2428     store_reg( R_EAX, Rn );
  2429     sh4_x86.tstate = TSTATE_NONE;
  2430 :}
  2431 STC GBR, Rn {:  
  2432     load_spreg( R_EAX, R_GBR );
  2433     store_reg( R_EAX, Rn );
  2434 :}
  2435 STC VBR, Rn {:  
  2436     check_priv();
  2437     load_spreg( R_EAX, R_VBR );
  2438     store_reg( R_EAX, Rn );
  2439     sh4_x86.tstate = TSTATE_NONE;
  2440 :}
  2441 STC SSR, Rn {:  
  2442     check_priv();
  2443     load_spreg( R_EAX, R_SSR );
  2444     store_reg( R_EAX, Rn );
  2445     sh4_x86.tstate = TSTATE_NONE;
  2446 :}
  2447 STC SPC, Rn {:  
  2448     check_priv();
  2449     load_spreg( R_EAX, R_SPC );
  2450     store_reg( R_EAX, Rn );
  2451     sh4_x86.tstate = TSTATE_NONE;
  2452 :}
  2453 STC SGR, Rn {:  
  2454     check_priv();
  2455     load_spreg( R_EAX, R_SGR );
  2456     store_reg( R_EAX, Rn );
  2457     sh4_x86.tstate = TSTATE_NONE;
  2458 :}
  2459 STC DBR, Rn {:  
  2460     check_priv();
  2461     load_spreg( R_EAX, R_DBR );
  2462     store_reg( R_EAX, Rn );
  2463     sh4_x86.tstate = TSTATE_NONE;
  2464 :}
  2465 STC Rm_BANK, Rn {:
  2466     check_priv();
  2467     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2468     store_reg( R_EAX, Rn );
  2469     sh4_x86.tstate = TSTATE_NONE;
  2470 :}
  2471 STC.L SR, @-Rn {:
  2472     precheck();
  2473     check_priv_no_precheck();
  2474     call_func0( sh4_read_sr );
  2475     load_reg( R_ECX, Rn );
  2476     check_walign32( R_ECX );
  2477     ADD_imm8s_r32( -4, R_ECX );
  2478     store_reg( R_ECX, Rn );
  2479     MEM_WRITE_LONG( R_ECX, R_EAX );
  2480     sh4_x86.tstate = TSTATE_NONE;
  2481 :}
  2482 STC.L VBR, @-Rn {:  
  2483     precheck();
  2484     check_priv_no_precheck();
  2485     load_reg( R_ECX, Rn );
  2486     check_walign32( R_ECX );
  2487     ADD_imm8s_r32( -4, R_ECX );
  2488     store_reg( R_ECX, Rn );
  2489     load_spreg( R_EAX, R_VBR );
  2490     MEM_WRITE_LONG( R_ECX, R_EAX );
  2491     sh4_x86.tstate = TSTATE_NONE;
  2492 :}
  2493 STC.L SSR, @-Rn {:  
  2494     precheck();
  2495     check_priv_no_precheck();
  2496     load_reg( R_ECX, Rn );
  2497     check_walign32( R_ECX );
  2498     ADD_imm8s_r32( -4, R_ECX );
  2499     store_reg( R_ECX, Rn );
  2500     load_spreg( R_EAX, R_SSR );
  2501     MEM_WRITE_LONG( R_ECX, R_EAX );
  2502     sh4_x86.tstate = TSTATE_NONE;
  2503 :}
  2504 STC.L SPC, @-Rn {:
  2505     precheck();
  2506     check_priv_no_precheck();
  2507     load_reg( R_ECX, Rn );
  2508     check_walign32( R_ECX );
  2509     ADD_imm8s_r32( -4, R_ECX );
  2510     store_reg( R_ECX, Rn );
  2511     load_spreg( R_EAX, R_SPC );
  2512     MEM_WRITE_LONG( R_ECX, R_EAX );
  2513     sh4_x86.tstate = TSTATE_NONE;
  2514 :}
  2515 STC.L SGR, @-Rn {:  
  2516     precheck();
  2517     check_priv_no_precheck();
  2518     load_reg( R_ECX, Rn );
  2519     check_walign32( R_ECX );
  2520     ADD_imm8s_r32( -4, R_ECX );
  2521     store_reg( R_ECX, Rn );
  2522     load_spreg( R_EAX, R_SGR );
  2523     MEM_WRITE_LONG( R_ECX, R_EAX );
  2524     sh4_x86.tstate = TSTATE_NONE;
  2525 :}
  2526 STC.L DBR, @-Rn {:  
  2527     precheck();
  2528     check_priv_no_precheck();
  2529     load_reg( R_ECX, Rn );
  2530     check_walign32( R_ECX );
  2531     ADD_imm8s_r32( -4, R_ECX );
  2532     store_reg( R_ECX, Rn );
  2533     load_spreg( R_EAX, R_DBR );
  2534     MEM_WRITE_LONG( R_ECX, R_EAX );
  2535     sh4_x86.tstate = TSTATE_NONE;
  2536 :}
  2537 STC.L Rm_BANK, @-Rn {:  
  2538     precheck();
  2539     check_priv_no_precheck();
  2540     load_reg( R_ECX, Rn );
  2541     check_walign32( R_ECX );
  2542     ADD_imm8s_r32( -4, R_ECX );
  2543     store_reg( R_ECX, Rn );
  2544     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2545     MEM_WRITE_LONG( R_ECX, R_EAX );
  2546     sh4_x86.tstate = TSTATE_NONE;
  2547 :}
  2548 STC.L GBR, @-Rn {:  
  2549     load_reg( R_ECX, Rn );
  2550     precheck();
  2551     check_walign32( R_ECX );
  2552     ADD_imm8s_r32( -4, R_ECX );
  2553     store_reg( R_ECX, Rn );
  2554     load_spreg( R_EAX, R_GBR );
  2555     MEM_WRITE_LONG( R_ECX, R_EAX );
  2556     sh4_x86.tstate = TSTATE_NONE;
  2557 :}
  2558 STS FPSCR, Rn {:  
  2559     load_spreg( R_EAX, R_FPSCR );
  2560     store_reg( R_EAX, Rn );
  2561 :}
  2562 STS.L FPSCR, @-Rn {:  
  2563     load_reg( R_ECX, Rn );
  2564     precheck();
  2565     check_walign32( R_ECX );
  2566     ADD_imm8s_r32( -4, R_ECX );
  2567     store_reg( R_ECX, Rn );
  2568     load_spreg( R_EAX, R_FPSCR );
  2569     MEM_WRITE_LONG( R_ECX, R_EAX );
  2570     sh4_x86.tstate = TSTATE_NONE;
  2571 :}
  2572 STS FPUL, Rn {:  
  2573     load_spreg( R_EAX, R_FPUL );
  2574     store_reg( R_EAX, Rn );
  2575 :}
  2576 STS.L FPUL, @-Rn {:  
  2577     load_reg( R_ECX, Rn );
  2578     precheck();
  2579     check_walign32( R_ECX );
  2580     ADD_imm8s_r32( -4, R_ECX );
  2581     store_reg( R_ECX, Rn );
  2582     load_spreg( R_EAX, R_FPUL );
  2583     MEM_WRITE_LONG( R_ECX, R_EAX );
  2584     sh4_x86.tstate = TSTATE_NONE;
  2585 :}
  2586 STS MACH, Rn {:  
  2587     load_spreg( R_EAX, R_MACH );
  2588     store_reg( R_EAX, Rn );
  2589 :}
  2590 STS.L MACH, @-Rn {:  
  2591     load_reg( R_ECX, Rn );
  2592     precheck();
  2593     check_walign32( R_ECX );
  2594     ADD_imm8s_r32( -4, R_ECX );
  2595     store_reg( R_ECX, Rn );
  2596     load_spreg( R_EAX, R_MACH );
  2597     MEM_WRITE_LONG( R_ECX, R_EAX );
  2598     sh4_x86.tstate = TSTATE_NONE;
  2599 :}
  2600 STS MACL, Rn {:  
  2601     load_spreg( R_EAX, R_MACL );
  2602     store_reg( R_EAX, Rn );
  2603 :}
  2604 STS.L MACL, @-Rn {:  
  2605     load_reg( R_ECX, Rn );
  2606     precheck();
  2607     check_walign32( R_ECX );
  2608     ADD_imm8s_r32( -4, R_ECX );
  2609     store_reg( R_ECX, Rn );
  2610     load_spreg( R_EAX, R_MACL );
  2611     MEM_WRITE_LONG( R_ECX, R_EAX );
  2612     sh4_x86.tstate = TSTATE_NONE;
  2613 :}
  2614 STS PR, Rn {:  
  2615     load_spreg( R_EAX, R_PR );
  2616     store_reg( R_EAX, Rn );
  2617 :}
  2618 STS.L PR, @-Rn {:  
  2619     load_reg( R_ECX, Rn );
  2620     precheck();
  2621     check_walign32( R_ECX );
  2622     ADD_imm8s_r32( -4, R_ECX );
  2623     store_reg( R_ECX, Rn );
  2624     load_spreg( R_EAX, R_PR );
  2625     MEM_WRITE_LONG( R_ECX, R_EAX );
  2626     sh4_x86.tstate = TSTATE_NONE;
  2627 :}
  2629 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2630 %%
  2631     sh4_x86.in_delay_slot = FALSE;
  2632     return 0;
.