Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 417:bd927df302a9
prev416:714df603c869
next502:c4ecae2b1b5e
author nkeynes
date Mon Oct 08 12:09:06 2007 +0000 (16 years ago)
permissions -rw-r--r--
last change Fix compilation warnings
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.19 2007-10-04 08:47:27 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 /** 
    38  * Struct to manage internal translation state. This state is not saved -
    39  * it is only valid between calls to sh4_translate_begin_block() and
    40  * sh4_translate_end_block()
    41  */
    42 struct sh4_x86_state {
    43     gboolean in_delay_slot;
    44     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    45     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    46     gboolean branch_taken; /* true if we branched unconditionally */
    47     uint32_t block_start_pc;
    48     int tstate;
    50     /* Allocated memory for the (block-wide) back-patch list */
    51     uint32_t **backpatch_list;
    52     uint32_t backpatch_posn;
    53     uint32_t backpatch_size;
    54 };
    56 #define TSTATE_NONE -1
    57 #define TSTATE_O    0
    58 #define TSTATE_C    2
    59 #define TSTATE_E    4
    60 #define TSTATE_NE   5
    61 #define TSTATE_G    0xF
    62 #define TSTATE_GE   0xD
    63 #define TSTATE_A    7
    64 #define TSTATE_AE   3
    66 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    67 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    68 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    69     OP(0x70+sh4_x86.tstate); OP(rel8); \
    70     MARK_JMP(rel8,label)
    71 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    72 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    73 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    74     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    75     MARK_JMP(rel8, label)
    78 #define EXIT_DATA_ADDR_READ 0
    79 #define EXIT_DATA_ADDR_WRITE 7
    80 #define EXIT_ILLEGAL 14
    81 #define EXIT_SLOT_ILLEGAL 21
    82 #define EXIT_FPU_DISABLED 28
    83 #define EXIT_SLOT_FPU_DISABLED 35
    85 static struct sh4_x86_state sh4_x86;
    87 static uint32_t max_int = 0x7FFFFFFF;
    88 static uint32_t min_int = 0x80000000;
    89 static uint32_t save_fcw; /* save value for fpu control word */
    90 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    92 void sh4_x86_init()
    93 {
    94     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    95     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
    96 }
    99 static void sh4_x86_add_backpatch( uint8_t *ptr )
   100 {
   101     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   102 	sh4_x86.backpatch_size <<= 1;
   103 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
   104 	assert( sh4_x86.backpatch_list != NULL );
   105     }
   106     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
   107 }
   109 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
   110 {
   111     unsigned int i;
   112     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
   113 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
   114     }
   115 }
   117 /**
   118  * Emit an instruction to load an SH4 reg into a real register
   119  */
   120 static inline void load_reg( int x86reg, int sh4reg ) 
   121 {
   122     /* mov [bp+n], reg */
   123     OP(0x8B);
   124     OP(0x45 + (x86reg<<3));
   125     OP(REG_OFFSET(r[sh4reg]));
   126 }
   128 static inline void load_reg16s( int x86reg, int sh4reg )
   129 {
   130     OP(0x0F);
   131     OP(0xBF);
   132     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   133 }
   135 static inline void load_reg16u( int x86reg, int sh4reg )
   136 {
   137     OP(0x0F);
   138     OP(0xB7);
   139     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   141 }
   143 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   144 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   145 /**
   146  * Emit an instruction to load an immediate value into a register
   147  */
   148 static inline void load_imm32( int x86reg, uint32_t value ) {
   149     /* mov #value, reg */
   150     OP(0xB8 + x86reg);
   151     OP32(value);
   152 }
   154 /**
   155  * Emit an instruction to store an SH4 reg (RN)
   156  */
   157 void static inline store_reg( int x86reg, int sh4reg ) {
   158     /* mov reg, [bp+n] */
   159     OP(0x89);
   160     OP(0x45 + (x86reg<<3));
   161     OP(REG_OFFSET(r[sh4reg]));
   162 }
   164 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   166 /**
   167  * Load an FR register (single-precision floating point) into an integer x86
   168  * register (eg for register-to-register moves)
   169  */
   170 void static inline load_fr( int bankreg, int x86reg, int frm )
   171 {
   172     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   173 }
   175 /**
   176  * Store an FR register (single-precision floating point) into an integer x86
   177  * register (eg for register-to-register moves)
   178  */
   179 void static inline store_fr( int bankreg, int x86reg, int frn )
   180 {
   181     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   182 }
   185 /**
   186  * Load a pointer to the back fp back into the specified x86 register. The
   187  * bankreg must have been previously loaded with FPSCR.
   188  * NB: 12 bytes
   189  */
   190 static inline void load_xf_bank( int bankreg )
   191 {
   192     NOT_r32( bankreg );
   193     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   194     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   195     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   196 }
   198 /**
   199  * Update the fr_bank pointer based on the current fpscr value.
   200  */
   201 static inline void update_fr_bank( int fpscrreg )
   202 {
   203     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   204     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   205     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   206     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   207 }
   208 /**
   209  * Push FPUL (as a 32-bit float) onto the FPU stack
   210  */
   211 static inline void push_fpul( )
   212 {
   213     OP(0xD9); OP(0x45); OP(R_FPUL);
   214 }
   216 /**
   217  * Pop FPUL (as a 32-bit float) from the FPU stack
   218  */
   219 static inline void pop_fpul( )
   220 {
   221     OP(0xD9); OP(0x5D); OP(R_FPUL);
   222 }
   224 /**
   225  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   226  * with the location of the current fp bank.
   227  */
   228 static inline void push_fr( int bankreg, int frm ) 
   229 {
   230     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   231 }
   233 /**
   234  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   235  * with bankreg previously loaded with the location of the current fp bank.
   236  */
   237 static inline void pop_fr( int bankreg, int frm )
   238 {
   239     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   240 }
   242 /**
   243  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   244  * with the location of the current fp bank.
   245  */
   246 static inline void push_dr( int bankreg, int frm )
   247 {
   248     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   249 }
   251 static inline void pop_dr( int bankreg, int frm )
   252 {
   253     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   254 }
   256 /**
   257  * Note: clobbers EAX to make the indirect call - this isn't usually
   258  * a problem since the callee will usually clobber it anyway.
   259  */
   260 static inline void call_func0( void *ptr )
   261 {
   262     load_imm32(R_EAX, (uint32_t)ptr);
   263     CALL_r32(R_EAX);
   264 }
   266 static inline void call_func1( void *ptr, int arg1 )
   267 {
   268     PUSH_r32(arg1);
   269     call_func0(ptr);
   270     ADD_imm8s_r32( 4, R_ESP );
   271 }
   273 static inline void call_func2( void *ptr, int arg1, int arg2 )
   274 {
   275     PUSH_r32(arg2);
   276     PUSH_r32(arg1);
   277     call_func0(ptr);
   278     ADD_imm8s_r32( 8, R_ESP );
   279 }
   281 /**
   282  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   283  * the second in arg2b
   284  * NB: 30 bytes
   285  */
   286 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   287 {
   288     ADD_imm8s_r32( 4, addr );
   289     PUSH_r32(arg2b);
   290     PUSH_r32(addr);
   291     ADD_imm8s_r32( -4, addr );
   292     PUSH_r32(arg2a);
   293     PUSH_r32(addr);
   294     call_func0(sh4_write_long);
   295     ADD_imm8s_r32( 8, R_ESP );
   296     call_func0(sh4_write_long);
   297     ADD_imm8s_r32( 8, R_ESP );
   298 }
   300 /**
   301  * Read a double (64-bit) value from memory, writing the first word into arg2a
   302  * and the second into arg2b. The addr must not be in EAX
   303  * NB: 27 bytes
   304  */
   305 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   306 {
   307     PUSH_r32(addr);
   308     call_func0(sh4_read_long);
   309     POP_r32(addr);
   310     PUSH_r32(R_EAX);
   311     ADD_imm8s_r32( 4, addr );
   312     PUSH_r32(addr);
   313     call_func0(sh4_read_long);
   314     ADD_imm8s_r32( 4, R_ESP );
   315     MOV_r32_r32( R_EAX, arg2b );
   316     POP_r32(arg2a);
   317 }
   319 /* Exception checks - Note that all exception checks will clobber EAX */
   320 #define precheck() load_imm32(R_EDX, (pc-sh4_x86.block_start_pc-(sh4_x86.in_delay_slot?2:0))>>1)
   322 #define check_priv( ) \
   323     if( !sh4_x86.priv_checked ) { \
   324 	sh4_x86.priv_checked = TRUE;\
   325 	precheck();\
   326 	load_spreg( R_EAX, R_SR );\
   327 	AND_imm32_r32( SR_MD, R_EAX );\
   328 	if( sh4_x86.in_delay_slot ) {\
   329 	    JE_exit( EXIT_SLOT_ILLEGAL );\
   330 	} else {\
   331 	    JE_exit( EXIT_ILLEGAL );\
   332 	}\
   333     }\
   336 static void check_priv_no_precheck()
   337 {
   338     if( !sh4_x86.priv_checked ) {
   339 	sh4_x86.priv_checked = TRUE;
   340 	load_spreg( R_EAX, R_SR );
   341 	AND_imm32_r32( SR_MD, R_EAX );
   342 	if( sh4_x86.in_delay_slot ) {
   343 	    JE_exit( EXIT_SLOT_ILLEGAL );
   344 	} else {
   345 	    JE_exit( EXIT_ILLEGAL );
   346 	}
   347     }
   348 }
   350 #define check_fpuen( ) \
   351     if( !sh4_x86.fpuen_checked ) {\
   352 	sh4_x86.fpuen_checked = TRUE;\
   353 	precheck();\
   354 	load_spreg( R_EAX, R_SR );\
   355 	AND_imm32_r32( SR_FD, R_EAX );\
   356 	if( sh4_x86.in_delay_slot ) {\
   357 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);\
   358 	} else {\
   359 	    JNE_exit(EXIT_FPU_DISABLED);\
   360 	}\
   361     }
   363 static void check_fpuen_no_precheck()
   364 {
   365     if( !sh4_x86.fpuen_checked ) {
   366 	sh4_x86.fpuen_checked = TRUE;
   367 	load_spreg( R_EAX, R_SR );
   368 	AND_imm32_r32( SR_FD, R_EAX );
   369 	if( sh4_x86.in_delay_slot ) {
   370 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   371 	} else {
   372 	    JNE_exit(EXIT_FPU_DISABLED);
   373 	}
   374     }
   376 }
   378 static void check_ralign16( int x86reg )
   379 {
   380     TEST_imm32_r32( 0x00000001, x86reg );
   381     JNE_exit(EXIT_DATA_ADDR_READ);
   382 }
   384 static void check_walign16( int x86reg )
   385 {
   386     TEST_imm32_r32( 0x00000001, x86reg );
   387     JNE_exit(EXIT_DATA_ADDR_WRITE);
   388 }
   390 static void check_ralign32( int x86reg )
   391 {
   392     TEST_imm32_r32( 0x00000003, x86reg );
   393     JNE_exit(EXIT_DATA_ADDR_READ);
   394 }
   395 static void check_walign32( int x86reg )
   396 {
   397     TEST_imm32_r32( 0x00000003, x86reg );
   398     JNE_exit(EXIT_DATA_ADDR_WRITE);
   399 }
   401 #define UNDEF()
   402 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   403 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   404 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   405 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   406 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   407 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   408 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   410 #define SLOTILLEGAL() precheck(); JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   414 /**
   415  * Emit the 'start of block' assembly. Sets up the stack frame and save
   416  * SI/DI as required
   417  */
   418 void sh4_translate_begin_block( sh4addr_t pc ) 
   419 {
   420     PUSH_r32(R_EBP);
   421     /* mov &sh4r, ebp */
   422     load_imm32( R_EBP, (uint32_t)&sh4r );
   424     sh4_x86.in_delay_slot = FALSE;
   425     sh4_x86.priv_checked = FALSE;
   426     sh4_x86.fpuen_checked = FALSE;
   427     sh4_x86.branch_taken = FALSE;
   428     sh4_x86.backpatch_posn = 0;
   429     sh4_x86.block_start_pc = pc;
   430     sh4_x86.tstate = TSTATE_NONE;
   431 }
   433 /**
   434  * Exit the block to an absolute PC
   435  * Bytes: 29
   436  */
   437 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   438 {
   439     load_imm32( R_ECX, pc );                            // 5
   440     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   441     MOV_moff32_EAX( (uint32_t)xlat_get_lut_entry(pc) ); // 5
   442     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   443     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   444     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   445     POP_r32(R_EBP);
   446     RET();
   447 }
   449 /**
   450  * Exit the block with sh4r.pc already written
   451  * Bytes: 15
   452  */
   453 void exit_block_pcset( pc )
   454 {
   455     load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   456     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
   457     load_spreg( R_EAX, REG_OFFSET(pc) );
   458     call_func1(xlat_get_code,R_EAX);
   459     POP_r32(R_EBP);
   460     RET();
   461 }
   463 /**
   464  * Write the block trailer (exception handling block)
   465  */
   466 void sh4_translate_end_block( sh4addr_t pc ) {
   467     if( sh4_x86.branch_taken == FALSE ) {
   468 	// Didn't exit unconditionally already, so write the termination here
   469 	exit_block( pc, pc );
   470     }
   471     if( sh4_x86.backpatch_posn != 0 ) {
   472 	uint8_t *end_ptr = xlat_output;
   473 	// Exception termination. Jump block for various exception codes:
   474 	PUSH_imm32( EXC_DATA_ADDR_READ );
   475 	JMP_rel8( 33, target1 );
   476 	PUSH_imm32( EXC_DATA_ADDR_WRITE );
   477 	JMP_rel8( 26, target2 );
   478 	PUSH_imm32( EXC_ILLEGAL );
   479 	JMP_rel8( 19, target3 );
   480 	PUSH_imm32( EXC_SLOT_ILLEGAL ); 
   481 	JMP_rel8( 12, target4 );
   482 	PUSH_imm32( EXC_FPU_DISABLED ); 
   483 	JMP_rel8( 5, target5 );
   484 	PUSH_imm32( EXC_SLOT_FPU_DISABLED );
   485 	// target
   486 	JMP_TARGET(target1);
   487 	JMP_TARGET(target2);
   488 	JMP_TARGET(target3);
   489 	JMP_TARGET(target4);
   490 	JMP_TARGET(target5);
   491 	// Raise exception
   492 	load_spreg( R_ECX, REG_OFFSET(pc) );
   493 	ADD_r32_r32( R_EDX, R_ECX );
   494 	ADD_r32_r32( R_EDX, R_ECX );
   495 	store_spreg( R_ECX, REG_OFFSET(pc) );
   496 	MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
   497 	MUL_r32( R_EDX );
   498 	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
   500 	load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
   501 	CALL_r32( R_EAX ); // 2
   502 	ADD_imm8s_r32( 4, R_ESP );
   503 	load_spreg( R_EAX, REG_OFFSET(pc) );
   504 	call_func1(xlat_get_code,R_EAX);
   505 	POP_r32(R_EBP);
   506 	RET();
   508 	sh4_x86_do_backpatch( end_ptr );
   509     }
   511 }
   514 extern uint16_t *sh4_icache;
   515 extern uint32_t sh4_icache_addr;
   517 /**
   518  * Translate a single instruction. Delayed branches are handled specially
   519  * by translating both branch and delayed instruction as a single unit (as
   520  * 
   521  *
   522  * @return true if the instruction marks the end of a basic block
   523  * (eg a branch or 
   524  */
   525 uint32_t sh4_x86_translate_instruction( sh4addr_t pc )
   526 {
   527     uint32_t ir;
   528     /* Read instruction */
   529     uint32_t pageaddr = pc >> 12;
   530     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   531 	ir = sh4_icache[(pc&0xFFF)>>1];
   532     } else {
   533 	sh4_icache = (uint16_t *)mem_get_page(pc);
   534 	if( ((uint32_t)sh4_icache) < MAX_IO_REGIONS ) {
   535 	    /* If someone's actually been so daft as to try to execute out of an IO
   536 	     * region, fallback on the full-blown memory read
   537 	     */
   538 	    sh4_icache = NULL;
   539 	    ir = sh4_read_word(pc);
   540 	} else {
   541 	    sh4_icache_addr = pageaddr;
   542 	    ir = sh4_icache[(pc&0xFFF)>>1];
   543 	}
   544     }
   546 %%
   547 /* ALU operations */
   548 ADD Rm, Rn {:
   549     load_reg( R_EAX, Rm );
   550     load_reg( R_ECX, Rn );
   551     ADD_r32_r32( R_EAX, R_ECX );
   552     store_reg( R_ECX, Rn );
   553     sh4_x86.tstate = TSTATE_NONE;
   554 :}
   555 ADD #imm, Rn {:  
   556     load_reg( R_EAX, Rn );
   557     ADD_imm8s_r32( imm, R_EAX );
   558     store_reg( R_EAX, Rn );
   559     sh4_x86.tstate = TSTATE_NONE;
   560 :}
   561 ADDC Rm, Rn {:
   562     if( sh4_x86.tstate != TSTATE_C ) {
   563 	LDC_t();
   564     }
   565     load_reg( R_EAX, Rm );
   566     load_reg( R_ECX, Rn );
   567     ADC_r32_r32( R_EAX, R_ECX );
   568     store_reg( R_ECX, Rn );
   569     SETC_t();
   570     sh4_x86.tstate = TSTATE_C;
   571 :}
   572 ADDV Rm, Rn {:
   573     load_reg( R_EAX, Rm );
   574     load_reg( R_ECX, Rn );
   575     ADD_r32_r32( R_EAX, R_ECX );
   576     store_reg( R_ECX, Rn );
   577     SETO_t();
   578     sh4_x86.tstate = TSTATE_O;
   579 :}
   580 AND Rm, Rn {:
   581     load_reg( R_EAX, Rm );
   582     load_reg( R_ECX, Rn );
   583     AND_r32_r32( R_EAX, R_ECX );
   584     store_reg( R_ECX, Rn );
   585     sh4_x86.tstate = TSTATE_NONE;
   586 :}
   587 AND #imm, R0 {:  
   588     load_reg( R_EAX, 0 );
   589     AND_imm32_r32(imm, R_EAX); 
   590     store_reg( R_EAX, 0 );
   591     sh4_x86.tstate = TSTATE_NONE;
   592 :}
   593 AND.B #imm, @(R0, GBR) {: 
   594     load_reg( R_EAX, 0 );
   595     load_spreg( R_ECX, R_GBR );
   596     ADD_r32_r32( R_EAX, R_ECX );
   597     PUSH_r32(R_ECX);
   598     call_func0(sh4_read_byte);
   599     POP_r32(R_ECX);
   600     AND_imm32_r32(imm, R_EAX );
   601     MEM_WRITE_BYTE( R_ECX, R_EAX );
   602     sh4_x86.tstate = TSTATE_NONE;
   603 :}
   604 CMP/EQ Rm, Rn {:  
   605     load_reg( R_EAX, Rm );
   606     load_reg( R_ECX, Rn );
   607     CMP_r32_r32( R_EAX, R_ECX );
   608     SETE_t();
   609     sh4_x86.tstate = TSTATE_E;
   610 :}
   611 CMP/EQ #imm, R0 {:  
   612     load_reg( R_EAX, 0 );
   613     CMP_imm8s_r32(imm, R_EAX);
   614     SETE_t();
   615     sh4_x86.tstate = TSTATE_E;
   616 :}
   617 CMP/GE Rm, Rn {:  
   618     load_reg( R_EAX, Rm );
   619     load_reg( R_ECX, Rn );
   620     CMP_r32_r32( R_EAX, R_ECX );
   621     SETGE_t();
   622     sh4_x86.tstate = TSTATE_GE;
   623 :}
   624 CMP/GT Rm, Rn {: 
   625     load_reg( R_EAX, Rm );
   626     load_reg( R_ECX, Rn );
   627     CMP_r32_r32( R_EAX, R_ECX );
   628     SETG_t();
   629     sh4_x86.tstate = TSTATE_G;
   630 :}
   631 CMP/HI Rm, Rn {:  
   632     load_reg( R_EAX, Rm );
   633     load_reg( R_ECX, Rn );
   634     CMP_r32_r32( R_EAX, R_ECX );
   635     SETA_t();
   636     sh4_x86.tstate = TSTATE_A;
   637 :}
   638 CMP/HS Rm, Rn {: 
   639     load_reg( R_EAX, Rm );
   640     load_reg( R_ECX, Rn );
   641     CMP_r32_r32( R_EAX, R_ECX );
   642     SETAE_t();
   643     sh4_x86.tstate = TSTATE_AE;
   644  :}
   645 CMP/PL Rn {: 
   646     load_reg( R_EAX, Rn );
   647     CMP_imm8s_r32( 0, R_EAX );
   648     SETG_t();
   649     sh4_x86.tstate = TSTATE_G;
   650 :}
   651 CMP/PZ Rn {:  
   652     load_reg( R_EAX, Rn );
   653     CMP_imm8s_r32( 0, R_EAX );
   654     SETGE_t();
   655     sh4_x86.tstate = TSTATE_GE;
   656 :}
   657 CMP/STR Rm, Rn {:  
   658     load_reg( R_EAX, Rm );
   659     load_reg( R_ECX, Rn );
   660     XOR_r32_r32( R_ECX, R_EAX );
   661     TEST_r8_r8( R_AL, R_AL );
   662     JE_rel8(13, target1);
   663     TEST_r8_r8( R_AH, R_AH ); // 2
   664     JE_rel8(9, target2);
   665     SHR_imm8_r32( 16, R_EAX ); // 3
   666     TEST_r8_r8( R_AL, R_AL ); // 2
   667     JE_rel8(2, target3);
   668     TEST_r8_r8( R_AH, R_AH ); // 2
   669     JMP_TARGET(target1);
   670     JMP_TARGET(target2);
   671     JMP_TARGET(target3);
   672     SETE_t();
   673     sh4_x86.tstate = TSTATE_E;
   674 :}
   675 DIV0S Rm, Rn {:
   676     load_reg( R_EAX, Rm );
   677     load_reg( R_ECX, Rn );
   678     SHR_imm8_r32( 31, R_EAX );
   679     SHR_imm8_r32( 31, R_ECX );
   680     store_spreg( R_EAX, R_M );
   681     store_spreg( R_ECX, R_Q );
   682     CMP_r32_r32( R_EAX, R_ECX );
   683     SETNE_t();
   684     sh4_x86.tstate = TSTATE_NE;
   685 :}
   686 DIV0U {:  
   687     XOR_r32_r32( R_EAX, R_EAX );
   688     store_spreg( R_EAX, R_Q );
   689     store_spreg( R_EAX, R_M );
   690     store_spreg( R_EAX, R_T );
   691     sh4_x86.tstate = TSTATE_C; // works for DIV1
   692 :}
   693 DIV1 Rm, Rn {:
   694     load_spreg( R_ECX, R_M );
   695     load_reg( R_EAX, Rn );
   696     if( sh4_x86.tstate != TSTATE_C ) {
   697 	LDC_t();
   698     }
   699     RCL1_r32( R_EAX );
   700     SETC_r8( R_DL ); // Q'
   701     CMP_sh4r_r32( R_Q, R_ECX );
   702     JE_rel8(5, mqequal);
   703     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   704     JMP_rel8(3, end);
   705     JMP_TARGET(mqequal);
   706     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   707     JMP_TARGET(end);
   708     store_reg( R_EAX, Rn ); // Done with Rn now
   709     SETC_r8(R_AL); // tmp1
   710     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   711     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   712     store_spreg( R_ECX, R_Q );
   713     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   714     MOVZX_r8_r32( R_AL, R_EAX );
   715     store_spreg( R_EAX, R_T );
   716     sh4_x86.tstate = TSTATE_NONE;
   717 :}
   718 DMULS.L Rm, Rn {:  
   719     load_reg( R_EAX, Rm );
   720     load_reg( R_ECX, Rn );
   721     IMUL_r32(R_ECX);
   722     store_spreg( R_EDX, R_MACH );
   723     store_spreg( R_EAX, R_MACL );
   724     sh4_x86.tstate = TSTATE_NONE;
   725 :}
   726 DMULU.L Rm, Rn {:  
   727     load_reg( R_EAX, Rm );
   728     load_reg( R_ECX, Rn );
   729     MUL_r32(R_ECX);
   730     store_spreg( R_EDX, R_MACH );
   731     store_spreg( R_EAX, R_MACL );    
   732     sh4_x86.tstate = TSTATE_NONE;
   733 :}
   734 DT Rn {:  
   735     load_reg( R_EAX, Rn );
   736     ADD_imm8s_r32( -1, R_EAX );
   737     store_reg( R_EAX, Rn );
   738     SETE_t();
   739     sh4_x86.tstate = TSTATE_E;
   740 :}
   741 EXTS.B Rm, Rn {:  
   742     load_reg( R_EAX, Rm );
   743     MOVSX_r8_r32( R_EAX, R_EAX );
   744     store_reg( R_EAX, Rn );
   745 :}
   746 EXTS.W Rm, Rn {:  
   747     load_reg( R_EAX, Rm );
   748     MOVSX_r16_r32( R_EAX, R_EAX );
   749     store_reg( R_EAX, Rn );
   750 :}
   751 EXTU.B Rm, Rn {:  
   752     load_reg( R_EAX, Rm );
   753     MOVZX_r8_r32( R_EAX, R_EAX );
   754     store_reg( R_EAX, Rn );
   755 :}
   756 EXTU.W Rm, Rn {:  
   757     load_reg( R_EAX, Rm );
   758     MOVZX_r16_r32( R_EAX, R_EAX );
   759     store_reg( R_EAX, Rn );
   760 :}
   761 MAC.L @Rm+, @Rn+ {:  
   762     load_reg( R_ECX, Rm );
   763     precheck();
   764     check_ralign32( R_ECX );
   765     load_reg( R_ECX, Rn );
   766     check_ralign32( R_ECX );
   767     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   768     MEM_READ_LONG( R_ECX, R_EAX );
   769     PUSH_r32( R_EAX );
   770     load_reg( R_ECX, Rm );
   771     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   772     MEM_READ_LONG( R_ECX, R_EAX );
   773     POP_r32( R_ECX );
   774     IMUL_r32( R_ECX );
   775     ADD_r32_sh4r( R_EAX, R_MACL );
   776     ADC_r32_sh4r( R_EDX, R_MACH );
   778     load_spreg( R_ECX, R_S );
   779     TEST_r32_r32(R_ECX, R_ECX);
   780     JE_rel8( 7, nosat );
   781     call_func0( signsat48 );
   782     JMP_TARGET( nosat );
   783     sh4_x86.tstate = TSTATE_NONE;
   784 :}
   785 MAC.W @Rm+, @Rn+ {:  
   786     load_reg( R_ECX, Rm );
   787     precheck();
   788     check_ralign16( R_ECX );
   789     load_reg( R_ECX, Rn );
   790     check_ralign16( R_ECX );
   791     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   792     MEM_READ_WORD( R_ECX, R_EAX );
   793     PUSH_r32( R_EAX );
   794     load_reg( R_ECX, Rm );
   795     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   796     MEM_READ_WORD( R_ECX, R_EAX );
   797     POP_r32( R_ECX );
   798     IMUL_r32( R_ECX );
   800     load_spreg( R_ECX, R_S );
   801     TEST_r32_r32( R_ECX, R_ECX );
   802     JE_rel8( 47, nosat );
   804     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   805     JNO_rel8( 51, end );            // 2
   806     load_imm32( R_EDX, 1 );         // 5
   807     store_spreg( R_EDX, R_MACH );   // 6
   808     JS_rel8( 13, positive );        // 2
   809     load_imm32( R_EAX, 0x80000000 );// 5
   810     store_spreg( R_EAX, R_MACL );   // 6
   811     JMP_rel8( 25, end2 );           // 2
   813     JMP_TARGET(positive);
   814     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   815     store_spreg( R_EAX, R_MACL );   // 6
   816     JMP_rel8( 12, end3);            // 2
   818     JMP_TARGET(nosat);
   819     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   820     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   821     JMP_TARGET(end);
   822     JMP_TARGET(end2);
   823     JMP_TARGET(end3);
   824     sh4_x86.tstate = TSTATE_NONE;
   825 :}
   826 MOVT Rn {:  
   827     load_spreg( R_EAX, R_T );
   828     store_reg( R_EAX, Rn );
   829 :}
   830 MUL.L Rm, Rn {:  
   831     load_reg( R_EAX, Rm );
   832     load_reg( R_ECX, Rn );
   833     MUL_r32( R_ECX );
   834     store_spreg( R_EAX, R_MACL );
   835     sh4_x86.tstate = TSTATE_NONE;
   836 :}
   837 MULS.W Rm, Rn {:
   838     load_reg16s( R_EAX, Rm );
   839     load_reg16s( R_ECX, Rn );
   840     MUL_r32( R_ECX );
   841     store_spreg( R_EAX, R_MACL );
   842     sh4_x86.tstate = TSTATE_NONE;
   843 :}
   844 MULU.W Rm, Rn {:  
   845     load_reg16u( R_EAX, Rm );
   846     load_reg16u( R_ECX, Rn );
   847     MUL_r32( R_ECX );
   848     store_spreg( R_EAX, R_MACL );
   849     sh4_x86.tstate = TSTATE_NONE;
   850 :}
   851 NEG Rm, Rn {:
   852     load_reg( R_EAX, Rm );
   853     NEG_r32( R_EAX );
   854     store_reg( R_EAX, Rn );
   855     sh4_x86.tstate = TSTATE_NONE;
   856 :}
   857 NEGC Rm, Rn {:  
   858     load_reg( R_EAX, Rm );
   859     XOR_r32_r32( R_ECX, R_ECX );
   860     LDC_t();
   861     SBB_r32_r32( R_EAX, R_ECX );
   862     store_reg( R_ECX, Rn );
   863     SETC_t();
   864     sh4_x86.tstate = TSTATE_C;
   865 :}
   866 NOT Rm, Rn {:  
   867     load_reg( R_EAX, Rm );
   868     NOT_r32( R_EAX );
   869     store_reg( R_EAX, Rn );
   870     sh4_x86.tstate = TSTATE_NONE;
   871 :}
   872 OR Rm, Rn {:  
   873     load_reg( R_EAX, Rm );
   874     load_reg( R_ECX, Rn );
   875     OR_r32_r32( R_EAX, R_ECX );
   876     store_reg( R_ECX, Rn );
   877     sh4_x86.tstate = TSTATE_NONE;
   878 :}
   879 OR #imm, R0 {:
   880     load_reg( R_EAX, 0 );
   881     OR_imm32_r32(imm, R_EAX);
   882     store_reg( R_EAX, 0 );
   883     sh4_x86.tstate = TSTATE_NONE;
   884 :}
   885 OR.B #imm, @(R0, GBR) {:  
   886     load_reg( R_EAX, 0 );
   887     load_spreg( R_ECX, R_GBR );
   888     ADD_r32_r32( R_EAX, R_ECX );
   889     PUSH_r32(R_ECX);
   890     call_func0(sh4_read_byte);
   891     POP_r32(R_ECX);
   892     OR_imm32_r32(imm, R_EAX );
   893     MEM_WRITE_BYTE( R_ECX, R_EAX );
   894     sh4_x86.tstate = TSTATE_NONE;
   895 :}
   896 ROTCL Rn {:
   897     load_reg( R_EAX, Rn );
   898     if( sh4_x86.tstate != TSTATE_C ) {
   899 	LDC_t();
   900     }
   901     RCL1_r32( R_EAX );
   902     store_reg( R_EAX, Rn );
   903     SETC_t();
   904     sh4_x86.tstate = TSTATE_C;
   905 :}
   906 ROTCR Rn {:  
   907     load_reg( R_EAX, Rn );
   908     if( sh4_x86.tstate != TSTATE_C ) {
   909 	LDC_t();
   910     }
   911     RCR1_r32( R_EAX );
   912     store_reg( R_EAX, Rn );
   913     SETC_t();
   914     sh4_x86.tstate = TSTATE_C;
   915 :}
   916 ROTL Rn {:  
   917     load_reg( R_EAX, Rn );
   918     ROL1_r32( R_EAX );
   919     store_reg( R_EAX, Rn );
   920     SETC_t();
   921     sh4_x86.tstate = TSTATE_C;
   922 :}
   923 ROTR Rn {:  
   924     load_reg( R_EAX, Rn );
   925     ROR1_r32( R_EAX );
   926     store_reg( R_EAX, Rn );
   927     SETC_t();
   928     sh4_x86.tstate = TSTATE_C;
   929 :}
   930 SHAD Rm, Rn {:
   931     /* Annoyingly enough, not directly convertible */
   932     load_reg( R_EAX, Rn );
   933     load_reg( R_ECX, Rm );
   934     CMP_imm32_r32( 0, R_ECX );
   935     JGE_rel8(16, doshl);
   937     NEG_r32( R_ECX );      // 2
   938     AND_imm8_r8( 0x1F, R_CL ); // 3
   939     JE_rel8( 4, emptysar);     // 2
   940     SAR_r32_CL( R_EAX );       // 2
   941     JMP_rel8(10, end);          // 2
   943     JMP_TARGET(emptysar);
   944     SAR_imm8_r32(31, R_EAX );  // 3
   945     JMP_rel8(5, end2);
   947     JMP_TARGET(doshl);
   948     AND_imm8_r8( 0x1F, R_CL ); // 3
   949     SHL_r32_CL( R_EAX );       // 2
   950     JMP_TARGET(end);
   951     JMP_TARGET(end2);
   952     store_reg( R_EAX, Rn );
   953     sh4_x86.tstate = TSTATE_NONE;
   954 :}
   955 SHLD Rm, Rn {:  
   956     load_reg( R_EAX, Rn );
   957     load_reg( R_ECX, Rm );
   958     CMP_imm32_r32( 0, R_ECX );
   959     JGE_rel8(15, doshl);
   961     NEG_r32( R_ECX );      // 2
   962     AND_imm8_r8( 0x1F, R_CL ); // 3
   963     JE_rel8( 4, emptyshr );
   964     SHR_r32_CL( R_EAX );       // 2
   965     JMP_rel8(9, end);          // 2
   967     JMP_TARGET(emptyshr);
   968     XOR_r32_r32( R_EAX, R_EAX );
   969     JMP_rel8(5, end2);
   971     JMP_TARGET(doshl);
   972     AND_imm8_r8( 0x1F, R_CL ); // 3
   973     SHL_r32_CL( R_EAX );       // 2
   974     JMP_TARGET(end);
   975     JMP_TARGET(end2);
   976     store_reg( R_EAX, Rn );
   977     sh4_x86.tstate = TSTATE_NONE;
   978 :}
   979 SHAL Rn {: 
   980     load_reg( R_EAX, Rn );
   981     SHL1_r32( R_EAX );
   982     SETC_t();
   983     store_reg( R_EAX, Rn );
   984     sh4_x86.tstate = TSTATE_C;
   985 :}
   986 SHAR Rn {:  
   987     load_reg( R_EAX, Rn );
   988     SAR1_r32( R_EAX );
   989     SETC_t();
   990     store_reg( R_EAX, Rn );
   991     sh4_x86.tstate = TSTATE_C;
   992 :}
   993 SHLL Rn {:  
   994     load_reg( R_EAX, Rn );
   995     SHL1_r32( R_EAX );
   996     SETC_t();
   997     store_reg( R_EAX, Rn );
   998     sh4_x86.tstate = TSTATE_C;
   999 :}
  1000 SHLL2 Rn {:
  1001     load_reg( R_EAX, Rn );
  1002     SHL_imm8_r32( 2, R_EAX );
  1003     store_reg( R_EAX, Rn );
  1004     sh4_x86.tstate = TSTATE_NONE;
  1005 :}
  1006 SHLL8 Rn {:  
  1007     load_reg( R_EAX, Rn );
  1008     SHL_imm8_r32( 8, R_EAX );
  1009     store_reg( R_EAX, Rn );
  1010     sh4_x86.tstate = TSTATE_NONE;
  1011 :}
  1012 SHLL16 Rn {:  
  1013     load_reg( R_EAX, Rn );
  1014     SHL_imm8_r32( 16, R_EAX );
  1015     store_reg( R_EAX, Rn );
  1016     sh4_x86.tstate = TSTATE_NONE;
  1017 :}
  1018 SHLR Rn {:  
  1019     load_reg( R_EAX, Rn );
  1020     SHR1_r32( R_EAX );
  1021     SETC_t();
  1022     store_reg( R_EAX, Rn );
  1023     sh4_x86.tstate = TSTATE_C;
  1024 :}
  1025 SHLR2 Rn {:  
  1026     load_reg( R_EAX, Rn );
  1027     SHR_imm8_r32( 2, R_EAX );
  1028     store_reg( R_EAX, Rn );
  1029     sh4_x86.tstate = TSTATE_NONE;
  1030 :}
  1031 SHLR8 Rn {:  
  1032     load_reg( R_EAX, Rn );
  1033     SHR_imm8_r32( 8, R_EAX );
  1034     store_reg( R_EAX, Rn );
  1035     sh4_x86.tstate = TSTATE_NONE;
  1036 :}
  1037 SHLR16 Rn {:  
  1038     load_reg( R_EAX, Rn );
  1039     SHR_imm8_r32( 16, R_EAX );
  1040     store_reg( R_EAX, Rn );
  1041     sh4_x86.tstate = TSTATE_NONE;
  1042 :}
  1043 SUB Rm, Rn {:  
  1044     load_reg( R_EAX, Rm );
  1045     load_reg( R_ECX, Rn );
  1046     SUB_r32_r32( R_EAX, R_ECX );
  1047     store_reg( R_ECX, Rn );
  1048     sh4_x86.tstate = TSTATE_NONE;
  1049 :}
  1050 SUBC Rm, Rn {:  
  1051     load_reg( R_EAX, Rm );
  1052     load_reg( R_ECX, Rn );
  1053     if( sh4_x86.tstate != TSTATE_C ) {
  1054 	LDC_t();
  1056     SBB_r32_r32( R_EAX, R_ECX );
  1057     store_reg( R_ECX, Rn );
  1058     SETC_t();
  1059     sh4_x86.tstate = TSTATE_C;
  1060 :}
  1061 SUBV Rm, Rn {:  
  1062     load_reg( R_EAX, Rm );
  1063     load_reg( R_ECX, Rn );
  1064     SUB_r32_r32( R_EAX, R_ECX );
  1065     store_reg( R_ECX, Rn );
  1066     SETO_t();
  1067     sh4_x86.tstate = TSTATE_O;
  1068 :}
  1069 SWAP.B Rm, Rn {:  
  1070     load_reg( R_EAX, Rm );
  1071     XCHG_r8_r8( R_AL, R_AH );
  1072     store_reg( R_EAX, Rn );
  1073 :}
  1074 SWAP.W Rm, Rn {:  
  1075     load_reg( R_EAX, Rm );
  1076     MOV_r32_r32( R_EAX, R_ECX );
  1077     SHL_imm8_r32( 16, R_ECX );
  1078     SHR_imm8_r32( 16, R_EAX );
  1079     OR_r32_r32( R_EAX, R_ECX );
  1080     store_reg( R_ECX, Rn );
  1081     sh4_x86.tstate = TSTATE_NONE;
  1082 :}
  1083 TAS.B @Rn {:  
  1084     load_reg( R_ECX, Rn );
  1085     MEM_READ_BYTE( R_ECX, R_EAX );
  1086     TEST_r8_r8( R_AL, R_AL );
  1087     SETE_t();
  1088     OR_imm8_r8( 0x80, R_AL );
  1089     load_reg( R_ECX, Rn );
  1090     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1091     sh4_x86.tstate = TSTATE_NONE;
  1092 :}
  1093 TST Rm, Rn {:  
  1094     load_reg( R_EAX, Rm );
  1095     load_reg( R_ECX, Rn );
  1096     TEST_r32_r32( R_EAX, R_ECX );
  1097     SETE_t();
  1098     sh4_x86.tstate = TSTATE_E;
  1099 :}
  1100 TST #imm, R0 {:  
  1101     load_reg( R_EAX, 0 );
  1102     TEST_imm32_r32( imm, R_EAX );
  1103     SETE_t();
  1104     sh4_x86.tstate = TSTATE_E;
  1105 :}
  1106 TST.B #imm, @(R0, GBR) {:  
  1107     load_reg( R_EAX, 0);
  1108     load_reg( R_ECX, R_GBR);
  1109     ADD_r32_r32( R_EAX, R_ECX );
  1110     MEM_READ_BYTE( R_ECX, R_EAX );
  1111     TEST_imm8_r8( imm, R_AL );
  1112     SETE_t();
  1113     sh4_x86.tstate = TSTATE_E;
  1114 :}
  1115 XOR Rm, Rn {:  
  1116     load_reg( R_EAX, Rm );
  1117     load_reg( R_ECX, Rn );
  1118     XOR_r32_r32( R_EAX, R_ECX );
  1119     store_reg( R_ECX, Rn );
  1120     sh4_x86.tstate = TSTATE_NONE;
  1121 :}
  1122 XOR #imm, R0 {:  
  1123     load_reg( R_EAX, 0 );
  1124     XOR_imm32_r32( imm, R_EAX );
  1125     store_reg( R_EAX, 0 );
  1126     sh4_x86.tstate = TSTATE_NONE;
  1127 :}
  1128 XOR.B #imm, @(R0, GBR) {:  
  1129     load_reg( R_EAX, 0 );
  1130     load_spreg( R_ECX, R_GBR );
  1131     ADD_r32_r32( R_EAX, R_ECX );
  1132     PUSH_r32(R_ECX);
  1133     call_func0(sh4_read_byte);
  1134     POP_r32(R_ECX);
  1135     XOR_imm32_r32( imm, R_EAX );
  1136     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1139 XTRCT Rm, Rn {:
  1140     load_reg( R_EAX, Rm );
  1141     load_reg( R_ECX, Rn );
  1142     SHL_imm8_r32( 16, R_EAX );
  1143     SHR_imm8_r32( 16, R_ECX );
  1144     OR_r32_r32( R_EAX, R_ECX );
  1145     store_reg( R_ECX, Rn );
  1146     sh4_x86.tstate = TSTATE_NONE;
  1147 :}
  1149 /* Data move instructions */
  1150 MOV Rm, Rn {:  
  1151     load_reg( R_EAX, Rm );
  1152     store_reg( R_EAX, Rn );
  1153 :}
  1154 MOV #imm, Rn {:  
  1155     load_imm32( R_EAX, imm );
  1156     store_reg( R_EAX, Rn );
  1157 :}
  1158 MOV.B Rm, @Rn {:  
  1159     load_reg( R_EAX, Rm );
  1160     load_reg( R_ECX, Rn );
  1161     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1162     sh4_x86.tstate = TSTATE_NONE;
  1163 :}
  1164 MOV.B Rm, @-Rn {:  
  1165     load_reg( R_EAX, Rm );
  1166     load_reg( R_ECX, Rn );
  1167     ADD_imm8s_r32( -1, R_ECX );
  1168     store_reg( R_ECX, Rn );
  1169     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1170     sh4_x86.tstate = TSTATE_NONE;
  1171 :}
  1172 MOV.B Rm, @(R0, Rn) {:  
  1173     load_reg( R_EAX, 0 );
  1174     load_reg( R_ECX, Rn );
  1175     ADD_r32_r32( R_EAX, R_ECX );
  1176     load_reg( R_EAX, Rm );
  1177     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1178     sh4_x86.tstate = TSTATE_NONE;
  1179 :}
  1180 MOV.B R0, @(disp, GBR) {:  
  1181     load_reg( R_EAX, 0 );
  1182     load_spreg( R_ECX, R_GBR );
  1183     ADD_imm32_r32( disp, R_ECX );
  1184     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1185     sh4_x86.tstate = TSTATE_NONE;
  1186 :}
  1187 MOV.B R0, @(disp, Rn) {:  
  1188     load_reg( R_EAX, 0 );
  1189     load_reg( R_ECX, Rn );
  1190     ADD_imm32_r32( disp, R_ECX );
  1191     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1192     sh4_x86.tstate = TSTATE_NONE;
  1193 :}
  1194 MOV.B @Rm, Rn {:  
  1195     load_reg( R_ECX, Rm );
  1196     MEM_READ_BYTE( R_ECX, R_EAX );
  1197     store_reg( R_EAX, Rn );
  1198     sh4_x86.tstate = TSTATE_NONE;
  1199 :}
  1200 MOV.B @Rm+, Rn {:  
  1201     load_reg( R_ECX, Rm );
  1202     MOV_r32_r32( R_ECX, R_EAX );
  1203     ADD_imm8s_r32( 1, R_EAX );
  1204     store_reg( R_EAX, Rm );
  1205     MEM_READ_BYTE( R_ECX, R_EAX );
  1206     store_reg( R_EAX, Rn );
  1207     sh4_x86.tstate = TSTATE_NONE;
  1208 :}
  1209 MOV.B @(R0, Rm), Rn {:  
  1210     load_reg( R_EAX, 0 );
  1211     load_reg( R_ECX, Rm );
  1212     ADD_r32_r32( R_EAX, R_ECX );
  1213     MEM_READ_BYTE( R_ECX, R_EAX );
  1214     store_reg( R_EAX, Rn );
  1215     sh4_x86.tstate = TSTATE_NONE;
  1216 :}
  1217 MOV.B @(disp, GBR), R0 {:  
  1218     load_spreg( R_ECX, R_GBR );
  1219     ADD_imm32_r32( disp, R_ECX );
  1220     MEM_READ_BYTE( R_ECX, R_EAX );
  1221     store_reg( R_EAX, 0 );
  1222     sh4_x86.tstate = TSTATE_NONE;
  1223 :}
  1224 MOV.B @(disp, Rm), R0 {:  
  1225     load_reg( R_ECX, Rm );
  1226     ADD_imm32_r32( disp, R_ECX );
  1227     MEM_READ_BYTE( R_ECX, R_EAX );
  1228     store_reg( R_EAX, 0 );
  1229     sh4_x86.tstate = TSTATE_NONE;
  1230 :}
  1231 MOV.L Rm, @Rn {:
  1232     load_reg( R_EAX, Rm );
  1233     load_reg( R_ECX, Rn );
  1234     precheck();
  1235     check_walign32(R_ECX);
  1236     MEM_WRITE_LONG( R_ECX, R_EAX );
  1237     sh4_x86.tstate = TSTATE_NONE;
  1238 :}
  1239 MOV.L Rm, @-Rn {:  
  1240     load_reg( R_EAX, Rm );
  1241     load_reg( R_ECX, Rn );
  1242     precheck();
  1243     check_walign32( R_ECX );
  1244     ADD_imm8s_r32( -4, R_ECX );
  1245     store_reg( R_ECX, Rn );
  1246     MEM_WRITE_LONG( R_ECX, R_EAX );
  1247     sh4_x86.tstate = TSTATE_NONE;
  1248 :}
  1249 MOV.L Rm, @(R0, Rn) {:  
  1250     load_reg( R_EAX, 0 );
  1251     load_reg( R_ECX, Rn );
  1252     ADD_r32_r32( R_EAX, R_ECX );
  1253     precheck();
  1254     check_walign32( R_ECX );
  1255     load_reg( R_EAX, Rm );
  1256     MEM_WRITE_LONG( R_ECX, R_EAX );
  1257     sh4_x86.tstate = TSTATE_NONE;
  1258 :}
  1259 MOV.L R0, @(disp, GBR) {:  
  1260     load_spreg( R_ECX, R_GBR );
  1261     load_reg( R_EAX, 0 );
  1262     ADD_imm32_r32( disp, R_ECX );
  1263     precheck();
  1264     check_walign32( R_ECX );
  1265     MEM_WRITE_LONG( R_ECX, R_EAX );
  1266     sh4_x86.tstate = TSTATE_NONE;
  1267 :}
  1268 MOV.L Rm, @(disp, Rn) {:  
  1269     load_reg( R_ECX, Rn );
  1270     load_reg( R_EAX, Rm );
  1271     ADD_imm32_r32( disp, R_ECX );
  1272     precheck();
  1273     check_walign32( R_ECX );
  1274     MEM_WRITE_LONG( R_ECX, R_EAX );
  1275     sh4_x86.tstate = TSTATE_NONE;
  1276 :}
  1277 MOV.L @Rm, Rn {:  
  1278     load_reg( R_ECX, Rm );
  1279     precheck();
  1280     check_ralign32( R_ECX );
  1281     MEM_READ_LONG( R_ECX, R_EAX );
  1282     store_reg( R_EAX, Rn );
  1283     sh4_x86.tstate = TSTATE_NONE;
  1284 :}
  1285 MOV.L @Rm+, Rn {:  
  1286     load_reg( R_EAX, Rm );
  1287     precheck();
  1288     check_ralign32( R_EAX );
  1289     MOV_r32_r32( R_EAX, R_ECX );
  1290     ADD_imm8s_r32( 4, R_EAX );
  1291     store_reg( R_EAX, Rm );
  1292     MEM_READ_LONG( R_ECX, R_EAX );
  1293     store_reg( R_EAX, Rn );
  1294     sh4_x86.tstate = TSTATE_NONE;
  1295 :}
  1296 MOV.L @(R0, Rm), Rn {:  
  1297     load_reg( R_EAX, 0 );
  1298     load_reg( R_ECX, Rm );
  1299     ADD_r32_r32( R_EAX, R_ECX );
  1300     precheck();
  1301     check_ralign32( R_ECX );
  1302     MEM_READ_LONG( R_ECX, R_EAX );
  1303     store_reg( R_EAX, Rn );
  1304     sh4_x86.tstate = TSTATE_NONE;
  1305 :}
  1306 MOV.L @(disp, GBR), R0 {:
  1307     load_spreg( R_ECX, R_GBR );
  1308     ADD_imm32_r32( disp, R_ECX );
  1309     precheck();
  1310     check_ralign32( R_ECX );
  1311     MEM_READ_LONG( R_ECX, R_EAX );
  1312     store_reg( R_EAX, 0 );
  1313     sh4_x86.tstate = TSTATE_NONE;
  1314 :}
  1315 MOV.L @(disp, PC), Rn {:  
  1316     if( sh4_x86.in_delay_slot ) {
  1317 	SLOTILLEGAL();
  1318     } else {
  1319 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1320 	char *ptr = mem_get_region(target);
  1321 	if( ptr != NULL ) {
  1322 	    MOV_moff32_EAX( (uint32_t)ptr );
  1323 	} else {
  1324 	    load_imm32( R_ECX, target );
  1325 	    MEM_READ_LONG( R_ECX, R_EAX );
  1327 	store_reg( R_EAX, Rn );
  1328 	sh4_x86.tstate = TSTATE_NONE;
  1330 :}
  1331 MOV.L @(disp, Rm), Rn {:  
  1332     load_reg( R_ECX, Rm );
  1333     ADD_imm8s_r32( disp, R_ECX );
  1334     precheck();
  1335     check_ralign32( R_ECX );
  1336     MEM_READ_LONG( R_ECX, R_EAX );
  1337     store_reg( R_EAX, Rn );
  1338     sh4_x86.tstate = TSTATE_NONE;
  1339 :}
  1340 MOV.W Rm, @Rn {:  
  1341     load_reg( R_ECX, Rn );
  1342     precheck();
  1343     check_walign16( R_ECX );
  1344     load_reg( R_EAX, Rm );
  1345     MEM_WRITE_WORD( R_ECX, R_EAX );
  1346     sh4_x86.tstate = TSTATE_NONE;
  1347 :}
  1348 MOV.W Rm, @-Rn {:  
  1349     load_reg( R_ECX, Rn );
  1350     precheck();
  1351     check_walign16( R_ECX );
  1352     load_reg( R_EAX, Rm );
  1353     ADD_imm8s_r32( -2, R_ECX );
  1354     store_reg( R_ECX, Rn );
  1355     MEM_WRITE_WORD( R_ECX, R_EAX );
  1356     sh4_x86.tstate = TSTATE_NONE;
  1357 :}
  1358 MOV.W Rm, @(R0, Rn) {:  
  1359     load_reg( R_EAX, 0 );
  1360     load_reg( R_ECX, Rn );
  1361     ADD_r32_r32( R_EAX, R_ECX );
  1362     precheck();
  1363     check_walign16( R_ECX );
  1364     load_reg( R_EAX, Rm );
  1365     MEM_WRITE_WORD( R_ECX, R_EAX );
  1366     sh4_x86.tstate = TSTATE_NONE;
  1367 :}
  1368 MOV.W R0, @(disp, GBR) {:  
  1369     load_spreg( R_ECX, R_GBR );
  1370     load_reg( R_EAX, 0 );
  1371     ADD_imm32_r32( disp, R_ECX );
  1372     precheck();
  1373     check_walign16( R_ECX );
  1374     MEM_WRITE_WORD( R_ECX, R_EAX );
  1375     sh4_x86.tstate = TSTATE_NONE;
  1376 :}
  1377 MOV.W R0, @(disp, Rn) {:  
  1378     load_reg( R_ECX, Rn );
  1379     load_reg( R_EAX, 0 );
  1380     ADD_imm32_r32( disp, R_ECX );
  1381     precheck();
  1382     check_walign16( R_ECX );
  1383     MEM_WRITE_WORD( R_ECX, R_EAX );
  1384     sh4_x86.tstate = TSTATE_NONE;
  1385 :}
  1386 MOV.W @Rm, Rn {:  
  1387     load_reg( R_ECX, Rm );
  1388     precheck();
  1389     check_ralign16( R_ECX );
  1390     MEM_READ_WORD( R_ECX, R_EAX );
  1391     store_reg( R_EAX, Rn );
  1392     sh4_x86.tstate = TSTATE_NONE;
  1393 :}
  1394 MOV.W @Rm+, Rn {:  
  1395     load_reg( R_EAX, Rm );
  1396     precheck();
  1397     check_ralign16( R_EAX );
  1398     MOV_r32_r32( R_EAX, R_ECX );
  1399     ADD_imm8s_r32( 2, R_EAX );
  1400     store_reg( R_EAX, Rm );
  1401     MEM_READ_WORD( R_ECX, R_EAX );
  1402     store_reg( R_EAX, Rn );
  1403     sh4_x86.tstate = TSTATE_NONE;
  1404 :}
  1405 MOV.W @(R0, Rm), Rn {:  
  1406     load_reg( R_EAX, 0 );
  1407     load_reg( R_ECX, Rm );
  1408     ADD_r32_r32( R_EAX, R_ECX );
  1409     precheck();
  1410     check_ralign16( R_ECX );
  1411     MEM_READ_WORD( R_ECX, R_EAX );
  1412     store_reg( R_EAX, Rn );
  1413     sh4_x86.tstate = TSTATE_NONE;
  1414 :}
  1415 MOV.W @(disp, GBR), R0 {:  
  1416     load_spreg( R_ECX, R_GBR );
  1417     ADD_imm32_r32( disp, R_ECX );
  1418     precheck();
  1419     check_ralign16( R_ECX );
  1420     MEM_READ_WORD( R_ECX, R_EAX );
  1421     store_reg( R_EAX, 0 );
  1422     sh4_x86.tstate = TSTATE_NONE;
  1423 :}
  1424 MOV.W @(disp, PC), Rn {:  
  1425     if( sh4_x86.in_delay_slot ) {
  1426 	SLOTILLEGAL();
  1427     } else {
  1428 	load_imm32( R_ECX, pc + disp + 4 );
  1429 	MEM_READ_WORD( R_ECX, R_EAX );
  1430 	store_reg( R_EAX, Rn );
  1431 	sh4_x86.tstate = TSTATE_NONE;
  1433 :}
  1434 MOV.W @(disp, Rm), R0 {:  
  1435     load_reg( R_ECX, Rm );
  1436     ADD_imm32_r32( disp, R_ECX );
  1437     precheck();
  1438     check_ralign16( R_ECX );
  1439     MEM_READ_WORD( R_ECX, R_EAX );
  1440     store_reg( R_EAX, 0 );
  1441     sh4_x86.tstate = TSTATE_NONE;
  1442 :}
  1443 MOVA @(disp, PC), R0 {:  
  1444     if( sh4_x86.in_delay_slot ) {
  1445 	SLOTILLEGAL();
  1446     } else {
  1447 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1448 	store_reg( R_ECX, 0 );
  1450 :}
  1451 MOVCA.L R0, @Rn {:  
  1452     load_reg( R_EAX, 0 );
  1453     load_reg( R_ECX, Rn );
  1454     precheck();
  1455     check_walign32( R_ECX );
  1456     MEM_WRITE_LONG( R_ECX, R_EAX );
  1457     sh4_x86.tstate = TSTATE_NONE;
  1458 :}
  1460 /* Control transfer instructions */
  1461 BF disp {:
  1462     if( sh4_x86.in_delay_slot ) {
  1463 	SLOTILLEGAL();
  1464     } else {
  1465 	JT_rel8( 29, nottaken );
  1466 	exit_block( disp + pc + 4, pc+2 );
  1467 	JMP_TARGET(nottaken);
  1468 	return 2;
  1470 :}
  1471 BF/S disp {:
  1472     if( sh4_x86.in_delay_slot ) {
  1473 	SLOTILLEGAL();
  1474     } else {
  1475 	sh4_x86.in_delay_slot = TRUE;
  1476 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1477 	    CMP_imm8s_sh4r( 1, R_T );
  1478 	    sh4_x86.tstate = TSTATE_E;
  1480 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1481 	sh4_x86_translate_instruction(pc+2);
  1482 	exit_block( disp + pc + 4, pc+4 );
  1483 	// not taken
  1484 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1485 	sh4_x86_translate_instruction(pc+2);
  1486 	return 4;
  1488 :}
  1489 BRA disp {:  
  1490     if( sh4_x86.in_delay_slot ) {
  1491 	SLOTILLEGAL();
  1492     } else {
  1493 	sh4_x86.in_delay_slot = TRUE;
  1494 	sh4_x86_translate_instruction( pc + 2 );
  1495 	exit_block( disp + pc + 4, pc+4 );
  1496 	sh4_x86.branch_taken = TRUE;
  1497 	return 4;
  1499 :}
  1500 BRAF Rn {:  
  1501     if( sh4_x86.in_delay_slot ) {
  1502 	SLOTILLEGAL();
  1503     } else {
  1504 	load_reg( R_EAX, Rn );
  1505 	ADD_imm32_r32( pc + 4, R_EAX );
  1506 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1507 	sh4_x86.in_delay_slot = TRUE;
  1508 	sh4_x86.tstate = TSTATE_NONE;
  1509 	sh4_x86_translate_instruction( pc + 2 );
  1510 	exit_block_pcset(pc+2);
  1511 	sh4_x86.branch_taken = TRUE;
  1512 	return 4;
  1514 :}
  1515 BSR disp {:  
  1516     if( sh4_x86.in_delay_slot ) {
  1517 	SLOTILLEGAL();
  1518     } else {
  1519 	load_imm32( R_EAX, pc + 4 );
  1520 	store_spreg( R_EAX, R_PR );
  1521 	sh4_x86.in_delay_slot = TRUE;
  1522 	sh4_x86_translate_instruction( pc + 2 );
  1523 	exit_block( disp + pc + 4, pc+4 );
  1524 	sh4_x86.branch_taken = TRUE;
  1525 	return 4;
  1527 :}
  1528 BSRF Rn {:  
  1529     if( sh4_x86.in_delay_slot ) {
  1530 	SLOTILLEGAL();
  1531     } else {
  1532 	load_imm32( R_ECX, pc + 4 );
  1533 	store_spreg( R_ECX, R_PR );
  1534 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1535 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1536 	sh4_x86.in_delay_slot = TRUE;
  1537 	sh4_x86.tstate = TSTATE_NONE;
  1538 	sh4_x86_translate_instruction( pc + 2 );
  1539 	exit_block_pcset(pc+2);
  1540 	sh4_x86.branch_taken = TRUE;
  1541 	return 4;
  1543 :}
  1544 BT disp {:
  1545     if( sh4_x86.in_delay_slot ) {
  1546 	SLOTILLEGAL();
  1547     } else {
  1548 	JF_rel8( 29, nottaken );
  1549 	exit_block( disp + pc + 4, pc+2 );
  1550 	JMP_TARGET(nottaken);
  1551 	return 2;
  1553 :}
  1554 BT/S disp {:
  1555     if( sh4_x86.in_delay_slot ) {
  1556 	SLOTILLEGAL();
  1557     } else {
  1558 	sh4_x86.in_delay_slot = TRUE;
  1559 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1560 	    CMP_imm8s_sh4r( 1, R_T );
  1561 	    sh4_x86.tstate = TSTATE_E;
  1563 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1564 	sh4_x86_translate_instruction(pc+2);
  1565 	exit_block( disp + pc + 4, pc+4 );
  1566 	// not taken
  1567 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1568 	sh4_x86_translate_instruction(pc+2);
  1569 	return 4;
  1571 :}
  1572 JMP @Rn {:  
  1573     if( sh4_x86.in_delay_slot ) {
  1574 	SLOTILLEGAL();
  1575     } else {
  1576 	load_reg( R_ECX, Rn );
  1577 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1578 	sh4_x86.in_delay_slot = TRUE;
  1579 	sh4_x86_translate_instruction(pc+2);
  1580 	exit_block_pcset(pc+2);
  1581 	sh4_x86.branch_taken = TRUE;
  1582 	return 4;
  1584 :}
  1585 JSR @Rn {:  
  1586     if( sh4_x86.in_delay_slot ) {
  1587 	SLOTILLEGAL();
  1588     } else {
  1589 	load_imm32( R_EAX, pc + 4 );
  1590 	store_spreg( R_EAX, R_PR );
  1591 	load_reg( R_ECX, Rn );
  1592 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1593 	sh4_x86.in_delay_slot = TRUE;
  1594 	sh4_x86_translate_instruction(pc+2);
  1595 	exit_block_pcset(pc+2);
  1596 	sh4_x86.branch_taken = TRUE;
  1597 	return 4;
  1599 :}
  1600 RTE {:  
  1601     if( sh4_x86.in_delay_slot ) {
  1602 	SLOTILLEGAL();
  1603     } else {
  1604 	check_priv();
  1605 	load_spreg( R_ECX, R_SPC );
  1606 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1607 	load_spreg( R_EAX, R_SSR );
  1608 	call_func1( sh4_write_sr, R_EAX );
  1609 	sh4_x86.in_delay_slot = TRUE;
  1610 	sh4_x86.priv_checked = FALSE;
  1611 	sh4_x86.fpuen_checked = FALSE;
  1612 	sh4_x86.tstate = TSTATE_NONE;
  1613 	sh4_x86_translate_instruction(pc+2);
  1614 	exit_block_pcset(pc+2);
  1615 	sh4_x86.branch_taken = TRUE;
  1616 	return 4;
  1618 :}
  1619 RTS {:  
  1620     if( sh4_x86.in_delay_slot ) {
  1621 	SLOTILLEGAL();
  1622     } else {
  1623 	load_spreg( R_ECX, R_PR );
  1624 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1625 	sh4_x86.in_delay_slot = TRUE;
  1626 	sh4_x86_translate_instruction(pc+2);
  1627 	exit_block_pcset(pc+2);
  1628 	sh4_x86.branch_taken = TRUE;
  1629 	return 4;
  1631 :}
  1632 TRAPA #imm {:  
  1633     if( sh4_x86.in_delay_slot ) {
  1634 	SLOTILLEGAL();
  1635     } else {
  1636 	PUSH_imm32( imm );
  1637 	call_func0( sh4_raise_trap );
  1638 	ADD_imm8s_r32( 4, R_ESP );
  1639 	sh4_x86.tstate = TSTATE_NONE;
  1640 	exit_block_pcset(pc);
  1641 	sh4_x86.branch_taken = TRUE;
  1642 	return 2;
  1644 :}
  1645 UNDEF {:  
  1646     if( sh4_x86.in_delay_slot ) {
  1647 	SLOTILLEGAL();
  1648     } else {
  1649 	precheck();
  1650 	JMP_exit(EXIT_ILLEGAL);
  1651 	return 2;
  1653 :}
  1655 CLRMAC {:  
  1656     XOR_r32_r32(R_EAX, R_EAX);
  1657     store_spreg( R_EAX, R_MACL );
  1658     store_spreg( R_EAX, R_MACH );
  1659     sh4_x86.tstate = TSTATE_NONE;
  1660 :}
  1661 CLRS {:
  1662     CLC();
  1663     SETC_sh4r(R_S);
  1664     sh4_x86.tstate = TSTATE_C;
  1665 :}
  1666 CLRT {:  
  1667     CLC();
  1668     SETC_t();
  1669     sh4_x86.tstate = TSTATE_C;
  1670 :}
  1671 SETS {:  
  1672     STC();
  1673     SETC_sh4r(R_S);
  1674     sh4_x86.tstate = TSTATE_C;
  1675 :}
  1676 SETT {:  
  1677     STC();
  1678     SETC_t();
  1679     sh4_x86.tstate = TSTATE_C;
  1680 :}
  1682 /* Floating point moves */
  1683 FMOV FRm, FRn {:  
  1684     /* As horrible as this looks, it's actually covering 5 separate cases:
  1685      * 1. 32-bit fr-to-fr (PR=0)
  1686      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1687      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1688      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1689      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1690      */
  1691     check_fpuen();
  1692     load_spreg( R_ECX, R_FPSCR );
  1693     load_fr_bank( R_EDX );
  1694     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1695     JNE_rel8(8, doublesize);
  1696     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1697     store_fr( R_EDX, R_EAX, FRn );
  1698     if( FRm&1 ) {
  1699 	JMP_rel8(24, end);
  1700 	JMP_TARGET(doublesize);
  1701 	load_xf_bank( R_ECX ); 
  1702 	load_fr( R_ECX, R_EAX, FRm-1 );
  1703 	if( FRn&1 ) {
  1704 	    load_fr( R_ECX, R_EDX, FRm );
  1705 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1706 	    store_fr( R_ECX, R_EDX, FRn );
  1707 	} else /* FRn&1 == 0 */ {
  1708 	    load_fr( R_ECX, R_ECX, FRm );
  1709 	    store_fr( R_EDX, R_EAX, FRn );
  1710 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1712 	JMP_TARGET(end);
  1713     } else /* FRm&1 == 0 */ {
  1714 	if( FRn&1 ) {
  1715 	    JMP_rel8(24, end);
  1716 	    load_xf_bank( R_ECX );
  1717 	    load_fr( R_EDX, R_EAX, FRm );
  1718 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1719 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1720 	    store_fr( R_ECX, R_EDX, FRn );
  1721 	    JMP_TARGET(end);
  1722 	} else /* FRn&1 == 0 */ {
  1723 	    JMP_rel8(12, end);
  1724 	    load_fr( R_EDX, R_EAX, FRm );
  1725 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1726 	    store_fr( R_EDX, R_EAX, FRn );
  1727 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1728 	    JMP_TARGET(end);
  1731     sh4_x86.tstate = TSTATE_NONE;
  1732 :}
  1733 FMOV FRm, @Rn {: 
  1734     precheck();
  1735     check_fpuen_no_precheck();
  1736     load_reg( R_ECX, Rn );
  1737     check_walign32( R_ECX );
  1738     load_spreg( R_EDX, R_FPSCR );
  1739     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1740     JNE_rel8(20, doublesize);
  1741     load_fr_bank( R_EDX );
  1742     load_fr( R_EDX, R_EAX, FRm );
  1743     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1744     if( FRm&1 ) {
  1745 	JMP_rel8( 48, end );
  1746 	JMP_TARGET(doublesize);
  1747 	load_xf_bank( R_EDX );
  1748 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1749 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1750 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1751 	JMP_TARGET(end);
  1752     } else {
  1753 	JMP_rel8( 39, end );
  1754 	JMP_TARGET(doublesize);
  1755 	load_fr_bank( R_EDX );
  1756 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1757 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1758 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1759 	JMP_TARGET(end);
  1761     sh4_x86.tstate = TSTATE_NONE;
  1762 :}
  1763 FMOV @Rm, FRn {:  
  1764     precheck();
  1765     check_fpuen_no_precheck();
  1766     load_reg( R_ECX, Rm );
  1767     check_ralign32( R_ECX );
  1768     load_spreg( R_EDX, R_FPSCR );
  1769     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1770     JNE_rel8(19, doublesize);
  1771     MEM_READ_LONG( R_ECX, R_EAX );
  1772     load_fr_bank( R_EDX );
  1773     store_fr( R_EDX, R_EAX, FRn );
  1774     if( FRn&1 ) {
  1775 	JMP_rel8(48, end);
  1776 	JMP_TARGET(doublesize);
  1777 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1778 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1779 	load_xf_bank( R_EDX );
  1780 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1781 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1782 	JMP_TARGET(end);
  1783     } else {
  1784 	JMP_rel8(36, end);
  1785 	JMP_TARGET(doublesize);
  1786 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1787 	load_fr_bank( R_EDX );
  1788 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1789 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1790 	JMP_TARGET(end);
  1792     sh4_x86.tstate = TSTATE_NONE;
  1793 :}
  1794 FMOV FRm, @-Rn {:  
  1795     precheck();
  1796     check_fpuen_no_precheck();
  1797     load_reg( R_ECX, Rn );
  1798     check_walign32( R_ECX );
  1799     load_spreg( R_EDX, R_FPSCR );
  1800     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1801     JNE_rel8(26, doublesize);
  1802     load_fr_bank( R_EDX );
  1803     load_fr( R_EDX, R_EAX, FRm );
  1804     ADD_imm8s_r32(-4,R_ECX);
  1805     store_reg( R_ECX, Rn );
  1806     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1807     if( FRm&1 ) {
  1808 	JMP_rel8( 54, end );
  1809 	JMP_TARGET(doublesize);
  1810 	load_xf_bank( R_EDX );
  1811 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1812 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1813 	ADD_imm8s_r32(-8,R_ECX);
  1814 	store_reg( R_ECX, Rn );
  1815 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1816 	JMP_TARGET(end);
  1817     } else {
  1818 	JMP_rel8( 45, end );
  1819 	JMP_TARGET(doublesize);
  1820 	load_fr_bank( R_EDX );
  1821 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1822 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1823 	ADD_imm8s_r32(-8,R_ECX);
  1824 	store_reg( R_ECX, Rn );
  1825 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1826 	JMP_TARGET(end);
  1828     sh4_x86.tstate = TSTATE_NONE;
  1829 :}
  1830 FMOV @Rm+, FRn {:
  1831     precheck();
  1832     check_fpuen_no_precheck();
  1833     load_reg( R_ECX, Rm );
  1834     check_ralign32( R_ECX );
  1835     MOV_r32_r32( R_ECX, R_EAX );
  1836     load_spreg( R_EDX, R_FPSCR );
  1837     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1838     JNE_rel8(25, doublesize);
  1839     ADD_imm8s_r32( 4, R_EAX );
  1840     store_reg( R_EAX, Rm );
  1841     MEM_READ_LONG( R_ECX, R_EAX );
  1842     load_fr_bank( R_EDX );
  1843     store_fr( R_EDX, R_EAX, FRn );
  1844     if( FRn&1 ) {
  1845 	JMP_rel8(54, end);
  1846 	JMP_TARGET(doublesize);
  1847 	ADD_imm8s_r32( 8, R_EAX );
  1848 	store_reg(R_EAX, Rm);
  1849 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1850 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1851 	load_xf_bank( R_EDX );
  1852 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1853 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1854 	JMP_TARGET(end);
  1855     } else {
  1856 	JMP_rel8(42, end);
  1857 	ADD_imm8s_r32( 8, R_EAX );
  1858 	store_reg(R_EAX, Rm);
  1859 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1860 	load_fr_bank( R_EDX );
  1861 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1862 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1863 	JMP_TARGET(end);
  1865     sh4_x86.tstate = TSTATE_NONE;
  1866 :}
  1867 FMOV FRm, @(R0, Rn) {:  
  1868     precheck();
  1869     check_fpuen_no_precheck();
  1870     load_reg( R_ECX, Rn );
  1871     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1872     check_walign32( R_ECX );
  1873     load_spreg( R_EDX, R_FPSCR );
  1874     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1875     JNE_rel8(20, doublesize);
  1876     load_fr_bank( R_EDX );
  1877     load_fr( R_EDX, R_EAX, FRm );
  1878     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1879     if( FRm&1 ) {
  1880 	JMP_rel8( 48, end );
  1881 	JMP_TARGET(doublesize);
  1882 	load_xf_bank( R_EDX );
  1883 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1884 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1885 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1886 	JMP_TARGET(end);
  1887     } else {
  1888 	JMP_rel8( 39, end );
  1889 	JMP_TARGET(doublesize);
  1890 	load_fr_bank( R_EDX );
  1891 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1892 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1893 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1894 	JMP_TARGET(end);
  1896     sh4_x86.tstate = TSTATE_NONE;
  1897 :}
  1898 FMOV @(R0, Rm), FRn {:  
  1899     precheck();
  1900     check_fpuen_no_precheck();
  1901     load_reg( R_ECX, Rm );
  1902     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  1903     check_ralign32( R_ECX );
  1904     load_spreg( R_EDX, R_FPSCR );
  1905     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1906     JNE_rel8(19, doublesize);
  1907     MEM_READ_LONG( R_ECX, R_EAX );
  1908     load_fr_bank( R_EDX );
  1909     store_fr( R_EDX, R_EAX, FRn );
  1910     if( FRn&1 ) {
  1911 	JMP_rel8(48, end);
  1912 	JMP_TARGET(doublesize);
  1913 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1914 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1915 	load_xf_bank( R_EDX );
  1916 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1917 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1918 	JMP_TARGET(end);
  1919     } else {
  1920 	JMP_rel8(36, end);
  1921 	JMP_TARGET(doublesize);
  1922 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1923 	load_fr_bank( R_EDX );
  1924 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1925 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1926 	JMP_TARGET(end);
  1928     sh4_x86.tstate = TSTATE_NONE;
  1929 :}
  1930 FLDI0 FRn {:  /* IFF PR=0 */
  1931     check_fpuen();
  1932     load_spreg( R_ECX, R_FPSCR );
  1933     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1934     JNE_rel8(8, end);
  1935     XOR_r32_r32( R_EAX, R_EAX );
  1936     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1937     store_fr( R_ECX, R_EAX, FRn );
  1938     JMP_TARGET(end);
  1939     sh4_x86.tstate = TSTATE_NONE;
  1940 :}
  1941 FLDI1 FRn {:  /* IFF PR=0 */
  1942     check_fpuen();
  1943     load_spreg( R_ECX, R_FPSCR );
  1944     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1945     JNE_rel8(11, end);
  1946     load_imm32(R_EAX, 0x3F800000);
  1947     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1948     store_fr( R_ECX, R_EAX, FRn );
  1949     JMP_TARGET(end);
  1950     sh4_x86.tstate = TSTATE_NONE;
  1951 :}
  1953 FLOAT FPUL, FRn {:  
  1954     check_fpuen();
  1955     load_spreg( R_ECX, R_FPSCR );
  1956     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1957     FILD_sh4r(R_FPUL);
  1958     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1959     JNE_rel8(5, doubleprec);
  1960     pop_fr( R_EDX, FRn );
  1961     JMP_rel8(3, end);
  1962     JMP_TARGET(doubleprec);
  1963     pop_dr( R_EDX, FRn );
  1964     JMP_TARGET(end);
  1965     sh4_x86.tstate = TSTATE_NONE;
  1966 :}
  1967 FTRC FRm, FPUL {:  
  1968     check_fpuen();
  1969     load_spreg( R_ECX, R_FPSCR );
  1970     load_fr_bank( R_EDX );
  1971     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1972     JNE_rel8(5, doubleprec);
  1973     push_fr( R_EDX, FRm );
  1974     JMP_rel8(3, doop);
  1975     JMP_TARGET(doubleprec);
  1976     push_dr( R_EDX, FRm );
  1977     JMP_TARGET( doop );
  1978     load_imm32( R_ECX, (uint32_t)&max_int );
  1979     FILD_r32ind( R_ECX );
  1980     FCOMIP_st(1);
  1981     JNA_rel8( 32, sat );
  1982     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1983     FILD_r32ind( R_ECX );           // 2
  1984     FCOMIP_st(1);                   // 2
  1985     JAE_rel8( 21, sat2 );            // 2
  1986     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1987     FNSTCW_r32ind( R_EAX );
  1988     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1989     FLDCW_r32ind( R_EDX );
  1990     FISTP_sh4r(R_FPUL);             // 3
  1991     FLDCW_r32ind( R_EAX );
  1992     JMP_rel8( 9, end );             // 2
  1994     JMP_TARGET(sat);
  1995     JMP_TARGET(sat2);
  1996     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1997     store_spreg( R_ECX, R_FPUL );
  1998     FPOP_st();
  1999     JMP_TARGET(end);
  2000     sh4_x86.tstate = TSTATE_NONE;
  2001 :}
  2002 FLDS FRm, FPUL {:  
  2003     check_fpuen();
  2004     load_fr_bank( R_ECX );
  2005     load_fr( R_ECX, R_EAX, FRm );
  2006     store_spreg( R_EAX, R_FPUL );
  2007     sh4_x86.tstate = TSTATE_NONE;
  2008 :}
  2009 FSTS FPUL, FRn {:  
  2010     check_fpuen();
  2011     load_fr_bank( R_ECX );
  2012     load_spreg( R_EAX, R_FPUL );
  2013     store_fr( R_ECX, R_EAX, FRn );
  2014     sh4_x86.tstate = TSTATE_NONE;
  2015 :}
  2016 FCNVDS FRm, FPUL {:  
  2017     check_fpuen();
  2018     load_spreg( R_ECX, R_FPSCR );
  2019     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2020     JE_rel8(9, end); // only when PR=1
  2021     load_fr_bank( R_ECX );
  2022     push_dr( R_ECX, FRm );
  2023     pop_fpul();
  2024     JMP_TARGET(end);
  2025     sh4_x86.tstate = TSTATE_NONE;
  2026 :}
  2027 FCNVSD FPUL, FRn {:  
  2028     check_fpuen();
  2029     load_spreg( R_ECX, R_FPSCR );
  2030     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2031     JE_rel8(9, end); // only when PR=1
  2032     load_fr_bank( R_ECX );
  2033     push_fpul();
  2034     pop_dr( R_ECX, FRn );
  2035     JMP_TARGET(end);
  2036     sh4_x86.tstate = TSTATE_NONE;
  2037 :}
  2039 /* Floating point instructions */
  2040 FABS FRn {:  
  2041     check_fpuen();
  2042     load_spreg( R_ECX, R_FPSCR );
  2043     load_fr_bank( R_EDX );
  2044     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2045     JNE_rel8(10, doubleprec);
  2046     push_fr(R_EDX, FRn); // 3
  2047     FABS_st0(); // 2
  2048     pop_fr( R_EDX, FRn); //3
  2049     JMP_rel8(8,end); // 2
  2050     JMP_TARGET(doubleprec);
  2051     push_dr(R_EDX, FRn);
  2052     FABS_st0();
  2053     pop_dr(R_EDX, FRn);
  2054     JMP_TARGET(end);
  2055     sh4_x86.tstate = TSTATE_NONE;
  2056 :}
  2057 FADD FRm, FRn {:  
  2058     check_fpuen();
  2059     load_spreg( R_ECX, R_FPSCR );
  2060     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2061     load_fr_bank( R_EDX );
  2062     JNE_rel8(13,doubleprec);
  2063     push_fr(R_EDX, FRm);
  2064     push_fr(R_EDX, FRn);
  2065     FADDP_st(1);
  2066     pop_fr(R_EDX, FRn);
  2067     JMP_rel8(11,end);
  2068     JMP_TARGET(doubleprec);
  2069     push_dr(R_EDX, FRm);
  2070     push_dr(R_EDX, FRn);
  2071     FADDP_st(1);
  2072     pop_dr(R_EDX, FRn);
  2073     JMP_TARGET(end);
  2074     sh4_x86.tstate = TSTATE_NONE;
  2075 :}
  2076 FDIV FRm, FRn {:  
  2077     check_fpuen();
  2078     load_spreg( R_ECX, R_FPSCR );
  2079     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2080     load_fr_bank( R_EDX );
  2081     JNE_rel8(13, doubleprec);
  2082     push_fr(R_EDX, FRn);
  2083     push_fr(R_EDX, FRm);
  2084     FDIVP_st(1);
  2085     pop_fr(R_EDX, FRn);
  2086     JMP_rel8(11, end);
  2087     JMP_TARGET(doubleprec);
  2088     push_dr(R_EDX, FRn);
  2089     push_dr(R_EDX, FRm);
  2090     FDIVP_st(1);
  2091     pop_dr(R_EDX, FRn);
  2092     JMP_TARGET(end);
  2093     sh4_x86.tstate = TSTATE_NONE;
  2094 :}
  2095 FMAC FR0, FRm, FRn {:  
  2096     check_fpuen();
  2097     load_spreg( R_ECX, R_FPSCR );
  2098     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2099     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2100     JNE_rel8(18, doubleprec);
  2101     push_fr( R_EDX, 0 );
  2102     push_fr( R_EDX, FRm );
  2103     FMULP_st(1);
  2104     push_fr( R_EDX, FRn );
  2105     FADDP_st(1);
  2106     pop_fr( R_EDX, FRn );
  2107     JMP_rel8(16, end);
  2108     JMP_TARGET(doubleprec);
  2109     push_dr( R_EDX, 0 );
  2110     push_dr( R_EDX, FRm );
  2111     FMULP_st(1);
  2112     push_dr( R_EDX, FRn );
  2113     FADDP_st(1);
  2114     pop_dr( R_EDX, FRn );
  2115     JMP_TARGET(end);
  2116     sh4_x86.tstate = TSTATE_NONE;
  2117 :}
  2119 FMUL FRm, FRn {:  
  2120     check_fpuen();
  2121     load_spreg( R_ECX, R_FPSCR );
  2122     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2123     load_fr_bank( R_EDX );
  2124     JNE_rel8(13, doubleprec);
  2125     push_fr(R_EDX, FRm);
  2126     push_fr(R_EDX, FRn);
  2127     FMULP_st(1);
  2128     pop_fr(R_EDX, FRn);
  2129     JMP_rel8(11, end);
  2130     JMP_TARGET(doubleprec);
  2131     push_dr(R_EDX, FRm);
  2132     push_dr(R_EDX, FRn);
  2133     FMULP_st(1);
  2134     pop_dr(R_EDX, FRn);
  2135     JMP_TARGET(end);
  2136     sh4_x86.tstate = TSTATE_NONE;
  2137 :}
  2138 FNEG FRn {:  
  2139     check_fpuen();
  2140     load_spreg( R_ECX, R_FPSCR );
  2141     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2142     load_fr_bank( R_EDX );
  2143     JNE_rel8(10, doubleprec);
  2144     push_fr(R_EDX, FRn);
  2145     FCHS_st0();
  2146     pop_fr(R_EDX, FRn);
  2147     JMP_rel8(8, end);
  2148     JMP_TARGET(doubleprec);
  2149     push_dr(R_EDX, FRn);
  2150     FCHS_st0();
  2151     pop_dr(R_EDX, FRn);
  2152     JMP_TARGET(end);
  2153     sh4_x86.tstate = TSTATE_NONE;
  2154 :}
  2155 FSRRA FRn {:  
  2156     check_fpuen();
  2157     load_spreg( R_ECX, R_FPSCR );
  2158     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2159     load_fr_bank( R_EDX );
  2160     JNE_rel8(12, end); // PR=0 only
  2161     FLD1_st0();
  2162     push_fr(R_EDX, FRn);
  2163     FSQRT_st0();
  2164     FDIVP_st(1);
  2165     pop_fr(R_EDX, FRn);
  2166     JMP_TARGET(end);
  2167     sh4_x86.tstate = TSTATE_NONE;
  2168 :}
  2169 FSQRT FRn {:  
  2170     check_fpuen();
  2171     load_spreg( R_ECX, R_FPSCR );
  2172     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2173     load_fr_bank( R_EDX );
  2174     JNE_rel8(10, doubleprec);
  2175     push_fr(R_EDX, FRn);
  2176     FSQRT_st0();
  2177     pop_fr(R_EDX, FRn);
  2178     JMP_rel8(8, end);
  2179     JMP_TARGET(doubleprec);
  2180     push_dr(R_EDX, FRn);
  2181     FSQRT_st0();
  2182     pop_dr(R_EDX, FRn);
  2183     JMP_TARGET(end);
  2184     sh4_x86.tstate = TSTATE_NONE;
  2185 :}
  2186 FSUB FRm, FRn {:  
  2187     check_fpuen();
  2188     load_spreg( R_ECX, R_FPSCR );
  2189     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2190     load_fr_bank( R_EDX );
  2191     JNE_rel8(13, doubleprec);
  2192     push_fr(R_EDX, FRn);
  2193     push_fr(R_EDX, FRm);
  2194     FSUBP_st(1);
  2195     pop_fr(R_EDX, FRn);
  2196     JMP_rel8(11, end);
  2197     JMP_TARGET(doubleprec);
  2198     push_dr(R_EDX, FRn);
  2199     push_dr(R_EDX, FRm);
  2200     FSUBP_st(1);
  2201     pop_dr(R_EDX, FRn);
  2202     JMP_TARGET(end);
  2203     sh4_x86.tstate = TSTATE_NONE;
  2204 :}
  2206 FCMP/EQ FRm, FRn {:  
  2207     check_fpuen();
  2208     load_spreg( R_ECX, R_FPSCR );
  2209     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2210     load_fr_bank( R_EDX );
  2211     JNE_rel8(8, doubleprec);
  2212     push_fr(R_EDX, FRm);
  2213     push_fr(R_EDX, FRn);
  2214     JMP_rel8(6, end);
  2215     JMP_TARGET(doubleprec);
  2216     push_dr(R_EDX, FRm);
  2217     push_dr(R_EDX, FRn);
  2218     JMP_TARGET(end);
  2219     FCOMIP_st(1);
  2220     SETE_t();
  2221     FPOP_st();
  2222     sh4_x86.tstate = TSTATE_NONE;
  2223 :}
  2224 FCMP/GT FRm, FRn {:  
  2225     check_fpuen();
  2226     load_spreg( R_ECX, R_FPSCR );
  2227     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2228     load_fr_bank( R_EDX );
  2229     JNE_rel8(8, doubleprec);
  2230     push_fr(R_EDX, FRm);
  2231     push_fr(R_EDX, FRn);
  2232     JMP_rel8(6, end);
  2233     JMP_TARGET(doubleprec);
  2234     push_dr(R_EDX, FRm);
  2235     push_dr(R_EDX, FRn);
  2236     JMP_TARGET(end);
  2237     FCOMIP_st(1);
  2238     SETA_t();
  2239     FPOP_st();
  2240     sh4_x86.tstate = TSTATE_NONE;
  2241 :}
  2243 FSCA FPUL, FRn {:  
  2244     check_fpuen();
  2245     load_spreg( R_ECX, R_FPSCR );
  2246     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2247     JNE_rel8( 21, doubleprec );
  2248     load_fr_bank( R_ECX );
  2249     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2250     load_spreg( R_EDX, R_FPUL );
  2251     call_func2( sh4_fsca, R_EDX, R_ECX );
  2252     JMP_TARGET(doubleprec);
  2253     sh4_x86.tstate = TSTATE_NONE;
  2254 :}
  2255 FIPR FVm, FVn {:  
  2256     check_fpuen();
  2257     load_spreg( R_ECX, R_FPSCR );
  2258     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2259     JNE_rel8(44, doubleprec);
  2261     load_fr_bank( R_ECX );
  2262     push_fr( R_ECX, FVm<<2 );
  2263     push_fr( R_ECX, FVn<<2 );
  2264     FMULP_st(1);
  2265     push_fr( R_ECX, (FVm<<2)+1);
  2266     push_fr( R_ECX, (FVn<<2)+1);
  2267     FMULP_st(1);
  2268     FADDP_st(1);
  2269     push_fr( R_ECX, (FVm<<2)+2);
  2270     push_fr( R_ECX, (FVn<<2)+2);
  2271     FMULP_st(1);
  2272     FADDP_st(1);
  2273     push_fr( R_ECX, (FVm<<2)+3);
  2274     push_fr( R_ECX, (FVn<<2)+3);
  2275     FMULP_st(1);
  2276     FADDP_st(1);
  2277     pop_fr( R_ECX, (FVn<<2)+3);
  2278     JMP_TARGET(doubleprec);
  2279     sh4_x86.tstate = TSTATE_NONE;
  2280 :}
  2281 FTRV XMTRX, FVn {:  
  2282     check_fpuen();
  2283     load_spreg( R_ECX, R_FPSCR );
  2284     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2285     JNE_rel8( 30, doubleprec );
  2286     load_fr_bank( R_EDX );                 // 3
  2287     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2288     load_xf_bank( R_ECX );                 // 12
  2289     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2290     JMP_TARGET(doubleprec);
  2291     sh4_x86.tstate = TSTATE_NONE;
  2292 :}
  2294 FRCHG {:  
  2295     check_fpuen();
  2296     load_spreg( R_ECX, R_FPSCR );
  2297     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2298     store_spreg( R_ECX, R_FPSCR );
  2299     update_fr_bank( R_ECX );
  2300     sh4_x86.tstate = TSTATE_NONE;
  2301 :}
  2302 FSCHG {:  
  2303     check_fpuen();
  2304     load_spreg( R_ECX, R_FPSCR );
  2305     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2306     store_spreg( R_ECX, R_FPSCR );
  2307     sh4_x86.tstate = TSTATE_NONE;
  2308 :}
  2310 /* Processor control instructions */
  2311 LDC Rm, SR {:
  2312     if( sh4_x86.in_delay_slot ) {
  2313 	SLOTILLEGAL();
  2314     } else {
  2315 	check_priv();
  2316 	load_reg( R_EAX, Rm );
  2317 	call_func1( sh4_write_sr, R_EAX );
  2318 	sh4_x86.priv_checked = FALSE;
  2319 	sh4_x86.fpuen_checked = FALSE;
  2320 	sh4_x86.tstate = TSTATE_NONE;
  2322 :}
  2323 LDC Rm, GBR {: 
  2324     load_reg( R_EAX, Rm );
  2325     store_spreg( R_EAX, R_GBR );
  2326 :}
  2327 LDC Rm, VBR {:  
  2328     check_priv();
  2329     load_reg( R_EAX, Rm );
  2330     store_spreg( R_EAX, R_VBR );
  2331     sh4_x86.tstate = TSTATE_NONE;
  2332 :}
  2333 LDC Rm, SSR {:  
  2334     check_priv();
  2335     load_reg( R_EAX, Rm );
  2336     store_spreg( R_EAX, R_SSR );
  2337     sh4_x86.tstate = TSTATE_NONE;
  2338 :}
  2339 LDC Rm, SGR {:  
  2340     check_priv();
  2341     load_reg( R_EAX, Rm );
  2342     store_spreg( R_EAX, R_SGR );
  2343     sh4_x86.tstate = TSTATE_NONE;
  2344 :}
  2345 LDC Rm, SPC {:  
  2346     check_priv();
  2347     load_reg( R_EAX, Rm );
  2348     store_spreg( R_EAX, R_SPC );
  2349     sh4_x86.tstate = TSTATE_NONE;
  2350 :}
  2351 LDC Rm, DBR {:  
  2352     check_priv();
  2353     load_reg( R_EAX, Rm );
  2354     store_spreg( R_EAX, R_DBR );
  2355     sh4_x86.tstate = TSTATE_NONE;
  2356 :}
  2357 LDC Rm, Rn_BANK {:  
  2358     check_priv();
  2359     load_reg( R_EAX, Rm );
  2360     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2361     sh4_x86.tstate = TSTATE_NONE;
  2362 :}
  2363 LDC.L @Rm+, GBR {:  
  2364     load_reg( R_EAX, Rm );
  2365     precheck();
  2366     check_ralign32( R_EAX );
  2367     MOV_r32_r32( R_EAX, R_ECX );
  2368     ADD_imm8s_r32( 4, R_EAX );
  2369     store_reg( R_EAX, Rm );
  2370     MEM_READ_LONG( R_ECX, R_EAX );
  2371     store_spreg( R_EAX, R_GBR );
  2372     sh4_x86.tstate = TSTATE_NONE;
  2373 :}
  2374 LDC.L @Rm+, SR {:
  2375     if( sh4_x86.in_delay_slot ) {
  2376 	SLOTILLEGAL();
  2377     } else {
  2378 	precheck();
  2379 	check_priv_no_precheck();
  2380 	load_reg( R_EAX, Rm );
  2381 	check_ralign32( R_EAX );
  2382 	MOV_r32_r32( R_EAX, R_ECX );
  2383 	ADD_imm8s_r32( 4, R_EAX );
  2384 	store_reg( R_EAX, Rm );
  2385 	MEM_READ_LONG( R_ECX, R_EAX );
  2386 	call_func1( sh4_write_sr, R_EAX );
  2387 	sh4_x86.priv_checked = FALSE;
  2388 	sh4_x86.fpuen_checked = FALSE;
  2389 	sh4_x86.tstate = TSTATE_NONE;
  2391 :}
  2392 LDC.L @Rm+, VBR {:  
  2393     precheck();
  2394     check_priv_no_precheck();
  2395     load_reg( R_EAX, Rm );
  2396     check_ralign32( R_EAX );
  2397     MOV_r32_r32( R_EAX, R_ECX );
  2398     ADD_imm8s_r32( 4, R_EAX );
  2399     store_reg( R_EAX, Rm );
  2400     MEM_READ_LONG( R_ECX, R_EAX );
  2401     store_spreg( R_EAX, R_VBR );
  2402     sh4_x86.tstate = TSTATE_NONE;
  2403 :}
  2404 LDC.L @Rm+, SSR {:
  2405     precheck();
  2406     check_priv_no_precheck();
  2407     load_reg( R_EAX, Rm );
  2408     check_ralign32( R_EAX );
  2409     MOV_r32_r32( R_EAX, R_ECX );
  2410     ADD_imm8s_r32( 4, R_EAX );
  2411     store_reg( R_EAX, Rm );
  2412     MEM_READ_LONG( R_ECX, R_EAX );
  2413     store_spreg( R_EAX, R_SSR );
  2414     sh4_x86.tstate = TSTATE_NONE;
  2415 :}
  2416 LDC.L @Rm+, SGR {:  
  2417     precheck();
  2418     check_priv_no_precheck();
  2419     load_reg( R_EAX, Rm );
  2420     check_ralign32( R_EAX );
  2421     MOV_r32_r32( R_EAX, R_ECX );
  2422     ADD_imm8s_r32( 4, R_EAX );
  2423     store_reg( R_EAX, Rm );
  2424     MEM_READ_LONG( R_ECX, R_EAX );
  2425     store_spreg( R_EAX, R_SGR );
  2426     sh4_x86.tstate = TSTATE_NONE;
  2427 :}
  2428 LDC.L @Rm+, SPC {:  
  2429     precheck();
  2430     check_priv_no_precheck();
  2431     load_reg( R_EAX, Rm );
  2432     check_ralign32( R_EAX );
  2433     MOV_r32_r32( R_EAX, R_ECX );
  2434     ADD_imm8s_r32( 4, R_EAX );
  2435     store_reg( R_EAX, Rm );
  2436     MEM_READ_LONG( R_ECX, R_EAX );
  2437     store_spreg( R_EAX, R_SPC );
  2438     sh4_x86.tstate = TSTATE_NONE;
  2439 :}
  2440 LDC.L @Rm+, DBR {:  
  2441     precheck();
  2442     check_priv_no_precheck();
  2443     load_reg( R_EAX, Rm );
  2444     check_ralign32( R_EAX );
  2445     MOV_r32_r32( R_EAX, R_ECX );
  2446     ADD_imm8s_r32( 4, R_EAX );
  2447     store_reg( R_EAX, Rm );
  2448     MEM_READ_LONG( R_ECX, R_EAX );
  2449     store_spreg( R_EAX, R_DBR );
  2450     sh4_x86.tstate = TSTATE_NONE;
  2451 :}
  2452 LDC.L @Rm+, Rn_BANK {:  
  2453     precheck();
  2454     check_priv_no_precheck();
  2455     load_reg( R_EAX, Rm );
  2456     check_ralign32( R_EAX );
  2457     MOV_r32_r32( R_EAX, R_ECX );
  2458     ADD_imm8s_r32( 4, R_EAX );
  2459     store_reg( R_EAX, Rm );
  2460     MEM_READ_LONG( R_ECX, R_EAX );
  2461     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2462     sh4_x86.tstate = TSTATE_NONE;
  2463 :}
  2464 LDS Rm, FPSCR {:  
  2465     load_reg( R_EAX, Rm );
  2466     store_spreg( R_EAX, R_FPSCR );
  2467     update_fr_bank( R_EAX );
  2468     sh4_x86.tstate = TSTATE_NONE;
  2469 :}
  2470 LDS.L @Rm+, FPSCR {:  
  2471     load_reg( R_EAX, Rm );
  2472     precheck();
  2473     check_ralign32( R_EAX );
  2474     MOV_r32_r32( R_EAX, R_ECX );
  2475     ADD_imm8s_r32( 4, R_EAX );
  2476     store_reg( R_EAX, Rm );
  2477     MEM_READ_LONG( R_ECX, R_EAX );
  2478     store_spreg( R_EAX, R_FPSCR );
  2479     update_fr_bank( R_EAX );
  2480     sh4_x86.tstate = TSTATE_NONE;
  2481 :}
  2482 LDS Rm, FPUL {:  
  2483     load_reg( R_EAX, Rm );
  2484     store_spreg( R_EAX, R_FPUL );
  2485 :}
  2486 LDS.L @Rm+, FPUL {:  
  2487     load_reg( R_EAX, Rm );
  2488     precheck();
  2489     check_ralign32( R_EAX );
  2490     MOV_r32_r32( R_EAX, R_ECX );
  2491     ADD_imm8s_r32( 4, R_EAX );
  2492     store_reg( R_EAX, Rm );
  2493     MEM_READ_LONG( R_ECX, R_EAX );
  2494     store_spreg( R_EAX, R_FPUL );
  2495     sh4_x86.tstate = TSTATE_NONE;
  2496 :}
  2497 LDS Rm, MACH {: 
  2498     load_reg( R_EAX, Rm );
  2499     store_spreg( R_EAX, R_MACH );
  2500 :}
  2501 LDS.L @Rm+, MACH {:  
  2502     load_reg( R_EAX, Rm );
  2503     precheck();
  2504     check_ralign32( R_EAX );
  2505     MOV_r32_r32( R_EAX, R_ECX );
  2506     ADD_imm8s_r32( 4, R_EAX );
  2507     store_reg( R_EAX, Rm );
  2508     MEM_READ_LONG( R_ECX, R_EAX );
  2509     store_spreg( R_EAX, R_MACH );
  2510     sh4_x86.tstate = TSTATE_NONE;
  2511 :}
  2512 LDS Rm, MACL {:  
  2513     load_reg( R_EAX, Rm );
  2514     store_spreg( R_EAX, R_MACL );
  2515 :}
  2516 LDS.L @Rm+, MACL {:  
  2517     load_reg( R_EAX, Rm );
  2518     precheck();
  2519     check_ralign32( R_EAX );
  2520     MOV_r32_r32( R_EAX, R_ECX );
  2521     ADD_imm8s_r32( 4, R_EAX );
  2522     store_reg( R_EAX, Rm );
  2523     MEM_READ_LONG( R_ECX, R_EAX );
  2524     store_spreg( R_EAX, R_MACL );
  2525     sh4_x86.tstate = TSTATE_NONE;
  2526 :}
  2527 LDS Rm, PR {:  
  2528     load_reg( R_EAX, Rm );
  2529     store_spreg( R_EAX, R_PR );
  2530 :}
  2531 LDS.L @Rm+, PR {:  
  2532     load_reg( R_EAX, Rm );
  2533     precheck();
  2534     check_ralign32( R_EAX );
  2535     MOV_r32_r32( R_EAX, R_ECX );
  2536     ADD_imm8s_r32( 4, R_EAX );
  2537     store_reg( R_EAX, Rm );
  2538     MEM_READ_LONG( R_ECX, R_EAX );
  2539     store_spreg( R_EAX, R_PR );
  2540     sh4_x86.tstate = TSTATE_NONE;
  2541 :}
  2542 LDTLB {:  :}
  2543 OCBI @Rn {:  :}
  2544 OCBP @Rn {:  :}
  2545 OCBWB @Rn {:  :}
  2546 PREF @Rn {:
  2547     load_reg( R_EAX, Rn );
  2548     PUSH_r32( R_EAX );
  2549     AND_imm32_r32( 0xFC000000, R_EAX );
  2550     CMP_imm32_r32( 0xE0000000, R_EAX );
  2551     JNE_rel8(7, end);
  2552     call_func0( sh4_flush_store_queue );
  2553     JMP_TARGET(end);
  2554     ADD_imm8s_r32( 4, R_ESP );
  2555     sh4_x86.tstate = TSTATE_NONE;
  2556 :}
  2557 SLEEP {: 
  2558     check_priv();
  2559     call_func0( sh4_sleep );
  2560     sh4_x86.tstate = TSTATE_NONE;
  2561     sh4_x86.in_delay_slot = FALSE;
  2562     return 2;
  2563 :}
  2564 STC SR, Rn {:
  2565     check_priv();
  2566     call_func0(sh4_read_sr);
  2567     store_reg( R_EAX, Rn );
  2568     sh4_x86.tstate = TSTATE_NONE;
  2569 :}
  2570 STC GBR, Rn {:  
  2571     load_spreg( R_EAX, R_GBR );
  2572     store_reg( R_EAX, Rn );
  2573 :}
  2574 STC VBR, Rn {:  
  2575     check_priv();
  2576     load_spreg( R_EAX, R_VBR );
  2577     store_reg( R_EAX, Rn );
  2578     sh4_x86.tstate = TSTATE_NONE;
  2579 :}
  2580 STC SSR, Rn {:  
  2581     check_priv();
  2582     load_spreg( R_EAX, R_SSR );
  2583     store_reg( R_EAX, Rn );
  2584     sh4_x86.tstate = TSTATE_NONE;
  2585 :}
  2586 STC SPC, Rn {:  
  2587     check_priv();
  2588     load_spreg( R_EAX, R_SPC );
  2589     store_reg( R_EAX, Rn );
  2590     sh4_x86.tstate = TSTATE_NONE;
  2591 :}
  2592 STC SGR, Rn {:  
  2593     check_priv();
  2594     load_spreg( R_EAX, R_SGR );
  2595     store_reg( R_EAX, Rn );
  2596     sh4_x86.tstate = TSTATE_NONE;
  2597 :}
  2598 STC DBR, Rn {:  
  2599     check_priv();
  2600     load_spreg( R_EAX, R_DBR );
  2601     store_reg( R_EAX, Rn );
  2602     sh4_x86.tstate = TSTATE_NONE;
  2603 :}
  2604 STC Rm_BANK, Rn {:
  2605     check_priv();
  2606     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2607     store_reg( R_EAX, Rn );
  2608     sh4_x86.tstate = TSTATE_NONE;
  2609 :}
  2610 STC.L SR, @-Rn {:
  2611     precheck();
  2612     check_priv_no_precheck();
  2613     call_func0( sh4_read_sr );
  2614     load_reg( R_ECX, Rn );
  2615     check_walign32( R_ECX );
  2616     ADD_imm8s_r32( -4, R_ECX );
  2617     store_reg( R_ECX, Rn );
  2618     MEM_WRITE_LONG( R_ECX, R_EAX );
  2619     sh4_x86.tstate = TSTATE_NONE;
  2620 :}
  2621 STC.L VBR, @-Rn {:  
  2622     precheck();
  2623     check_priv_no_precheck();
  2624     load_reg( R_ECX, Rn );
  2625     check_walign32( R_ECX );
  2626     ADD_imm8s_r32( -4, R_ECX );
  2627     store_reg( R_ECX, Rn );
  2628     load_spreg( R_EAX, R_VBR );
  2629     MEM_WRITE_LONG( R_ECX, R_EAX );
  2630     sh4_x86.tstate = TSTATE_NONE;
  2631 :}
  2632 STC.L SSR, @-Rn {:  
  2633     precheck();
  2634     check_priv_no_precheck();
  2635     load_reg( R_ECX, Rn );
  2636     check_walign32( R_ECX );
  2637     ADD_imm8s_r32( -4, R_ECX );
  2638     store_reg( R_ECX, Rn );
  2639     load_spreg( R_EAX, R_SSR );
  2640     MEM_WRITE_LONG( R_ECX, R_EAX );
  2641     sh4_x86.tstate = TSTATE_NONE;
  2642 :}
  2643 STC.L SPC, @-Rn {:
  2644     precheck();
  2645     check_priv_no_precheck();
  2646     load_reg( R_ECX, Rn );
  2647     check_walign32( R_ECX );
  2648     ADD_imm8s_r32( -4, R_ECX );
  2649     store_reg( R_ECX, Rn );
  2650     load_spreg( R_EAX, R_SPC );
  2651     MEM_WRITE_LONG( R_ECX, R_EAX );
  2652     sh4_x86.tstate = TSTATE_NONE;
  2653 :}
  2654 STC.L SGR, @-Rn {:  
  2655     precheck();
  2656     check_priv_no_precheck();
  2657     load_reg( R_ECX, Rn );
  2658     check_walign32( R_ECX );
  2659     ADD_imm8s_r32( -4, R_ECX );
  2660     store_reg( R_ECX, Rn );
  2661     load_spreg( R_EAX, R_SGR );
  2662     MEM_WRITE_LONG( R_ECX, R_EAX );
  2663     sh4_x86.tstate = TSTATE_NONE;
  2664 :}
  2665 STC.L DBR, @-Rn {:  
  2666     precheck();
  2667     check_priv_no_precheck();
  2668     load_reg( R_ECX, Rn );
  2669     check_walign32( R_ECX );
  2670     ADD_imm8s_r32( -4, R_ECX );
  2671     store_reg( R_ECX, Rn );
  2672     load_spreg( R_EAX, R_DBR );
  2673     MEM_WRITE_LONG( R_ECX, R_EAX );
  2674     sh4_x86.tstate = TSTATE_NONE;
  2675 :}
  2676 STC.L Rm_BANK, @-Rn {:  
  2677     precheck();
  2678     check_priv_no_precheck();
  2679     load_reg( R_ECX, Rn );
  2680     check_walign32( R_ECX );
  2681     ADD_imm8s_r32( -4, R_ECX );
  2682     store_reg( R_ECX, Rn );
  2683     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2684     MEM_WRITE_LONG( R_ECX, R_EAX );
  2685     sh4_x86.tstate = TSTATE_NONE;
  2686 :}
  2687 STC.L GBR, @-Rn {:  
  2688     load_reg( R_ECX, Rn );
  2689     precheck();
  2690     check_walign32( R_ECX );
  2691     ADD_imm8s_r32( -4, R_ECX );
  2692     store_reg( R_ECX, Rn );
  2693     load_spreg( R_EAX, R_GBR );
  2694     MEM_WRITE_LONG( R_ECX, R_EAX );
  2695     sh4_x86.tstate = TSTATE_NONE;
  2696 :}
  2697 STS FPSCR, Rn {:  
  2698     load_spreg( R_EAX, R_FPSCR );
  2699     store_reg( R_EAX, Rn );
  2700 :}
  2701 STS.L FPSCR, @-Rn {:  
  2702     load_reg( R_ECX, Rn );
  2703     precheck();
  2704     check_walign32( R_ECX );
  2705     ADD_imm8s_r32( -4, R_ECX );
  2706     store_reg( R_ECX, Rn );
  2707     load_spreg( R_EAX, R_FPSCR );
  2708     MEM_WRITE_LONG( R_ECX, R_EAX );
  2709     sh4_x86.tstate = TSTATE_NONE;
  2710 :}
  2711 STS FPUL, Rn {:  
  2712     load_spreg( R_EAX, R_FPUL );
  2713     store_reg( R_EAX, Rn );
  2714 :}
  2715 STS.L FPUL, @-Rn {:  
  2716     load_reg( R_ECX, Rn );
  2717     precheck();
  2718     check_walign32( R_ECX );
  2719     ADD_imm8s_r32( -4, R_ECX );
  2720     store_reg( R_ECX, Rn );
  2721     load_spreg( R_EAX, R_FPUL );
  2722     MEM_WRITE_LONG( R_ECX, R_EAX );
  2723     sh4_x86.tstate = TSTATE_NONE;
  2724 :}
  2725 STS MACH, Rn {:  
  2726     load_spreg( R_EAX, R_MACH );
  2727     store_reg( R_EAX, Rn );
  2728 :}
  2729 STS.L MACH, @-Rn {:  
  2730     load_reg( R_ECX, Rn );
  2731     precheck();
  2732     check_walign32( R_ECX );
  2733     ADD_imm8s_r32( -4, R_ECX );
  2734     store_reg( R_ECX, Rn );
  2735     load_spreg( R_EAX, R_MACH );
  2736     MEM_WRITE_LONG( R_ECX, R_EAX );
  2737     sh4_x86.tstate = TSTATE_NONE;
  2738 :}
  2739 STS MACL, Rn {:  
  2740     load_spreg( R_EAX, R_MACL );
  2741     store_reg( R_EAX, Rn );
  2742 :}
  2743 STS.L MACL, @-Rn {:  
  2744     load_reg( R_ECX, Rn );
  2745     precheck();
  2746     check_walign32( R_ECX );
  2747     ADD_imm8s_r32( -4, R_ECX );
  2748     store_reg( R_ECX, Rn );
  2749     load_spreg( R_EAX, R_MACL );
  2750     MEM_WRITE_LONG( R_ECX, R_EAX );
  2751     sh4_x86.tstate = TSTATE_NONE;
  2752 :}
  2753 STS PR, Rn {:  
  2754     load_spreg( R_EAX, R_PR );
  2755     store_reg( R_EAX, Rn );
  2756 :}
  2757 STS.L PR, @-Rn {:  
  2758     load_reg( R_ECX, Rn );
  2759     precheck();
  2760     check_walign32( R_ECX );
  2761     ADD_imm8s_r32( -4, R_ECX );
  2762     store_reg( R_ECX, Rn );
  2763     load_spreg( R_EAX, R_PR );
  2764     MEM_WRITE_LONG( R_ECX, R_EAX );
  2765     sh4_x86.tstate = TSTATE_NONE;
  2766 :}
  2768 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2769 %%
  2770     sh4_x86.in_delay_slot = FALSE;
  2771     return 0;
.