Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 533:9764673fd4a5
prev532:43653e748030
next539:75f3e594d4a7
author nkeynes
date Tue Nov 20 08:31:34 2007 +0000 (16 years ago)
permissions -rw-r--r--
last change Fix TRAPA (translator) and add test case
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.20 2007-11-08 11:54:16 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/xltcache.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4trans.h"
    31 #include "sh4/sh4mmio.h"
    32 #include "sh4/x86op.h"
    33 #include "clock.h"
    35 #define DEFAULT_BACKPATCH_SIZE 4096
    37 /** 
    38  * Struct to manage internal translation state. This state is not saved -
    39  * it is only valid between calls to sh4_translate_begin_block() and
    40  * sh4_translate_end_block()
    41  */
    42 struct sh4_x86_state {
    43     gboolean in_delay_slot;
    44     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    45     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    46     gboolean branch_taken; /* true if we branched unconditionally */
    47     uint32_t block_start_pc;
    48     int tstate;
    50     /* Allocated memory for the (block-wide) back-patch list */
    51     uint32_t **backpatch_list;
    52     uint32_t backpatch_posn;
    53     uint32_t backpatch_size;
    54 };
    56 #define TSTATE_NONE -1
    57 #define TSTATE_O    0
    58 #define TSTATE_C    2
    59 #define TSTATE_E    4
    60 #define TSTATE_NE   5
    61 #define TSTATE_G    0xF
    62 #define TSTATE_GE   0xD
    63 #define TSTATE_A    7
    64 #define TSTATE_AE   3
    66 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
    67 #define JT_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    68 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    69     OP(0x70+sh4_x86.tstate); OP(rel8); \
    70     MARK_JMP(rel8,label)
    71 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
    72 #define JF_rel8(rel8,label) if( sh4_x86.tstate == TSTATE_NONE ) { \
    73 	CMP_imm8s_sh4r( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
    74     OP(0x70+ (sh4_x86.tstate^1)); OP(rel8); \
    75     MARK_JMP(rel8, label)
    78 #define EXIT_DATA_ADDR_READ 0
    79 #define EXIT_DATA_ADDR_WRITE 7
    80 #define EXIT_ILLEGAL 14
    81 #define EXIT_SLOT_ILLEGAL 21
    82 #define EXIT_FPU_DISABLED 28
    83 #define EXIT_SLOT_FPU_DISABLED 35
    85 static struct sh4_x86_state sh4_x86;
    87 static uint32_t max_int = 0x7FFFFFFF;
    88 static uint32_t min_int = 0x80000000;
    89 static uint32_t save_fcw; /* save value for fpu control word */
    90 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    92 void sh4_x86_init()
    93 {
    94     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    95     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
    96 }
    99 static void sh4_x86_add_backpatch( uint8_t *ptr )
   100 {
   101     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   102 	sh4_x86.backpatch_size <<= 1;
   103 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
   104 	assert( sh4_x86.backpatch_list != NULL );
   105     }
   106     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
   107 }
   109 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
   110 {
   111     unsigned int i;
   112     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
   113 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
   114     }
   115 }
   117 /**
   118  * Emit an instruction to load an SH4 reg into a real register
   119  */
   120 static inline void load_reg( int x86reg, int sh4reg ) 
   121 {
   122     /* mov [bp+n], reg */
   123     OP(0x8B);
   124     OP(0x45 + (x86reg<<3));
   125     OP(REG_OFFSET(r[sh4reg]));
   126 }
   128 static inline void load_reg16s( int x86reg, int sh4reg )
   129 {
   130     OP(0x0F);
   131     OP(0xBF);
   132     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   133 }
   135 static inline void load_reg16u( int x86reg, int sh4reg )
   136 {
   137     OP(0x0F);
   138     OP(0xB7);
   139     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   141 }
   143 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   144 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   145 /**
   146  * Emit an instruction to load an immediate value into a register
   147  */
   148 static inline void load_imm32( int x86reg, uint32_t value ) {
   149     /* mov #value, reg */
   150     OP(0xB8 + x86reg);
   151     OP32(value);
   152 }
   154 /**
   155  * Load an immediate 64-bit quantity (note: x86-64 only)
   156  */
   157 static inline void load_imm64( int x86reg, uint32_t value ) {
   158     /* mov #value, reg */
   159     REXW();
   160     OP(0xB8 + x86reg);
   161     OP64(value);
   162 }
   165 /**
   166  * Emit an instruction to store an SH4 reg (RN)
   167  */
   168 void static inline store_reg( int x86reg, int sh4reg ) {
   169     /* mov reg, [bp+n] */
   170     OP(0x89);
   171     OP(0x45 + (x86reg<<3));
   172     OP(REG_OFFSET(r[sh4reg]));
   173 }
   175 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   177 /**
   178  * Load an FR register (single-precision floating point) into an integer x86
   179  * register (eg for register-to-register moves)
   180  */
   181 void static inline load_fr( int bankreg, int x86reg, int frm )
   182 {
   183     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   184 }
   186 /**
   187  * Store an FR register (single-precision floating point) into an integer x86
   188  * register (eg for register-to-register moves)
   189  */
   190 void static inline store_fr( int bankreg, int x86reg, int frn )
   191 {
   192     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   193 }
   196 /**
   197  * Load a pointer to the back fp back into the specified x86 register. The
   198  * bankreg must have been previously loaded with FPSCR.
   199  * NB: 12 bytes
   200  */
   201 static inline void load_xf_bank( int bankreg )
   202 {
   203     NOT_r32( bankreg );
   204     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   205     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   206     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   207 }
   209 /**
   210  * Update the fr_bank pointer based on the current fpscr value.
   211  */
   212 static inline void update_fr_bank( int fpscrreg )
   213 {
   214     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   215     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   216     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   217     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   218 }
   219 /**
   220  * Push FPUL (as a 32-bit float) onto the FPU stack
   221  */
   222 static inline void push_fpul( )
   223 {
   224     OP(0xD9); OP(0x45); OP(R_FPUL);
   225 }
   227 /**
   228  * Pop FPUL (as a 32-bit float) from the FPU stack
   229  */
   230 static inline void pop_fpul( )
   231 {
   232     OP(0xD9); OP(0x5D); OP(R_FPUL);
   233 }
   235 /**
   236  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   237  * with the location of the current fp bank.
   238  */
   239 static inline void push_fr( int bankreg, int frm ) 
   240 {
   241     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   242 }
   244 /**
   245  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   246  * with bankreg previously loaded with the location of the current fp bank.
   247  */
   248 static inline void pop_fr( int bankreg, int frm )
   249 {
   250     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   251 }
   253 /**
   254  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   255  * with the location of the current fp bank.
   256  */
   257 static inline void push_dr( int bankreg, int frm )
   258 {
   259     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   260 }
   262 static inline void pop_dr( int bankreg, int frm )
   263 {
   264     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   265 }
   267 #if SH4_TRANSLATOR == TARGET_X86_64
   268 /* X86-64 has different calling conventions... */
   270 #define load_ptr( reg, ptr ) load_imm64( reg, (uint64_t)ptr );
   272 /**
   273  * Note: clobbers EAX to make the indirect call - this isn't usually
   274  * a problem since the callee will usually clobber it anyway.
   275  * Size: 12 bytes
   276  */
   277 #define CALL_FUNC0_SIZE 12
   278 static inline void call_func0( void *ptr )
   279 {
   280     load_imm64(R_EAX, (uint64_t)ptr);
   281     CALL_r32(R_EAX);
   282 }
   284 #define CALL_FUNC1_SIZE 14
   285 static inline void call_func1( void *ptr, int arg1 )
   286 {
   287     MOV_r32_r32(arg1, R_EDI);
   288     call_func0(ptr);
   289 }
   291 #define CALL_FUNC2_SIZE 16
   292 static inline void call_func2( void *ptr, int arg1, int arg2 )
   293 {
   294     MOV_r32_r32(arg1, R_EDI);
   295     MOV_r32_r32(arg2, R_ESI);
   296     call_func0(ptr);
   297 }
   299 #define MEM_WRITE_DOUBLE_SIZE 39
   300 /**
   301  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   302  * the second in arg2b
   303  */
   304 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   305 {
   306 /*
   307     MOV_r32_r32( addr, R_EDI );
   308     MOV_r32_r32( arg2b, R_ESI );
   309     REXW(); SHL_imm8_r32( 32, R_ESI );
   310     REXW(); MOVZX_r16_r32( arg2a, arg2a );
   311     REXW(); OR_r32_r32( arg2a, R_ESI );
   312     call_func0(sh4_write_quad);
   313 */
   314     PUSH_r32(arg2b);
   315     PUSH_r32(addr);
   316     call_func2(sh4_write_long, addr, arg2a);
   317     POP_r32(addr);
   318     POP_r32(arg2b);
   319     ADD_imm8s_r32(4, addr);
   320     call_func2(sh4_write_long, addr, arg2b);
   321 }
   323 #define MEM_READ_DOUBLE_SIZE 35
   324 /**
   325  * Read a double (64-bit) value from memory, writing the first word into arg2a
   326  * and the second into arg2b. The addr must not be in EAX
   327  */
   328 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   329 {
   330 /*
   331     MOV_r32_r32( addr, R_EDI );
   332     call_func0(sh4_read_quad);
   333     REXW(); MOV_r32_r32( R_EAX, arg2a );
   334     REXW(); MOV_r32_r32( R_EAX, arg2b );
   335     REXW(); SHR_imm8_r32( 32, arg2b );
   336 */
   337     PUSH_r32(addr);
   338     call_func1(sh4_read_long, addr);
   339     POP_r32(R_EDI);
   340     PUSH_r32(R_EAX);
   341     ADD_imm8s_r32(4, R_EDI);
   342     call_func0(sh4_read_long);
   343     MOV_r32_r32(R_EAX, arg2b);
   344     POP_r32(arg2a);
   345 }
   347 #define EXIT_BLOCK_SIZE 35
   348 /**
   349  * Exit the block to an absolute PC
   350  */
   351 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   352 {
   353     load_imm32( R_ECX, pc );                            // 5
   354     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   355     REXW(); MOV_moff32_EAX( xlat_get_lut_entry(pc) );
   356     REXW(); AND_imm8s_r32( 0xFC, R_EAX ); // 3
   357     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   358     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   359     POP_r32(R_EBP);
   360     RET();
   361 }
   364 /**
   365  * Write the block trailer (exception handling block)
   366  */
   367 void sh4_translate_end_block( sh4addr_t pc ) {
   368     if( sh4_x86.branch_taken == FALSE ) {
   369 	// Didn't exit unconditionally already, so write the termination here
   370 	exit_block( pc, pc );
   371     }
   372     if( sh4_x86.backpatch_posn != 0 ) {
   373 	uint8_t *end_ptr = xlat_output;
   374 	// Exception termination. Jump block for various exception codes:
   375 	load_imm32( R_EDI, EXC_DATA_ADDR_READ );
   376 	JMP_rel8( 33, target1 );
   377 	load_imm32( R_EDI, EXC_DATA_ADDR_WRITE );
   378 	JMP_rel8( 26, target2 );
   379 	load_imm32( R_EDI, EXC_ILLEGAL );
   380 	JMP_rel8( 19, target3 );
   381 	load_imm32( R_EDI, EXC_SLOT_ILLEGAL ); 
   382 	JMP_rel8( 12, target4 );
   383 	load_imm32( R_EDI, EXC_FPU_DISABLED ); 
   384 	JMP_rel8( 5, target5 );
   385 	load_imm32( R_EDI, EXC_SLOT_FPU_DISABLED );
   386 	// target
   387 	JMP_TARGET(target1);
   388 	JMP_TARGET(target2);
   389 	JMP_TARGET(target3);
   390 	JMP_TARGET(target4);
   391 	JMP_TARGET(target5);
   392 	// Raise exception
   393 	load_spreg( R_ECX, REG_OFFSET(pc) );
   394 	ADD_r32_r32( R_EDX, R_ECX );
   395 	ADD_r32_r32( R_EDX, R_ECX );
   396 	store_spreg( R_ECX, REG_OFFSET(pc) );
   397 	MOV_moff32_EAX( &sh4_cpu_period );
   398 	MUL_r32( R_EDX );
   399 	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
   401 	call_func0( sh4_raise_exception );
   402 	load_spreg( R_EAX, REG_OFFSET(pc) );
   403 	call_func1(xlat_get_code,R_EAX);
   404 	POP_r32(R_EBP);
   405 	RET();
   407 	sh4_x86_do_backpatch( end_ptr );
   408     }
   409 }
   411 #else /* SH4_TRANSLATOR == TARGET_X86 */
   413 #define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
   415 /**
   416  * Note: clobbers EAX to make the indirect call - this isn't usually
   417  * a problem since the callee will usually clobber it anyway.
   418  */
   419 #define CALL_FUNC0_SIZE 7
   420 static inline void call_func0( void *ptr )
   421 {
   422     load_imm32(R_EAX, (uint32_t)ptr);
   423     CALL_r32(R_EAX);
   424 }
   426 #define CALL_FUNC1_SIZE 11
   427 static inline void call_func1( void *ptr, int arg1 )
   428 {
   429     PUSH_r32(arg1);
   430     call_func0(ptr);
   431     ADD_imm8s_r32( 4, R_ESP );
   432 }
   434 #define CALL_FUNC2_SIZE 12
   435 static inline void call_func2( void *ptr, int arg1, int arg2 )
   436 {
   437     PUSH_r32(arg2);
   438     PUSH_r32(arg1);
   439     call_func0(ptr);
   440     ADD_imm8s_r32( 8, R_ESP );
   441 }
   443 /**
   444  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   445  * the second in arg2b
   446  * NB: 30 bytes
   447  */
   448 #define MEM_WRITE_DOUBLE_SIZE 30
   449 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   450 {
   451     ADD_imm8s_r32( 4, addr );
   452     PUSH_r32(arg2b);
   453     PUSH_r32(addr);
   454     ADD_imm8s_r32( -4, addr );
   455     PUSH_r32(arg2a);
   456     PUSH_r32(addr);
   457     call_func0(sh4_write_long);
   458     ADD_imm8s_r32( 8, R_ESP );
   459     call_func0(sh4_write_long);
   460     ADD_imm8s_r32( 8, R_ESP );
   461 }
   463 /**
   464  * Read a double (64-bit) value from memory, writing the first word into arg2a
   465  * and the second into arg2b. The addr must not be in EAX
   466  * NB: 27 bytes
   467  */
   468 #define MEM_READ_DOUBLE_SIZE 27
   469 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   470 {
   471     PUSH_r32(addr);
   472     call_func0(sh4_read_long);
   473     POP_r32(addr);
   474     PUSH_r32(R_EAX);
   475     ADD_imm8s_r32( 4, addr );
   476     PUSH_r32(addr);
   477     call_func0(sh4_read_long);
   478     ADD_imm8s_r32( 4, R_ESP );
   479     MOV_r32_r32( R_EAX, arg2b );
   480     POP_r32(arg2a);
   481 }
   483 #define EXIT_BLOCK_SIZE 29
   484 /**
   485  * Exit the block to an absolute PC
   486  */
   487 void exit_block( sh4addr_t pc, sh4addr_t endpc )
   488 {
   489     load_imm32( R_ECX, pc );                            // 5
   490     store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
   491     MOV_moff32_EAX( xlat_get_lut_entry(pc) ); // 5
   492     AND_imm8s_r32( 0xFC, R_EAX ); // 3
   493     load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   494     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
   495     POP_r32(R_EBP);
   496     RET();
   497 }
   499 /**
   500  * Write the block trailer (exception handling block)
   501  */
   502 void sh4_translate_end_block( sh4addr_t pc ) {
   503     if( sh4_x86.branch_taken == FALSE ) {
   504 	// Didn't exit unconditionally already, so write the termination here
   505 	exit_block( pc, pc );
   506     }
   507     if( sh4_x86.backpatch_posn != 0 ) {
   508 	uint8_t *end_ptr = xlat_output;
   509 	// Exception termination. Jump block for various exception codes:
   510 	PUSH_imm32( EXC_DATA_ADDR_READ );
   511 	JMP_rel8( 33, target1 );
   512 	PUSH_imm32( EXC_DATA_ADDR_WRITE );
   513 	JMP_rel8( 26, target2 );
   514 	PUSH_imm32( EXC_ILLEGAL );
   515 	JMP_rel8( 19, target3 );
   516 	PUSH_imm32( EXC_SLOT_ILLEGAL ); 
   517 	JMP_rel8( 12, target4 );
   518 	PUSH_imm32( EXC_FPU_DISABLED ); 
   519 	JMP_rel8( 5, target5 );
   520 	PUSH_imm32( EXC_SLOT_FPU_DISABLED );
   521 	// target
   522 	JMP_TARGET(target1);
   523 	JMP_TARGET(target2);
   524 	JMP_TARGET(target3);
   525 	JMP_TARGET(target4);
   526 	JMP_TARGET(target5);
   527 	// Raise exception
   528 	load_spreg( R_ECX, REG_OFFSET(pc) );
   529 	ADD_r32_r32( R_EDX, R_ECX );
   530 	ADD_r32_r32( R_EDX, R_ECX );
   531 	store_spreg( R_ECX, REG_OFFSET(pc) );
   532 	MOV_moff32_EAX( &sh4_cpu_period );
   533 	MUL_r32( R_EDX );
   534 	ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
   536 	call_func0( sh4_raise_exception );
   537 	ADD_imm8s_r32( 4, R_ESP );
   538 	load_spreg( R_EAX, REG_OFFSET(pc) );
   539 	call_func1(xlat_get_code,R_EAX);
   540 	POP_r32(R_EBP);
   541 	RET();
   543 	sh4_x86_do_backpatch( end_ptr );
   544     }
   545 }
   546 #endif
   548 /* Exception checks - Note that all exception checks will clobber EAX */
   549 #define precheck() load_imm32(R_EDX, (pc-sh4_x86.block_start_pc-(sh4_x86.in_delay_slot?2:0))>>1)
   551 #define check_priv( ) \
   552     if( !sh4_x86.priv_checked ) { \
   553 	sh4_x86.priv_checked = TRUE;\
   554 	precheck();\
   555 	load_spreg( R_EAX, R_SR );\
   556 	AND_imm32_r32( SR_MD, R_EAX );\
   557 	if( sh4_x86.in_delay_slot ) {\
   558 	    JE_exit( EXIT_SLOT_ILLEGAL );\
   559 	} else {\
   560 	    JE_exit( EXIT_ILLEGAL );\
   561 	}\
   562     }\
   565 static void check_priv_no_precheck()
   566 {
   567     if( !sh4_x86.priv_checked ) {
   568 	sh4_x86.priv_checked = TRUE;
   569 	load_spreg( R_EAX, R_SR );
   570 	AND_imm32_r32( SR_MD, R_EAX );
   571 	if( sh4_x86.in_delay_slot ) {
   572 	    JE_exit( EXIT_SLOT_ILLEGAL );
   573 	} else {
   574 	    JE_exit( EXIT_ILLEGAL );
   575 	}
   576     }
   577 }
   579 #define check_fpuen( ) \
   580     if( !sh4_x86.fpuen_checked ) {\
   581 	sh4_x86.fpuen_checked = TRUE;\
   582 	precheck();\
   583 	load_spreg( R_EAX, R_SR );\
   584 	AND_imm32_r32( SR_FD, R_EAX );\
   585 	if( sh4_x86.in_delay_slot ) {\
   586 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);\
   587 	} else {\
   588 	    JNE_exit(EXIT_FPU_DISABLED);\
   589 	}\
   590     }
   592 static void check_fpuen_no_precheck()
   593 {
   594     if( !sh4_x86.fpuen_checked ) {
   595 	sh4_x86.fpuen_checked = TRUE;
   596 	load_spreg( R_EAX, R_SR );
   597 	AND_imm32_r32( SR_FD, R_EAX );
   598 	if( sh4_x86.in_delay_slot ) {
   599 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   600 	} else {
   601 	    JNE_exit(EXIT_FPU_DISABLED);
   602 	}
   603     }
   605 }
   607 static void check_ralign16( int x86reg )
   608 {
   609     TEST_imm32_r32( 0x00000001, x86reg );
   610     JNE_exit(EXIT_DATA_ADDR_READ);
   611 }
   613 static void check_walign16( int x86reg )
   614 {
   615     TEST_imm32_r32( 0x00000001, x86reg );
   616     JNE_exit(EXIT_DATA_ADDR_WRITE);
   617 }
   619 static void check_ralign32( int x86reg )
   620 {
   621     TEST_imm32_r32( 0x00000003, x86reg );
   622     JNE_exit(EXIT_DATA_ADDR_READ);
   623 }
   624 static void check_walign32( int x86reg )
   625 {
   626     TEST_imm32_r32( 0x00000003, x86reg );
   627     JNE_exit(EXIT_DATA_ADDR_WRITE);
   628 }
   630 #define UNDEF()
   631 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   632 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   633 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   634 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   635 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   636 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   637 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   639 #define SLOTILLEGAL() precheck(); JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   643 /**
   644  * Emit the 'start of block' assembly. Sets up the stack frame and save
   645  * SI/DI as required
   646  */
   647 void sh4_translate_begin_block( sh4addr_t pc ) 
   648 {
   649     PUSH_r32(R_EBP);
   650     /* mov &sh4r, ebp */
   651     load_ptr( R_EBP, &sh4r );
   653     sh4_x86.in_delay_slot = FALSE;
   654     sh4_x86.priv_checked = FALSE;
   655     sh4_x86.fpuen_checked = FALSE;
   656     sh4_x86.branch_taken = FALSE;
   657     sh4_x86.backpatch_posn = 0;
   658     sh4_x86.block_start_pc = pc;
   659     sh4_x86.tstate = TSTATE_NONE;
   660 }
   662 /**
   663  * Exit the block with sh4r.pc already written
   664  * Bytes: 15
   665  */
   666 void exit_block_pcset( pc )
   667 {
   668     load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
   669     ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
   670     load_spreg( R_EAX, REG_OFFSET(pc) );
   671     call_func1(xlat_get_code,R_EAX);
   672     POP_r32(R_EBP);
   673     RET();
   674 }
   676 extern uint16_t *sh4_icache;
   677 extern uint32_t sh4_icache_addr;
   679 /**
   680  * Translate a single instruction. Delayed branches are handled specially
   681  * by translating both branch and delayed instruction as a single unit (as
   682  * 
   683  *
   684  * @return true if the instruction marks the end of a basic block
   685  * (eg a branch or 
   686  */
   687 uint32_t sh4_translate_instruction( sh4addr_t pc )
   688 {
   689     uint32_t ir;
   690     /* Read instruction */
   691     uint32_t pageaddr = pc >> 12;
   692     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   693 	ir = sh4_icache[(pc&0xFFF)>>1];
   694     } else {
   695 	sh4_icache = (uint16_t *)mem_get_page(pc);
   696 	if( ((uintptr_t)sh4_icache) < MAX_IO_REGIONS ) {
   697 	    /* If someone's actually been so daft as to try to execute out of an IO
   698 	     * region, fallback on the full-blown memory read
   699 	     */
   700 	    sh4_icache = NULL;
   701 	    ir = sh4_read_word(pc);
   702 	} else {
   703 	    sh4_icache_addr = pageaddr;
   704 	    ir = sh4_icache[(pc&0xFFF)>>1];
   705 	}
   706     }
   708 %%
   709 /* ALU operations */
   710 ADD Rm, Rn {:
   711     load_reg( R_EAX, Rm );
   712     load_reg( R_ECX, Rn );
   713     ADD_r32_r32( R_EAX, R_ECX );
   714     store_reg( R_ECX, Rn );
   715     sh4_x86.tstate = TSTATE_NONE;
   716 :}
   717 ADD #imm, Rn {:  
   718     load_reg( R_EAX, Rn );
   719     ADD_imm8s_r32( imm, R_EAX );
   720     store_reg( R_EAX, Rn );
   721     sh4_x86.tstate = TSTATE_NONE;
   722 :}
   723 ADDC Rm, Rn {:
   724     if( sh4_x86.tstate != TSTATE_C ) {
   725 	LDC_t();
   726     }
   727     load_reg( R_EAX, Rm );
   728     load_reg( R_ECX, Rn );
   729     ADC_r32_r32( R_EAX, R_ECX );
   730     store_reg( R_ECX, Rn );
   731     SETC_t();
   732     sh4_x86.tstate = TSTATE_C;
   733 :}
   734 ADDV Rm, Rn {:
   735     load_reg( R_EAX, Rm );
   736     load_reg( R_ECX, Rn );
   737     ADD_r32_r32( R_EAX, R_ECX );
   738     store_reg( R_ECX, Rn );
   739     SETO_t();
   740     sh4_x86.tstate = TSTATE_O;
   741 :}
   742 AND Rm, Rn {:
   743     load_reg( R_EAX, Rm );
   744     load_reg( R_ECX, Rn );
   745     AND_r32_r32( R_EAX, R_ECX );
   746     store_reg( R_ECX, Rn );
   747     sh4_x86.tstate = TSTATE_NONE;
   748 :}
   749 AND #imm, R0 {:  
   750     load_reg( R_EAX, 0 );
   751     AND_imm32_r32(imm, R_EAX); 
   752     store_reg( R_EAX, 0 );
   753     sh4_x86.tstate = TSTATE_NONE;
   754 :}
   755 AND.B #imm, @(R0, GBR) {: 
   756     load_reg( R_EAX, 0 );
   757     load_spreg( R_ECX, R_GBR );
   758     ADD_r32_r32( R_EAX, R_ECX );
   759     PUSH_r32(R_ECX);
   760     MEM_READ_BYTE( R_ECX, R_EAX );
   761     POP_r32(R_ECX);
   762     AND_imm32_r32(imm, R_EAX );
   763     MEM_WRITE_BYTE( R_ECX, R_EAX );
   764     sh4_x86.tstate = TSTATE_NONE;
   765 :}
   766 CMP/EQ Rm, Rn {:  
   767     load_reg( R_EAX, Rm );
   768     load_reg( R_ECX, Rn );
   769     CMP_r32_r32( R_EAX, R_ECX );
   770     SETE_t();
   771     sh4_x86.tstate = TSTATE_E;
   772 :}
   773 CMP/EQ #imm, R0 {:  
   774     load_reg( R_EAX, 0 );
   775     CMP_imm8s_r32(imm, R_EAX);
   776     SETE_t();
   777     sh4_x86.tstate = TSTATE_E;
   778 :}
   779 CMP/GE Rm, Rn {:  
   780     load_reg( R_EAX, Rm );
   781     load_reg( R_ECX, Rn );
   782     CMP_r32_r32( R_EAX, R_ECX );
   783     SETGE_t();
   784     sh4_x86.tstate = TSTATE_GE;
   785 :}
   786 CMP/GT Rm, Rn {: 
   787     load_reg( R_EAX, Rm );
   788     load_reg( R_ECX, Rn );
   789     CMP_r32_r32( R_EAX, R_ECX );
   790     SETG_t();
   791     sh4_x86.tstate = TSTATE_G;
   792 :}
   793 CMP/HI Rm, Rn {:  
   794     load_reg( R_EAX, Rm );
   795     load_reg( R_ECX, Rn );
   796     CMP_r32_r32( R_EAX, R_ECX );
   797     SETA_t();
   798     sh4_x86.tstate = TSTATE_A;
   799 :}
   800 CMP/HS Rm, Rn {: 
   801     load_reg( R_EAX, Rm );
   802     load_reg( R_ECX, Rn );
   803     CMP_r32_r32( R_EAX, R_ECX );
   804     SETAE_t();
   805     sh4_x86.tstate = TSTATE_AE;
   806  :}
   807 CMP/PL Rn {: 
   808     load_reg( R_EAX, Rn );
   809     CMP_imm8s_r32( 0, R_EAX );
   810     SETG_t();
   811     sh4_x86.tstate = TSTATE_G;
   812 :}
   813 CMP/PZ Rn {:  
   814     load_reg( R_EAX, Rn );
   815     CMP_imm8s_r32( 0, R_EAX );
   816     SETGE_t();
   817     sh4_x86.tstate = TSTATE_GE;
   818 :}
   819 CMP/STR Rm, Rn {:  
   820     load_reg( R_EAX, Rm );
   821     load_reg( R_ECX, Rn );
   822     XOR_r32_r32( R_ECX, R_EAX );
   823     TEST_r8_r8( R_AL, R_AL );
   824     JE_rel8(13, target1);
   825     TEST_r8_r8( R_AH, R_AH ); // 2
   826     JE_rel8(9, target2);
   827     SHR_imm8_r32( 16, R_EAX ); // 3
   828     TEST_r8_r8( R_AL, R_AL ); // 2
   829     JE_rel8(2, target3);
   830     TEST_r8_r8( R_AH, R_AH ); // 2
   831     JMP_TARGET(target1);
   832     JMP_TARGET(target2);
   833     JMP_TARGET(target3);
   834     SETE_t();
   835     sh4_x86.tstate = TSTATE_E;
   836 :}
   837 DIV0S Rm, Rn {:
   838     load_reg( R_EAX, Rm );
   839     load_reg( R_ECX, Rn );
   840     SHR_imm8_r32( 31, R_EAX );
   841     SHR_imm8_r32( 31, R_ECX );
   842     store_spreg( R_EAX, R_M );
   843     store_spreg( R_ECX, R_Q );
   844     CMP_r32_r32( R_EAX, R_ECX );
   845     SETNE_t();
   846     sh4_x86.tstate = TSTATE_NE;
   847 :}
   848 DIV0U {:  
   849     XOR_r32_r32( R_EAX, R_EAX );
   850     store_spreg( R_EAX, R_Q );
   851     store_spreg( R_EAX, R_M );
   852     store_spreg( R_EAX, R_T );
   853     sh4_x86.tstate = TSTATE_C; // works for DIV1
   854 :}
   855 DIV1 Rm, Rn {:
   856     load_spreg( R_ECX, R_M );
   857     load_reg( R_EAX, Rn );
   858     if( sh4_x86.tstate != TSTATE_C ) {
   859 	LDC_t();
   860     }
   861     RCL1_r32( R_EAX );
   862     SETC_r8( R_DL ); // Q'
   863     CMP_sh4r_r32( R_Q, R_ECX );
   864     JE_rel8(5, mqequal);
   865     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   866     JMP_rel8(3, end);
   867     JMP_TARGET(mqequal);
   868     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   869     JMP_TARGET(end);
   870     store_reg( R_EAX, Rn ); // Done with Rn now
   871     SETC_r8(R_AL); // tmp1
   872     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   873     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   874     store_spreg( R_ECX, R_Q );
   875     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   876     MOVZX_r8_r32( R_AL, R_EAX );
   877     store_spreg( R_EAX, R_T );
   878     sh4_x86.tstate = TSTATE_NONE;
   879 :}
   880 DMULS.L Rm, Rn {:  
   881     load_reg( R_EAX, Rm );
   882     load_reg( R_ECX, Rn );
   883     IMUL_r32(R_ECX);
   884     store_spreg( R_EDX, R_MACH );
   885     store_spreg( R_EAX, R_MACL );
   886     sh4_x86.tstate = TSTATE_NONE;
   887 :}
   888 DMULU.L Rm, Rn {:  
   889     load_reg( R_EAX, Rm );
   890     load_reg( R_ECX, Rn );
   891     MUL_r32(R_ECX);
   892     store_spreg( R_EDX, R_MACH );
   893     store_spreg( R_EAX, R_MACL );    
   894     sh4_x86.tstate = TSTATE_NONE;
   895 :}
   896 DT Rn {:  
   897     load_reg( R_EAX, Rn );
   898     ADD_imm8s_r32( -1, R_EAX );
   899     store_reg( R_EAX, Rn );
   900     SETE_t();
   901     sh4_x86.tstate = TSTATE_E;
   902 :}
   903 EXTS.B Rm, Rn {:  
   904     load_reg( R_EAX, Rm );
   905     MOVSX_r8_r32( R_EAX, R_EAX );
   906     store_reg( R_EAX, Rn );
   907 :}
   908 EXTS.W Rm, Rn {:  
   909     load_reg( R_EAX, Rm );
   910     MOVSX_r16_r32( R_EAX, R_EAX );
   911     store_reg( R_EAX, Rn );
   912 :}
   913 EXTU.B Rm, Rn {:  
   914     load_reg( R_EAX, Rm );
   915     MOVZX_r8_r32( R_EAX, R_EAX );
   916     store_reg( R_EAX, Rn );
   917 :}
   918 EXTU.W Rm, Rn {:  
   919     load_reg( R_EAX, Rm );
   920     MOVZX_r16_r32( R_EAX, R_EAX );
   921     store_reg( R_EAX, Rn );
   922 :}
   923 MAC.L @Rm+, @Rn+ {:  
   924     load_reg( R_ECX, Rm );
   925     precheck();
   926     check_ralign32( R_ECX );
   927     load_reg( R_ECX, Rn );
   928     check_ralign32( R_ECX );
   929     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   930     MEM_READ_LONG( R_ECX, R_EAX );
   931     PUSH_r32( R_EAX );
   932     load_reg( R_ECX, Rm );
   933     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   934     MEM_READ_LONG( R_ECX, R_EAX );
   935     POP_r32( R_ECX );
   936     IMUL_r32( R_ECX );
   937     ADD_r32_sh4r( R_EAX, R_MACL );
   938     ADC_r32_sh4r( R_EDX, R_MACH );
   940     load_spreg( R_ECX, R_S );
   941     TEST_r32_r32(R_ECX, R_ECX);
   942     JE_rel8( CALL_FUNC0_SIZE, nosat );
   943     call_func0( signsat48 );
   944     JMP_TARGET( nosat );
   945     sh4_x86.tstate = TSTATE_NONE;
   946 :}
   947 MAC.W @Rm+, @Rn+ {:  
   948     load_reg( R_ECX, Rm );
   949     precheck();
   950     check_ralign16( R_ECX );
   951     load_reg( R_ECX, Rn );
   952     check_ralign16( R_ECX );
   953     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   954     MEM_READ_WORD( R_ECX, R_EAX );
   955     PUSH_r32( R_EAX );
   956     load_reg( R_ECX, Rm );
   957     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   958     MEM_READ_WORD( R_ECX, R_EAX );
   959     POP_r32( R_ECX );
   960     IMUL_r32( R_ECX );
   962     load_spreg( R_ECX, R_S );
   963     TEST_r32_r32( R_ECX, R_ECX );
   964     JE_rel8( 47, nosat );
   966     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   967     JNO_rel8( 51, end );            // 2
   968     load_imm32( R_EDX, 1 );         // 5
   969     store_spreg( R_EDX, R_MACH );   // 6
   970     JS_rel8( 13, positive );        // 2
   971     load_imm32( R_EAX, 0x80000000 );// 5
   972     store_spreg( R_EAX, R_MACL );   // 6
   973     JMP_rel8( 25, end2 );           // 2
   975     JMP_TARGET(positive);
   976     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   977     store_spreg( R_EAX, R_MACL );   // 6
   978     JMP_rel8( 12, end3);            // 2
   980     JMP_TARGET(nosat);
   981     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   982     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   983     JMP_TARGET(end);
   984     JMP_TARGET(end2);
   985     JMP_TARGET(end3);
   986     sh4_x86.tstate = TSTATE_NONE;
   987 :}
   988 MOVT Rn {:  
   989     load_spreg( R_EAX, R_T );
   990     store_reg( R_EAX, Rn );
   991 :}
   992 MUL.L Rm, Rn {:  
   993     load_reg( R_EAX, Rm );
   994     load_reg( R_ECX, Rn );
   995     MUL_r32( R_ECX );
   996     store_spreg( R_EAX, R_MACL );
   997     sh4_x86.tstate = TSTATE_NONE;
   998 :}
   999 MULS.W Rm, Rn {:
  1000     load_reg16s( R_EAX, Rm );
  1001     load_reg16s( R_ECX, Rn );
  1002     MUL_r32( R_ECX );
  1003     store_spreg( R_EAX, R_MACL );
  1004     sh4_x86.tstate = TSTATE_NONE;
  1005 :}
  1006 MULU.W Rm, Rn {:  
  1007     load_reg16u( R_EAX, Rm );
  1008     load_reg16u( R_ECX, Rn );
  1009     MUL_r32( R_ECX );
  1010     store_spreg( R_EAX, R_MACL );
  1011     sh4_x86.tstate = TSTATE_NONE;
  1012 :}
  1013 NEG Rm, Rn {:
  1014     load_reg( R_EAX, Rm );
  1015     NEG_r32( R_EAX );
  1016     store_reg( R_EAX, Rn );
  1017     sh4_x86.tstate = TSTATE_NONE;
  1018 :}
  1019 NEGC Rm, Rn {:  
  1020     load_reg( R_EAX, Rm );
  1021     XOR_r32_r32( R_ECX, R_ECX );
  1022     LDC_t();
  1023     SBB_r32_r32( R_EAX, R_ECX );
  1024     store_reg( R_ECX, Rn );
  1025     SETC_t();
  1026     sh4_x86.tstate = TSTATE_C;
  1027 :}
  1028 NOT Rm, Rn {:  
  1029     load_reg( R_EAX, Rm );
  1030     NOT_r32( R_EAX );
  1031     store_reg( R_EAX, Rn );
  1032     sh4_x86.tstate = TSTATE_NONE;
  1033 :}
  1034 OR Rm, Rn {:  
  1035     load_reg( R_EAX, Rm );
  1036     load_reg( R_ECX, Rn );
  1037     OR_r32_r32( R_EAX, R_ECX );
  1038     store_reg( R_ECX, Rn );
  1039     sh4_x86.tstate = TSTATE_NONE;
  1040 :}
  1041 OR #imm, R0 {:
  1042     load_reg( R_EAX, 0 );
  1043     OR_imm32_r32(imm, R_EAX);
  1044     store_reg( R_EAX, 0 );
  1045     sh4_x86.tstate = TSTATE_NONE;
  1046 :}
  1047 OR.B #imm, @(R0, GBR) {:  
  1048     load_reg( R_EAX, 0 );
  1049     load_spreg( R_ECX, R_GBR );
  1050     ADD_r32_r32( R_EAX, R_ECX );
  1051     PUSH_r32(R_ECX);
  1052     MEM_READ_BYTE( R_ECX, R_EAX );
  1053     POP_r32(R_ECX);
  1054     OR_imm32_r32(imm, R_EAX );
  1055     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1056     sh4_x86.tstate = TSTATE_NONE;
  1057 :}
  1058 ROTCL Rn {:
  1059     load_reg( R_EAX, Rn );
  1060     if( sh4_x86.tstate != TSTATE_C ) {
  1061 	LDC_t();
  1063     RCL1_r32( R_EAX );
  1064     store_reg( R_EAX, Rn );
  1065     SETC_t();
  1066     sh4_x86.tstate = TSTATE_C;
  1067 :}
  1068 ROTCR Rn {:  
  1069     load_reg( R_EAX, Rn );
  1070     if( sh4_x86.tstate != TSTATE_C ) {
  1071 	LDC_t();
  1073     RCR1_r32( R_EAX );
  1074     store_reg( R_EAX, Rn );
  1075     SETC_t();
  1076     sh4_x86.tstate = TSTATE_C;
  1077 :}
  1078 ROTL Rn {:  
  1079     load_reg( R_EAX, Rn );
  1080     ROL1_r32( R_EAX );
  1081     store_reg( R_EAX, Rn );
  1082     SETC_t();
  1083     sh4_x86.tstate = TSTATE_C;
  1084 :}
  1085 ROTR Rn {:  
  1086     load_reg( R_EAX, Rn );
  1087     ROR1_r32( R_EAX );
  1088     store_reg( R_EAX, Rn );
  1089     SETC_t();
  1090     sh4_x86.tstate = TSTATE_C;
  1091 :}
  1092 SHAD Rm, Rn {:
  1093     /* Annoyingly enough, not directly convertible */
  1094     load_reg( R_EAX, Rn );
  1095     load_reg( R_ECX, Rm );
  1096     CMP_imm32_r32( 0, R_ECX );
  1097     JGE_rel8(16, doshl);
  1099     NEG_r32( R_ECX );      // 2
  1100     AND_imm8_r8( 0x1F, R_CL ); // 3
  1101     JE_rel8( 4, emptysar);     // 2
  1102     SAR_r32_CL( R_EAX );       // 2
  1103     JMP_rel8(10, end);          // 2
  1105     JMP_TARGET(emptysar);
  1106     SAR_imm8_r32(31, R_EAX );  // 3
  1107     JMP_rel8(5, end2);
  1109     JMP_TARGET(doshl);
  1110     AND_imm8_r8( 0x1F, R_CL ); // 3
  1111     SHL_r32_CL( R_EAX );       // 2
  1112     JMP_TARGET(end);
  1113     JMP_TARGET(end2);
  1114     store_reg( R_EAX, Rn );
  1115     sh4_x86.tstate = TSTATE_NONE;
  1116 :}
  1117 SHLD Rm, Rn {:  
  1118     load_reg( R_EAX, Rn );
  1119     load_reg( R_ECX, Rm );
  1120     CMP_imm32_r32( 0, R_ECX );
  1121     JGE_rel8(15, doshl);
  1123     NEG_r32( R_ECX );      // 2
  1124     AND_imm8_r8( 0x1F, R_CL ); // 3
  1125     JE_rel8( 4, emptyshr );
  1126     SHR_r32_CL( R_EAX );       // 2
  1127     JMP_rel8(9, end);          // 2
  1129     JMP_TARGET(emptyshr);
  1130     XOR_r32_r32( R_EAX, R_EAX );
  1131     JMP_rel8(5, end2);
  1133     JMP_TARGET(doshl);
  1134     AND_imm8_r8( 0x1F, R_CL ); // 3
  1135     SHL_r32_CL( R_EAX );       // 2
  1136     JMP_TARGET(end);
  1137     JMP_TARGET(end2);
  1138     store_reg( R_EAX, Rn );
  1139     sh4_x86.tstate = TSTATE_NONE;
  1140 :}
  1141 SHAL Rn {: 
  1142     load_reg( R_EAX, Rn );
  1143     SHL1_r32( R_EAX );
  1144     SETC_t();
  1145     store_reg( R_EAX, Rn );
  1146     sh4_x86.tstate = TSTATE_C;
  1147 :}
  1148 SHAR Rn {:  
  1149     load_reg( R_EAX, Rn );
  1150     SAR1_r32( R_EAX );
  1151     SETC_t();
  1152     store_reg( R_EAX, Rn );
  1153     sh4_x86.tstate = TSTATE_C;
  1154 :}
  1155 SHLL Rn {:  
  1156     load_reg( R_EAX, Rn );
  1157     SHL1_r32( R_EAX );
  1158     SETC_t();
  1159     store_reg( R_EAX, Rn );
  1160     sh4_x86.tstate = TSTATE_C;
  1161 :}
  1162 SHLL2 Rn {:
  1163     load_reg( R_EAX, Rn );
  1164     SHL_imm8_r32( 2, R_EAX );
  1165     store_reg( R_EAX, Rn );
  1166     sh4_x86.tstate = TSTATE_NONE;
  1167 :}
  1168 SHLL8 Rn {:  
  1169     load_reg( R_EAX, Rn );
  1170     SHL_imm8_r32( 8, R_EAX );
  1171     store_reg( R_EAX, Rn );
  1172     sh4_x86.tstate = TSTATE_NONE;
  1173 :}
  1174 SHLL16 Rn {:  
  1175     load_reg( R_EAX, Rn );
  1176     SHL_imm8_r32( 16, R_EAX );
  1177     store_reg( R_EAX, Rn );
  1178     sh4_x86.tstate = TSTATE_NONE;
  1179 :}
  1180 SHLR Rn {:  
  1181     load_reg( R_EAX, Rn );
  1182     SHR1_r32( R_EAX );
  1183     SETC_t();
  1184     store_reg( R_EAX, Rn );
  1185     sh4_x86.tstate = TSTATE_C;
  1186 :}
  1187 SHLR2 Rn {:  
  1188     load_reg( R_EAX, Rn );
  1189     SHR_imm8_r32( 2, R_EAX );
  1190     store_reg( R_EAX, Rn );
  1191     sh4_x86.tstate = TSTATE_NONE;
  1192 :}
  1193 SHLR8 Rn {:  
  1194     load_reg( R_EAX, Rn );
  1195     SHR_imm8_r32( 8, R_EAX );
  1196     store_reg( R_EAX, Rn );
  1197     sh4_x86.tstate = TSTATE_NONE;
  1198 :}
  1199 SHLR16 Rn {:  
  1200     load_reg( R_EAX, Rn );
  1201     SHR_imm8_r32( 16, R_EAX );
  1202     store_reg( R_EAX, Rn );
  1203     sh4_x86.tstate = TSTATE_NONE;
  1204 :}
  1205 SUB Rm, Rn {:  
  1206     load_reg( R_EAX, Rm );
  1207     load_reg( R_ECX, Rn );
  1208     SUB_r32_r32( R_EAX, R_ECX );
  1209     store_reg( R_ECX, Rn );
  1210     sh4_x86.tstate = TSTATE_NONE;
  1211 :}
  1212 SUBC Rm, Rn {:  
  1213     load_reg( R_EAX, Rm );
  1214     load_reg( R_ECX, Rn );
  1215     if( sh4_x86.tstate != TSTATE_C ) {
  1216 	LDC_t();
  1218     SBB_r32_r32( R_EAX, R_ECX );
  1219     store_reg( R_ECX, Rn );
  1220     SETC_t();
  1221     sh4_x86.tstate = TSTATE_C;
  1222 :}
  1223 SUBV Rm, Rn {:  
  1224     load_reg( R_EAX, Rm );
  1225     load_reg( R_ECX, Rn );
  1226     SUB_r32_r32( R_EAX, R_ECX );
  1227     store_reg( R_ECX, Rn );
  1228     SETO_t();
  1229     sh4_x86.tstate = TSTATE_O;
  1230 :}
  1231 SWAP.B Rm, Rn {:  
  1232     load_reg( R_EAX, Rm );
  1233     XCHG_r8_r8( R_AL, R_AH );
  1234     store_reg( R_EAX, Rn );
  1235 :}
  1236 SWAP.W Rm, Rn {:  
  1237     load_reg( R_EAX, Rm );
  1238     MOV_r32_r32( R_EAX, R_ECX );
  1239     SHL_imm8_r32( 16, R_ECX );
  1240     SHR_imm8_r32( 16, R_EAX );
  1241     OR_r32_r32( R_EAX, R_ECX );
  1242     store_reg( R_ECX, Rn );
  1243     sh4_x86.tstate = TSTATE_NONE;
  1244 :}
  1245 TAS.B @Rn {:  
  1246     load_reg( R_ECX, Rn );
  1247     MEM_READ_BYTE( R_ECX, R_EAX );
  1248     TEST_r8_r8( R_AL, R_AL );
  1249     SETE_t();
  1250     OR_imm8_r8( 0x80, R_AL );
  1251     load_reg( R_ECX, Rn );
  1252     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1253     sh4_x86.tstate = TSTATE_NONE;
  1254 :}
  1255 TST Rm, Rn {:  
  1256     load_reg( R_EAX, Rm );
  1257     load_reg( R_ECX, Rn );
  1258     TEST_r32_r32( R_EAX, R_ECX );
  1259     SETE_t();
  1260     sh4_x86.tstate = TSTATE_E;
  1261 :}
  1262 TST #imm, R0 {:  
  1263     load_reg( R_EAX, 0 );
  1264     TEST_imm32_r32( imm, R_EAX );
  1265     SETE_t();
  1266     sh4_x86.tstate = TSTATE_E;
  1267 :}
  1268 TST.B #imm, @(R0, GBR) {:  
  1269     load_reg( R_EAX, 0);
  1270     load_reg( R_ECX, R_GBR);
  1271     ADD_r32_r32( R_EAX, R_ECX );
  1272     MEM_READ_BYTE( R_ECX, R_EAX );
  1273     TEST_imm8_r8( imm, R_AL );
  1274     SETE_t();
  1275     sh4_x86.tstate = TSTATE_E;
  1276 :}
  1277 XOR Rm, Rn {:  
  1278     load_reg( R_EAX, Rm );
  1279     load_reg( R_ECX, Rn );
  1280     XOR_r32_r32( R_EAX, R_ECX );
  1281     store_reg( R_ECX, Rn );
  1282     sh4_x86.tstate = TSTATE_NONE;
  1283 :}
  1284 XOR #imm, R0 {:  
  1285     load_reg( R_EAX, 0 );
  1286     XOR_imm32_r32( imm, R_EAX );
  1287     store_reg( R_EAX, 0 );
  1288     sh4_x86.tstate = TSTATE_NONE;
  1289 :}
  1290 XOR.B #imm, @(R0, GBR) {:  
  1291     load_reg( R_EAX, 0 );
  1292     load_spreg( R_ECX, R_GBR );
  1293     ADD_r32_r32( R_EAX, R_ECX );
  1294     PUSH_r32(R_ECX);
  1295     MEM_READ_BYTE(R_ECX, R_EAX);
  1296     POP_r32(R_ECX);
  1297     XOR_imm32_r32( imm, R_EAX );
  1298     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1299     sh4_x86.tstate = TSTATE_NONE;
  1300 :}
  1301 XTRCT Rm, Rn {:
  1302     load_reg( R_EAX, Rm );
  1303     load_reg( R_ECX, Rn );
  1304     SHL_imm8_r32( 16, R_EAX );
  1305     SHR_imm8_r32( 16, R_ECX );
  1306     OR_r32_r32( R_EAX, R_ECX );
  1307     store_reg( R_ECX, Rn );
  1308     sh4_x86.tstate = TSTATE_NONE;
  1309 :}
  1311 /* Data move instructions */
  1312 MOV Rm, Rn {:  
  1313     load_reg( R_EAX, Rm );
  1314     store_reg( R_EAX, Rn );
  1315 :}
  1316 MOV #imm, Rn {:  
  1317     load_imm32( R_EAX, imm );
  1318     store_reg( R_EAX, Rn );
  1319 :}
  1320 MOV.B Rm, @Rn {:  
  1321     load_reg( R_EAX, Rm );
  1322     load_reg( R_ECX, Rn );
  1323     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1324     sh4_x86.tstate = TSTATE_NONE;
  1325 :}
  1326 MOV.B Rm, @-Rn {:  
  1327     load_reg( R_EAX, Rm );
  1328     load_reg( R_ECX, Rn );
  1329     ADD_imm8s_r32( -1, R_ECX );
  1330     store_reg( R_ECX, Rn );
  1331     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1332     sh4_x86.tstate = TSTATE_NONE;
  1333 :}
  1334 MOV.B Rm, @(R0, Rn) {:  
  1335     load_reg( R_EAX, 0 );
  1336     load_reg( R_ECX, Rn );
  1337     ADD_r32_r32( R_EAX, R_ECX );
  1338     load_reg( R_EAX, Rm );
  1339     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1340     sh4_x86.tstate = TSTATE_NONE;
  1341 :}
  1342 MOV.B R0, @(disp, GBR) {:  
  1343     load_reg( R_EAX, 0 );
  1344     load_spreg( R_ECX, R_GBR );
  1345     ADD_imm32_r32( disp, R_ECX );
  1346     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1347     sh4_x86.tstate = TSTATE_NONE;
  1348 :}
  1349 MOV.B R0, @(disp, Rn) {:  
  1350     load_reg( R_EAX, 0 );
  1351     load_reg( R_ECX, Rn );
  1352     ADD_imm32_r32( disp, R_ECX );
  1353     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1354     sh4_x86.tstate = TSTATE_NONE;
  1355 :}
  1356 MOV.B @Rm, Rn {:  
  1357     load_reg( R_ECX, Rm );
  1358     MEM_READ_BYTE( R_ECX, R_EAX );
  1359     store_reg( R_EAX, Rn );
  1360     sh4_x86.tstate = TSTATE_NONE;
  1361 :}
  1362 MOV.B @Rm+, Rn {:  
  1363     load_reg( R_ECX, Rm );
  1364     MOV_r32_r32( R_ECX, R_EAX );
  1365     ADD_imm8s_r32( 1, R_EAX );
  1366     store_reg( R_EAX, Rm );
  1367     MEM_READ_BYTE( R_ECX, R_EAX );
  1368     store_reg( R_EAX, Rn );
  1369     sh4_x86.tstate = TSTATE_NONE;
  1370 :}
  1371 MOV.B @(R0, Rm), Rn {:  
  1372     load_reg( R_EAX, 0 );
  1373     load_reg( R_ECX, Rm );
  1374     ADD_r32_r32( R_EAX, R_ECX );
  1375     MEM_READ_BYTE( R_ECX, R_EAX );
  1376     store_reg( R_EAX, Rn );
  1377     sh4_x86.tstate = TSTATE_NONE;
  1378 :}
  1379 MOV.B @(disp, GBR), R0 {:  
  1380     load_spreg( R_ECX, R_GBR );
  1381     ADD_imm32_r32( disp, R_ECX );
  1382     MEM_READ_BYTE( R_ECX, R_EAX );
  1383     store_reg( R_EAX, 0 );
  1384     sh4_x86.tstate = TSTATE_NONE;
  1385 :}
  1386 MOV.B @(disp, Rm), R0 {:  
  1387     load_reg( R_ECX, Rm );
  1388     ADD_imm32_r32( disp, R_ECX );
  1389     MEM_READ_BYTE( R_ECX, R_EAX );
  1390     store_reg( R_EAX, 0 );
  1391     sh4_x86.tstate = TSTATE_NONE;
  1392 :}
  1393 MOV.L Rm, @Rn {:
  1394     load_reg( R_EAX, Rm );
  1395     load_reg( R_ECX, Rn );
  1396     precheck();
  1397     check_walign32(R_ECX);
  1398     MEM_WRITE_LONG( R_ECX, R_EAX );
  1399     sh4_x86.tstate = TSTATE_NONE;
  1400 :}
  1401 MOV.L Rm, @-Rn {:  
  1402     load_reg( R_EAX, Rm );
  1403     load_reg( R_ECX, Rn );
  1404     precheck();
  1405     check_walign32( R_ECX );
  1406     ADD_imm8s_r32( -4, R_ECX );
  1407     store_reg( R_ECX, Rn );
  1408     MEM_WRITE_LONG( R_ECX, R_EAX );
  1409     sh4_x86.tstate = TSTATE_NONE;
  1410 :}
  1411 MOV.L Rm, @(R0, Rn) {:  
  1412     load_reg( R_EAX, 0 );
  1413     load_reg( R_ECX, Rn );
  1414     ADD_r32_r32( R_EAX, R_ECX );
  1415     precheck();
  1416     check_walign32( R_ECX );
  1417     load_reg( R_EAX, Rm );
  1418     MEM_WRITE_LONG( R_ECX, R_EAX );
  1419     sh4_x86.tstate = TSTATE_NONE;
  1420 :}
  1421 MOV.L R0, @(disp, GBR) {:  
  1422     load_spreg( R_ECX, R_GBR );
  1423     load_reg( R_EAX, 0 );
  1424     ADD_imm32_r32( disp, R_ECX );
  1425     precheck();
  1426     check_walign32( R_ECX );
  1427     MEM_WRITE_LONG( R_ECX, R_EAX );
  1428     sh4_x86.tstate = TSTATE_NONE;
  1429 :}
  1430 MOV.L Rm, @(disp, Rn) {:  
  1431     load_reg( R_ECX, Rn );
  1432     load_reg( R_EAX, Rm );
  1433     ADD_imm32_r32( disp, R_ECX );
  1434     precheck();
  1435     check_walign32( R_ECX );
  1436     MEM_WRITE_LONG( R_ECX, R_EAX );
  1437     sh4_x86.tstate = TSTATE_NONE;
  1438 :}
  1439 MOV.L @Rm, Rn {:  
  1440     load_reg( R_ECX, Rm );
  1441     precheck();
  1442     check_ralign32( R_ECX );
  1443     MEM_READ_LONG( R_ECX, R_EAX );
  1444     store_reg( R_EAX, Rn );
  1445     sh4_x86.tstate = TSTATE_NONE;
  1446 :}
  1447 MOV.L @Rm+, Rn {:  
  1448     load_reg( R_EAX, Rm );
  1449     precheck();
  1450     check_ralign32( R_EAX );
  1451     MOV_r32_r32( R_EAX, R_ECX );
  1452     ADD_imm8s_r32( 4, R_EAX );
  1453     store_reg( R_EAX, Rm );
  1454     MEM_READ_LONG( R_ECX, R_EAX );
  1455     store_reg( R_EAX, Rn );
  1456     sh4_x86.tstate = TSTATE_NONE;
  1457 :}
  1458 MOV.L @(R0, Rm), Rn {:  
  1459     load_reg( R_EAX, 0 );
  1460     load_reg( R_ECX, Rm );
  1461     ADD_r32_r32( R_EAX, R_ECX );
  1462     precheck();
  1463     check_ralign32( R_ECX );
  1464     MEM_READ_LONG( R_ECX, R_EAX );
  1465     store_reg( R_EAX, Rn );
  1466     sh4_x86.tstate = TSTATE_NONE;
  1467 :}
  1468 MOV.L @(disp, GBR), R0 {:
  1469     load_spreg( R_ECX, R_GBR );
  1470     ADD_imm32_r32( disp, R_ECX );
  1471     precheck();
  1472     check_ralign32( R_ECX );
  1473     MEM_READ_LONG( R_ECX, R_EAX );
  1474     store_reg( R_EAX, 0 );
  1475     sh4_x86.tstate = TSTATE_NONE;
  1476 :}
  1477 MOV.L @(disp, PC), Rn {:  
  1478     if( sh4_x86.in_delay_slot ) {
  1479 	SLOTILLEGAL();
  1480     } else {
  1481 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1482 	sh4ptr_t ptr = mem_get_region(target);
  1483 	if( ptr != NULL ) {
  1484 	    MOV_moff32_EAX( ptr );
  1485 	} else {
  1486 	    load_imm32( R_ECX, target );
  1487 	    MEM_READ_LONG( R_ECX, R_EAX );
  1489 	store_reg( R_EAX, Rn );
  1490 	sh4_x86.tstate = TSTATE_NONE;
  1492 :}
  1493 MOV.L @(disp, Rm), Rn {:  
  1494     load_reg( R_ECX, Rm );
  1495     ADD_imm8s_r32( disp, R_ECX );
  1496     precheck();
  1497     check_ralign32( R_ECX );
  1498     MEM_READ_LONG( R_ECX, R_EAX );
  1499     store_reg( R_EAX, Rn );
  1500     sh4_x86.tstate = TSTATE_NONE;
  1501 :}
  1502 MOV.W Rm, @Rn {:  
  1503     load_reg( R_ECX, Rn );
  1504     precheck();
  1505     check_walign16( R_ECX );
  1506     load_reg( R_EAX, Rm );
  1507     MEM_WRITE_WORD( R_ECX, R_EAX );
  1508     sh4_x86.tstate = TSTATE_NONE;
  1509 :}
  1510 MOV.W Rm, @-Rn {:  
  1511     load_reg( R_ECX, Rn );
  1512     precheck();
  1513     check_walign16( R_ECX );
  1514     load_reg( R_EAX, Rm );
  1515     ADD_imm8s_r32( -2, R_ECX );
  1516     store_reg( R_ECX, Rn );
  1517     MEM_WRITE_WORD( R_ECX, R_EAX );
  1518     sh4_x86.tstate = TSTATE_NONE;
  1519 :}
  1520 MOV.W Rm, @(R0, Rn) {:  
  1521     load_reg( R_EAX, 0 );
  1522     load_reg( R_ECX, Rn );
  1523     ADD_r32_r32( R_EAX, R_ECX );
  1524     precheck();
  1525     check_walign16( R_ECX );
  1526     load_reg( R_EAX, Rm );
  1527     MEM_WRITE_WORD( R_ECX, R_EAX );
  1528     sh4_x86.tstate = TSTATE_NONE;
  1529 :}
  1530 MOV.W R0, @(disp, GBR) {:  
  1531     load_spreg( R_ECX, R_GBR );
  1532     load_reg( R_EAX, 0 );
  1533     ADD_imm32_r32( disp, R_ECX );
  1534     precheck();
  1535     check_walign16( R_ECX );
  1536     MEM_WRITE_WORD( R_ECX, R_EAX );
  1537     sh4_x86.tstate = TSTATE_NONE;
  1538 :}
  1539 MOV.W R0, @(disp, Rn) {:  
  1540     load_reg( R_ECX, Rn );
  1541     load_reg( R_EAX, 0 );
  1542     ADD_imm32_r32( disp, R_ECX );
  1543     precheck();
  1544     check_walign16( R_ECX );
  1545     MEM_WRITE_WORD( R_ECX, R_EAX );
  1546     sh4_x86.tstate = TSTATE_NONE;
  1547 :}
  1548 MOV.W @Rm, Rn {:  
  1549     load_reg( R_ECX, Rm );
  1550     precheck();
  1551     check_ralign16( R_ECX );
  1552     MEM_READ_WORD( R_ECX, R_EAX );
  1553     store_reg( R_EAX, Rn );
  1554     sh4_x86.tstate = TSTATE_NONE;
  1555 :}
  1556 MOV.W @Rm+, Rn {:  
  1557     load_reg( R_EAX, Rm );
  1558     precheck();
  1559     check_ralign16( R_EAX );
  1560     MOV_r32_r32( R_EAX, R_ECX );
  1561     ADD_imm8s_r32( 2, R_EAX );
  1562     store_reg( R_EAX, Rm );
  1563     MEM_READ_WORD( R_ECX, R_EAX );
  1564     store_reg( R_EAX, Rn );
  1565     sh4_x86.tstate = TSTATE_NONE;
  1566 :}
  1567 MOV.W @(R0, Rm), Rn {:  
  1568     load_reg( R_EAX, 0 );
  1569     load_reg( R_ECX, Rm );
  1570     ADD_r32_r32( R_EAX, R_ECX );
  1571     precheck();
  1572     check_ralign16( R_ECX );
  1573     MEM_READ_WORD( R_ECX, R_EAX );
  1574     store_reg( R_EAX, Rn );
  1575     sh4_x86.tstate = TSTATE_NONE;
  1576 :}
  1577 MOV.W @(disp, GBR), R0 {:  
  1578     load_spreg( R_ECX, R_GBR );
  1579     ADD_imm32_r32( disp, R_ECX );
  1580     precheck();
  1581     check_ralign16( R_ECX );
  1582     MEM_READ_WORD( R_ECX, R_EAX );
  1583     store_reg( R_EAX, 0 );
  1584     sh4_x86.tstate = TSTATE_NONE;
  1585 :}
  1586 MOV.W @(disp, PC), Rn {:  
  1587     if( sh4_x86.in_delay_slot ) {
  1588 	SLOTILLEGAL();
  1589     } else {
  1590 	load_imm32( R_ECX, pc + disp + 4 );
  1591 	MEM_READ_WORD( R_ECX, R_EAX );
  1592 	store_reg( R_EAX, Rn );
  1593 	sh4_x86.tstate = TSTATE_NONE;
  1595 :}
  1596 MOV.W @(disp, Rm), R0 {:  
  1597     load_reg( R_ECX, Rm );
  1598     ADD_imm32_r32( disp, R_ECX );
  1599     precheck();
  1600     check_ralign16( R_ECX );
  1601     MEM_READ_WORD( R_ECX, R_EAX );
  1602     store_reg( R_EAX, 0 );
  1603     sh4_x86.tstate = TSTATE_NONE;
  1604 :}
  1605 MOVA @(disp, PC), R0 {:  
  1606     if( sh4_x86.in_delay_slot ) {
  1607 	SLOTILLEGAL();
  1608     } else {
  1609 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1610 	store_reg( R_ECX, 0 );
  1612 :}
  1613 MOVCA.L R0, @Rn {:  
  1614     load_reg( R_EAX, 0 );
  1615     load_reg( R_ECX, Rn );
  1616     precheck();
  1617     check_walign32( R_ECX );
  1618     MEM_WRITE_LONG( R_ECX, R_EAX );
  1619     sh4_x86.tstate = TSTATE_NONE;
  1620 :}
  1622 /* Control transfer instructions */
  1623 BF disp {:
  1624     if( sh4_x86.in_delay_slot ) {
  1625 	SLOTILLEGAL();
  1626     } else {
  1627 	JT_rel8( EXIT_BLOCK_SIZE, nottaken );
  1628 	exit_block( disp + pc + 4, pc+2 );
  1629 	JMP_TARGET(nottaken);
  1630 	return 2;
  1632 :}
  1633 BF/S disp {:
  1634     if( sh4_x86.in_delay_slot ) {
  1635 	SLOTILLEGAL();
  1636     } else {
  1637 	sh4_x86.in_delay_slot = TRUE;
  1638 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1639 	    CMP_imm8s_sh4r( 1, R_T );
  1640 	    sh4_x86.tstate = TSTATE_E;
  1642 	OP(0x0F); OP(0x80+sh4_x86.tstate); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JNE rel32
  1643 	sh4_translate_instruction(pc+2);
  1644 	exit_block( disp + pc + 4, pc+4 );
  1645 	// not taken
  1646 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1647 	sh4_translate_instruction(pc+2);
  1648 	return 4;
  1650 :}
  1651 BRA disp {:  
  1652     if( sh4_x86.in_delay_slot ) {
  1653 	SLOTILLEGAL();
  1654     } else {
  1655 	sh4_x86.in_delay_slot = TRUE;
  1656 	sh4_translate_instruction( pc + 2 );
  1657 	exit_block( disp + pc + 4, pc+4 );
  1658 	sh4_x86.branch_taken = TRUE;
  1659 	return 4;
  1661 :}
  1662 BRAF Rn {:  
  1663     if( sh4_x86.in_delay_slot ) {
  1664 	SLOTILLEGAL();
  1665     } else {
  1666 	load_reg( R_EAX, Rn );
  1667 	ADD_imm32_r32( pc + 4, R_EAX );
  1668 	store_spreg( R_EAX, REG_OFFSET(pc) );
  1669 	sh4_x86.in_delay_slot = TRUE;
  1670 	sh4_x86.tstate = TSTATE_NONE;
  1671 	sh4_translate_instruction( pc + 2 );
  1672 	exit_block_pcset(pc+2);
  1673 	sh4_x86.branch_taken = TRUE;
  1674 	return 4;
  1676 :}
  1677 BSR disp {:  
  1678     if( sh4_x86.in_delay_slot ) {
  1679 	SLOTILLEGAL();
  1680     } else {
  1681 	load_imm32( R_EAX, pc + 4 );
  1682 	store_spreg( R_EAX, R_PR );
  1683 	sh4_x86.in_delay_slot = TRUE;
  1684 	sh4_translate_instruction( pc + 2 );
  1685 	exit_block( disp + pc + 4, pc+4 );
  1686 	sh4_x86.branch_taken = TRUE;
  1687 	return 4;
  1689 :}
  1690 BSRF Rn {:  
  1691     if( sh4_x86.in_delay_slot ) {
  1692 	SLOTILLEGAL();
  1693     } else {
  1694 	load_imm32( R_ECX, pc + 4 );
  1695 	store_spreg( R_ECX, R_PR );
  1696 	ADD_sh4r_r32( REG_OFFSET(r[Rn]), R_ECX );
  1697 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1698 	sh4_x86.in_delay_slot = TRUE;
  1699 	sh4_x86.tstate = TSTATE_NONE;
  1700 	sh4_translate_instruction( pc + 2 );
  1701 	exit_block_pcset(pc+2);
  1702 	sh4_x86.branch_taken = TRUE;
  1703 	return 4;
  1705 :}
  1706 BT disp {:
  1707     if( sh4_x86.in_delay_slot ) {
  1708 	SLOTILLEGAL();
  1709     } else {
  1710 	JF_rel8( EXIT_BLOCK_SIZE, nottaken );
  1711 	exit_block( disp + pc + 4, pc+2 );
  1712 	JMP_TARGET(nottaken);
  1713 	return 2;
  1715 :}
  1716 BT/S disp {:
  1717     if( sh4_x86.in_delay_slot ) {
  1718 	SLOTILLEGAL();
  1719     } else {
  1720 	sh4_x86.in_delay_slot = TRUE;
  1721 	if( sh4_x86.tstate == TSTATE_NONE ) {
  1722 	    CMP_imm8s_sh4r( 1, R_T );
  1723 	    sh4_x86.tstate = TSTATE_E;
  1725 	OP(0x0F); OP(0x80+(sh4_x86.tstate^1)); uint32_t *patch = (uint32_t *)xlat_output; OP32(0); // JE rel32
  1726 	sh4_translate_instruction(pc+2);
  1727 	exit_block( disp + pc + 4, pc+4 );
  1728 	// not taken
  1729 	*patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1730 	sh4_translate_instruction(pc+2);
  1731 	return 4;
  1733 :}
  1734 JMP @Rn {:  
  1735     if( sh4_x86.in_delay_slot ) {
  1736 	SLOTILLEGAL();
  1737     } else {
  1738 	load_reg( R_ECX, Rn );
  1739 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1740 	sh4_x86.in_delay_slot = TRUE;
  1741 	sh4_translate_instruction(pc+2);
  1742 	exit_block_pcset(pc+2);
  1743 	sh4_x86.branch_taken = TRUE;
  1744 	return 4;
  1746 :}
  1747 JSR @Rn {:  
  1748     if( sh4_x86.in_delay_slot ) {
  1749 	SLOTILLEGAL();
  1750     } else {
  1751 	load_imm32( R_EAX, pc + 4 );
  1752 	store_spreg( R_EAX, R_PR );
  1753 	load_reg( R_ECX, Rn );
  1754 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1755 	sh4_x86.in_delay_slot = TRUE;
  1756 	sh4_translate_instruction(pc+2);
  1757 	exit_block_pcset(pc+2);
  1758 	sh4_x86.branch_taken = TRUE;
  1759 	return 4;
  1761 :}
  1762 RTE {:  
  1763     if( sh4_x86.in_delay_slot ) {
  1764 	SLOTILLEGAL();
  1765     } else {
  1766 	check_priv();
  1767 	load_spreg( R_ECX, R_SPC );
  1768 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1769 	load_spreg( R_EAX, R_SSR );
  1770 	call_func1( sh4_write_sr, R_EAX );
  1771 	sh4_x86.in_delay_slot = TRUE;
  1772 	sh4_x86.priv_checked = FALSE;
  1773 	sh4_x86.fpuen_checked = FALSE;
  1774 	sh4_x86.tstate = TSTATE_NONE;
  1775 	sh4_translate_instruction(pc+2);
  1776 	exit_block_pcset(pc+2);
  1777 	sh4_x86.branch_taken = TRUE;
  1778 	return 4;
  1780 :}
  1781 RTS {:  
  1782     if( sh4_x86.in_delay_slot ) {
  1783 	SLOTILLEGAL();
  1784     } else {
  1785 	load_spreg( R_ECX, R_PR );
  1786 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1787 	sh4_x86.in_delay_slot = TRUE;
  1788 	sh4_translate_instruction(pc+2);
  1789 	exit_block_pcset(pc+2);
  1790 	sh4_x86.branch_taken = TRUE;
  1791 	return 4;
  1793 :}
  1794 TRAPA #imm {:  
  1795     if( sh4_x86.in_delay_slot ) {
  1796 	SLOTILLEGAL();
  1797     } else {
  1798 	load_imm32( R_ECX, pc+2 );
  1799 	store_spreg( R_ECX, REG_OFFSET(pc) );
  1800 	load_imm32( R_EAX, imm );
  1801 	call_func1( sh4_raise_trap, R_EAX );
  1802 	sh4_x86.tstate = TSTATE_NONE;
  1803 	exit_block_pcset(pc);
  1804 	sh4_x86.branch_taken = TRUE;
  1805 	return 2;
  1807 :}
  1808 UNDEF {:  
  1809     if( sh4_x86.in_delay_slot ) {
  1810 	SLOTILLEGAL();
  1811     } else {
  1812 	precheck();
  1813 	JMP_exit(EXIT_ILLEGAL);
  1814 	return 2;
  1816 :}
  1818 CLRMAC {:  
  1819     XOR_r32_r32(R_EAX, R_EAX);
  1820     store_spreg( R_EAX, R_MACL );
  1821     store_spreg( R_EAX, R_MACH );
  1822     sh4_x86.tstate = TSTATE_NONE;
  1823 :}
  1824 CLRS {:
  1825     CLC();
  1826     SETC_sh4r(R_S);
  1827     sh4_x86.tstate = TSTATE_C;
  1828 :}
  1829 CLRT {:  
  1830     CLC();
  1831     SETC_t();
  1832     sh4_x86.tstate = TSTATE_C;
  1833 :}
  1834 SETS {:  
  1835     STC();
  1836     SETC_sh4r(R_S);
  1837     sh4_x86.tstate = TSTATE_C;
  1838 :}
  1839 SETT {:  
  1840     STC();
  1841     SETC_t();
  1842     sh4_x86.tstate = TSTATE_C;
  1843 :}
  1845 /* Floating point moves */
  1846 FMOV FRm, FRn {:  
  1847     /* As horrible as this looks, it's actually covering 5 separate cases:
  1848      * 1. 32-bit fr-to-fr (PR=0)
  1849      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1850      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1851      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1852      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1853      */
  1854     check_fpuen();
  1855     load_spreg( R_ECX, R_FPSCR );
  1856     load_fr_bank( R_EDX );
  1857     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1858     JNE_rel8(8, doublesize);
  1859     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1860     store_fr( R_EDX, R_EAX, FRn );
  1861     if( FRm&1 ) {
  1862 	JMP_rel8(24, end);
  1863 	JMP_TARGET(doublesize);
  1864 	load_xf_bank( R_ECX ); 
  1865 	load_fr( R_ECX, R_EAX, FRm-1 );
  1866 	if( FRn&1 ) {
  1867 	    load_fr( R_ECX, R_EDX, FRm );
  1868 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1869 	    store_fr( R_ECX, R_EDX, FRn );
  1870 	} else /* FRn&1 == 0 */ {
  1871 	    load_fr( R_ECX, R_ECX, FRm );
  1872 	    store_fr( R_EDX, R_EAX, FRn );
  1873 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1875 	JMP_TARGET(end);
  1876     } else /* FRm&1 == 0 */ {
  1877 	if( FRn&1 ) {
  1878 	    JMP_rel8(24, end);
  1879 	    load_xf_bank( R_ECX );
  1880 	    load_fr( R_EDX, R_EAX, FRm );
  1881 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1882 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1883 	    store_fr( R_ECX, R_EDX, FRn );
  1884 	    JMP_TARGET(end);
  1885 	} else /* FRn&1 == 0 */ {
  1886 	    JMP_rel8(12, end);
  1887 	    load_fr( R_EDX, R_EAX, FRm );
  1888 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1889 	    store_fr( R_EDX, R_EAX, FRn );
  1890 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1891 	    JMP_TARGET(end);
  1894     sh4_x86.tstate = TSTATE_NONE;
  1895 :}
  1896 FMOV FRm, @Rn {: 
  1897     precheck();
  1898     check_fpuen_no_precheck();
  1899     load_reg( R_ECX, Rn );
  1900     check_walign32( R_ECX );
  1901     load_spreg( R_EDX, R_FPSCR );
  1902     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1903     JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
  1904     load_fr_bank( R_EDX );
  1905     load_fr( R_EDX, R_EAX, FRm );
  1906     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1907     if( FRm&1 ) {
  1908 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  1909 	JMP_TARGET(doublesize);
  1910 	load_xf_bank( R_EDX );
  1911 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1912 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1913 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1914 	JMP_TARGET(end);
  1915     } else {
  1916 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  1917 	JMP_TARGET(doublesize);
  1918 	load_fr_bank( R_EDX );
  1919 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1920 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1921 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1922 	JMP_TARGET(end);
  1924     sh4_x86.tstate = TSTATE_NONE;
  1925 :}
  1926 FMOV @Rm, FRn {:  
  1927     precheck();
  1928     check_fpuen_no_precheck();
  1929     load_reg( R_ECX, Rm );
  1930     check_ralign32( R_ECX );
  1931     load_spreg( R_EDX, R_FPSCR );
  1932     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1933     JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
  1934     MEM_READ_LONG( R_ECX, R_EAX );
  1935     load_fr_bank( R_EDX );
  1936     store_fr( R_EDX, R_EAX, FRn );
  1937     if( FRn&1 ) {
  1938 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  1939 	JMP_TARGET(doublesize);
  1940 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1941 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  1942 	load_xf_bank( R_EDX );
  1943 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1944 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1945 	JMP_TARGET(end);
  1946     } else {
  1947 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  1948 	JMP_TARGET(doublesize);
  1949 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  1950 	load_fr_bank( R_EDX );
  1951 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  1952 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  1953 	JMP_TARGET(end);
  1955     sh4_x86.tstate = TSTATE_NONE;
  1956 :}
  1957 FMOV FRm, @-Rn {:  
  1958     precheck();
  1959     check_fpuen_no_precheck();
  1960     load_reg( R_ECX, Rn );
  1961     check_walign32( R_ECX );
  1962     load_spreg( R_EDX, R_FPSCR );
  1963     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  1964     JNE_rel8(14 + CALL_FUNC2_SIZE, doublesize);
  1965     load_fr_bank( R_EDX );
  1966     load_fr( R_EDX, R_EAX, FRm );
  1967     ADD_imm8s_r32(-4,R_ECX);
  1968     store_reg( R_ECX, Rn );
  1969     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  1970     if( FRm&1 ) {
  1971 	JMP_rel8( 24 + MEM_WRITE_DOUBLE_SIZE, end );
  1972 	JMP_TARGET(doublesize);
  1973 	load_xf_bank( R_EDX );
  1974 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1975 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1976 	ADD_imm8s_r32(-8,R_ECX);
  1977 	store_reg( R_ECX, Rn );
  1978 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1979 	JMP_TARGET(end);
  1980     } else {
  1981 	JMP_rel8( 15 + MEM_WRITE_DOUBLE_SIZE, end );
  1982 	JMP_TARGET(doublesize);
  1983 	load_fr_bank( R_EDX );
  1984 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  1985 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  1986 	ADD_imm8s_r32(-8,R_ECX);
  1987 	store_reg( R_ECX, Rn );
  1988 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  1989 	JMP_TARGET(end);
  1991     sh4_x86.tstate = TSTATE_NONE;
  1992 :}
  1993 FMOV @Rm+, FRn {:
  1994     precheck();
  1995     check_fpuen_no_precheck();
  1996     load_reg( R_ECX, Rm );
  1997     check_ralign32( R_ECX );
  1998     MOV_r32_r32( R_ECX, R_EAX );
  1999     load_spreg( R_EDX, R_FPSCR );
  2000     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  2001     JNE_rel8(14 + CALL_FUNC1_SIZE, doublesize);
  2002     ADD_imm8s_r32( 4, R_EAX );
  2003     store_reg( R_EAX, Rm );
  2004     MEM_READ_LONG( R_ECX, R_EAX );
  2005     load_fr_bank( R_EDX );
  2006     store_fr( R_EDX, R_EAX, FRn );
  2007     if( FRn&1 ) {
  2008 	JMP_rel8(27 + MEM_READ_DOUBLE_SIZE, end);
  2009 	JMP_TARGET(doublesize);
  2010 	ADD_imm8s_r32( 8, R_EAX );
  2011 	store_reg(R_EAX, Rm);
  2012 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2013 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  2014 	load_xf_bank( R_EDX );
  2015 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2016 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2017 	JMP_TARGET(end);
  2018     } else {
  2019 	JMP_rel8(15 + MEM_READ_DOUBLE_SIZE, end);
  2020 	ADD_imm8s_r32( 8, R_EAX );
  2021 	store_reg(R_EAX, Rm);
  2022 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2023 	load_fr_bank( R_EDX );
  2024 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2025 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2026 	JMP_TARGET(end);
  2028     sh4_x86.tstate = TSTATE_NONE;
  2029 :}
  2030 FMOV FRm, @(R0, Rn) {:  
  2031     precheck();
  2032     check_fpuen_no_precheck();
  2033     load_reg( R_ECX, Rn );
  2034     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  2035     check_walign32( R_ECX );
  2036     load_spreg( R_EDX, R_FPSCR );
  2037     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  2038     JNE_rel8(8 + CALL_FUNC2_SIZE, doublesize);
  2039     load_fr_bank( R_EDX );
  2040     load_fr( R_EDX, R_EAX, FRm );
  2041     MEM_WRITE_LONG( R_ECX, R_EAX ); // 12
  2042     if( FRm&1 ) {
  2043 	JMP_rel8( 18 + MEM_WRITE_DOUBLE_SIZE, end );
  2044 	JMP_TARGET(doublesize);
  2045 	load_xf_bank( R_EDX );
  2046 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  2047 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  2048 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  2049 	JMP_TARGET(end);
  2050     } else {
  2051 	JMP_rel8( 9 + MEM_WRITE_DOUBLE_SIZE, end );
  2052 	JMP_TARGET(doublesize);
  2053 	load_fr_bank( R_EDX );
  2054 	load_fr( R_EDX, R_EAX, FRm&0x0E );
  2055 	load_fr( R_EDX, R_EDX, FRm|0x01 );
  2056 	MEM_WRITE_DOUBLE( R_ECX, R_EAX, R_EDX );
  2057 	JMP_TARGET(end);
  2059     sh4_x86.tstate = TSTATE_NONE;
  2060 :}
  2061 FMOV @(R0, Rm), FRn {:  
  2062     precheck();
  2063     check_fpuen_no_precheck();
  2064     load_reg( R_ECX, Rm );
  2065     ADD_sh4r_r32( REG_OFFSET(r[0]), R_ECX );
  2066     check_ralign32( R_ECX );
  2067     load_spreg( R_EDX, R_FPSCR );
  2068     TEST_imm32_r32( FPSCR_SZ, R_EDX );
  2069     JNE_rel8(8 + CALL_FUNC1_SIZE, doublesize);
  2070     MEM_READ_LONG( R_ECX, R_EAX );
  2071     load_fr_bank( R_EDX );
  2072     store_fr( R_EDX, R_EAX, FRn );
  2073     if( FRn&1 ) {
  2074 	JMP_rel8(21 + MEM_READ_DOUBLE_SIZE, end);
  2075 	JMP_TARGET(doublesize);
  2076 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2077 	load_spreg( R_EDX, R_FPSCR ); // assume read_long clobbered it
  2078 	load_xf_bank( R_EDX );
  2079 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2080 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2081 	JMP_TARGET(end);
  2082     } else {
  2083 	JMP_rel8(9 + MEM_READ_DOUBLE_SIZE, end);
  2084 	JMP_TARGET(doublesize);
  2085 	MEM_READ_DOUBLE( R_ECX, R_EAX, R_ECX );
  2086 	load_fr_bank( R_EDX );
  2087 	store_fr( R_EDX, R_EAX, FRn&0x0E );
  2088 	store_fr( R_EDX, R_ECX, FRn|0x01 );
  2089 	JMP_TARGET(end);
  2091     sh4_x86.tstate = TSTATE_NONE;
  2092 :}
  2093 FLDI0 FRn {:  /* IFF PR=0 */
  2094     check_fpuen();
  2095     load_spreg( R_ECX, R_FPSCR );
  2096     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2097     JNE_rel8(8, end);
  2098     XOR_r32_r32( R_EAX, R_EAX );
  2099     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  2100     store_fr( R_ECX, R_EAX, FRn );
  2101     JMP_TARGET(end);
  2102     sh4_x86.tstate = TSTATE_NONE;
  2103 :}
  2104 FLDI1 FRn {:  /* IFF PR=0 */
  2105     check_fpuen();
  2106     load_spreg( R_ECX, R_FPSCR );
  2107     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2108     JNE_rel8(11, end);
  2109     load_imm32(R_EAX, 0x3F800000);
  2110     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  2111     store_fr( R_ECX, R_EAX, FRn );
  2112     JMP_TARGET(end);
  2113     sh4_x86.tstate = TSTATE_NONE;
  2114 :}
  2116 FLOAT FPUL, FRn {:  
  2117     check_fpuen();
  2118     load_spreg( R_ECX, R_FPSCR );
  2119     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  2120     FILD_sh4r(R_FPUL);
  2121     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2122     JNE_rel8(5, doubleprec);
  2123     pop_fr( R_EDX, FRn );
  2124     JMP_rel8(3, end);
  2125     JMP_TARGET(doubleprec);
  2126     pop_dr( R_EDX, FRn );
  2127     JMP_TARGET(end);
  2128     sh4_x86.tstate = TSTATE_NONE;
  2129 :}
  2130 FTRC FRm, FPUL {:  
  2131     check_fpuen();
  2132     load_spreg( R_ECX, R_FPSCR );
  2133     load_fr_bank( R_EDX );
  2134     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2135     JNE_rel8(5, doubleprec);
  2136     push_fr( R_EDX, FRm );
  2137     JMP_rel8(3, doop);
  2138     JMP_TARGET(doubleprec);
  2139     push_dr( R_EDX, FRm );
  2140     JMP_TARGET( doop );
  2141     load_imm32( R_ECX, (uint32_t)&max_int );
  2142     FILD_r32ind( R_ECX );
  2143     FCOMIP_st(1);
  2144     JNA_rel8( 32, sat );
  2145     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  2146     FILD_r32ind( R_ECX );           // 2
  2147     FCOMIP_st(1);                   // 2
  2148     JAE_rel8( 21, sat2 );            // 2
  2149     load_imm32( R_EAX, (uint32_t)&save_fcw );
  2150     FNSTCW_r32ind( R_EAX );
  2151     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  2152     FLDCW_r32ind( R_EDX );
  2153     FISTP_sh4r(R_FPUL);             // 3
  2154     FLDCW_r32ind( R_EAX );
  2155     JMP_rel8( 9, end );             // 2
  2157     JMP_TARGET(sat);
  2158     JMP_TARGET(sat2);
  2159     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  2160     store_spreg( R_ECX, R_FPUL );
  2161     FPOP_st();
  2162     JMP_TARGET(end);
  2163     sh4_x86.tstate = TSTATE_NONE;
  2164 :}
  2165 FLDS FRm, FPUL {:  
  2166     check_fpuen();
  2167     load_fr_bank( R_ECX );
  2168     load_fr( R_ECX, R_EAX, FRm );
  2169     store_spreg( R_EAX, R_FPUL );
  2170     sh4_x86.tstate = TSTATE_NONE;
  2171 :}
  2172 FSTS FPUL, FRn {:  
  2173     check_fpuen();
  2174     load_fr_bank( R_ECX );
  2175     load_spreg( R_EAX, R_FPUL );
  2176     store_fr( R_ECX, R_EAX, FRn );
  2177     sh4_x86.tstate = TSTATE_NONE;
  2178 :}
  2179 FCNVDS FRm, FPUL {:  
  2180     check_fpuen();
  2181     load_spreg( R_ECX, R_FPSCR );
  2182     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2183     JE_rel8(9, end); // only when PR=1
  2184     load_fr_bank( R_ECX );
  2185     push_dr( R_ECX, FRm );
  2186     pop_fpul();
  2187     JMP_TARGET(end);
  2188     sh4_x86.tstate = TSTATE_NONE;
  2189 :}
  2190 FCNVSD FPUL, FRn {:  
  2191     check_fpuen();
  2192     load_spreg( R_ECX, R_FPSCR );
  2193     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2194     JE_rel8(9, end); // only when PR=1
  2195     load_fr_bank( R_ECX );
  2196     push_fpul();
  2197     pop_dr( R_ECX, FRn );
  2198     JMP_TARGET(end);
  2199     sh4_x86.tstate = TSTATE_NONE;
  2200 :}
  2202 /* Floating point instructions */
  2203 FABS FRn {:  
  2204     check_fpuen();
  2205     load_spreg( R_ECX, R_FPSCR );
  2206     load_fr_bank( R_EDX );
  2207     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2208     JNE_rel8(10, doubleprec);
  2209     push_fr(R_EDX, FRn); // 3
  2210     FABS_st0(); // 2
  2211     pop_fr( R_EDX, FRn); //3
  2212     JMP_rel8(8,end); // 2
  2213     JMP_TARGET(doubleprec);
  2214     push_dr(R_EDX, FRn);
  2215     FABS_st0();
  2216     pop_dr(R_EDX, FRn);
  2217     JMP_TARGET(end);
  2218     sh4_x86.tstate = TSTATE_NONE;
  2219 :}
  2220 FADD FRm, FRn {:  
  2221     check_fpuen();
  2222     load_spreg( R_ECX, R_FPSCR );
  2223     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2224     load_fr_bank( R_EDX );
  2225     JNE_rel8(13,doubleprec);
  2226     push_fr(R_EDX, FRm);
  2227     push_fr(R_EDX, FRn);
  2228     FADDP_st(1);
  2229     pop_fr(R_EDX, FRn);
  2230     JMP_rel8(11,end);
  2231     JMP_TARGET(doubleprec);
  2232     push_dr(R_EDX, FRm);
  2233     push_dr(R_EDX, FRn);
  2234     FADDP_st(1);
  2235     pop_dr(R_EDX, FRn);
  2236     JMP_TARGET(end);
  2237     sh4_x86.tstate = TSTATE_NONE;
  2238 :}
  2239 FDIV FRm, FRn {:  
  2240     check_fpuen();
  2241     load_spreg( R_ECX, R_FPSCR );
  2242     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2243     load_fr_bank( R_EDX );
  2244     JNE_rel8(13, doubleprec);
  2245     push_fr(R_EDX, FRn);
  2246     push_fr(R_EDX, FRm);
  2247     FDIVP_st(1);
  2248     pop_fr(R_EDX, FRn);
  2249     JMP_rel8(11, end);
  2250     JMP_TARGET(doubleprec);
  2251     push_dr(R_EDX, FRn);
  2252     push_dr(R_EDX, FRm);
  2253     FDIVP_st(1);
  2254     pop_dr(R_EDX, FRn);
  2255     JMP_TARGET(end);
  2256     sh4_x86.tstate = TSTATE_NONE;
  2257 :}
  2258 FMAC FR0, FRm, FRn {:  
  2259     check_fpuen();
  2260     load_spreg( R_ECX, R_FPSCR );
  2261     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  2262     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2263     JNE_rel8(18, doubleprec);
  2264     push_fr( R_EDX, 0 );
  2265     push_fr( R_EDX, FRm );
  2266     FMULP_st(1);
  2267     push_fr( R_EDX, FRn );
  2268     FADDP_st(1);
  2269     pop_fr( R_EDX, FRn );
  2270     JMP_rel8(16, end);
  2271     JMP_TARGET(doubleprec);
  2272     push_dr( R_EDX, 0 );
  2273     push_dr( R_EDX, FRm );
  2274     FMULP_st(1);
  2275     push_dr( R_EDX, FRn );
  2276     FADDP_st(1);
  2277     pop_dr( R_EDX, FRn );
  2278     JMP_TARGET(end);
  2279     sh4_x86.tstate = TSTATE_NONE;
  2280 :}
  2282 FMUL FRm, FRn {:  
  2283     check_fpuen();
  2284     load_spreg( R_ECX, R_FPSCR );
  2285     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2286     load_fr_bank( R_EDX );
  2287     JNE_rel8(13, doubleprec);
  2288     push_fr(R_EDX, FRm);
  2289     push_fr(R_EDX, FRn);
  2290     FMULP_st(1);
  2291     pop_fr(R_EDX, FRn);
  2292     JMP_rel8(11, end);
  2293     JMP_TARGET(doubleprec);
  2294     push_dr(R_EDX, FRm);
  2295     push_dr(R_EDX, FRn);
  2296     FMULP_st(1);
  2297     pop_dr(R_EDX, FRn);
  2298     JMP_TARGET(end);
  2299     sh4_x86.tstate = TSTATE_NONE;
  2300 :}
  2301 FNEG FRn {:  
  2302     check_fpuen();
  2303     load_spreg( R_ECX, R_FPSCR );
  2304     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2305     load_fr_bank( R_EDX );
  2306     JNE_rel8(10, doubleprec);
  2307     push_fr(R_EDX, FRn);
  2308     FCHS_st0();
  2309     pop_fr(R_EDX, FRn);
  2310     JMP_rel8(8, end);
  2311     JMP_TARGET(doubleprec);
  2312     push_dr(R_EDX, FRn);
  2313     FCHS_st0();
  2314     pop_dr(R_EDX, FRn);
  2315     JMP_TARGET(end);
  2316     sh4_x86.tstate = TSTATE_NONE;
  2317 :}
  2318 FSRRA FRn {:  
  2319     check_fpuen();
  2320     load_spreg( R_ECX, R_FPSCR );
  2321     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2322     load_fr_bank( R_EDX );
  2323     JNE_rel8(12, end); // PR=0 only
  2324     FLD1_st0();
  2325     push_fr(R_EDX, FRn);
  2326     FSQRT_st0();
  2327     FDIVP_st(1);
  2328     pop_fr(R_EDX, FRn);
  2329     JMP_TARGET(end);
  2330     sh4_x86.tstate = TSTATE_NONE;
  2331 :}
  2332 FSQRT FRn {:  
  2333     check_fpuen();
  2334     load_spreg( R_ECX, R_FPSCR );
  2335     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2336     load_fr_bank( R_EDX );
  2337     JNE_rel8(10, doubleprec);
  2338     push_fr(R_EDX, FRn);
  2339     FSQRT_st0();
  2340     pop_fr(R_EDX, FRn);
  2341     JMP_rel8(8, end);
  2342     JMP_TARGET(doubleprec);
  2343     push_dr(R_EDX, FRn);
  2344     FSQRT_st0();
  2345     pop_dr(R_EDX, FRn);
  2346     JMP_TARGET(end);
  2347     sh4_x86.tstate = TSTATE_NONE;
  2348 :}
  2349 FSUB FRm, FRn {:  
  2350     check_fpuen();
  2351     load_spreg( R_ECX, R_FPSCR );
  2352     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2353     load_fr_bank( R_EDX );
  2354     JNE_rel8(13, doubleprec);
  2355     push_fr(R_EDX, FRn);
  2356     push_fr(R_EDX, FRm);
  2357     FSUBP_st(1);
  2358     pop_fr(R_EDX, FRn);
  2359     JMP_rel8(11, end);
  2360     JMP_TARGET(doubleprec);
  2361     push_dr(R_EDX, FRn);
  2362     push_dr(R_EDX, FRm);
  2363     FSUBP_st(1);
  2364     pop_dr(R_EDX, FRn);
  2365     JMP_TARGET(end);
  2366     sh4_x86.tstate = TSTATE_NONE;
  2367 :}
  2369 FCMP/EQ FRm, FRn {:  
  2370     check_fpuen();
  2371     load_spreg( R_ECX, R_FPSCR );
  2372     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2373     load_fr_bank( R_EDX );
  2374     JNE_rel8(8, doubleprec);
  2375     push_fr(R_EDX, FRm);
  2376     push_fr(R_EDX, FRn);
  2377     JMP_rel8(6, end);
  2378     JMP_TARGET(doubleprec);
  2379     push_dr(R_EDX, FRm);
  2380     push_dr(R_EDX, FRn);
  2381     JMP_TARGET(end);
  2382     FCOMIP_st(1);
  2383     SETE_t();
  2384     FPOP_st();
  2385     sh4_x86.tstate = TSTATE_NONE;
  2386 :}
  2387 FCMP/GT FRm, FRn {:  
  2388     check_fpuen();
  2389     load_spreg( R_ECX, R_FPSCR );
  2390     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2391     load_fr_bank( R_EDX );
  2392     JNE_rel8(8, doubleprec);
  2393     push_fr(R_EDX, FRm);
  2394     push_fr(R_EDX, FRn);
  2395     JMP_rel8(6, end);
  2396     JMP_TARGET(doubleprec);
  2397     push_dr(R_EDX, FRm);
  2398     push_dr(R_EDX, FRn);
  2399     JMP_TARGET(end);
  2400     FCOMIP_st(1);
  2401     SETA_t();
  2402     FPOP_st();
  2403     sh4_x86.tstate = TSTATE_NONE;
  2404 :}
  2406 FSCA FPUL, FRn {:  
  2407     check_fpuen();
  2408     load_spreg( R_ECX, R_FPSCR );
  2409     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2410     JNE_rel8( CALL_FUNC2_SIZE + 9, doubleprec );
  2411     load_fr_bank( R_ECX );
  2412     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2413     load_spreg( R_EDX, R_FPUL );
  2414     call_func2( sh4_fsca, R_EDX, R_ECX );
  2415     JMP_TARGET(doubleprec);
  2416     sh4_x86.tstate = TSTATE_NONE;
  2417 :}
  2418 FIPR FVm, FVn {:  
  2419     check_fpuen();
  2420     load_spreg( R_ECX, R_FPSCR );
  2421     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2422     JNE_rel8(44, doubleprec);
  2424     load_fr_bank( R_ECX );
  2425     push_fr( R_ECX, FVm<<2 );
  2426     push_fr( R_ECX, FVn<<2 );
  2427     FMULP_st(1);
  2428     push_fr( R_ECX, (FVm<<2)+1);
  2429     push_fr( R_ECX, (FVn<<2)+1);
  2430     FMULP_st(1);
  2431     FADDP_st(1);
  2432     push_fr( R_ECX, (FVm<<2)+2);
  2433     push_fr( R_ECX, (FVn<<2)+2);
  2434     FMULP_st(1);
  2435     FADDP_st(1);
  2436     push_fr( R_ECX, (FVm<<2)+3);
  2437     push_fr( R_ECX, (FVn<<2)+3);
  2438     FMULP_st(1);
  2439     FADDP_st(1);
  2440     pop_fr( R_ECX, (FVn<<2)+3);
  2441     JMP_TARGET(doubleprec);
  2442     sh4_x86.tstate = TSTATE_NONE;
  2443 :}
  2444 FTRV XMTRX, FVn {:  
  2445     check_fpuen();
  2446     load_spreg( R_ECX, R_FPSCR );
  2447     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2448     JNE_rel8( 18 + CALL_FUNC2_SIZE, doubleprec );
  2449     load_fr_bank( R_EDX );                 // 3
  2450     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2451     load_xf_bank( R_ECX );                 // 12
  2452     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2453     JMP_TARGET(doubleprec);
  2454     sh4_x86.tstate = TSTATE_NONE;
  2455 :}
  2457 FRCHG {:  
  2458     check_fpuen();
  2459     load_spreg( R_ECX, R_FPSCR );
  2460     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2461     store_spreg( R_ECX, R_FPSCR );
  2462     update_fr_bank( R_ECX );
  2463     sh4_x86.tstate = TSTATE_NONE;
  2464 :}
  2465 FSCHG {:  
  2466     check_fpuen();
  2467     load_spreg( R_ECX, R_FPSCR );
  2468     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2469     store_spreg( R_ECX, R_FPSCR );
  2470     sh4_x86.tstate = TSTATE_NONE;
  2471 :}
  2473 /* Processor control instructions */
  2474 LDC Rm, SR {:
  2475     if( sh4_x86.in_delay_slot ) {
  2476 	SLOTILLEGAL();
  2477     } else {
  2478 	check_priv();
  2479 	load_reg( R_EAX, Rm );
  2480 	call_func1( sh4_write_sr, R_EAX );
  2481 	sh4_x86.priv_checked = FALSE;
  2482 	sh4_x86.fpuen_checked = FALSE;
  2483 	sh4_x86.tstate = TSTATE_NONE;
  2485 :}
  2486 LDC Rm, GBR {: 
  2487     load_reg( R_EAX, Rm );
  2488     store_spreg( R_EAX, R_GBR );
  2489 :}
  2490 LDC Rm, VBR {:  
  2491     check_priv();
  2492     load_reg( R_EAX, Rm );
  2493     store_spreg( R_EAX, R_VBR );
  2494     sh4_x86.tstate = TSTATE_NONE;
  2495 :}
  2496 LDC Rm, SSR {:  
  2497     check_priv();
  2498     load_reg( R_EAX, Rm );
  2499     store_spreg( R_EAX, R_SSR );
  2500     sh4_x86.tstate = TSTATE_NONE;
  2501 :}
  2502 LDC Rm, SGR {:  
  2503     check_priv();
  2504     load_reg( R_EAX, Rm );
  2505     store_spreg( R_EAX, R_SGR );
  2506     sh4_x86.tstate = TSTATE_NONE;
  2507 :}
  2508 LDC Rm, SPC {:  
  2509     check_priv();
  2510     load_reg( R_EAX, Rm );
  2511     store_spreg( R_EAX, R_SPC );
  2512     sh4_x86.tstate = TSTATE_NONE;
  2513 :}
  2514 LDC Rm, DBR {:  
  2515     check_priv();
  2516     load_reg( R_EAX, Rm );
  2517     store_spreg( R_EAX, R_DBR );
  2518     sh4_x86.tstate = TSTATE_NONE;
  2519 :}
  2520 LDC Rm, Rn_BANK {:  
  2521     check_priv();
  2522     load_reg( R_EAX, Rm );
  2523     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2524     sh4_x86.tstate = TSTATE_NONE;
  2525 :}
  2526 LDC.L @Rm+, GBR {:  
  2527     load_reg( R_EAX, Rm );
  2528     precheck();
  2529     check_ralign32( R_EAX );
  2530     MOV_r32_r32( R_EAX, R_ECX );
  2531     ADD_imm8s_r32( 4, R_EAX );
  2532     store_reg( R_EAX, Rm );
  2533     MEM_READ_LONG( R_ECX, R_EAX );
  2534     store_spreg( R_EAX, R_GBR );
  2535     sh4_x86.tstate = TSTATE_NONE;
  2536 :}
  2537 LDC.L @Rm+, SR {:
  2538     if( sh4_x86.in_delay_slot ) {
  2539 	SLOTILLEGAL();
  2540     } else {
  2541 	precheck();
  2542 	check_priv_no_precheck();
  2543 	load_reg( R_EAX, Rm );
  2544 	check_ralign32( R_EAX );
  2545 	MOV_r32_r32( R_EAX, R_ECX );
  2546 	ADD_imm8s_r32( 4, R_EAX );
  2547 	store_reg( R_EAX, Rm );
  2548 	MEM_READ_LONG( R_ECX, R_EAX );
  2549 	call_func1( sh4_write_sr, R_EAX );
  2550 	sh4_x86.priv_checked = FALSE;
  2551 	sh4_x86.fpuen_checked = FALSE;
  2552 	sh4_x86.tstate = TSTATE_NONE;
  2554 :}
  2555 LDC.L @Rm+, VBR {:  
  2556     precheck();
  2557     check_priv_no_precheck();
  2558     load_reg( R_EAX, Rm );
  2559     check_ralign32( R_EAX );
  2560     MOV_r32_r32( R_EAX, R_ECX );
  2561     ADD_imm8s_r32( 4, R_EAX );
  2562     store_reg( R_EAX, Rm );
  2563     MEM_READ_LONG( R_ECX, R_EAX );
  2564     store_spreg( R_EAX, R_VBR );
  2565     sh4_x86.tstate = TSTATE_NONE;
  2566 :}
  2567 LDC.L @Rm+, SSR {:
  2568     precheck();
  2569     check_priv_no_precheck();
  2570     load_reg( R_EAX, Rm );
  2571     check_ralign32( R_EAX );
  2572     MOV_r32_r32( R_EAX, R_ECX );
  2573     ADD_imm8s_r32( 4, R_EAX );
  2574     store_reg( R_EAX, Rm );
  2575     MEM_READ_LONG( R_ECX, R_EAX );
  2576     store_spreg( R_EAX, R_SSR );
  2577     sh4_x86.tstate = TSTATE_NONE;
  2578 :}
  2579 LDC.L @Rm+, SGR {:  
  2580     precheck();
  2581     check_priv_no_precheck();
  2582     load_reg( R_EAX, Rm );
  2583     check_ralign32( R_EAX );
  2584     MOV_r32_r32( R_EAX, R_ECX );
  2585     ADD_imm8s_r32( 4, R_EAX );
  2586     store_reg( R_EAX, Rm );
  2587     MEM_READ_LONG( R_ECX, R_EAX );
  2588     store_spreg( R_EAX, R_SGR );
  2589     sh4_x86.tstate = TSTATE_NONE;
  2590 :}
  2591 LDC.L @Rm+, SPC {:  
  2592     precheck();
  2593     check_priv_no_precheck();
  2594     load_reg( R_EAX, Rm );
  2595     check_ralign32( R_EAX );
  2596     MOV_r32_r32( R_EAX, R_ECX );
  2597     ADD_imm8s_r32( 4, R_EAX );
  2598     store_reg( R_EAX, Rm );
  2599     MEM_READ_LONG( R_ECX, R_EAX );
  2600     store_spreg( R_EAX, R_SPC );
  2601     sh4_x86.tstate = TSTATE_NONE;
  2602 :}
  2603 LDC.L @Rm+, DBR {:  
  2604     precheck();
  2605     check_priv_no_precheck();
  2606     load_reg( R_EAX, Rm );
  2607     check_ralign32( R_EAX );
  2608     MOV_r32_r32( R_EAX, R_ECX );
  2609     ADD_imm8s_r32( 4, R_EAX );
  2610     store_reg( R_EAX, Rm );
  2611     MEM_READ_LONG( R_ECX, R_EAX );
  2612     store_spreg( R_EAX, R_DBR );
  2613     sh4_x86.tstate = TSTATE_NONE;
  2614 :}
  2615 LDC.L @Rm+, Rn_BANK {:  
  2616     precheck();
  2617     check_priv_no_precheck();
  2618     load_reg( R_EAX, Rm );
  2619     check_ralign32( R_EAX );
  2620     MOV_r32_r32( R_EAX, R_ECX );
  2621     ADD_imm8s_r32( 4, R_EAX );
  2622     store_reg( R_EAX, Rm );
  2623     MEM_READ_LONG( R_ECX, R_EAX );
  2624     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2625     sh4_x86.tstate = TSTATE_NONE;
  2626 :}
  2627 LDS Rm, FPSCR {:  
  2628     load_reg( R_EAX, Rm );
  2629     store_spreg( R_EAX, R_FPSCR );
  2630     update_fr_bank( R_EAX );
  2631     sh4_x86.tstate = TSTATE_NONE;
  2632 :}
  2633 LDS.L @Rm+, FPSCR {:  
  2634     load_reg( R_EAX, Rm );
  2635     precheck();
  2636     check_ralign32( R_EAX );
  2637     MOV_r32_r32( R_EAX, R_ECX );
  2638     ADD_imm8s_r32( 4, R_EAX );
  2639     store_reg( R_EAX, Rm );
  2640     MEM_READ_LONG( R_ECX, R_EAX );
  2641     store_spreg( R_EAX, R_FPSCR );
  2642     update_fr_bank( R_EAX );
  2643     sh4_x86.tstate = TSTATE_NONE;
  2644 :}
  2645 LDS Rm, FPUL {:  
  2646     load_reg( R_EAX, Rm );
  2647     store_spreg( R_EAX, R_FPUL );
  2648 :}
  2649 LDS.L @Rm+, FPUL {:  
  2650     load_reg( R_EAX, Rm );
  2651     precheck();
  2652     check_ralign32( R_EAX );
  2653     MOV_r32_r32( R_EAX, R_ECX );
  2654     ADD_imm8s_r32( 4, R_EAX );
  2655     store_reg( R_EAX, Rm );
  2656     MEM_READ_LONG( R_ECX, R_EAX );
  2657     store_spreg( R_EAX, R_FPUL );
  2658     sh4_x86.tstate = TSTATE_NONE;
  2659 :}
  2660 LDS Rm, MACH {: 
  2661     load_reg( R_EAX, Rm );
  2662     store_spreg( R_EAX, R_MACH );
  2663 :}
  2664 LDS.L @Rm+, MACH {:  
  2665     load_reg( R_EAX, Rm );
  2666     precheck();
  2667     check_ralign32( R_EAX );
  2668     MOV_r32_r32( R_EAX, R_ECX );
  2669     ADD_imm8s_r32( 4, R_EAX );
  2670     store_reg( R_EAX, Rm );
  2671     MEM_READ_LONG( R_ECX, R_EAX );
  2672     store_spreg( R_EAX, R_MACH );
  2673     sh4_x86.tstate = TSTATE_NONE;
  2674 :}
  2675 LDS Rm, MACL {:  
  2676     load_reg( R_EAX, Rm );
  2677     store_spreg( R_EAX, R_MACL );
  2678 :}
  2679 LDS.L @Rm+, MACL {:  
  2680     load_reg( R_EAX, Rm );
  2681     precheck();
  2682     check_ralign32( R_EAX );
  2683     MOV_r32_r32( R_EAX, R_ECX );
  2684     ADD_imm8s_r32( 4, R_EAX );
  2685     store_reg( R_EAX, Rm );
  2686     MEM_READ_LONG( R_ECX, R_EAX );
  2687     store_spreg( R_EAX, R_MACL );
  2688     sh4_x86.tstate = TSTATE_NONE;
  2689 :}
  2690 LDS Rm, PR {:  
  2691     load_reg( R_EAX, Rm );
  2692     store_spreg( R_EAX, R_PR );
  2693 :}
  2694 LDS.L @Rm+, PR {:  
  2695     load_reg( R_EAX, Rm );
  2696     precheck();
  2697     check_ralign32( R_EAX );
  2698     MOV_r32_r32( R_EAX, R_ECX );
  2699     ADD_imm8s_r32( 4, R_EAX );
  2700     store_reg( R_EAX, Rm );
  2701     MEM_READ_LONG( R_ECX, R_EAX );
  2702     store_spreg( R_EAX, R_PR );
  2703     sh4_x86.tstate = TSTATE_NONE;
  2704 :}
  2705 LDTLB {:  :}
  2706 OCBI @Rn {:  :}
  2707 OCBP @Rn {:  :}
  2708 OCBWB @Rn {:  :}
  2709 PREF @Rn {:
  2710     load_reg( R_EAX, Rn );
  2711     MOV_r32_r32( R_EAX, R_ECX );
  2712     AND_imm32_r32( 0xFC000000, R_EAX );
  2713     CMP_imm32_r32( 0xE0000000, R_EAX );
  2714     JNE_rel8(CALL_FUNC1_SIZE, end);
  2715     call_func1( sh4_flush_store_queue, R_ECX );
  2716     JMP_TARGET(end);
  2717     sh4_x86.tstate = TSTATE_NONE;
  2718 :}
  2719 SLEEP {: 
  2720     check_priv();
  2721     call_func0( sh4_sleep );
  2722     sh4_x86.tstate = TSTATE_NONE;
  2723     sh4_x86.in_delay_slot = FALSE;
  2724     return 2;
  2725 :}
  2726 STC SR, Rn {:
  2727     check_priv();
  2728     call_func0(sh4_read_sr);
  2729     store_reg( R_EAX, Rn );
  2730     sh4_x86.tstate = TSTATE_NONE;
  2731 :}
  2732 STC GBR, Rn {:  
  2733     load_spreg( R_EAX, R_GBR );
  2734     store_reg( R_EAX, Rn );
  2735 :}
  2736 STC VBR, Rn {:  
  2737     check_priv();
  2738     load_spreg( R_EAX, R_VBR );
  2739     store_reg( R_EAX, Rn );
  2740     sh4_x86.tstate = TSTATE_NONE;
  2741 :}
  2742 STC SSR, Rn {:  
  2743     check_priv();
  2744     load_spreg( R_EAX, R_SSR );
  2745     store_reg( R_EAX, Rn );
  2746     sh4_x86.tstate = TSTATE_NONE;
  2747 :}
  2748 STC SPC, Rn {:  
  2749     check_priv();
  2750     load_spreg( R_EAX, R_SPC );
  2751     store_reg( R_EAX, Rn );
  2752     sh4_x86.tstate = TSTATE_NONE;
  2753 :}
  2754 STC SGR, Rn {:  
  2755     check_priv();
  2756     load_spreg( R_EAX, R_SGR );
  2757     store_reg( R_EAX, Rn );
  2758     sh4_x86.tstate = TSTATE_NONE;
  2759 :}
  2760 STC DBR, Rn {:  
  2761     check_priv();
  2762     load_spreg( R_EAX, R_DBR );
  2763     store_reg( R_EAX, Rn );
  2764     sh4_x86.tstate = TSTATE_NONE;
  2765 :}
  2766 STC Rm_BANK, Rn {:
  2767     check_priv();
  2768     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2769     store_reg( R_EAX, Rn );
  2770     sh4_x86.tstate = TSTATE_NONE;
  2771 :}
  2772 STC.L SR, @-Rn {:
  2773     precheck();
  2774     check_priv_no_precheck();
  2775     call_func0( sh4_read_sr );
  2776     load_reg( R_ECX, Rn );
  2777     check_walign32( R_ECX );
  2778     ADD_imm8s_r32( -4, R_ECX );
  2779     store_reg( R_ECX, Rn );
  2780     MEM_WRITE_LONG( R_ECX, R_EAX );
  2781     sh4_x86.tstate = TSTATE_NONE;
  2782 :}
  2783 STC.L VBR, @-Rn {:  
  2784     precheck();
  2785     check_priv_no_precheck();
  2786     load_reg( R_ECX, Rn );
  2787     check_walign32( R_ECX );
  2788     ADD_imm8s_r32( -4, R_ECX );
  2789     store_reg( R_ECX, Rn );
  2790     load_spreg( R_EAX, R_VBR );
  2791     MEM_WRITE_LONG( R_ECX, R_EAX );
  2792     sh4_x86.tstate = TSTATE_NONE;
  2793 :}
  2794 STC.L SSR, @-Rn {:  
  2795     precheck();
  2796     check_priv_no_precheck();
  2797     load_reg( R_ECX, Rn );
  2798     check_walign32( R_ECX );
  2799     ADD_imm8s_r32( -4, R_ECX );
  2800     store_reg( R_ECX, Rn );
  2801     load_spreg( R_EAX, R_SSR );
  2802     MEM_WRITE_LONG( R_ECX, R_EAX );
  2803     sh4_x86.tstate = TSTATE_NONE;
  2804 :}
  2805 STC.L SPC, @-Rn {:
  2806     precheck();
  2807     check_priv_no_precheck();
  2808     load_reg( R_ECX, Rn );
  2809     check_walign32( R_ECX );
  2810     ADD_imm8s_r32( -4, R_ECX );
  2811     store_reg( R_ECX, Rn );
  2812     load_spreg( R_EAX, R_SPC );
  2813     MEM_WRITE_LONG( R_ECX, R_EAX );
  2814     sh4_x86.tstate = TSTATE_NONE;
  2815 :}
  2816 STC.L SGR, @-Rn {:  
  2817     precheck();
  2818     check_priv_no_precheck();
  2819     load_reg( R_ECX, Rn );
  2820     check_walign32( R_ECX );
  2821     ADD_imm8s_r32( -4, R_ECX );
  2822     store_reg( R_ECX, Rn );
  2823     load_spreg( R_EAX, R_SGR );
  2824     MEM_WRITE_LONG( R_ECX, R_EAX );
  2825     sh4_x86.tstate = TSTATE_NONE;
  2826 :}
  2827 STC.L DBR, @-Rn {:  
  2828     precheck();
  2829     check_priv_no_precheck();
  2830     load_reg( R_ECX, Rn );
  2831     check_walign32( R_ECX );
  2832     ADD_imm8s_r32( -4, R_ECX );
  2833     store_reg( R_ECX, Rn );
  2834     load_spreg( R_EAX, R_DBR );
  2835     MEM_WRITE_LONG( R_ECX, R_EAX );
  2836     sh4_x86.tstate = TSTATE_NONE;
  2837 :}
  2838 STC.L Rm_BANK, @-Rn {:  
  2839     precheck();
  2840     check_priv_no_precheck();
  2841     load_reg( R_ECX, Rn );
  2842     check_walign32( R_ECX );
  2843     ADD_imm8s_r32( -4, R_ECX );
  2844     store_reg( R_ECX, Rn );
  2845     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2846     MEM_WRITE_LONG( R_ECX, R_EAX );
  2847     sh4_x86.tstate = TSTATE_NONE;
  2848 :}
  2849 STC.L GBR, @-Rn {:  
  2850     load_reg( R_ECX, Rn );
  2851     precheck();
  2852     check_walign32( R_ECX );
  2853     ADD_imm8s_r32( -4, R_ECX );
  2854     store_reg( R_ECX, Rn );
  2855     load_spreg( R_EAX, R_GBR );
  2856     MEM_WRITE_LONG( R_ECX, R_EAX );
  2857     sh4_x86.tstate = TSTATE_NONE;
  2858 :}
  2859 STS FPSCR, Rn {:  
  2860     load_spreg( R_EAX, R_FPSCR );
  2861     store_reg( R_EAX, Rn );
  2862 :}
  2863 STS.L FPSCR, @-Rn {:  
  2864     load_reg( R_ECX, Rn );
  2865     precheck();
  2866     check_walign32( R_ECX );
  2867     ADD_imm8s_r32( -4, R_ECX );
  2868     store_reg( R_ECX, Rn );
  2869     load_spreg( R_EAX, R_FPSCR );
  2870     MEM_WRITE_LONG( R_ECX, R_EAX );
  2871     sh4_x86.tstate = TSTATE_NONE;
  2872 :}
  2873 STS FPUL, Rn {:  
  2874     load_spreg( R_EAX, R_FPUL );
  2875     store_reg( R_EAX, Rn );
  2876 :}
  2877 STS.L FPUL, @-Rn {:  
  2878     load_reg( R_ECX, Rn );
  2879     precheck();
  2880     check_walign32( R_ECX );
  2881     ADD_imm8s_r32( -4, R_ECX );
  2882     store_reg( R_ECX, Rn );
  2883     load_spreg( R_EAX, R_FPUL );
  2884     MEM_WRITE_LONG( R_ECX, R_EAX );
  2885     sh4_x86.tstate = TSTATE_NONE;
  2886 :}
  2887 STS MACH, Rn {:  
  2888     load_spreg( R_EAX, R_MACH );
  2889     store_reg( R_EAX, Rn );
  2890 :}
  2891 STS.L MACH, @-Rn {:  
  2892     load_reg( R_ECX, Rn );
  2893     precheck();
  2894     check_walign32( R_ECX );
  2895     ADD_imm8s_r32( -4, R_ECX );
  2896     store_reg( R_ECX, Rn );
  2897     load_spreg( R_EAX, R_MACH );
  2898     MEM_WRITE_LONG( R_ECX, R_EAX );
  2899     sh4_x86.tstate = TSTATE_NONE;
  2900 :}
  2901 STS MACL, Rn {:  
  2902     load_spreg( R_EAX, R_MACL );
  2903     store_reg( R_EAX, Rn );
  2904 :}
  2905 STS.L MACL, @-Rn {:  
  2906     load_reg( R_ECX, Rn );
  2907     precheck();
  2908     check_walign32( R_ECX );
  2909     ADD_imm8s_r32( -4, R_ECX );
  2910     store_reg( R_ECX, Rn );
  2911     load_spreg( R_EAX, R_MACL );
  2912     MEM_WRITE_LONG( R_ECX, R_EAX );
  2913     sh4_x86.tstate = TSTATE_NONE;
  2914 :}
  2915 STS PR, Rn {:  
  2916     load_spreg( R_EAX, R_PR );
  2917     store_reg( R_EAX, Rn );
  2918 :}
  2919 STS.L PR, @-Rn {:  
  2920     load_reg( R_ECX, Rn );
  2921     precheck();
  2922     check_walign32( R_ECX );
  2923     ADD_imm8s_r32( -4, R_ECX );
  2924     store_reg( R_ECX, Rn );
  2925     load_spreg( R_EAX, R_PR );
  2926     MEM_WRITE_LONG( R_ECX, R_EAX );
  2927     sh4_x86.tstate = TSTATE_NONE;
  2928 :}
  2930 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2931 %%
  2932     sh4_x86.in_delay_slot = FALSE;
  2933     return 0;
.