Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 397:640324505325
prev395:c473acbde186
next401:f79327f39818
author nkeynes
date Wed Sep 19 11:30:30 2007 +0000 (13 years ago)
permissions -rw-r--r--
last change Fix SHLL/SHLR/SHAL/SHAR flag setting
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.14 2007-09-19 11:30:30 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "sh4/sh4core.h"
    29 #include "sh4/sh4trans.h"
    30 #include "sh4/sh4mmio.h"
    31 #include "sh4/x86op.h"
    32 #include "clock.h"
    34 #define DEFAULT_BACKPATCH_SIZE 4096
    36 /** 
    37  * Struct to manage internal translation state. This state is not saved -
    38  * it is only valid between calls to sh4_translate_begin_block() and
    39  * sh4_translate_end_block()
    40  */
    41 struct sh4_x86_state {
    42     gboolean in_delay_slot;
    43     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    44     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    45     int exit_code;
    47     /* Allocated memory for the (block-wide) back-patch list */
    48     uint32_t **backpatch_list;
    49     uint32_t backpatch_posn;
    50     uint32_t backpatch_size;
    51 };
    53 #define EXIT_DATA_ADDR_READ 0
    54 #define EXIT_DATA_ADDR_WRITE 7
    55 #define EXIT_ILLEGAL 14
    56 #define EXIT_SLOT_ILLEGAL 21
    57 #define EXIT_FPU_DISABLED 28
    58 #define EXIT_SLOT_FPU_DISABLED 35
    60 static struct sh4_x86_state sh4_x86;
    62 static uint32_t max_int = 0x7FFFFFFF;
    63 static uint32_t min_int = 0x80000000;
    64 static uint32_t save_fcw; /* save value for fpu control word */
    65 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
    66 void signsat48( void )
    67 {
    68     if( ((int64_t)sh4r.mac) < (int64_t)0xFFFF800000000000LL )
    69 	sh4r.mac = 0xFFFF800000000000LL;
    70     else if( ((int64_t)sh4r.mac) > (int64_t)0x00007FFFFFFFFFFFLL )
    71 	sh4r.mac = 0x00007FFFFFFFFFFFLL;
    72 }
    74 void sh4_fsca( uint32_t anglei, float *fr )
    75 {
    76     float angle = (((float)(anglei&0xFFFF))/65536.0) * 2 * M_PI;
    77     *fr++ = cosf(angle);
    78     *fr = sinf(angle);
    79 }
    81 void sh4_sleep()
    82 {
    83     if( MMIO_READ( CPG, STBCR ) & 0x80 ) {
    84 	sh4r.sh4_state = SH4_STATE_STANDBY;
    85     } else {
    86 	sh4r.sh4_state = SH4_STATE_SLEEP;
    87     }
    88 }
    90 /**
    91  * Compute the matrix tranform of fv given the matrix xf.
    92  * Both fv and xf are word-swapped as per the sh4r.fr banks
    93  */
    94 void sh4_ftrv( float *target, float *xf )
    95 {
    96     float fv[4] = { target[1], target[0], target[3], target[2] };
    97     target[1] = xf[1] * fv[0] + xf[5]*fv[1] +
    98 	xf[9]*fv[2] + xf[13]*fv[3];
    99     target[0] = xf[0] * fv[0] + xf[4]*fv[1] +
   100 	xf[8]*fv[2] + xf[12]*fv[3];
   101     target[3] = xf[3] * fv[0] + xf[7]*fv[1] +
   102 	xf[11]*fv[2] + xf[15]*fv[3];
   103     target[2] = xf[2] * fv[0] + xf[6]*fv[1] +
   104 	xf[10]*fv[2] + xf[14]*fv[3];
   105 }
   109 void sh4_x86_init()
   110 {
   111     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   112     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
   113 }
   116 static void sh4_x86_add_backpatch( uint8_t *ptr )
   117 {
   118     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   119 	sh4_x86.backpatch_size <<= 1;
   120 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
   121 	assert( sh4_x86.backpatch_list != NULL );
   122     }
   123     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
   124 }
   126 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
   127 {
   128     unsigned int i;
   129     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
   130 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
   131     }
   132 }
   134 /**
   135  * Emit an instruction to load an SH4 reg into a real register
   136  */
   137 static inline void load_reg( int x86reg, int sh4reg ) 
   138 {
   139     /* mov [bp+n], reg */
   140     OP(0x8B);
   141     OP(0x45 + (x86reg<<3));
   142     OP(REG_OFFSET(r[sh4reg]));
   143 }
   145 static inline void load_reg16s( int x86reg, int sh4reg )
   146 {
   147     OP(0x0F);
   148     OP(0xBF);
   149     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   150 }
   152 static inline void load_reg16u( int x86reg, int sh4reg )
   153 {
   154     OP(0x0F);
   155     OP(0xB7);
   156     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   158 }
   160 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   161 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   162 /**
   163  * Emit an instruction to load an immediate value into a register
   164  */
   165 static inline void load_imm32( int x86reg, uint32_t value ) {
   166     /* mov #value, reg */
   167     OP(0xB8 + x86reg);
   168     OP32(value);
   169 }
   171 /**
   172  * Emit an instruction to store an SH4 reg (RN)
   173  */
   174 void static inline store_reg( int x86reg, int sh4reg ) {
   175     /* mov reg, [bp+n] */
   176     OP(0x89);
   177     OP(0x45 + (x86reg<<3));
   178     OP(REG_OFFSET(r[sh4reg]));
   179 }
   181 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   183 /**
   184  * Load an FR register (single-precision floating point) into an integer x86
   185  * register (eg for register-to-register moves)
   186  */
   187 void static inline load_fr( int bankreg, int x86reg, int frm )
   188 {
   189     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   190 }
   192 /**
   193  * Store an FR register (single-precision floating point) into an integer x86
   194  * register (eg for register-to-register moves)
   195  */
   196 void static inline store_fr( int bankreg, int x86reg, int frn )
   197 {
   198     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   199 }
   202 /**
   203  * Load a pointer to the back fp back into the specified x86 register. The
   204  * bankreg must have been previously loaded with FPSCR.
   205  * NB: 12 bytes
   206  */
   207 static inline void load_xf_bank( int bankreg )
   208 {
   209     NOT_r32( bankreg );
   210     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   211     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   212     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   213 }
   215 /**
   216  * Update the fr_bank pointer based on the current fpscr value.
   217  */
   218 static inline void update_fr_bank( int fpscrreg )
   219 {
   220     SHR_imm8_r32( (21 - 6), fpscrreg ); // Extract bit 21 then *64 for bank size
   221     AND_imm8s_r32( 0x40, fpscrreg );    // Complete extraction
   222     OP(0x8D); OP(0x44+(fpscrreg<<3)); OP(0x28+fpscrreg); OP(REG_OFFSET(fr)); // LEA [ebp+fpscrreg+disp], fpscrreg
   223     store_spreg( fpscrreg, REG_OFFSET(fr_bank) );
   224 }
   225 /**
   226  * Push FPUL (as a 32-bit float) onto the FPU stack
   227  */
   228 static inline void push_fpul( )
   229 {
   230     OP(0xD9); OP(0x45); OP(R_FPUL);
   231 }
   233 /**
   234  * Pop FPUL (as a 32-bit float) from the FPU stack
   235  */
   236 static inline void pop_fpul( )
   237 {
   238     OP(0xD9); OP(0x5D); OP(R_FPUL);
   239 }
   241 /**
   242  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   243  * with the location of the current fp bank.
   244  */
   245 static inline void push_fr( int bankreg, int frm ) 
   246 {
   247     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   248 }
   250 /**
   251  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   252  * with bankreg previously loaded with the location of the current fp bank.
   253  */
   254 static inline void pop_fr( int bankreg, int frm )
   255 {
   256     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   257 }
   259 /**
   260  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   261  * with the location of the current fp bank.
   262  */
   263 static inline void push_dr( int bankreg, int frm )
   264 {
   265     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   266 }
   268 static inline void pop_dr( int bankreg, int frm )
   269 {
   270     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   271 }
   273 /**
   274  * Note: clobbers EAX to make the indirect call - this isn't usually
   275  * a problem since the callee will usually clobber it anyway.
   276  */
   277 static inline void call_func0( void *ptr )
   278 {
   279     load_imm32(R_EAX, (uint32_t)ptr);
   280     CALL_r32(R_EAX);
   281 }
   283 static inline void call_func1( void *ptr, int arg1 )
   284 {
   285     PUSH_r32(arg1);
   286     call_func0(ptr);
   287     ADD_imm8s_r32( 4, R_ESP );
   288 }
   290 static inline void call_func2( void *ptr, int arg1, int arg2 )
   291 {
   292     PUSH_r32(arg2);
   293     PUSH_r32(arg1);
   294     call_func0(ptr);
   295     ADD_imm8s_r32( 8, R_ESP );
   296 }
   298 /**
   299  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   300  * the second in arg2b
   301  * NB: 30 bytes
   302  */
   303 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   304 {
   305     ADD_imm8s_r32( 4, addr );
   306     PUSH_r32(arg2b);
   307     PUSH_r32(addr);
   308     ADD_imm8s_r32( -4, addr );
   309     PUSH_r32(arg2a);
   310     PUSH_r32(addr);
   311     call_func0(sh4_write_long);
   312     ADD_imm8s_r32( 8, R_ESP );
   313     call_func0(sh4_write_long);
   314     ADD_imm8s_r32( 8, R_ESP );
   315 }
   317 /**
   318  * Read a double (64-bit) value from memory, writing the first word into arg2a
   319  * and the second into arg2b. The addr must not be in EAX
   320  * NB: 27 bytes
   321  */
   322 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   323 {
   324     PUSH_r32(addr);
   325     call_func0(sh4_read_long);
   326     POP_r32(addr);
   327     PUSH_r32(R_EAX);
   328     ADD_imm8s_r32( 4, addr );
   329     PUSH_r32(addr);
   330     call_func0(sh4_read_long);
   331     ADD_imm8s_r32( 4, R_ESP );
   332     MOV_r32_r32( R_EAX, arg2b );
   333     POP_r32(arg2a);
   334 }
   336 /* Exception checks - Note that all exception checks will clobber EAX */
   337 static void check_priv( )
   338 {
   339     if( !sh4_x86.priv_checked ) {
   340 	sh4_x86.priv_checked = TRUE;
   341 	load_spreg( R_EAX, R_SR );
   342 	AND_imm32_r32( SR_MD, R_EAX );
   343 	if( sh4_x86.in_delay_slot ) {
   344 	    JE_exit( EXIT_SLOT_ILLEGAL );
   345 	} else {
   346 	    JE_exit( EXIT_ILLEGAL );
   347 	}
   348     }
   349 }
   351 static void check_fpuen( )
   352 {
   353     if( !sh4_x86.fpuen_checked ) {
   354 	sh4_x86.fpuen_checked = TRUE;
   355 	load_spreg( R_EAX, R_SR );
   356 	AND_imm32_r32( SR_FD, R_EAX );
   357 	if( sh4_x86.in_delay_slot ) {
   358 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   359 	} else {
   360 	    JNE_exit(EXIT_FPU_DISABLED);
   361 	}
   362     }
   363 }
   365 static void check_ralign16( int x86reg )
   366 {
   367     TEST_imm32_r32( 0x00000001, x86reg );
   368     JNE_exit(EXIT_DATA_ADDR_READ);
   369 }
   371 static void check_walign16( int x86reg )
   372 {
   373     TEST_imm32_r32( 0x00000001, x86reg );
   374     JNE_exit(EXIT_DATA_ADDR_WRITE);
   375 }
   377 static void check_ralign32( int x86reg )
   378 {
   379     TEST_imm32_r32( 0x00000003, x86reg );
   380     JNE_exit(EXIT_DATA_ADDR_READ);
   381 }
   382 static void check_walign32( int x86reg )
   383 {
   384     TEST_imm32_r32( 0x00000003, x86reg );
   385     JNE_exit(EXIT_DATA_ADDR_WRITE);
   386 }
   388 #define UNDEF()
   389 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   390 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   391 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   392 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   393 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   394 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   395 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   397 #define SLOTILLEGAL() JMP_exit(EXIT_SLOT_ILLEGAL); sh4_x86.in_delay_slot = FALSE; return 1;
   401 /**
   402  * Emit the 'start of block' assembly. Sets up the stack frame and save
   403  * SI/DI as required
   404  */
   405 void sh4_translate_begin_block() 
   406 {
   407     PUSH_r32(R_EBP);
   408     /* mov &sh4r, ebp */
   409     load_imm32( R_EBP, (uint32_t)&sh4r );
   410     PUSH_r32(R_EDI);
   411     PUSH_r32(R_ESI);
   412     XOR_r32_r32(R_ESI, R_ESI);
   414     sh4_x86.in_delay_slot = FALSE;
   415     sh4_x86.priv_checked = FALSE;
   416     sh4_x86.fpuen_checked = FALSE;
   417     sh4_x86.backpatch_posn = 0;
   418     sh4_x86.exit_code = 1;
   419 }
   421 /**
   422  * Exit the block early (ie branch out), conditionally or otherwise
   423  */
   424 void exit_block( )
   425 {
   426     store_spreg( R_EDI, REG_OFFSET(pc) );
   427     MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
   428     load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   429     MUL_r32( R_ESI );
   430     ADD_r32_r32( R_EAX, R_ECX );
   431     store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   432     load_imm32( R_EAX, sh4_x86.exit_code );
   433     POP_r32(R_ESI);
   434     POP_r32(R_EDI);
   435     POP_r32(R_EBP);
   436     RET();
   437 }
   439 /**
   440  * Flush any open regs back to memory, restore SI/DI/, update PC, etc
   441  */
   442 void sh4_translate_end_block( sh4addr_t pc ) {
   443     assert( !sh4_x86.in_delay_slot ); // should never stop here
   444     // Normal termination - save PC, cycle count
   445     exit_block( );
   447     if( sh4_x86.backpatch_posn != 0 ) {
   448 	uint8_t *end_ptr = xlat_output;
   449 	// Exception termination. Jump block for various exception codes:
   450 	PUSH_imm32( EXC_DATA_ADDR_READ );
   451 	JMP_rel8( 33, target1 );
   452 	PUSH_imm32( EXC_DATA_ADDR_WRITE );
   453 	JMP_rel8( 26, target2 );
   454 	PUSH_imm32( EXC_ILLEGAL );
   455 	JMP_rel8( 19, target3 );
   456 	PUSH_imm32( EXC_SLOT_ILLEGAL ); 
   457 	JMP_rel8( 12, target4 );
   458 	PUSH_imm32( EXC_FPU_DISABLED ); 
   459 	JMP_rel8( 5, target5 );
   460 	PUSH_imm32( EXC_SLOT_FPU_DISABLED );
   461 	// target
   462 	JMP_TARGET(target1);
   463 	JMP_TARGET(target2);
   464 	JMP_TARGET(target3);
   465 	JMP_TARGET(target4);
   466 	JMP_TARGET(target5);
   467 	load_spreg( R_ECX, REG_OFFSET(pc) );
   468 	ADD_r32_r32( R_ESI, R_ECX );
   469 	ADD_r32_r32( R_ESI, R_ECX );
   470 	store_spreg( R_ECX, REG_OFFSET(pc) );
   471 	MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
   472 	load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   473 	MUL_r32( R_ESI );
   474 	ADD_r32_r32( R_EAX, R_ECX );
   475 	store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   477 	load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
   478 	CALL_r32( R_EAX ); // 2
   479 	ADD_imm8s_r32( 4, R_ESP );
   480 	POP_r32(R_ESI);
   481 	POP_r32(R_EDI);
   482 	POP_r32(R_EBP);
   483 	RET();
   485 	sh4_x86_do_backpatch( end_ptr );
   486     }
   488 }
   491 extern uint16_t *sh4_icache;
   492 extern uint32_t sh4_icache_addr;
   494 /**
   495  * Translate a single instruction. Delayed branches are handled specially
   496  * by translating both branch and delayed instruction as a single unit (as
   497  * 
   498  *
   499  * @return true if the instruction marks the end of a basic block
   500  * (eg a branch or 
   501  */
   502 uint32_t sh4_x86_translate_instruction( uint32_t pc )
   503 {
   504     uint32_t ir;
   505     /* Read instruction */
   506     uint32_t pageaddr = pc >> 12;
   507     if( sh4_icache != NULL && pageaddr == sh4_icache_addr ) {
   508 	ir = sh4_icache[(pc&0xFFF)>>1];
   509     } else {
   510 	sh4_icache = (uint16_t *)mem_get_page(pc);
   511 	if( ((uint32_t)sh4_icache) < MAX_IO_REGIONS ) {
   512 	    /* If someone's actually been so daft as to try to execute out of an IO
   513 	     * region, fallback on the full-blown memory read
   514 	     */
   515 	    sh4_icache = NULL;
   516 	    ir = sh4_read_word(pc);
   517 	} else {
   518 	    sh4_icache_addr = pageaddr;
   519 	    ir = sh4_icache[(pc&0xFFF)>>1];
   520 	}
   521     }
   523 %%
   524 /* ALU operations */
   525 ADD Rm, Rn {:
   526     load_reg( R_EAX, Rm );
   527     load_reg( R_ECX, Rn );
   528     ADD_r32_r32( R_EAX, R_ECX );
   529     store_reg( R_ECX, Rn );
   530 :}
   531 ADD #imm, Rn {:  
   532     load_reg( R_EAX, Rn );
   533     ADD_imm8s_r32( imm, R_EAX );
   534     store_reg( R_EAX, Rn );
   535 :}
   536 ADDC Rm, Rn {:
   537     load_reg( R_EAX, Rm );
   538     load_reg( R_ECX, Rn );
   539     LDC_t();
   540     ADC_r32_r32( R_EAX, R_ECX );
   541     store_reg( R_ECX, Rn );
   542     SETC_t();
   543 :}
   544 ADDV Rm, Rn {:
   545     load_reg( R_EAX, Rm );
   546     load_reg( R_ECX, Rn );
   547     ADD_r32_r32( R_EAX, R_ECX );
   548     store_reg( R_ECX, Rn );
   549     SETO_t();
   550 :}
   551 AND Rm, Rn {:
   552     load_reg( R_EAX, Rm );
   553     load_reg( R_ECX, Rn );
   554     AND_r32_r32( R_EAX, R_ECX );
   555     store_reg( R_ECX, Rn );
   556 :}
   557 AND #imm, R0 {:  
   558     load_reg( R_EAX, 0 );
   559     AND_imm32_r32(imm, R_EAX); 
   560     store_reg( R_EAX, 0 );
   561 :}
   562 AND.B #imm, @(R0, GBR) {: 
   563     load_reg( R_EAX, 0 );
   564     load_spreg( R_ECX, R_GBR );
   565     ADD_r32_r32( R_EAX, R_ECX );
   566     PUSH_r32(R_ECX);
   567     call_func0(sh4_read_byte);
   568     POP_r32(R_ECX);
   569     AND_imm32_r32(imm, R_EAX );
   570     MEM_WRITE_BYTE( R_ECX, R_EAX );
   571 :}
   572 CMP/EQ Rm, Rn {:  
   573     load_reg( R_EAX, Rm );
   574     load_reg( R_ECX, Rn );
   575     CMP_r32_r32( R_EAX, R_ECX );
   576     SETE_t();
   577 :}
   578 CMP/EQ #imm, R0 {:  
   579     load_reg( R_EAX, 0 );
   580     CMP_imm8s_r32(imm, R_EAX);
   581     SETE_t();
   582 :}
   583 CMP/GE Rm, Rn {:  
   584     load_reg( R_EAX, Rm );
   585     load_reg( R_ECX, Rn );
   586     CMP_r32_r32( R_EAX, R_ECX );
   587     SETGE_t();
   588 :}
   589 CMP/GT Rm, Rn {: 
   590     load_reg( R_EAX, Rm );
   591     load_reg( R_ECX, Rn );
   592     CMP_r32_r32( R_EAX, R_ECX );
   593     SETG_t();
   594 :}
   595 CMP/HI Rm, Rn {:  
   596     load_reg( R_EAX, Rm );
   597     load_reg( R_ECX, Rn );
   598     CMP_r32_r32( R_EAX, R_ECX );
   599     SETA_t();
   600 :}
   601 CMP/HS Rm, Rn {: 
   602     load_reg( R_EAX, Rm );
   603     load_reg( R_ECX, Rn );
   604     CMP_r32_r32( R_EAX, R_ECX );
   605     SETAE_t();
   606  :}
   607 CMP/PL Rn {: 
   608     load_reg( R_EAX, Rn );
   609     CMP_imm8s_r32( 0, R_EAX );
   610     SETG_t();
   611 :}
   612 CMP/PZ Rn {:  
   613     load_reg( R_EAX, Rn );
   614     CMP_imm8s_r32( 0, R_EAX );
   615     SETGE_t();
   616 :}
   617 CMP/STR Rm, Rn {:  
   618     load_reg( R_EAX, Rm );
   619     load_reg( R_ECX, Rn );
   620     XOR_r32_r32( R_ECX, R_EAX );
   621     TEST_r8_r8( R_AL, R_AL );
   622     JE_rel8(13, target1);
   623     TEST_r8_r8( R_AH, R_AH ); // 2
   624     JE_rel8(9, target2);
   625     SHR_imm8_r32( 16, R_EAX ); // 3
   626     TEST_r8_r8( R_AL, R_AL ); // 2
   627     JE_rel8(2, target3);
   628     TEST_r8_r8( R_AH, R_AH ); // 2
   629     JMP_TARGET(target1);
   630     JMP_TARGET(target2);
   631     JMP_TARGET(target3);
   632     SETE_t();
   633 :}
   634 DIV0S Rm, Rn {:
   635     load_reg( R_EAX, Rm );
   636     load_reg( R_ECX, Rn );
   637     SHR_imm8_r32( 31, R_EAX );
   638     SHR_imm8_r32( 31, R_ECX );
   639     store_spreg( R_EAX, R_M );
   640     store_spreg( R_ECX, R_Q );
   641     CMP_r32_r32( R_EAX, R_ECX );
   642     SETNE_t();
   643 :}
   644 DIV0U {:  
   645     XOR_r32_r32( R_EAX, R_EAX );
   646     store_spreg( R_EAX, R_Q );
   647     store_spreg( R_EAX, R_M );
   648     store_spreg( R_EAX, R_T );
   649 :}
   650 DIV1 Rm, Rn {:
   651     load_spreg( R_ECX, R_M );
   652     load_reg( R_EAX, Rn );
   653     LDC_t();
   654     RCL1_r32( R_EAX );
   655     SETC_r8( R_DL ); // Q'
   656     CMP_sh4r_r32( R_Q, R_ECX );
   657     JE_rel8(5, mqequal);
   658     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   659     JMP_rel8(3, end);
   660     JMP_TARGET(mqequal);
   661     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_EAX );
   662     JMP_TARGET(end);
   663     store_reg( R_EAX, Rn ); // Done with Rn now
   664     SETC_r8(R_AL); // tmp1
   665     XOR_r8_r8( R_DL, R_AL ); // Q' = Q ^ tmp1
   666     XOR_r8_r8( R_AL, R_CL ); // Q'' = Q' ^ M
   667     store_spreg( R_ECX, R_Q );
   668     XOR_imm8s_r32( 1, R_AL );   // T = !Q'
   669     MOVZX_r8_r32( R_AL, R_EAX );
   670     store_spreg( R_EAX, R_T );
   671 :}
   672 DMULS.L Rm, Rn {:  
   673     load_reg( R_EAX, Rm );
   674     load_reg( R_ECX, Rn );
   675     IMUL_r32(R_ECX);
   676     store_spreg( R_EDX, R_MACH );
   677     store_spreg( R_EAX, R_MACL );
   678 :}
   679 DMULU.L Rm, Rn {:  
   680     load_reg( R_EAX, Rm );
   681     load_reg( R_ECX, Rn );
   682     MUL_r32(R_ECX);
   683     store_spreg( R_EDX, R_MACH );
   684     store_spreg( R_EAX, R_MACL );    
   685 :}
   686 DT Rn {:  
   687     load_reg( R_EAX, Rn );
   688     ADD_imm8s_r32( -1, R_EAX );
   689     store_reg( R_EAX, Rn );
   690     SETE_t();
   691 :}
   692 EXTS.B Rm, Rn {:  
   693     load_reg( R_EAX, Rm );
   694     MOVSX_r8_r32( R_EAX, R_EAX );
   695     store_reg( R_EAX, Rn );
   696 :}
   697 EXTS.W Rm, Rn {:  
   698     load_reg( R_EAX, Rm );
   699     MOVSX_r16_r32( R_EAX, R_EAX );
   700     store_reg( R_EAX, Rn );
   701 :}
   702 EXTU.B Rm, Rn {:  
   703     load_reg( R_EAX, Rm );
   704     MOVZX_r8_r32( R_EAX, R_EAX );
   705     store_reg( R_EAX, Rn );
   706 :}
   707 EXTU.W Rm, Rn {:  
   708     load_reg( R_EAX, Rm );
   709     MOVZX_r16_r32( R_EAX, R_EAX );
   710     store_reg( R_EAX, Rn );
   711 :}
   712 MAC.L @Rm+, @Rn+ {:  
   713     load_reg( R_ECX, Rm );
   714     check_ralign32( R_ECX );
   715     load_reg( R_ECX, Rn );
   716     check_ralign32( R_ECX );
   717     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rn]) );
   718     MEM_READ_LONG( R_ECX, R_EAX );
   719     PUSH_r32( R_EAX );
   720     load_reg( R_ECX, Rm );
   721     ADD_imm8s_sh4r( 4, REG_OFFSET(r[Rm]) );
   722     MEM_READ_LONG( R_ECX, R_EAX );
   723     POP_r32( R_ECX );
   724     IMUL_r32( R_ECX );
   725     ADD_r32_sh4r( R_EAX, R_MACL );
   726     ADC_r32_sh4r( R_EDX, R_MACH );
   728     load_spreg( R_ECX, R_S );
   729     TEST_r32_r32(R_ECX, R_ECX);
   730     JE_rel8( 7, nosat );
   731     call_func0( signsat48 );
   732     JMP_TARGET( nosat );
   733 :}
   734 MAC.W @Rm+, @Rn+ {:  
   735     load_reg( R_ECX, Rm );
   736     check_ralign16( R_ECX );
   737     load_reg( R_ECX, Rn );
   738     check_ralign16( R_ECX );
   739     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rn]) );
   740     MEM_READ_WORD( R_ECX, R_EAX );
   741     PUSH_r32( R_EAX );
   742     load_reg( R_ECX, Rm );
   743     ADD_imm8s_sh4r( 2, REG_OFFSET(r[Rm]) );
   744     MEM_READ_WORD( R_ECX, R_EAX );
   745     POP_r32( R_ECX );
   746     IMUL_r32( R_ECX );
   748     load_spreg( R_ECX, R_S );
   749     TEST_r32_r32( R_ECX, R_ECX );
   750     JE_rel8( 47, nosat );
   752     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   753     JNO_rel8( 51, end );            // 2
   754     load_imm32( R_EDX, 1 );         // 5
   755     store_spreg( R_EDX, R_MACH );   // 6
   756     JS_rel8( 13, positive );        // 2
   757     load_imm32( R_EAX, 0x80000000 );// 5
   758     store_spreg( R_EAX, R_MACL );   // 6
   759     JMP_rel8( 25, end2 );           // 2
   761     JMP_TARGET(positive);
   762     load_imm32( R_EAX, 0x7FFFFFFF );// 5
   763     store_spreg( R_EAX, R_MACL );   // 6
   764     JMP_rel8( 12, end3);            // 2
   766     JMP_TARGET(nosat);
   767     ADD_r32_sh4r( R_EAX, R_MACL );  // 6
   768     ADC_r32_sh4r( R_EDX, R_MACH );  // 6
   769     JMP_TARGET(end);
   770     JMP_TARGET(end2);
   771     JMP_TARGET(end3);
   772 :}
   773 MOVT Rn {:  
   774     load_spreg( R_EAX, R_T );
   775     store_reg( R_EAX, Rn );
   776 :}
   777 MUL.L Rm, Rn {:  
   778     load_reg( R_EAX, Rm );
   779     load_reg( R_ECX, Rn );
   780     MUL_r32( R_ECX );
   781     store_spreg( R_EAX, R_MACL );
   782 :}
   783 MULS.W Rm, Rn {:
   784     load_reg16s( R_EAX, Rm );
   785     load_reg16s( R_ECX, Rn );
   786     MUL_r32( R_ECX );
   787     store_spreg( R_EAX, R_MACL );
   788 :}
   789 MULU.W Rm, Rn {:  
   790     load_reg16u( R_EAX, Rm );
   791     load_reg16u( R_ECX, Rn );
   792     MUL_r32( R_ECX );
   793     store_spreg( R_EAX, R_MACL );
   794 :}
   795 NEG Rm, Rn {:
   796     load_reg( R_EAX, Rm );
   797     NEG_r32( R_EAX );
   798     store_reg( R_EAX, Rn );
   799 :}
   800 NEGC Rm, Rn {:  
   801     load_reg( R_EAX, Rm );
   802     XOR_r32_r32( R_ECX, R_ECX );
   803     LDC_t();
   804     SBB_r32_r32( R_EAX, R_ECX );
   805     store_reg( R_ECX, Rn );
   806     SETC_t();
   807 :}
   808 NOT Rm, Rn {:  
   809     load_reg( R_EAX, Rm );
   810     NOT_r32( R_EAX );
   811     store_reg( R_EAX, Rn );
   812 :}
   813 OR Rm, Rn {:  
   814     load_reg( R_EAX, Rm );
   815     load_reg( R_ECX, Rn );
   816     OR_r32_r32( R_EAX, R_ECX );
   817     store_reg( R_ECX, Rn );
   818 :}
   819 OR #imm, R0 {:
   820     load_reg( R_EAX, 0 );
   821     OR_imm32_r32(imm, R_EAX);
   822     store_reg( R_EAX, 0 );
   823 :}
   824 OR.B #imm, @(R0, GBR) {:  
   825     load_reg( R_EAX, 0 );
   826     load_spreg( R_ECX, R_GBR );
   827     ADD_r32_r32( R_EAX, R_ECX );
   828     PUSH_r32(R_ECX);
   829     call_func0(sh4_read_byte);
   830     POP_r32(R_ECX);
   831     OR_imm32_r32(imm, R_EAX );
   832     MEM_WRITE_BYTE( R_ECX, R_EAX );
   833 :}
   834 ROTCL Rn {:
   835     load_reg( R_EAX, Rn );
   836     LDC_t();
   837     RCL1_r32( R_EAX );
   838     store_reg( R_EAX, Rn );
   839     SETC_t();
   840 :}
   841 ROTCR Rn {:  
   842     load_reg( R_EAX, Rn );
   843     LDC_t();
   844     RCR1_r32( R_EAX );
   845     store_reg( R_EAX, Rn );
   846     SETC_t();
   847 :}
   848 ROTL Rn {:  
   849     load_reg( R_EAX, Rn );
   850     ROL1_r32( R_EAX );
   851     store_reg( R_EAX, Rn );
   852     SETC_t();
   853 :}
   854 ROTR Rn {:  
   855     load_reg( R_EAX, Rn );
   856     ROR1_r32( R_EAX );
   857     store_reg( R_EAX, Rn );
   858     SETC_t();
   859 :}
   860 SHAD Rm, Rn {:
   861     /* Annoyingly enough, not directly convertible */
   862     load_reg( R_EAX, Rn );
   863     load_reg( R_ECX, Rm );
   864     CMP_imm32_r32( 0, R_ECX );
   865     JGE_rel8(16, doshl);
   867     NEG_r32( R_ECX );      // 2
   868     AND_imm8_r8( 0x1F, R_CL ); // 3
   869     JE_rel8( 4, emptysar);     // 2
   870     SAR_r32_CL( R_EAX );       // 2
   871     JMP_rel8(10, end);          // 2
   873     JMP_TARGET(emptysar);
   874     SAR_imm8_r32(31, R_EAX );  // 3
   875     JMP_rel8(5, end2);
   877     JMP_TARGET(doshl);
   878     AND_imm8_r8( 0x1F, R_CL ); // 3
   879     SHL_r32_CL( R_EAX );       // 2
   880     JMP_TARGET(end);
   881     JMP_TARGET(end2);
   882     store_reg( R_EAX, Rn );
   883 :}
   884 SHLD Rm, Rn {:  
   885     load_reg( R_EAX, Rn );
   886     load_reg( R_ECX, Rm );
   887     CMP_imm32_r32( 0, R_ECX );
   888     JGE_rel8(15, doshl);
   890     NEG_r32( R_ECX );      // 2
   891     AND_imm8_r8( 0x1F, R_CL ); // 3
   892     JE_rel8( 4, emptyshr );
   893     SHR_r32_CL( R_EAX );       // 2
   894     JMP_rel8(9, end);          // 2
   896     JMP_TARGET(emptyshr);
   897     XOR_r32_r32( R_EAX, R_EAX );
   898     JMP_rel8(5, end2);
   900     JMP_TARGET(doshl);
   901     AND_imm8_r8( 0x1F, R_CL ); // 3
   902     SHL_r32_CL( R_EAX );       // 2
   903     JMP_TARGET(end);
   904     JMP_TARGET(end2);
   905     store_reg( R_EAX, Rn );
   906 :}
   907 SHAL Rn {: 
   908     load_reg( R_EAX, Rn );
   909     SHL1_r32( R_EAX );
   910     SETC_t();
   911     store_reg( R_EAX, Rn );
   912 :}
   913 SHAR Rn {:  
   914     load_reg( R_EAX, Rn );
   915     SAR1_r32( R_EAX );
   916     SETC_t();
   917     store_reg( R_EAX, Rn );
   918 :}
   919 SHLL Rn {:  
   920     load_reg( R_EAX, Rn );
   921     SHL1_r32( R_EAX );
   922     SETC_t();
   923     store_reg( R_EAX, Rn );
   924 :}
   925 SHLL2 Rn {:
   926     load_reg( R_EAX, Rn );
   927     SHL_imm8_r32( 2, R_EAX );
   928     store_reg( R_EAX, Rn );
   929 :}
   930 SHLL8 Rn {:  
   931     load_reg( R_EAX, Rn );
   932     SHL_imm8_r32( 8, R_EAX );
   933     store_reg( R_EAX, Rn );
   934 :}
   935 SHLL16 Rn {:  
   936     load_reg( R_EAX, Rn );
   937     SHL_imm8_r32( 16, R_EAX );
   938     store_reg( R_EAX, Rn );
   939 :}
   940 SHLR Rn {:  
   941     load_reg( R_EAX, Rn );
   942     SHR1_r32( R_EAX );
   943     SETC_t();
   944     store_reg( R_EAX, Rn );
   945 :}
   946 SHLR2 Rn {:  
   947     load_reg( R_EAX, Rn );
   948     SHR_imm8_r32( 2, R_EAX );
   949     store_reg( R_EAX, Rn );
   950 :}
   951 SHLR8 Rn {:  
   952     load_reg( R_EAX, Rn );
   953     SHR_imm8_r32( 8, R_EAX );
   954     store_reg( R_EAX, Rn );
   955 :}
   956 SHLR16 Rn {:  
   957     load_reg( R_EAX, Rn );
   958     SHR_imm8_r32( 16, R_EAX );
   959     store_reg( R_EAX, Rn );
   960 :}
   961 SUB Rm, Rn {:  
   962     load_reg( R_EAX, Rm );
   963     load_reg( R_ECX, Rn );
   964     SUB_r32_r32( R_EAX, R_ECX );
   965     store_reg( R_ECX, Rn );
   966 :}
   967 SUBC Rm, Rn {:  
   968     load_reg( R_EAX, Rm );
   969     load_reg( R_ECX, Rn );
   970     LDC_t();
   971     SBB_r32_r32( R_EAX, R_ECX );
   972     store_reg( R_ECX, Rn );
   973     SETC_t();
   974 :}
   975 SUBV Rm, Rn {:  
   976     load_reg( R_EAX, Rm );
   977     load_reg( R_ECX, Rn );
   978     SUB_r32_r32( R_EAX, R_ECX );
   979     store_reg( R_ECX, Rn );
   980     SETO_t();
   981 :}
   982 SWAP.B Rm, Rn {:  
   983     load_reg( R_EAX, Rm );
   984     XCHG_r8_r8( R_AL, R_AH );
   985     store_reg( R_EAX, Rn );
   986 :}
   987 SWAP.W Rm, Rn {:  
   988     load_reg( R_EAX, Rm );
   989     MOV_r32_r32( R_EAX, R_ECX );
   990     SHL_imm8_r32( 16, R_ECX );
   991     SHR_imm8_r32( 16, R_EAX );
   992     OR_r32_r32( R_EAX, R_ECX );
   993     store_reg( R_ECX, Rn );
   994 :}
   995 TAS.B @Rn {:  
   996     load_reg( R_ECX, Rn );
   997     MEM_READ_BYTE( R_ECX, R_EAX );
   998     TEST_r8_r8( R_AL, R_AL );
   999     SETE_t();
  1000     OR_imm8_r8( 0x80, R_AL );
  1001     load_reg( R_ECX, Rn );
  1002     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1003 :}
  1004 TST Rm, Rn {:  
  1005     load_reg( R_EAX, Rm );
  1006     load_reg( R_ECX, Rn );
  1007     TEST_r32_r32( R_EAX, R_ECX );
  1008     SETE_t();
  1009 :}
  1010 TST #imm, R0 {:  
  1011     load_reg( R_EAX, 0 );
  1012     TEST_imm32_r32( imm, R_EAX );
  1013     SETE_t();
  1014 :}
  1015 TST.B #imm, @(R0, GBR) {:  
  1016     load_reg( R_EAX, 0);
  1017     load_reg( R_ECX, R_GBR);
  1018     ADD_r32_r32( R_EAX, R_ECX );
  1019     MEM_READ_BYTE( R_ECX, R_EAX );
  1020     TEST_imm8_r8( imm, R_AL );
  1021     SETE_t();
  1022 :}
  1023 XOR Rm, Rn {:  
  1024     load_reg( R_EAX, Rm );
  1025     load_reg( R_ECX, Rn );
  1026     XOR_r32_r32( R_EAX, R_ECX );
  1027     store_reg( R_ECX, Rn );
  1028 :}
  1029 XOR #imm, R0 {:  
  1030     load_reg( R_EAX, 0 );
  1031     XOR_imm32_r32( imm, R_EAX );
  1032     store_reg( R_EAX, 0 );
  1033 :}
  1034 XOR.B #imm, @(R0, GBR) {:  
  1035     load_reg( R_EAX, 0 );
  1036     load_spreg( R_ECX, R_GBR );
  1037     ADD_r32_r32( R_EAX, R_ECX );
  1038     PUSH_r32(R_ECX);
  1039     call_func0(sh4_read_byte);
  1040     POP_r32(R_ECX);
  1041     XOR_imm32_r32( imm, R_EAX );
  1042     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1043 :}
  1044 XTRCT Rm, Rn {:
  1045     load_reg( R_EAX, Rm );
  1046     load_reg( R_ECX, Rn );
  1047     SHL_imm8_r32( 16, R_EAX );
  1048     SHR_imm8_r32( 16, R_ECX );
  1049     OR_r32_r32( R_EAX, R_ECX );
  1050     store_reg( R_ECX, Rn );
  1051 :}
  1053 /* Data move instructions */
  1054 MOV Rm, Rn {:  
  1055     load_reg( R_EAX, Rm );
  1056     store_reg( R_EAX, Rn );
  1057 :}
  1058 MOV #imm, Rn {:  
  1059     load_imm32( R_EAX, imm );
  1060     store_reg( R_EAX, Rn );
  1061 :}
  1062 MOV.B Rm, @Rn {:  
  1063     load_reg( R_EAX, Rm );
  1064     load_reg( R_ECX, Rn );
  1065     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1066 :}
  1067 MOV.B Rm, @-Rn {:  
  1068     load_reg( R_EAX, Rm );
  1069     load_reg( R_ECX, Rn );
  1070     ADD_imm8s_r32( -1, R_ECX );
  1071     store_reg( R_ECX, Rn );
  1072     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1073 :}
  1074 MOV.B Rm, @(R0, Rn) {:  
  1075     load_reg( R_EAX, 0 );
  1076     load_reg( R_ECX, Rn );
  1077     ADD_r32_r32( R_EAX, R_ECX );
  1078     load_reg( R_EAX, Rm );
  1079     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1080 :}
  1081 MOV.B R0, @(disp, GBR) {:  
  1082     load_reg( R_EAX, 0 );
  1083     load_spreg( R_ECX, R_GBR );
  1084     ADD_imm32_r32( disp, R_ECX );
  1085     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1086 :}
  1087 MOV.B R0, @(disp, Rn) {:  
  1088     load_reg( R_EAX, 0 );
  1089     load_reg( R_ECX, Rn );
  1090     ADD_imm32_r32( disp, R_ECX );
  1091     MEM_WRITE_BYTE( R_ECX, R_EAX );
  1092 :}
  1093 MOV.B @Rm, Rn {:  
  1094     load_reg( R_ECX, Rm );
  1095     MEM_READ_BYTE( R_ECX, R_EAX );
  1096     store_reg( R_EAX, Rn );
  1097 :}
  1098 MOV.B @Rm+, Rn {:  
  1099     load_reg( R_ECX, Rm );
  1100     MOV_r32_r32( R_ECX, R_EAX );
  1101     ADD_imm8s_r32( 1, R_EAX );
  1102     store_reg( R_EAX, Rm );
  1103     MEM_READ_BYTE( R_ECX, R_EAX );
  1104     store_reg( R_EAX, Rn );
  1105 :}
  1106 MOV.B @(R0, Rm), Rn {:  
  1107     load_reg( R_EAX, 0 );
  1108     load_reg( R_ECX, Rm );
  1109     ADD_r32_r32( R_EAX, R_ECX );
  1110     MEM_READ_BYTE( R_ECX, R_EAX );
  1111     store_reg( R_EAX, Rn );
  1112 :}
  1113 MOV.B @(disp, GBR), R0 {:  
  1114     load_spreg( R_ECX, R_GBR );
  1115     ADD_imm32_r32( disp, R_ECX );
  1116     MEM_READ_BYTE( R_ECX, R_EAX );
  1117     store_reg( R_EAX, 0 );
  1118 :}
  1119 MOV.B @(disp, Rm), R0 {:  
  1120     load_reg( R_ECX, Rm );
  1121     ADD_imm32_r32( disp, R_ECX );
  1122     MEM_READ_BYTE( R_ECX, R_EAX );
  1123     store_reg( R_EAX, 0 );
  1124 :}
  1125 MOV.L Rm, @Rn {:
  1126     load_reg( R_EAX, Rm );
  1127     load_reg( R_ECX, Rn );
  1128     check_walign32(R_ECX);
  1129     MEM_WRITE_LONG( R_ECX, R_EAX );
  1130 :}
  1131 MOV.L Rm, @-Rn {:  
  1132     load_reg( R_EAX, Rm );
  1133     load_reg( R_ECX, Rn );
  1134     check_walign32( R_ECX );
  1135     ADD_imm8s_r32( -4, R_ECX );
  1136     store_reg( R_ECX, Rn );
  1137     MEM_WRITE_LONG( R_ECX, R_EAX );
  1138 :}
  1139 MOV.L Rm, @(R0, Rn) {:  
  1140     load_reg( R_EAX, 0 );
  1141     load_reg( R_ECX, Rn );
  1142     ADD_r32_r32( R_EAX, R_ECX );
  1143     check_walign32( R_ECX );
  1144     load_reg( R_EAX, Rm );
  1145     MEM_WRITE_LONG( R_ECX, R_EAX );
  1146 :}
  1147 MOV.L R0, @(disp, GBR) {:  
  1148     load_spreg( R_ECX, R_GBR );
  1149     load_reg( R_EAX, 0 );
  1150     ADD_imm32_r32( disp, R_ECX );
  1151     check_walign32( R_ECX );
  1152     MEM_WRITE_LONG( R_ECX, R_EAX );
  1153 :}
  1154 MOV.L Rm, @(disp, Rn) {:  
  1155     load_reg( R_ECX, Rn );
  1156     load_reg( R_EAX, Rm );
  1157     ADD_imm32_r32( disp, R_ECX );
  1158     check_walign32( R_ECX );
  1159     MEM_WRITE_LONG( R_ECX, R_EAX );
  1160 :}
  1161 MOV.L @Rm, Rn {:  
  1162     load_reg( R_ECX, Rm );
  1163     check_ralign32( R_ECX );
  1164     MEM_READ_LONG( R_ECX, R_EAX );
  1165     store_reg( R_EAX, Rn );
  1166 :}
  1167 MOV.L @Rm+, Rn {:  
  1168     load_reg( R_EAX, Rm );
  1169     check_ralign32( R_EAX );
  1170     MOV_r32_r32( R_EAX, R_ECX );
  1171     ADD_imm8s_r32( 4, R_EAX );
  1172     store_reg( R_EAX, Rm );
  1173     MEM_READ_LONG( R_ECX, R_EAX );
  1174     store_reg( R_EAX, Rn );
  1175 :}
  1176 MOV.L @(R0, Rm), Rn {:  
  1177     load_reg( R_EAX, 0 );
  1178     load_reg( R_ECX, Rm );
  1179     ADD_r32_r32( R_EAX, R_ECX );
  1180     check_ralign32( R_ECX );
  1181     MEM_READ_LONG( R_ECX, R_EAX );
  1182     store_reg( R_EAX, Rn );
  1183 :}
  1184 MOV.L @(disp, GBR), R0 {:
  1185     load_spreg( R_ECX, R_GBR );
  1186     ADD_imm32_r32( disp, R_ECX );
  1187     check_ralign32( R_ECX );
  1188     MEM_READ_LONG( R_ECX, R_EAX );
  1189     store_reg( R_EAX, 0 );
  1190 :}
  1191 MOV.L @(disp, PC), Rn {:  
  1192     if( sh4_x86.in_delay_slot ) {
  1193 	SLOTILLEGAL();
  1194     } else {
  1195 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1196 	char *ptr = mem_get_region(target);
  1197 	if( ptr != NULL ) {
  1198 	    MOV_moff32_EAX( (uint32_t)ptr );
  1199 	} else {
  1200 	    load_imm32( R_ECX, target );
  1201 	    MEM_READ_LONG( R_ECX, R_EAX );
  1203 	store_reg( R_EAX, Rn );
  1205 :}
  1206 MOV.L @(disp, Rm), Rn {:  
  1207     load_reg( R_ECX, Rm );
  1208     ADD_imm8s_r32( disp, R_ECX );
  1209     check_ralign32( R_ECX );
  1210     MEM_READ_LONG( R_ECX, R_EAX );
  1211     store_reg( R_EAX, Rn );
  1212 :}
  1213 MOV.W Rm, @Rn {:  
  1214     load_reg( R_ECX, Rn );
  1215     check_walign16( R_ECX );
  1216     load_reg( R_EAX, Rm );
  1217     MEM_WRITE_WORD( R_ECX, R_EAX );
  1218 :}
  1219 MOV.W Rm, @-Rn {:  
  1220     load_reg( R_ECX, Rn );
  1221     check_walign16( R_ECX );
  1222     load_reg( R_EAX, Rm );
  1223     ADD_imm8s_r32( -2, R_ECX );
  1224     store_reg( R_ECX, Rn );
  1225     MEM_WRITE_WORD( R_ECX, R_EAX );
  1226 :}
  1227 MOV.W Rm, @(R0, Rn) {:  
  1228     load_reg( R_EAX, 0 );
  1229     load_reg( R_ECX, Rn );
  1230     ADD_r32_r32( R_EAX, R_ECX );
  1231     check_walign16( R_ECX );
  1232     load_reg( R_EAX, Rm );
  1233     MEM_WRITE_WORD( R_ECX, R_EAX );
  1234 :}
  1235 MOV.W R0, @(disp, GBR) {:  
  1236     load_spreg( R_ECX, R_GBR );
  1237     load_reg( R_EAX, 0 );
  1238     ADD_imm32_r32( disp, R_ECX );
  1239     check_walign16( R_ECX );
  1240     MEM_WRITE_WORD( R_ECX, R_EAX );
  1241 :}
  1242 MOV.W R0, @(disp, Rn) {:  
  1243     load_reg( R_ECX, Rn );
  1244     load_reg( R_EAX, 0 );
  1245     ADD_imm32_r32( disp, R_ECX );
  1246     check_walign16( R_ECX );
  1247     MEM_WRITE_WORD( R_ECX, R_EAX );
  1248 :}
  1249 MOV.W @Rm, Rn {:  
  1250     load_reg( R_ECX, Rm );
  1251     check_ralign16( R_ECX );
  1252     MEM_READ_WORD( R_ECX, R_EAX );
  1253     store_reg( R_EAX, Rn );
  1254 :}
  1255 MOV.W @Rm+, Rn {:  
  1256     load_reg( R_EAX, Rm );
  1257     check_ralign16( R_EAX );
  1258     MOV_r32_r32( R_EAX, R_ECX );
  1259     ADD_imm8s_r32( 2, R_EAX );
  1260     store_reg( R_EAX, Rm );
  1261     MEM_READ_WORD( R_ECX, R_EAX );
  1262     store_reg( R_EAX, Rn );
  1263 :}
  1264 MOV.W @(R0, Rm), Rn {:  
  1265     load_reg( R_EAX, 0 );
  1266     load_reg( R_ECX, Rm );
  1267     ADD_r32_r32( R_EAX, R_ECX );
  1268     check_ralign16( R_ECX );
  1269     MEM_READ_WORD( R_ECX, R_EAX );
  1270     store_reg( R_EAX, Rn );
  1271 :}
  1272 MOV.W @(disp, GBR), R0 {:  
  1273     load_spreg( R_ECX, R_GBR );
  1274     ADD_imm32_r32( disp, R_ECX );
  1275     check_ralign16( R_ECX );
  1276     MEM_READ_WORD( R_ECX, R_EAX );
  1277     store_reg( R_EAX, 0 );
  1278 :}
  1279 MOV.W @(disp, PC), Rn {:  
  1280     if( sh4_x86.in_delay_slot ) {
  1281 	SLOTILLEGAL();
  1282     } else {
  1283 	load_imm32( R_ECX, pc + disp + 4 );
  1284 	MEM_READ_WORD( R_ECX, R_EAX );
  1285 	store_reg( R_EAX, Rn );
  1287 :}
  1288 MOV.W @(disp, Rm), R0 {:  
  1289     load_reg( R_ECX, Rm );
  1290     ADD_imm32_r32( disp, R_ECX );
  1291     check_ralign16( R_ECX );
  1292     MEM_READ_WORD( R_ECX, R_EAX );
  1293     store_reg( R_EAX, 0 );
  1294 :}
  1295 MOVA @(disp, PC), R0 {:  
  1296     if( sh4_x86.in_delay_slot ) {
  1297 	SLOTILLEGAL();
  1298     } else {
  1299 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1300 	store_reg( R_ECX, 0 );
  1302 :}
  1303 MOVCA.L R0, @Rn {:  
  1304     load_reg( R_EAX, 0 );
  1305     load_reg( R_ECX, Rn );
  1306     check_walign32( R_ECX );
  1307     MEM_WRITE_LONG( R_ECX, R_EAX );
  1308 :}
  1310 /* Control transfer instructions */
  1311 BF disp {:
  1312     if( sh4_x86.in_delay_slot ) {
  1313 	SLOTILLEGAL();
  1314     } else {
  1315 	load_imm32( R_EDI, pc + 2 );
  1316 	CMP_imm8s_sh4r( 0, R_T );
  1317 	JNE_rel8( 5, nottaken );
  1318 	load_imm32( R_EDI, disp + pc + 4 );
  1319 	JMP_TARGET(nottaken);
  1320 	INC_r32(R_ESI);
  1321 	return 1;
  1323 :}
  1324 BF/S disp {:
  1325     if( sh4_x86.in_delay_slot ) {
  1326 	SLOTILLEGAL();
  1327     } else {
  1328 	load_imm32( R_EDI, pc + 4 );
  1329 	CMP_imm8s_sh4r( 0, R_T );
  1330 	JNE_rel8( 5, nottaken );
  1331 	load_imm32( R_EDI, disp + pc + 4 );
  1332 	JMP_TARGET(nottaken);
  1333 	sh4_x86.in_delay_slot = TRUE;
  1334 	return 0;
  1336 :}
  1337 BRA disp {:  
  1338     if( sh4_x86.in_delay_slot ) {
  1339 	SLOTILLEGAL();
  1340     } else {
  1341 	load_imm32( R_EDI, disp + pc + 4 );
  1342 	sh4_x86.in_delay_slot = TRUE;
  1343 	return 0;
  1345 :}
  1346 BRAF Rn {:  
  1347     if( sh4_x86.in_delay_slot ) {
  1348 	SLOTILLEGAL();
  1349     } else {
  1350 	load_reg( R_EDI, Rn );
  1351 	ADD_imm32_r32( pc + 4, R_EDI );
  1352 	sh4_x86.in_delay_slot = TRUE;
  1353 	return 0;
  1355 :}
  1356 BSR disp {:  
  1357     if( sh4_x86.in_delay_slot ) {
  1358 	SLOTILLEGAL();
  1359     } else {
  1360 	load_imm32( R_EAX, pc + 4 );
  1361 	store_spreg( R_EAX, R_PR );
  1362 	load_imm32( R_EDI, disp + pc + 4 );
  1363 	sh4_x86.in_delay_slot = TRUE;
  1364 	return 0;
  1366 :}
  1367 BSRF Rn {:  
  1368     if( sh4_x86.in_delay_slot ) {
  1369 	SLOTILLEGAL();
  1370     } else {
  1371 	load_imm32( R_EAX, pc + 4 );
  1372 	store_spreg( R_EAX, R_PR );
  1373 	load_reg( R_EDI, Rn );
  1374 	ADD_r32_r32( R_EAX, R_EDI );
  1375 	sh4_x86.in_delay_slot = TRUE;
  1376 	return 0;
  1378 :}
  1379 BT disp {:
  1380     if( sh4_x86.in_delay_slot ) {
  1381 	SLOTILLEGAL();
  1382     } else {
  1383 	load_imm32( R_EDI, pc + 2 );
  1384 	CMP_imm8s_sh4r( 0, R_T );
  1385 	JE_rel8( 5, nottaken );
  1386 	load_imm32( R_EDI, disp + pc + 4 );
  1387 	JMP_TARGET(nottaken);
  1388 	INC_r32(R_ESI);
  1389 	return 1;
  1391 :}
  1392 BT/S disp {:
  1393     if( sh4_x86.in_delay_slot ) {
  1394 	SLOTILLEGAL();
  1395     } else {
  1396 	load_imm32( R_EDI, pc + 4 );
  1397 	CMP_imm8s_sh4r( 0, R_T );
  1398 	JE_rel8( 5, nottaken );
  1399 	load_imm32( R_EDI, disp + pc + 4 );
  1400 	JMP_TARGET(nottaken);
  1401 	sh4_x86.in_delay_slot = TRUE;
  1402 	return 0;
  1404 :}
  1405 JMP @Rn {:  
  1406     if( sh4_x86.in_delay_slot ) {
  1407 	SLOTILLEGAL();
  1408     } else {
  1409 	load_reg( R_EDI, Rn );
  1410 	sh4_x86.in_delay_slot = TRUE;
  1411 	return 0;
  1413 :}
  1414 JSR @Rn {:  
  1415     if( sh4_x86.in_delay_slot ) {
  1416 	SLOTILLEGAL();
  1417     } else {
  1418 	load_imm32( R_EAX, pc + 4 );
  1419 	store_spreg( R_EAX, R_PR );
  1420 	load_reg( R_EDI, Rn );
  1421 	sh4_x86.in_delay_slot = TRUE;
  1422 	return 0;
  1424 :}
  1425 RTE {:  
  1426     check_priv();
  1427     if( sh4_x86.in_delay_slot ) {
  1428 	SLOTILLEGAL();
  1429     } else {
  1430 	load_spreg( R_EDI, R_SPC );
  1431 	load_spreg( R_EAX, R_SSR );
  1432 	call_func1( sh4_write_sr, R_EAX );
  1433 	sh4_x86.in_delay_slot = TRUE;
  1434 	sh4_x86.priv_checked = FALSE;
  1435 	sh4_x86.fpuen_checked = FALSE;
  1436 	return 0;
  1438 :}
  1439 RTS {:  
  1440     if( sh4_x86.in_delay_slot ) {
  1441 	SLOTILLEGAL();
  1442     } else {
  1443 	load_spreg( R_EDI, R_PR );
  1444 	sh4_x86.in_delay_slot = TRUE;
  1445 	return 0;
  1447 :}
  1448 TRAPA #imm {:  
  1449     if( sh4_x86.in_delay_slot ) {
  1450 	SLOTILLEGAL();
  1451     } else {
  1452 	PUSH_imm32( imm );
  1453 	call_func0( sh4_raise_trap );
  1454 	ADD_imm8s_r32( 4, R_ESP );
  1456 :}
  1457 UNDEF {:  
  1458     if( sh4_x86.in_delay_slot ) {
  1459 	SLOTILLEGAL();
  1460     } else {
  1461 	JMP_exit(EXIT_ILLEGAL);
  1462 	return 1;
  1464 :}
  1466 CLRMAC {:  
  1467     XOR_r32_r32(R_EAX, R_EAX);
  1468     store_spreg( R_EAX, R_MACL );
  1469     store_spreg( R_EAX, R_MACH );
  1470 :}
  1471 CLRS {:
  1472     CLC();
  1473     SETC_sh4r(R_S);
  1474 :}
  1475 CLRT {:  
  1476     CLC();
  1477     SETC_t();
  1478 :}
  1479 SETS {:  
  1480     STC();
  1481     SETC_sh4r(R_S);
  1482 :}
  1483 SETT {:  
  1484     STC();
  1485     SETC_t();
  1486 :}
  1488 /* Floating point moves */
  1489 FMOV FRm, FRn {:  
  1490     /* As horrible as this looks, it's actually covering 5 separate cases:
  1491      * 1. 32-bit fr-to-fr (PR=0)
  1492      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1493      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1494      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1495      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1496      */
  1497     check_fpuen();
  1498     load_spreg( R_ECX, R_FPSCR );
  1499     load_fr_bank( R_EDX );
  1500     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1501     JNE_rel8(8, doublesize);
  1502     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1503     store_fr( R_EDX, R_EAX, FRn );
  1504     if( FRm&1 ) {
  1505 	JMP_rel8(24, end);
  1506 	JMP_TARGET(doublesize);
  1507 	load_xf_bank( R_ECX ); 
  1508 	load_fr( R_ECX, R_EAX, FRm-1 );
  1509 	if( FRn&1 ) {
  1510 	    load_fr( R_ECX, R_EDX, FRm );
  1511 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1512 	    store_fr( R_ECX, R_EDX, FRn );
  1513 	} else /* FRn&1 == 0 */ {
  1514 	    load_fr( R_ECX, R_ECX, FRm );
  1515 	    store_fr( R_EDX, R_EAX, FRn );
  1516 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1518 	JMP_TARGET(end);
  1519     } else /* FRm&1 == 0 */ {
  1520 	if( FRn&1 ) {
  1521 	    JMP_rel8(24, end);
  1522 	    load_xf_bank( R_ECX );
  1523 	    load_fr( R_EDX, R_EAX, FRm );
  1524 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1525 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1526 	    store_fr( R_ECX, R_EDX, FRn );
  1527 	    JMP_TARGET(end);
  1528 	} else /* FRn&1 == 0 */ {
  1529 	    JMP_rel8(12, end);
  1530 	    load_fr( R_EDX, R_EAX, FRm );
  1531 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1532 	    store_fr( R_EDX, R_EAX, FRn );
  1533 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1534 	    JMP_TARGET(end);
  1537 :}
  1538 FMOV FRm, @Rn {:  
  1539     check_fpuen();
  1540     load_reg( R_EDX, Rn );
  1541     check_walign32( R_EDX );
  1542     load_spreg( R_ECX, R_FPSCR );
  1543     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1544     JNE_rel8(20, doublesize);
  1545     load_fr_bank( R_ECX );
  1546     load_fr( R_ECX, R_EAX, FRm );
  1547     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1548     if( FRm&1 ) {
  1549 	JMP_rel8( 48, end );
  1550 	JMP_TARGET(doublesize);
  1551 	load_xf_bank( R_ECX );
  1552 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1553 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1554 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1555 	JMP_TARGET(end);
  1556     } else {
  1557 	JMP_rel8( 39, end );
  1558 	JMP_TARGET(doublesize);
  1559 	load_fr_bank( R_ECX );
  1560 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1561 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1562 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1563 	JMP_TARGET(end);
  1565 :}
  1566 FMOV @Rm, FRn {:  
  1567     check_fpuen();
  1568     load_reg( R_EDX, Rm );
  1569     check_ralign32( R_EDX );
  1570     load_spreg( R_ECX, R_FPSCR );
  1571     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1572     JNE_rel8(19, doublesize);
  1573     MEM_READ_LONG( R_EDX, R_EAX );
  1574     load_fr_bank( R_ECX );
  1575     store_fr( R_ECX, R_EAX, FRn );
  1576     if( FRn&1 ) {
  1577 	JMP_rel8(48, end);
  1578 	JMP_TARGET(doublesize);
  1579 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1580 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1581 	load_xf_bank( R_ECX );
  1582 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1583 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1584 	JMP_TARGET(end);
  1585     } else {
  1586 	JMP_rel8(36, end);
  1587 	JMP_TARGET(doublesize);
  1588 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1589 	load_fr_bank( R_ECX );
  1590 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1591 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1592 	JMP_TARGET(end);
  1594 :}
  1595 FMOV FRm, @-Rn {:  
  1596     check_fpuen();
  1597     load_reg( R_EDX, Rn );
  1598     check_walign32( R_EDX );
  1599     load_spreg( R_ECX, R_FPSCR );
  1600     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1601     JNE_rel8(26, doublesize);
  1602     load_fr_bank( R_ECX );
  1603     load_fr( R_ECX, R_EAX, FRm );
  1604     ADD_imm8s_r32(-4,R_EDX);
  1605     store_reg( R_EDX, Rn );
  1606     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1607     if( FRm&1 ) {
  1608 	JMP_rel8( 54, end );
  1609 	JMP_TARGET(doublesize);
  1610 	load_xf_bank( R_ECX );
  1611 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1612 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1613 	ADD_imm8s_r32(-8,R_EDX);
  1614 	store_reg( R_EDX, Rn );
  1615 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1616 	JMP_TARGET(end);
  1617     } else {
  1618 	JMP_rel8( 45, end );
  1619 	JMP_TARGET(doublesize);
  1620 	load_fr_bank( R_ECX );
  1621 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1622 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1623 	ADD_imm8s_r32(-8,R_EDX);
  1624 	store_reg( R_EDX, Rn );
  1625 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1626 	JMP_TARGET(end);
  1628 :}
  1629 FMOV @Rm+, FRn {:  
  1630     check_fpuen();
  1631     load_reg( R_EDX, Rm );
  1632     check_ralign32( R_EDX );
  1633     MOV_r32_r32( R_EDX, R_EAX );
  1634     load_spreg( R_ECX, R_FPSCR );
  1635     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1636     JNE_rel8(25, doublesize);
  1637     ADD_imm8s_r32( 4, R_EAX );
  1638     store_reg( R_EAX, Rm );
  1639     MEM_READ_LONG( R_EDX, R_EAX );
  1640     load_fr_bank( R_ECX );
  1641     store_fr( R_ECX, R_EAX, FRn );
  1642     if( FRn&1 ) {
  1643 	JMP_rel8(54, end);
  1644 	JMP_TARGET(doublesize);
  1645 	ADD_imm8s_r32( 8, R_EAX );
  1646 	store_reg(R_EAX, Rm);
  1647 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1648 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1649 	load_xf_bank( R_ECX );
  1650 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1651 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1652 	JMP_TARGET(end);
  1653     } else {
  1654 	JMP_rel8(42, end);
  1655 	ADD_imm8s_r32( 8, R_EAX );
  1656 	store_reg(R_EAX, Rm);
  1657 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1658 	load_fr_bank( R_ECX );
  1659 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1660 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1661 	JMP_TARGET(end);
  1663 :}
  1664 FMOV FRm, @(R0, Rn) {:  
  1665     check_fpuen();
  1666     load_reg( R_EDX, Rn );
  1667     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
  1668     check_walign32( R_EDX );
  1669     load_spreg( R_ECX, R_FPSCR );
  1670     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1671     JNE_rel8(20, doublesize);
  1672     load_fr_bank( R_ECX );
  1673     load_fr( R_ECX, R_EAX, FRm );
  1674     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1675     if( FRm&1 ) {
  1676 	JMP_rel8( 48, end );
  1677 	JMP_TARGET(doublesize);
  1678 	load_xf_bank( R_ECX );
  1679 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1680 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1681 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1682 	JMP_TARGET(end);
  1683     } else {
  1684 	JMP_rel8( 39, end );
  1685 	JMP_TARGET(doublesize);
  1686 	load_fr_bank( R_ECX );
  1687 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1688 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1689 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1690 	JMP_TARGET(end);
  1692 :}
  1693 FMOV @(R0, Rm), FRn {:  
  1694     check_fpuen();
  1695     load_reg( R_EDX, Rm );
  1696     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
  1697     check_ralign32( R_EDX );
  1698     load_spreg( R_ECX, R_FPSCR );
  1699     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1700     JNE_rel8(19, doublesize);
  1701     MEM_READ_LONG( R_EDX, R_EAX );
  1702     load_fr_bank( R_ECX );
  1703     store_fr( R_ECX, R_EAX, FRn );
  1704     if( FRn&1 ) {
  1705 	JMP_rel8(48, end);
  1706 	JMP_TARGET(doublesize);
  1707 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1708 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1709 	load_xf_bank( R_ECX );
  1710 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1711 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1712 	JMP_TARGET(end);
  1713     } else {
  1714 	JMP_rel8(36, end);
  1715 	JMP_TARGET(doublesize);
  1716 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1717 	load_fr_bank( R_ECX );
  1718 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1719 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1720 	JMP_TARGET(end);
  1722 :}
  1723 FLDI0 FRn {:  /* IFF PR=0 */
  1724     check_fpuen();
  1725     load_spreg( R_ECX, R_FPSCR );
  1726     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1727     JNE_rel8(8, end);
  1728     XOR_r32_r32( R_EAX, R_EAX );
  1729     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1730     store_fr( R_ECX, R_EAX, FRn );
  1731     JMP_TARGET(end);
  1732 :}
  1733 FLDI1 FRn {:  /* IFF PR=0 */
  1734     check_fpuen();
  1735     load_spreg( R_ECX, R_FPSCR );
  1736     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1737     JNE_rel8(11, end);
  1738     load_imm32(R_EAX, 0x3F800000);
  1739     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1740     store_fr( R_ECX, R_EAX, FRn );
  1741     JMP_TARGET(end);
  1742 :}
  1744 FLOAT FPUL, FRn {:  
  1745     check_fpuen();
  1746     load_spreg( R_ECX, R_FPSCR );
  1747     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1748     FILD_sh4r(R_FPUL);
  1749     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1750     JNE_rel8(5, doubleprec);
  1751     pop_fr( R_EDX, FRn );
  1752     JMP_rel8(3, end);
  1753     JMP_TARGET(doubleprec);
  1754     pop_dr( R_EDX, FRn );
  1755     JMP_TARGET(end);
  1756 :}
  1757 FTRC FRm, FPUL {:  
  1758     check_fpuen();
  1759     load_spreg( R_ECX, R_FPSCR );
  1760     load_fr_bank( R_EDX );
  1761     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1762     JNE_rel8(5, doubleprec);
  1763     push_fr( R_EDX, FRm );
  1764     JMP_rel8(3, doop);
  1765     JMP_TARGET(doubleprec);
  1766     push_dr( R_EDX, FRm );
  1767     JMP_TARGET( doop );
  1768     load_imm32( R_ECX, (uint32_t)&max_int );
  1769     FILD_r32ind( R_ECX );
  1770     FCOMIP_st(1);
  1771     JNA_rel8( 32, sat );
  1772     load_imm32( R_ECX, (uint32_t)&min_int );  // 5
  1773     FILD_r32ind( R_ECX );           // 2
  1774     FCOMIP_st(1);                   // 2
  1775     JAE_rel8( 21, sat2 );            // 2
  1776     load_imm32( R_EAX, (uint32_t)&save_fcw );
  1777     FNSTCW_r32ind( R_EAX );
  1778     load_imm32( R_EDX, (uint32_t)&trunc_fcw );
  1779     FLDCW_r32ind( R_EDX );
  1780     FISTP_sh4r(R_FPUL);             // 3
  1781     FLDCW_r32ind( R_EAX );
  1782     JMP_rel8( 9, end );             // 2
  1784     JMP_TARGET(sat);
  1785     JMP_TARGET(sat2);
  1786     MOV_r32ind_r32( R_ECX, R_ECX ); // 2
  1787     store_spreg( R_ECX, R_FPUL );
  1788     FPOP_st();
  1789     JMP_TARGET(end);
  1790 :}
  1791 FLDS FRm, FPUL {:  
  1792     check_fpuen();
  1793     load_fr_bank( R_ECX );
  1794     load_fr( R_ECX, R_EAX, FRm );
  1795     store_spreg( R_EAX, R_FPUL );
  1796 :}
  1797 FSTS FPUL, FRn {:  
  1798     check_fpuen();
  1799     load_fr_bank( R_ECX );
  1800     load_spreg( R_EAX, R_FPUL );
  1801     store_fr( R_ECX, R_EAX, FRn );
  1802 :}
  1803 FCNVDS FRm, FPUL {:  
  1804     check_fpuen();
  1805     load_spreg( R_ECX, R_FPSCR );
  1806     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1807     JE_rel8(9, end); // only when PR=1
  1808     load_fr_bank( R_ECX );
  1809     push_dr( R_ECX, FRm );
  1810     pop_fpul();
  1811     JMP_TARGET(end);
  1812 :}
  1813 FCNVSD FPUL, FRn {:  
  1814     check_fpuen();
  1815     load_spreg( R_ECX, R_FPSCR );
  1816     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1817     JE_rel8(9, end); // only when PR=1
  1818     load_fr_bank( R_ECX );
  1819     push_fpul();
  1820     pop_dr( R_ECX, FRn );
  1821     JMP_TARGET(end);
  1822 :}
  1824 /* Floating point instructions */
  1825 FABS FRn {:  
  1826     check_fpuen();
  1827     load_spreg( R_ECX, R_FPSCR );
  1828     load_fr_bank( R_EDX );
  1829     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1830     JNE_rel8(10, doubleprec);
  1831     push_fr(R_EDX, FRn); // 3
  1832     FABS_st0(); // 2
  1833     pop_fr( R_EDX, FRn); //3
  1834     JMP_rel8(8,end); // 2
  1835     JMP_TARGET(doubleprec);
  1836     push_dr(R_EDX, FRn);
  1837     FABS_st0();
  1838     pop_dr(R_EDX, FRn);
  1839     JMP_TARGET(end);
  1840 :}
  1841 FADD FRm, FRn {:  
  1842     check_fpuen();
  1843     load_spreg( R_ECX, R_FPSCR );
  1844     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1845     load_fr_bank( R_EDX );
  1846     JNE_rel8(13,doubleprec);
  1847     push_fr(R_EDX, FRm);
  1848     push_fr(R_EDX, FRn);
  1849     FADDP_st(1);
  1850     pop_fr(R_EDX, FRn);
  1851     JMP_rel8(11,end);
  1852     JMP_TARGET(doubleprec);
  1853     push_dr(R_EDX, FRm);
  1854     push_dr(R_EDX, FRn);
  1855     FADDP_st(1);
  1856     pop_dr(R_EDX, FRn);
  1857     JMP_TARGET(end);
  1858 :}
  1859 FDIV FRm, FRn {:  
  1860     check_fpuen();
  1861     load_spreg( R_ECX, R_FPSCR );
  1862     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1863     load_fr_bank( R_EDX );
  1864     JNE_rel8(13, doubleprec);
  1865     push_fr(R_EDX, FRn);
  1866     push_fr(R_EDX, FRm);
  1867     FDIVP_st(1);
  1868     pop_fr(R_EDX, FRn);
  1869     JMP_rel8(11, end);
  1870     JMP_TARGET(doubleprec);
  1871     push_dr(R_EDX, FRn);
  1872     push_dr(R_EDX, FRm);
  1873     FDIVP_st(1);
  1874     pop_dr(R_EDX, FRn);
  1875     JMP_TARGET(end);
  1876 :}
  1877 FMAC FR0, FRm, FRn {:  
  1878     check_fpuen();
  1879     load_spreg( R_ECX, R_FPSCR );
  1880     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  1881     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1882     JNE_rel8(18, doubleprec);
  1883     push_fr( R_EDX, 0 );
  1884     push_fr( R_EDX, FRm );
  1885     FMULP_st(1);
  1886     push_fr( R_EDX, FRn );
  1887     FADDP_st(1);
  1888     pop_fr( R_EDX, FRn );
  1889     JMP_rel8(16, end);
  1890     JMP_TARGET(doubleprec);
  1891     push_dr( R_EDX, 0 );
  1892     push_dr( R_EDX, FRm );
  1893     FMULP_st(1);
  1894     push_dr( R_EDX, FRn );
  1895     FADDP_st(1);
  1896     pop_dr( R_EDX, FRn );
  1897     JMP_TARGET(end);
  1898 :}
  1900 FMUL FRm, FRn {:  
  1901     check_fpuen();
  1902     load_spreg( R_ECX, R_FPSCR );
  1903     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1904     load_fr_bank( R_EDX );
  1905     JNE_rel8(13, doubleprec);
  1906     push_fr(R_EDX, FRm);
  1907     push_fr(R_EDX, FRn);
  1908     FMULP_st(1);
  1909     pop_fr(R_EDX, FRn);
  1910     JMP_rel8(11, end);
  1911     JMP_TARGET(doubleprec);
  1912     push_dr(R_EDX, FRm);
  1913     push_dr(R_EDX, FRn);
  1914     FMULP_st(1);
  1915     pop_dr(R_EDX, FRn);
  1916     JMP_TARGET(end);
  1917 :}
  1918 FNEG FRn {:  
  1919     check_fpuen();
  1920     load_spreg( R_ECX, R_FPSCR );
  1921     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1922     load_fr_bank( R_EDX );
  1923     JNE_rel8(10, doubleprec);
  1924     push_fr(R_EDX, FRn);
  1925     FCHS_st0();
  1926     pop_fr(R_EDX, FRn);
  1927     JMP_rel8(8, end);
  1928     JMP_TARGET(doubleprec);
  1929     push_dr(R_EDX, FRn);
  1930     FCHS_st0();
  1931     pop_dr(R_EDX, FRn);
  1932     JMP_TARGET(end);
  1933 :}
  1934 FSRRA FRn {:  
  1935     check_fpuen();
  1936     load_spreg( R_ECX, R_FPSCR );
  1937     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1938     load_fr_bank( R_EDX );
  1939     JNE_rel8(12, end); // PR=0 only
  1940     FLD1_st0();
  1941     push_fr(R_EDX, FRn);
  1942     FSQRT_st0();
  1943     FDIVP_st(1);
  1944     pop_fr(R_EDX, FRn);
  1945     JMP_TARGET(end);
  1946 :}
  1947 FSQRT FRn {:  
  1948     check_fpuen();
  1949     load_spreg( R_ECX, R_FPSCR );
  1950     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1951     load_fr_bank( R_EDX );
  1952     JNE_rel8(10, doubleprec);
  1953     push_fr(R_EDX, FRn);
  1954     FSQRT_st0();
  1955     pop_fr(R_EDX, FRn);
  1956     JMP_rel8(8, end);
  1957     JMP_TARGET(doubleprec);
  1958     push_dr(R_EDX, FRn);
  1959     FSQRT_st0();
  1960     pop_dr(R_EDX, FRn);
  1961     JMP_TARGET(end);
  1962 :}
  1963 FSUB FRm, FRn {:  
  1964     check_fpuen();
  1965     load_spreg( R_ECX, R_FPSCR );
  1966     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1967     load_fr_bank( R_EDX );
  1968     JNE_rel8(13, doubleprec);
  1969     push_fr(R_EDX, FRn);
  1970     push_fr(R_EDX, FRm);
  1971     FSUBP_st(1);
  1972     pop_fr(R_EDX, FRn);
  1973     JMP_rel8(11, end);
  1974     JMP_TARGET(doubleprec);
  1975     push_dr(R_EDX, FRn);
  1976     push_dr(R_EDX, FRm);
  1977     FSUBP_st(1);
  1978     pop_dr(R_EDX, FRn);
  1979     JMP_TARGET(end);
  1980 :}
  1982 FCMP/EQ FRm, FRn {:  
  1983     check_fpuen();
  1984     load_spreg( R_ECX, R_FPSCR );
  1985     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1986     load_fr_bank( R_EDX );
  1987     JNE_rel8(8, doubleprec);
  1988     push_fr(R_EDX, FRm);
  1989     push_fr(R_EDX, FRn);
  1990     JMP_rel8(6, end);
  1991     JMP_TARGET(doubleprec);
  1992     push_dr(R_EDX, FRm);
  1993     push_dr(R_EDX, FRn);
  1994     JMP_TARGET(end);
  1995     FCOMIP_st(1);
  1996     SETE_t();
  1997     FPOP_st();
  1998 :}
  1999 FCMP/GT FRm, FRn {:  
  2000     check_fpuen();
  2001     load_spreg( R_ECX, R_FPSCR );
  2002     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2003     load_fr_bank( R_EDX );
  2004     JNE_rel8(8, doubleprec);
  2005     push_fr(R_EDX, FRm);
  2006     push_fr(R_EDX, FRn);
  2007     JMP_rel8(6, end);
  2008     JMP_TARGET(doubleprec);
  2009     push_dr(R_EDX, FRm);
  2010     push_dr(R_EDX, FRn);
  2011     JMP_TARGET(end);
  2012     FCOMIP_st(1);
  2013     SETA_t();
  2014     FPOP_st();
  2015 :}
  2017 FSCA FPUL, FRn {:  
  2018     check_fpuen();
  2019     load_spreg( R_ECX, R_FPSCR );
  2020     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2021     JNE_rel8( 21, doubleprec );
  2022     load_fr_bank( R_ECX );
  2023     ADD_imm8s_r32( (FRn&0x0E)<<2, R_ECX );
  2024     load_spreg( R_EDX, R_FPUL );
  2025     call_func2( sh4_fsca, R_EDX, R_ECX );
  2026     JMP_TARGET(doubleprec);
  2027 :}
  2028 FIPR FVm, FVn {:  
  2029     check_fpuen();
  2030     load_spreg( R_ECX, R_FPSCR );
  2031     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2032     JNE_rel8(44, doubleprec);
  2034     load_fr_bank( R_ECX );
  2035     push_fr( R_ECX, FVm<<2 );
  2036     push_fr( R_ECX, FVn<<2 );
  2037     FMULP_st(1);
  2038     push_fr( R_ECX, (FVm<<2)+1);
  2039     push_fr( R_ECX, (FVn<<2)+1);
  2040     FMULP_st(1);
  2041     FADDP_st(1);
  2042     push_fr( R_ECX, (FVm<<2)+2);
  2043     push_fr( R_ECX, (FVn<<2)+2);
  2044     FMULP_st(1);
  2045     FADDP_st(1);
  2046     push_fr( R_ECX, (FVm<<2)+3);
  2047     push_fr( R_ECX, (FVn<<2)+3);
  2048     FMULP_st(1);
  2049     FADDP_st(1);
  2050     pop_fr( R_ECX, (FVn<<2)+3);
  2051     JMP_TARGET(doubleprec);
  2052 :}
  2053 FTRV XMTRX, FVn {:  
  2054     check_fpuen();
  2055     load_spreg( R_ECX, R_FPSCR );
  2056     TEST_imm32_r32( FPSCR_PR, R_ECX );
  2057     JNE_rel8( 30, doubleprec );
  2058     load_fr_bank( R_EDX );                 // 3
  2059     ADD_imm8s_r32( FVn<<4, R_EDX );        // 3
  2060     load_xf_bank( R_ECX );                 // 12
  2061     call_func2( sh4_ftrv, R_EDX, R_ECX );  // 12
  2062     JMP_TARGET(doubleprec);
  2063 :}
  2065 FRCHG {:  
  2066     check_fpuen();
  2067     load_spreg( R_ECX, R_FPSCR );
  2068     XOR_imm32_r32( FPSCR_FR, R_ECX );
  2069     store_spreg( R_ECX, R_FPSCR );
  2070     update_fr_bank( R_ECX );
  2071 :}
  2072 FSCHG {:  
  2073     check_fpuen();
  2074     load_spreg( R_ECX, R_FPSCR );
  2075     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  2076     store_spreg( R_ECX, R_FPSCR );
  2077 :}
  2079 /* Processor control instructions */
  2080 LDC Rm, SR {:
  2081     if( sh4_x86.in_delay_slot ) {
  2082 	SLOTILLEGAL();
  2083     } else {
  2084 	check_priv();
  2085 	load_reg( R_EAX, Rm );
  2086 	call_func1( sh4_write_sr, R_EAX );
  2087 	sh4_x86.priv_checked = FALSE;
  2088 	sh4_x86.fpuen_checked = FALSE;
  2090 :}
  2091 LDC Rm, GBR {: 
  2092     load_reg( R_EAX, Rm );
  2093     store_spreg( R_EAX, R_GBR );
  2094 :}
  2095 LDC Rm, VBR {:  
  2096     check_priv();
  2097     load_reg( R_EAX, Rm );
  2098     store_spreg( R_EAX, R_VBR );
  2099 :}
  2100 LDC Rm, SSR {:  
  2101     check_priv();
  2102     load_reg( R_EAX, Rm );
  2103     store_spreg( R_EAX, R_SSR );
  2104 :}
  2105 LDC Rm, SGR {:  
  2106     check_priv();
  2107     load_reg( R_EAX, Rm );
  2108     store_spreg( R_EAX, R_SGR );
  2109 :}
  2110 LDC Rm, SPC {:  
  2111     check_priv();
  2112     load_reg( R_EAX, Rm );
  2113     store_spreg( R_EAX, R_SPC );
  2114 :}
  2115 LDC Rm, DBR {:  
  2116     check_priv();
  2117     load_reg( R_EAX, Rm );
  2118     store_spreg( R_EAX, R_DBR );
  2119 :}
  2120 LDC Rm, Rn_BANK {:  
  2121     check_priv();
  2122     load_reg( R_EAX, Rm );
  2123     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2124 :}
  2125 LDC.L @Rm+, GBR {:  
  2126     load_reg( R_EAX, Rm );
  2127     check_ralign32( R_EAX );
  2128     MOV_r32_r32( R_EAX, R_ECX );
  2129     ADD_imm8s_r32( 4, R_EAX );
  2130     store_reg( R_EAX, Rm );
  2131     MEM_READ_LONG( R_ECX, R_EAX );
  2132     store_spreg( R_EAX, R_GBR );
  2133 :}
  2134 LDC.L @Rm+, SR {:
  2135     if( sh4_x86.in_delay_slot ) {
  2136 	SLOTILLEGAL();
  2137     } else {
  2138 	check_priv();
  2139 	load_reg( R_EAX, Rm );
  2140 	check_ralign32( R_EAX );
  2141 	MOV_r32_r32( R_EAX, R_ECX );
  2142 	ADD_imm8s_r32( 4, R_EAX );
  2143 	store_reg( R_EAX, Rm );
  2144 	MEM_READ_LONG( R_ECX, R_EAX );
  2145 	call_func1( sh4_write_sr, R_EAX );
  2146 	sh4_x86.priv_checked = FALSE;
  2147 	sh4_x86.fpuen_checked = FALSE;
  2149 :}
  2150 LDC.L @Rm+, VBR {:  
  2151     check_priv();
  2152     load_reg( R_EAX, Rm );
  2153     check_ralign32( R_EAX );
  2154     MOV_r32_r32( R_EAX, R_ECX );
  2155     ADD_imm8s_r32( 4, R_EAX );
  2156     store_reg( R_EAX, Rm );
  2157     MEM_READ_LONG( R_ECX, R_EAX );
  2158     store_spreg( R_EAX, R_VBR );
  2159 :}
  2160 LDC.L @Rm+, SSR {:
  2161     check_priv();
  2162     load_reg( R_EAX, Rm );
  2163     MOV_r32_r32( R_EAX, R_ECX );
  2164     ADD_imm8s_r32( 4, R_EAX );
  2165     store_reg( R_EAX, Rm );
  2166     MEM_READ_LONG( R_ECX, R_EAX );
  2167     store_spreg( R_EAX, R_SSR );
  2168 :}
  2169 LDC.L @Rm+, SGR {:  
  2170     check_priv();
  2171     load_reg( R_EAX, Rm );
  2172     check_ralign32( R_EAX );
  2173     MOV_r32_r32( R_EAX, R_ECX );
  2174     ADD_imm8s_r32( 4, R_EAX );
  2175     store_reg( R_EAX, Rm );
  2176     MEM_READ_LONG( R_ECX, R_EAX );
  2177     store_spreg( R_EAX, R_SGR );
  2178 :}
  2179 LDC.L @Rm+, SPC {:  
  2180     check_priv();
  2181     load_reg( R_EAX, Rm );
  2182     check_ralign32( R_EAX );
  2183     MOV_r32_r32( R_EAX, R_ECX );
  2184     ADD_imm8s_r32( 4, R_EAX );
  2185     store_reg( R_EAX, Rm );
  2186     MEM_READ_LONG( R_ECX, R_EAX );
  2187     store_spreg( R_EAX, R_SPC );
  2188 :}
  2189 LDC.L @Rm+, DBR {:  
  2190     check_priv();
  2191     load_reg( R_EAX, Rm );
  2192     check_ralign32( R_EAX );
  2193     MOV_r32_r32( R_EAX, R_ECX );
  2194     ADD_imm8s_r32( 4, R_EAX );
  2195     store_reg( R_EAX, Rm );
  2196     MEM_READ_LONG( R_ECX, R_EAX );
  2197     store_spreg( R_EAX, R_DBR );
  2198 :}
  2199 LDC.L @Rm+, Rn_BANK {:  
  2200     check_priv();
  2201     load_reg( R_EAX, Rm );
  2202     check_ralign32( R_EAX );
  2203     MOV_r32_r32( R_EAX, R_ECX );
  2204     ADD_imm8s_r32( 4, R_EAX );
  2205     store_reg( R_EAX, Rm );
  2206     MEM_READ_LONG( R_ECX, R_EAX );
  2207     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2208 :}
  2209 LDS Rm, FPSCR {:  
  2210     load_reg( R_EAX, Rm );
  2211     store_spreg( R_EAX, R_FPSCR );
  2212     update_fr_bank( R_EAX );
  2213 :}
  2214 LDS.L @Rm+, FPSCR {:  
  2215     load_reg( R_EAX, Rm );
  2216     check_ralign32( R_EAX );
  2217     MOV_r32_r32( R_EAX, R_ECX );
  2218     ADD_imm8s_r32( 4, R_EAX );
  2219     store_reg( R_EAX, Rm );
  2220     MEM_READ_LONG( R_ECX, R_EAX );
  2221     store_spreg( R_EAX, R_FPSCR );
  2222     update_fr_bank( R_EAX );
  2223 :}
  2224 LDS Rm, FPUL {:  
  2225     load_reg( R_EAX, Rm );
  2226     store_spreg( R_EAX, R_FPUL );
  2227 :}
  2228 LDS.L @Rm+, FPUL {:  
  2229     load_reg( R_EAX, Rm );
  2230     check_ralign32( R_EAX );
  2231     MOV_r32_r32( R_EAX, R_ECX );
  2232     ADD_imm8s_r32( 4, R_EAX );
  2233     store_reg( R_EAX, Rm );
  2234     MEM_READ_LONG( R_ECX, R_EAX );
  2235     store_spreg( R_EAX, R_FPUL );
  2236 :}
  2237 LDS Rm, MACH {: 
  2238     load_reg( R_EAX, Rm );
  2239     store_spreg( R_EAX, R_MACH );
  2240 :}
  2241 LDS.L @Rm+, MACH {:  
  2242     load_reg( R_EAX, Rm );
  2243     check_ralign32( R_EAX );
  2244     MOV_r32_r32( R_EAX, R_ECX );
  2245     ADD_imm8s_r32( 4, R_EAX );
  2246     store_reg( R_EAX, Rm );
  2247     MEM_READ_LONG( R_ECX, R_EAX );
  2248     store_spreg( R_EAX, R_MACH );
  2249 :}
  2250 LDS Rm, MACL {:  
  2251     load_reg( R_EAX, Rm );
  2252     store_spreg( R_EAX, R_MACL );
  2253 :}
  2254 LDS.L @Rm+, MACL {:  
  2255     load_reg( R_EAX, Rm );
  2256     check_ralign32( R_EAX );
  2257     MOV_r32_r32( R_EAX, R_ECX );
  2258     ADD_imm8s_r32( 4, R_EAX );
  2259     store_reg( R_EAX, Rm );
  2260     MEM_READ_LONG( R_ECX, R_EAX );
  2261     store_spreg( R_EAX, R_MACL );
  2262 :}
  2263 LDS Rm, PR {:  
  2264     load_reg( R_EAX, Rm );
  2265     store_spreg( R_EAX, R_PR );
  2266 :}
  2267 LDS.L @Rm+, PR {:  
  2268     load_reg( R_EAX, Rm );
  2269     check_ralign32( R_EAX );
  2270     MOV_r32_r32( R_EAX, R_ECX );
  2271     ADD_imm8s_r32( 4, R_EAX );
  2272     store_reg( R_EAX, Rm );
  2273     MEM_READ_LONG( R_ECX, R_EAX );
  2274     store_spreg( R_EAX, R_PR );
  2275 :}
  2276 LDTLB {:  :}
  2277 OCBI @Rn {:  :}
  2278 OCBP @Rn {:  :}
  2279 OCBWB @Rn {:  :}
  2280 PREF @Rn {:
  2281     load_reg( R_EAX, Rn );
  2282     PUSH_r32( R_EAX );
  2283     AND_imm32_r32( 0xFC000000, R_EAX );
  2284     CMP_imm32_r32( 0xE0000000, R_EAX );
  2285     JNE_rel8(7, end);
  2286     call_func0( sh4_flush_store_queue );
  2287     JMP_TARGET(end);
  2288     ADD_imm8s_r32( 4, R_ESP );
  2289 :}
  2290 SLEEP {: 
  2291     check_priv();
  2292     call_func0( sh4_sleep );
  2293     sh4_x86.exit_code = 0;
  2294     sh4_x86.in_delay_slot = FALSE;
  2295     INC_r32(R_ESI);
  2296     return 1;
  2297 :}
  2298 STC SR, Rn {:
  2299     check_priv();
  2300     call_func0(sh4_read_sr);
  2301     store_reg( R_EAX, Rn );
  2302 :}
  2303 STC GBR, Rn {:  
  2304     load_spreg( R_EAX, R_GBR );
  2305     store_reg( R_EAX, Rn );
  2306 :}
  2307 STC VBR, Rn {:  
  2308     check_priv();
  2309     load_spreg( R_EAX, R_VBR );
  2310     store_reg( R_EAX, Rn );
  2311 :}
  2312 STC SSR, Rn {:  
  2313     check_priv();
  2314     load_spreg( R_EAX, R_SSR );
  2315     store_reg( R_EAX, Rn );
  2316 :}
  2317 STC SPC, Rn {:  
  2318     check_priv();
  2319     load_spreg( R_EAX, R_SPC );
  2320     store_reg( R_EAX, Rn );
  2321 :}
  2322 STC SGR, Rn {:  
  2323     check_priv();
  2324     load_spreg( R_EAX, R_SGR );
  2325     store_reg( R_EAX, Rn );
  2326 :}
  2327 STC DBR, Rn {:  
  2328     check_priv();
  2329     load_spreg( R_EAX, R_DBR );
  2330     store_reg( R_EAX, Rn );
  2331 :}
  2332 STC Rm_BANK, Rn {:
  2333     check_priv();
  2334     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2335     store_reg( R_EAX, Rn );
  2336 :}
  2337 STC.L SR, @-Rn {:
  2338     check_priv();
  2339     call_func0( sh4_read_sr );
  2340     load_reg( R_ECX, Rn );
  2341     check_walign32( R_ECX );
  2342     ADD_imm8s_r32( -4, R_ECX );
  2343     store_reg( R_ECX, Rn );
  2344     MEM_WRITE_LONG( R_ECX, R_EAX );
  2345 :}
  2346 STC.L VBR, @-Rn {:  
  2347     check_priv();
  2348     load_reg( R_ECX, Rn );
  2349     check_walign32( R_ECX );
  2350     ADD_imm8s_r32( -4, R_ECX );
  2351     store_reg( R_ECX, Rn );
  2352     load_spreg( R_EAX, R_VBR );
  2353     MEM_WRITE_LONG( R_ECX, R_EAX );
  2354 :}
  2355 STC.L SSR, @-Rn {:  
  2356     check_priv();
  2357     load_reg( R_ECX, Rn );
  2358     check_walign32( R_ECX );
  2359     ADD_imm8s_r32( -4, R_ECX );
  2360     store_reg( R_ECX, Rn );
  2361     load_spreg( R_EAX, R_SSR );
  2362     MEM_WRITE_LONG( R_ECX, R_EAX );
  2363 :}
  2364 STC.L SPC, @-Rn {:  
  2365     check_priv();
  2366     load_reg( R_ECX, Rn );
  2367     check_walign32( R_ECX );
  2368     ADD_imm8s_r32( -4, R_ECX );
  2369     store_reg( R_ECX, Rn );
  2370     load_spreg( R_EAX, R_SPC );
  2371     MEM_WRITE_LONG( R_ECX, R_EAX );
  2372 :}
  2373 STC.L SGR, @-Rn {:  
  2374     check_priv();
  2375     load_reg( R_ECX, Rn );
  2376     check_walign32( R_ECX );
  2377     ADD_imm8s_r32( -4, R_ECX );
  2378     store_reg( R_ECX, Rn );
  2379     load_spreg( R_EAX, R_SGR );
  2380     MEM_WRITE_LONG( R_ECX, R_EAX );
  2381 :}
  2382 STC.L DBR, @-Rn {:  
  2383     check_priv();
  2384     load_reg( R_ECX, Rn );
  2385     check_walign32( R_ECX );
  2386     ADD_imm8s_r32( -4, R_ECX );
  2387     store_reg( R_ECX, Rn );
  2388     load_spreg( R_EAX, R_DBR );
  2389     MEM_WRITE_LONG( R_ECX, R_EAX );
  2390 :}
  2391 STC.L Rm_BANK, @-Rn {:  
  2392     check_priv();
  2393     load_reg( R_ECX, Rn );
  2394     check_walign32( R_ECX );
  2395     ADD_imm8s_r32( -4, R_ECX );
  2396     store_reg( R_ECX, Rn );
  2397     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2398     MEM_WRITE_LONG( R_ECX, R_EAX );
  2399 :}
  2400 STC.L GBR, @-Rn {:  
  2401     load_reg( R_ECX, Rn );
  2402     check_walign32( R_ECX );
  2403     ADD_imm8s_r32( -4, R_ECX );
  2404     store_reg( R_ECX, Rn );
  2405     load_spreg( R_EAX, R_GBR );
  2406     MEM_WRITE_LONG( R_ECX, R_EAX );
  2407 :}
  2408 STS FPSCR, Rn {:  
  2409     load_spreg( R_EAX, R_FPSCR );
  2410     store_reg( R_EAX, Rn );
  2411 :}
  2412 STS.L FPSCR, @-Rn {:  
  2413     load_reg( R_ECX, Rn );
  2414     check_walign32( R_ECX );
  2415     ADD_imm8s_r32( -4, R_ECX );
  2416     store_reg( R_ECX, Rn );
  2417     load_spreg( R_EAX, R_FPSCR );
  2418     MEM_WRITE_LONG( R_ECX, R_EAX );
  2419 :}
  2420 STS FPUL, Rn {:  
  2421     load_spreg( R_EAX, R_FPUL );
  2422     store_reg( R_EAX, Rn );
  2423 :}
  2424 STS.L FPUL, @-Rn {:  
  2425     load_reg( R_ECX, Rn );
  2426     check_walign32( R_ECX );
  2427     ADD_imm8s_r32( -4, R_ECX );
  2428     store_reg( R_ECX, Rn );
  2429     load_spreg( R_EAX, R_FPUL );
  2430     MEM_WRITE_LONG( R_ECX, R_EAX );
  2431 :}
  2432 STS MACH, Rn {:  
  2433     load_spreg( R_EAX, R_MACH );
  2434     store_reg( R_EAX, Rn );
  2435 :}
  2436 STS.L MACH, @-Rn {:  
  2437     load_reg( R_ECX, Rn );
  2438     check_walign32( R_ECX );
  2439     ADD_imm8s_r32( -4, R_ECX );
  2440     store_reg( R_ECX, Rn );
  2441     load_spreg( R_EAX, R_MACH );
  2442     MEM_WRITE_LONG( R_ECX, R_EAX );
  2443 :}
  2444 STS MACL, Rn {:  
  2445     load_spreg( R_EAX, R_MACL );
  2446     store_reg( R_EAX, Rn );
  2447 :}
  2448 STS.L MACL, @-Rn {:  
  2449     load_reg( R_ECX, Rn );
  2450     check_walign32( R_ECX );
  2451     ADD_imm8s_r32( -4, R_ECX );
  2452     store_reg( R_ECX, Rn );
  2453     load_spreg( R_EAX, R_MACL );
  2454     MEM_WRITE_LONG( R_ECX, R_EAX );
  2455 :}
  2456 STS PR, Rn {:  
  2457     load_spreg( R_EAX, R_PR );
  2458     store_reg( R_EAX, Rn );
  2459 :}
  2460 STS.L PR, @-Rn {:  
  2461     load_reg( R_ECX, Rn );
  2462     check_walign32( R_ECX );
  2463     ADD_imm8s_r32( -4, R_ECX );
  2464     store_reg( R_ECX, Rn );
  2465     load_spreg( R_EAX, R_PR );
  2466     MEM_WRITE_LONG( R_ECX, R_EAX );
  2467 :}
  2469 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2470 %%
  2471     if( sh4_x86.in_delay_slot ) {
  2472 	ADD_imm8s_r32(2,R_ESI);
  2473 	sh4_x86.in_delay_slot = FALSE;
  2474 	return 1;
  2475     } else {
  2476 	INC_r32(R_ESI);
  2478     return 0;
.