Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 381:aade6c9aca4d
prev380:2e8166bf6832
next382:fce3f4da92ab
author nkeynes
date Wed Sep 12 11:41:43 2007 +0000 (16 years ago)
permissions -rw-r--r--
last change Fix default block return value
view annotate diff log raw
     1 /**
     2  * $Id: sh4x86.in,v 1.8 2007-09-12 11:41:43 nkeynes Exp $
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    23 #ifndef NDEBUG
    24 #define DEBUG_JUMPS 1
    25 #endif
    27 #include "sh4/sh4core.h"
    28 #include "sh4/sh4trans.h"
    29 #include "sh4/x86op.h"
    30 #include "clock.h"
    32 #define DEFAULT_BACKPATCH_SIZE 4096
    34 /** 
    35  * Struct to manage internal translation state. This state is not saved -
    36  * it is only valid between calls to sh4_translate_begin_block() and
    37  * sh4_translate_end_block()
    38  */
    39 struct sh4_x86_state {
    40     gboolean in_delay_slot;
    41     gboolean priv_checked; /* true if we've already checked the cpu mode. */
    42     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    44     /* Allocated memory for the (block-wide) back-patch list */
    45     uint32_t **backpatch_list;
    46     uint32_t backpatch_posn;
    47     uint32_t backpatch_size;
    48 };
    50 #define EXIT_DATA_ADDR_READ 0
    51 #define EXIT_DATA_ADDR_WRITE 7
    52 #define EXIT_ILLEGAL 14
    53 #define EXIT_SLOT_ILLEGAL 21
    54 #define EXIT_FPU_DISABLED 28
    55 #define EXIT_SLOT_FPU_DISABLED 35
    57 static struct sh4_x86_state sh4_x86;
    59 void sh4_x86_init()
    60 {
    61     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
    62     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(uint32_t *);
    63 }
    66 static void sh4_x86_add_backpatch( uint8_t *ptr )
    67 {
    68     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
    69 	sh4_x86.backpatch_size <<= 1;
    70 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, sh4_x86.backpatch_size * sizeof(uint32_t *) );
    71 	assert( sh4_x86.backpatch_list != NULL );
    72     }
    73     sh4_x86.backpatch_list[sh4_x86.backpatch_posn++] = (uint32_t *)ptr;
    74 }
    76 static void sh4_x86_do_backpatch( uint8_t *reloc_base )
    77 {
    78     unsigned int i;
    79     for( i=0; i<sh4_x86.backpatch_posn; i++ ) {
    80 	*sh4_x86.backpatch_list[i] += (reloc_base - ((uint8_t *)sh4_x86.backpatch_list[i]) - 4);
    81     }
    82 }
    84 /**
    85  * Emit an instruction to load an SH4 reg into a real register
    86  */
    87 static inline void load_reg( int x86reg, int sh4reg ) 
    88 {
    89     /* mov [bp+n], reg */
    90     OP(0x8B);
    91     OP(0x45 + (x86reg<<3));
    92     OP(REG_OFFSET(r[sh4reg]));
    93 }
    95 static inline void load_reg16s( int x86reg, int sh4reg )
    96 {
    97     OP(0x0F);
    98     OP(0xBF);
    99     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   100 }
   102 static inline void load_reg16u( int x86reg, int sh4reg )
   103 {
   104     OP(0x0F);
   105     OP(0xB7);
   106     MODRM_r32_sh4r(x86reg, REG_OFFSET(r[sh4reg]));
   108 }
   110 #define load_spreg( x86reg, regoff ) MOV_sh4r_r32( regoff, x86reg )
   111 #define store_spreg( x86reg, regoff ) MOV_r32_sh4r( x86reg, regoff )
   112 /**
   113  * Emit an instruction to load an immediate value into a register
   114  */
   115 static inline void load_imm32( int x86reg, uint32_t value ) {
   116     /* mov #value, reg */
   117     OP(0xB8 + x86reg);
   118     OP32(value);
   119 }
   121 /**
   122  * Emit an instruction to store an SH4 reg (RN)
   123  */
   124 void static inline store_reg( int x86reg, int sh4reg ) {
   125     /* mov reg, [bp+n] */
   126     OP(0x89);
   127     OP(0x45 + (x86reg<<3));
   128     OP(REG_OFFSET(r[sh4reg]));
   129 }
   131 #define load_fr_bank(bankreg) load_spreg( bankreg, REG_OFFSET(fr_bank))
   133 /**
   134  * Load an FR register (single-precision floating point) into an integer x86
   135  * register (eg for register-to-register moves)
   136  */
   137 void static inline load_fr( int bankreg, int x86reg, int frm )
   138 {
   139     OP(0x8B); OP(0x40+bankreg+(x86reg<<3)); OP((frm^1)<<2);
   140 }
   142 /**
   143  * Store an FR register (single-precision floating point) into an integer x86
   144  * register (eg for register-to-register moves)
   145  */
   146 void static inline store_fr( int bankreg, int x86reg, int frn )
   147 {
   148     OP(0x89);  OP(0x40+bankreg+(x86reg<<3)); OP((frn^1)<<2);
   149 }
   152 /**
   153  * Load a pointer to the back fp back into the specified x86 register. The
   154  * bankreg must have been previously loaded with FPSCR.
   155  * NB: 10 bytes
   156  */
   157 static inline void load_xf_bank( int bankreg )
   158 {
   159     SHR_imm8_r32( (21 - 6), bankreg ); // Extract bit 21 then *64 for bank size
   160     AND_imm8s_r32( 0x40, bankreg );    // Complete extraction
   161     OP(0x8D); OP(0x44+(bankreg<<3)); OP(0x28+bankreg); OP(REG_OFFSET(fr)); // LEA [ebp+bankreg+disp], bankreg
   162 }
   164 /**
   165  * Push FPUL (as a 32-bit float) onto the FPU stack
   166  */
   167 static inline void push_fpul( )
   168 {
   169     OP(0xD9); OP(0x45); OP(R_FPUL);
   170 }
   172 /**
   173  * Pop FPUL (as a 32-bit float) from the FPU stack
   174  */
   175 static inline void pop_fpul( )
   176 {
   177     OP(0xD9); OP(0x5D); OP(R_FPUL);
   178 }
   180 /**
   181  * Push a 32-bit float onto the FPU stack, with bankreg previously loaded
   182  * with the location of the current fp bank.
   183  */
   184 static inline void push_fr( int bankreg, int frm ) 
   185 {
   186     OP(0xD9); OP(0x40 + bankreg); OP((frm^1)<<2);  // FLD.S [bankreg + frm^1*4]
   187 }
   189 /**
   190  * Pop a 32-bit float from the FPU stack and store it back into the fp bank, 
   191  * with bankreg previously loaded with the location of the current fp bank.
   192  */
   193 static inline void pop_fr( int bankreg, int frm )
   194 {
   195     OP(0xD9); OP(0x58 + bankreg); OP((frm^1)<<2); // FST.S [bankreg + frm^1*4]
   196 }
   198 /**
   199  * Push a 64-bit double onto the FPU stack, with bankreg previously loaded
   200  * with the location of the current fp bank.
   201  */
   202 static inline void push_dr( int bankreg, int frm )
   203 {
   204     OP(0xDD); OP(0x40 + bankreg); OP(frm<<2); // FLD.D [bankreg + frm*4]
   205 }
   207 static inline void pop_dr( int bankreg, int frm )
   208 {
   209     OP(0xDD); OP(0x58 + bankreg); OP(frm<<2); // FST.D [bankreg + frm*4]
   210 }
   212 /**
   213  * Note: clobbers EAX to make the indirect call - this isn't usually
   214  * a problem since the callee will usually clobber it anyway.
   215  */
   216 static inline void call_func0( void *ptr )
   217 {
   218     load_imm32(R_EAX, (uint32_t)ptr);
   219     CALL_r32(R_EAX);
   220 }
   222 static inline void call_func1( void *ptr, int arg1 )
   223 {
   224     PUSH_r32(arg1);
   225     call_func0(ptr);
   226     ADD_imm8s_r32( 4, R_ESP );
   227 }
   229 static inline void call_func2( void *ptr, int arg1, int arg2 )
   230 {
   231     PUSH_r32(arg2);
   232     PUSH_r32(arg1);
   233     call_func0(ptr);
   234     ADD_imm8s_r32( 8, R_ESP );
   235 }
   237 /**
   238  * Write a double (64-bit) value into memory, with the first word in arg2a, and
   239  * the second in arg2b
   240  * NB: 30 bytes
   241  */
   242 static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
   243 {
   244     ADD_imm8s_r32( 4, addr );
   245     PUSH_r32(addr);
   246     PUSH_r32(arg2b);
   247     ADD_imm8s_r32( -4, addr );
   248     PUSH_r32(addr);
   249     PUSH_r32(arg2a);
   250     call_func0(sh4_write_long);
   251     ADD_imm8s_r32( 8, R_ESP );
   252     call_func0(sh4_write_long);
   253     ADD_imm8s_r32( 8, R_ESP );
   254 }
   256 /**
   257  * Read a double (64-bit) value from memory, writing the first word into arg2a
   258  * and the second into arg2b. The addr must not be in EAX
   259  * NB: 27 bytes
   260  */
   261 static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
   262 {
   263     PUSH_r32(addr);
   264     call_func0(sh4_read_long);
   265     POP_r32(addr);
   266     PUSH_r32(R_EAX);
   267     ADD_imm8s_r32( 4, addr );
   268     PUSH_r32(addr);
   269     call_func0(sh4_read_long);
   270     ADD_imm8s_r32( 4, R_ESP );
   271     MOV_r32_r32( R_EAX, arg2b );
   272     POP_r32(arg2a);
   273 }
   275 /* Exception checks - Note that all exception checks will clobber EAX */
   276 static void check_priv( )
   277 {
   278     if( !sh4_x86.priv_checked ) {
   279 	sh4_x86.priv_checked = TRUE;
   280 	load_spreg( R_EAX, R_SR );
   281 	AND_imm32_r32( SR_MD, R_EAX );
   282 	if( sh4_x86.in_delay_slot ) {
   283 	    JE_exit( EXIT_SLOT_ILLEGAL );
   284 	} else {
   285 	    JE_exit( EXIT_ILLEGAL );
   286 	}
   287     }
   288 }
   290 static void check_fpuen( )
   291 {
   292     if( !sh4_x86.fpuen_checked ) {
   293 	sh4_x86.fpuen_checked = TRUE;
   294 	load_spreg( R_EAX, R_SR );
   295 	AND_imm32_r32( SR_FD, R_EAX );
   296 	if( sh4_x86.in_delay_slot ) {
   297 	    JNE_exit(EXIT_SLOT_FPU_DISABLED);
   298 	} else {
   299 	    JNE_exit(EXIT_FPU_DISABLED);
   300 	}
   301     }
   302 }
   304 static void check_ralign16( int x86reg )
   305 {
   306     TEST_imm32_r32( 0x00000001, x86reg );
   307     JNE_exit(EXIT_DATA_ADDR_READ);
   308 }
   310 static void check_walign16( int x86reg )
   311 {
   312     TEST_imm32_r32( 0x00000001, x86reg );
   313     JNE_exit(EXIT_DATA_ADDR_WRITE);
   314 }
   316 static void check_ralign32( int x86reg )
   317 {
   318     TEST_imm32_r32( 0x00000003, x86reg );
   319     JNE_exit(EXIT_DATA_ADDR_READ);
   320 }
   321 static void check_walign32( int x86reg )
   322 {
   323     TEST_imm32_r32( 0x00000003, x86reg );
   324     JNE_exit(EXIT_DATA_ADDR_WRITE);
   325 }
   328 #define UNDEF()
   329 #define MEM_RESULT(value_reg) if(value_reg != R_EAX) { MOV_r32_r32(R_EAX,value_reg); }
   330 #define MEM_READ_BYTE( addr_reg, value_reg ) call_func1(sh4_read_byte, addr_reg ); MEM_RESULT(value_reg)
   331 #define MEM_READ_WORD( addr_reg, value_reg ) call_func1(sh4_read_word, addr_reg ); MEM_RESULT(value_reg)
   332 #define MEM_READ_LONG( addr_reg, value_reg ) call_func1(sh4_read_long, addr_reg ); MEM_RESULT(value_reg)
   333 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_func2(sh4_write_byte, addr_reg, value_reg)
   334 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_func2(sh4_write_word, addr_reg, value_reg)
   335 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_func2(sh4_write_long, addr_reg, value_reg)
   337 #define RAISE_EXCEPTION( exc ) call_func1(sh4_raise_exception, exc);
   338 #define SLOTILLEGAL() RAISE_EXCEPTION(EXC_SLOT_ILLEGAL); return 1
   342 /**
   343  * Emit the 'start of block' assembly. Sets up the stack frame and save
   344  * SI/DI as required
   345  */
   346 void sh4_translate_begin_block() 
   347 {
   348     PUSH_r32(R_EBP);
   349     /* mov &sh4r, ebp */
   350     load_imm32( R_EBP, (uint32_t)&sh4r );
   351     PUSH_r32(R_EDI);
   352     PUSH_r32(R_ESI);
   353     XOR_r32_r32(R_ESI, R_ESI);
   355     sh4_x86.in_delay_slot = FALSE;
   356     sh4_x86.priv_checked = FALSE;
   357     sh4_x86.fpuen_checked = FALSE;
   358     sh4_x86.backpatch_posn = 0;
   359 }
   361 /**
   362  * Exit the block early (ie branch out), conditionally or otherwise
   363  */
   364 void exit_block( )
   365 {
   366     store_spreg( R_EDI, REG_OFFSET(pc) );
   367     MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
   368     load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   369     MUL_r32( R_ESI );
   370     ADD_r32_r32( R_EAX, R_ECX );
   371     store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   372     load_imm32( R_EAX, 1 );
   373     POP_r32(R_ESI);
   374     POP_r32(R_EDI);
   375     POP_r32(R_EBP);
   376     RET();
   377 }
   379 /**
   380  * Flush any open regs back to memory, restore SI/DI/, update PC, etc
   381  */
   382 void sh4_translate_end_block( sh4addr_t pc ) {
   383     assert( !sh4_x86.in_delay_slot ); // should never stop here
   384     // Normal termination - save PC, cycle count
   385     exit_block( );
   387     uint8_t *end_ptr = xlat_output;
   388     // Exception termination. Jump block for various exception codes:
   389     PUSH_imm32( EXC_DATA_ADDR_READ );
   390     JMP_rel8( 33, target1 );
   391     PUSH_imm32( EXC_DATA_ADDR_WRITE );
   392     JMP_rel8( 26, target2 );
   393     PUSH_imm32( EXC_ILLEGAL );
   394     JMP_rel8( 19, target3 );
   395     PUSH_imm32( EXC_SLOT_ILLEGAL ); 
   396     JMP_rel8( 12, target4 );
   397     PUSH_imm32( EXC_FPU_DISABLED ); 
   398     JMP_rel8( 5, target5 );
   399     PUSH_imm32( EXC_SLOT_FPU_DISABLED );
   400     // target
   401     JMP_TARGET(target1);
   402     JMP_TARGET(target2);
   403     JMP_TARGET(target3);
   404     JMP_TARGET(target4);
   405     JMP_TARGET(target5);
   406     load_spreg( R_ECX, REG_OFFSET(pc) );
   407     ADD_r32_r32( R_ESI, R_ECX );
   408     ADD_r32_r32( R_ESI, R_ECX );
   409     store_spreg( R_ECX, REG_OFFSET(pc) );
   410     MOV_moff32_EAX( (uint32_t)&sh4_cpu_period );
   411     load_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   412     MUL_r32( R_ESI );
   413     ADD_r32_r32( R_EAX, R_ECX );
   414     store_spreg( R_ECX, REG_OFFSET(slice_cycle) );
   416     load_imm32( R_EAX, (uint32_t)sh4_raise_exception ); // 6
   417     CALL_r32( R_EAX ); // 2
   418     POP_r32(R_EBP);
   419     RET();
   421     sh4_x86_do_backpatch( end_ptr );
   422 }
   424 /**
   425  * Translate a single instruction. Delayed branches are handled specially
   426  * by translating both branch and delayed instruction as a single unit (as
   427  * 
   428  *
   429  * @return true if the instruction marks the end of a basic block
   430  * (eg a branch or 
   431  */
   432 uint32_t sh4_x86_translate_instruction( uint32_t pc )
   433 {
   434     uint16_t ir = sh4_read_word( pc );
   436 %%
   437 /* ALU operations */
   438 ADD Rm, Rn {:
   439     load_reg( R_EAX, Rm );
   440     load_reg( R_ECX, Rn );
   441     ADD_r32_r32( R_EAX, R_ECX );
   442     store_reg( R_ECX, Rn );
   443 :}
   444 ADD #imm, Rn {:  
   445     load_reg( R_EAX, Rn );
   446     ADD_imm8s_r32( imm, R_EAX );
   447     store_reg( R_EAX, Rn );
   448 :}
   449 ADDC Rm, Rn {:
   450     load_reg( R_EAX, Rm );
   451     load_reg( R_ECX, Rn );
   452     LDC_t();
   453     ADC_r32_r32( R_EAX, R_ECX );
   454     store_reg( R_ECX, Rn );
   455     SETC_t();
   456 :}
   457 ADDV Rm, Rn {:
   458     load_reg( R_EAX, Rm );
   459     load_reg( R_ECX, Rn );
   460     ADD_r32_r32( R_EAX, R_ECX );
   461     store_reg( R_ECX, Rn );
   462     SETO_t();
   463 :}
   464 AND Rm, Rn {:
   465     load_reg( R_EAX, Rm );
   466     load_reg( R_ECX, Rn );
   467     AND_r32_r32( R_EAX, R_ECX );
   468     store_reg( R_ECX, Rn );
   469 :}
   470 AND #imm, R0 {:  
   471     load_reg( R_EAX, 0 );
   472     AND_imm32_r32(imm, R_EAX); 
   473     store_reg( R_EAX, 0 );
   474 :}
   475 AND.B #imm, @(R0, GBR) {: 
   476     load_reg( R_EAX, 0 );
   477     load_spreg( R_ECX, R_GBR );
   478     ADD_r32_r32( R_EAX, R_ECX );
   479     MEM_READ_BYTE( R_ECX, R_EAX );
   480     AND_imm32_r32(imm, R_ECX );
   481     MEM_WRITE_BYTE( R_ECX, R_EAX );
   482 :}
   483 CMP/EQ Rm, Rn {:  
   484     load_reg( R_EAX, Rm );
   485     load_reg( R_ECX, Rn );
   486     CMP_r32_r32( R_EAX, R_ECX );
   487     SETE_t();
   488 :}
   489 CMP/EQ #imm, R0 {:  
   490     load_reg( R_EAX, 0 );
   491     CMP_imm8s_r32(imm, R_EAX);
   492     SETE_t();
   493 :}
   494 CMP/GE Rm, Rn {:  
   495     load_reg( R_EAX, Rm );
   496     load_reg( R_ECX, Rn );
   497     CMP_r32_r32( R_EAX, R_ECX );
   498     SETGE_t();
   499 :}
   500 CMP/GT Rm, Rn {: 
   501     load_reg( R_EAX, Rm );
   502     load_reg( R_ECX, Rn );
   503     CMP_r32_r32( R_EAX, R_ECX );
   504     SETG_t();
   505 :}
   506 CMP/HI Rm, Rn {:  
   507     load_reg( R_EAX, Rm );
   508     load_reg( R_ECX, Rn );
   509     CMP_r32_r32( R_EAX, R_ECX );
   510     SETA_t();
   511 :}
   512 CMP/HS Rm, Rn {: 
   513     load_reg( R_EAX, Rm );
   514     load_reg( R_ECX, Rn );
   515     CMP_r32_r32( R_EAX, R_ECX );
   516     SETAE_t();
   517  :}
   518 CMP/PL Rn {: 
   519     load_reg( R_EAX, Rn );
   520     CMP_imm8s_r32( 0, R_EAX );
   521     SETG_t();
   522 :}
   523 CMP/PZ Rn {:  
   524     load_reg( R_EAX, Rn );
   525     CMP_imm8s_r32( 0, R_EAX );
   526     SETGE_t();
   527 :}
   528 CMP/STR Rm, Rn {:  
   529     load_reg( R_EAX, Rm );
   530     load_reg( R_ECX, Rn );
   531     XOR_r32_r32( R_ECX, R_EAX );
   532     TEST_r8_r8( R_AL, R_AL );
   533     JE_rel8(13, target1);
   534     TEST_r8_r8( R_AH, R_AH ); // 2
   535     JE_rel8(9, target2);
   536     SHR_imm8_r32( 16, R_EAX ); // 3
   537     TEST_r8_r8( R_AL, R_AL ); // 2
   538     JE_rel8(2, target3);
   539     TEST_r8_r8( R_AH, R_AH ); // 2
   540     JMP_TARGET(target1);
   541     JMP_TARGET(target2);
   542     JMP_TARGET(target3);
   543     SETE_t();
   544 :}
   545 DIV0S Rm, Rn {:
   546     load_reg( R_EAX, Rm );
   547     load_reg( R_ECX, Rm );
   548     SHR_imm8_r32( 31, R_EAX );
   549     SHR_imm8_r32( 31, R_ECX );
   550     store_spreg( R_EAX, R_M );
   551     store_spreg( R_ECX, R_Q );
   552     CMP_r32_r32( R_EAX, R_ECX );
   553     SETE_t();
   554 :}
   555 DIV0U {:  
   556     XOR_r32_r32( R_EAX, R_EAX );
   557     store_spreg( R_EAX, R_Q );
   558     store_spreg( R_EAX, R_M );
   559     store_spreg( R_EAX, R_T );
   560 :}
   561 DIV1 Rm, Rn {:  
   562     load_reg( R_ECX, Rn );
   563     LDC_t();
   564     RCL1_r32( R_ECX ); // OP2
   565     SETC_r32( R_EDX ); // Q
   566     load_spreg( R_EAX, R_Q );
   567     CMP_sh4r_r32( R_M, R_EAX );
   568     JE_rel8(8,mqequal);
   569     ADD_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
   570     JMP_rel8(3, mqnotequal);
   571     JMP_TARGET(mqequal);
   572     SUB_sh4r_r32( REG_OFFSET(r[Rm]), R_ECX );
   573     JMP_TARGET(mqnotequal);
   574     // TODO
   575 :}
   576 DMULS.L Rm, Rn {:  
   577     load_reg( R_EAX, Rm );
   578     load_reg( R_ECX, Rn );
   579     IMUL_r32(R_ECX);
   580     store_spreg( R_EDX, R_MACH );
   581     store_spreg( R_EAX, R_MACL );
   582 :}
   583 DMULU.L Rm, Rn {:  
   584     load_reg( R_EAX, Rm );
   585     load_reg( R_ECX, Rn );
   586     MUL_r32(R_ECX);
   587     store_spreg( R_EDX, R_MACH );
   588     store_spreg( R_EAX, R_MACL );    
   589 :}
   590 DT Rn {:  
   591     load_reg( R_EAX, Rn );
   592     ADD_imm8s_r32( -1, Rn );
   593     store_reg( R_EAX, Rn );
   594     SETE_t();
   595 :}
   596 EXTS.B Rm, Rn {:  
   597     load_reg( R_EAX, Rm );
   598     MOVSX_r8_r32( R_EAX, R_EAX );
   599     store_reg( R_EAX, Rn );
   600 :}
   601 EXTS.W Rm, Rn {:  
   602     load_reg( R_EAX, Rm );
   603     MOVSX_r16_r32( R_EAX, R_EAX );
   604     store_reg( R_EAX, Rn );
   605 :}
   606 EXTU.B Rm, Rn {:  
   607     load_reg( R_EAX, Rm );
   608     MOVZX_r8_r32( R_EAX, R_EAX );
   609     store_reg( R_EAX, Rn );
   610 :}
   611 EXTU.W Rm, Rn {:  
   612     load_reg( R_EAX, Rm );
   613     MOVZX_r16_r32( R_EAX, R_EAX );
   614     store_reg( R_EAX, Rn );
   615 :}
   616 MAC.L @Rm+, @Rn+ {:  :}
   617 MAC.W @Rm+, @Rn+ {:  :}
   618 MOVT Rn {:  
   619     load_spreg( R_EAX, R_T );
   620     store_reg( R_EAX, Rn );
   621 :}
   622 MUL.L Rm, Rn {:  
   623     load_reg( R_EAX, Rm );
   624     load_reg( R_ECX, Rn );
   625     MUL_r32( R_ECX );
   626     store_spreg( R_EAX, R_MACL );
   627 :}
   628 MULS.W Rm, Rn {:
   629     load_reg16s( R_EAX, Rm );
   630     load_reg16s( R_ECX, Rn );
   631     MUL_r32( R_ECX );
   632     store_spreg( R_EAX, R_MACL );
   633 :}
   634 MULU.W Rm, Rn {:  
   635     load_reg16u( R_EAX, Rm );
   636     load_reg16u( R_ECX, Rn );
   637     MUL_r32( R_ECX );
   638     store_spreg( R_EAX, R_MACL );
   639 :}
   640 NEG Rm, Rn {:
   641     load_reg( R_EAX, Rm );
   642     NEG_r32( R_EAX );
   643     store_reg( R_EAX, Rn );
   644 :}
   645 NEGC Rm, Rn {:  
   646     load_reg( R_EAX, Rm );
   647     XOR_r32_r32( R_ECX, R_ECX );
   648     LDC_t();
   649     SBB_r32_r32( R_EAX, R_ECX );
   650     store_reg( R_ECX, Rn );
   651     SETC_t();
   652 :}
   653 NOT Rm, Rn {:  
   654     load_reg( R_EAX, Rm );
   655     NOT_r32( R_EAX );
   656     store_reg( R_EAX, Rn );
   657 :}
   658 OR Rm, Rn {:  
   659     load_reg( R_EAX, Rm );
   660     load_reg( R_ECX, Rn );
   661     OR_r32_r32( R_EAX, R_ECX );
   662     store_reg( R_ECX, Rn );
   663 :}
   664 OR #imm, R0 {:
   665     load_reg( R_EAX, 0 );
   666     OR_imm32_r32(imm, R_EAX);
   667     store_reg( R_EAX, 0 );
   668 :}
   669 OR.B #imm, @(R0, GBR) {:  
   670     load_reg( R_EAX, 0 );
   671     load_spreg( R_ECX, R_GBR );
   672     ADD_r32_r32( R_EAX, R_ECX );
   673     MEM_READ_BYTE( R_ECX, R_EAX );
   674     OR_imm32_r32(imm, R_ECX );
   675     MEM_WRITE_BYTE( R_ECX, R_EAX );
   676 :}
   677 ROTCL Rn {:
   678     load_reg( R_EAX, Rn );
   679     LDC_t();
   680     RCL1_r32( R_EAX );
   681     store_reg( R_EAX, Rn );
   682     SETC_t();
   683 :}
   684 ROTCR Rn {:  
   685     load_reg( R_EAX, Rn );
   686     LDC_t();
   687     RCR1_r32( R_EAX );
   688     store_reg( R_EAX, Rn );
   689     SETC_t();
   690 :}
   691 ROTL Rn {:  
   692     load_reg( R_EAX, Rn );
   693     ROL1_r32( R_EAX );
   694     store_reg( R_EAX, Rn );
   695     SETC_t();
   696 :}
   697 ROTR Rn {:  
   698     load_reg( R_EAX, Rn );
   699     ROR1_r32( R_EAX );
   700     store_reg( R_EAX, Rn );
   701     SETC_t();
   702 :}
   703 SHAD Rm, Rn {:
   704     /* Annoyingly enough, not directly convertible */
   705     load_reg( R_EAX, Rn );
   706     load_reg( R_ECX, Rm );
   707     CMP_imm32_r32( 0, R_ECX );
   708     JAE_rel8(9, doshl);
   710     NEG_r32( R_ECX );      // 2
   711     AND_imm8_r8( 0x1F, R_CL ); // 3
   712     SAR_r32_CL( R_EAX );       // 2
   713     JMP_rel8(5, end);          // 2
   714     JMP_TARGET(doshl);
   715     AND_imm8_r8( 0x1F, R_CL ); // 3
   716     SHL_r32_CL( R_EAX );       // 2
   717     JMP_TARGET(end);
   718     store_reg( R_EAX, Rn );
   719 :}
   720 SHLD Rm, Rn {:  
   721     load_reg( R_EAX, Rn );
   722     load_reg( R_ECX, Rm );
   724     MOV_r32_r32( R_EAX, R_EDX );
   725     SHL_r32_CL( R_EAX );
   726     NEG_r32( R_ECX );
   727     SHR_r32_CL( R_EDX );
   728     CMP_imm8s_r32( 0, R_ECX );
   729     CMOVAE_r32_r32( R_EDX,  R_EAX );
   730     store_reg( R_EAX, Rn );
   731 :}
   732 SHAL Rn {: 
   733     load_reg( R_EAX, Rn );
   734     SHL1_r32( R_EAX );
   735     store_reg( R_EAX, Rn );
   736 :}
   737 SHAR Rn {:  
   738     load_reg( R_EAX, Rn );
   739     SAR1_r32( R_EAX );
   740     store_reg( R_EAX, Rn );
   741 :}
   742 SHLL Rn {:  
   743     load_reg( R_EAX, Rn );
   744     SHL1_r32( R_EAX );
   745     store_reg( R_EAX, Rn );
   746 :}
   747 SHLL2 Rn {:
   748     load_reg( R_EAX, Rn );
   749     SHL_imm8_r32( 2, R_EAX );
   750     store_reg( R_EAX, Rn );
   751 :}
   752 SHLL8 Rn {:  
   753     load_reg( R_EAX, Rn );
   754     SHL_imm8_r32( 8, R_EAX );
   755     store_reg( R_EAX, Rn );
   756 :}
   757 SHLL16 Rn {:  
   758     load_reg( R_EAX, Rn );
   759     SHL_imm8_r32( 16, R_EAX );
   760     store_reg( R_EAX, Rn );
   761 :}
   762 SHLR Rn {:  
   763     load_reg( R_EAX, Rn );
   764     SHR1_r32( R_EAX );
   765     store_reg( R_EAX, Rn );
   766 :}
   767 SHLR2 Rn {:  
   768     load_reg( R_EAX, Rn );
   769     SHR_imm8_r32( 2, R_EAX );
   770     store_reg( R_EAX, Rn );
   771 :}
   772 SHLR8 Rn {:  
   773     load_reg( R_EAX, Rn );
   774     SHR_imm8_r32( 8, R_EAX );
   775     store_reg( R_EAX, Rn );
   776 :}
   777 SHLR16 Rn {:  
   778     load_reg( R_EAX, Rn );
   779     SHR_imm8_r32( 16, R_EAX );
   780     store_reg( R_EAX, Rn );
   781 :}
   782 SUB Rm, Rn {:  
   783     load_reg( R_EAX, Rm );
   784     load_reg( R_ECX, Rn );
   785     SUB_r32_r32( R_EAX, R_ECX );
   786     store_reg( R_ECX, Rn );
   787 :}
   788 SUBC Rm, Rn {:  
   789     load_reg( R_EAX, Rm );
   790     load_reg( R_ECX, Rn );
   791     LDC_t();
   792     SBB_r32_r32( R_EAX, R_ECX );
   793     store_reg( R_ECX, Rn );
   794 :}
   795 SUBV Rm, Rn {:  
   796     load_reg( R_EAX, Rm );
   797     load_reg( R_ECX, Rn );
   798     SUB_r32_r32( R_EAX, R_ECX );
   799     store_reg( R_ECX, Rn );
   800     SETO_t();
   801 :}
   802 SWAP.B Rm, Rn {:  
   803     load_reg( R_EAX, Rm );
   804     XCHG_r8_r8( R_AL, R_AH );
   805     store_reg( R_EAX, Rn );
   806 :}
   807 SWAP.W Rm, Rn {:  
   808     load_reg( R_EAX, Rm );
   809     MOV_r32_r32( R_EAX, R_ECX );
   810     SHL_imm8_r32( 16, R_ECX );
   811     SHR_imm8_r32( 16, R_EAX );
   812     OR_r32_r32( R_EAX, R_ECX );
   813     store_reg( R_ECX, Rn );
   814 :}
   815 TAS.B @Rn {:  
   816     load_reg( R_ECX, Rn );
   817     MEM_READ_BYTE( R_ECX, R_EAX );
   818     TEST_r8_r8( R_AL, R_AL );
   819     SETE_t();
   820     OR_imm8_r8( 0x80, R_AL );
   821     MEM_WRITE_BYTE( R_ECX, R_EAX );
   822 :}
   823 TST Rm, Rn {:  
   824     load_reg( R_EAX, Rm );
   825     load_reg( R_ECX, Rn );
   826     TEST_r32_r32( R_EAX, R_ECX );
   827     SETE_t();
   828 :}
   829 TST #imm, R0 {:  
   830     load_reg( R_EAX, 0 );
   831     TEST_imm32_r32( imm, R_EAX );
   832     SETE_t();
   833 :}
   834 TST.B #imm, @(R0, GBR) {:  
   835     load_reg( R_EAX, 0);
   836     load_reg( R_ECX, R_GBR);
   837     ADD_r32_r32( R_EAX, R_ECX );
   838     MEM_READ_BYTE( R_ECX, R_EAX );
   839     TEST_imm8_r8( imm, R_EAX );
   840     SETE_t();
   841 :}
   842 XOR Rm, Rn {:  
   843     load_reg( R_EAX, Rm );
   844     load_reg( R_ECX, Rn );
   845     XOR_r32_r32( R_EAX, R_ECX );
   846     store_reg( R_ECX, Rn );
   847 :}
   848 XOR #imm, R0 {:  
   849     load_reg( R_EAX, 0 );
   850     XOR_imm32_r32( imm, R_EAX );
   851     store_reg( R_EAX, 0 );
   852 :}
   853 XOR.B #imm, @(R0, GBR) {:  
   854     load_reg( R_EAX, 0 );
   855     load_spreg( R_ECX, R_GBR );
   856     ADD_r32_r32( R_EAX, R_ECX );
   857     MEM_READ_BYTE( R_ECX, R_EAX );
   858     XOR_imm32_r32( imm, R_EAX );
   859     MEM_WRITE_BYTE( R_ECX, R_EAX );
   860 :}
   861 XTRCT Rm, Rn {:
   862     load_reg( R_EAX, Rm );
   863     MOV_r32_r32( R_EAX, R_ECX );
   864     SHR_imm8_r32( 16, R_EAX );
   865     SHL_imm8_r32( 16, R_ECX );
   866     OR_r32_r32( R_EAX, R_ECX );
   867     store_reg( R_ECX, Rn );
   868 :}
   870 /* Data move instructions */
   871 MOV Rm, Rn {:  
   872     load_reg( R_EAX, Rm );
   873     store_reg( R_EAX, Rn );
   874 :}
   875 MOV #imm, Rn {:  
   876     load_imm32( R_EAX, imm );
   877     store_reg( R_EAX, Rn );
   878 :}
   879 MOV.B Rm, @Rn {:  
   880     load_reg( R_EAX, Rm );
   881     load_reg( R_ECX, Rn );
   882     MEM_WRITE_BYTE( R_ECX, R_EAX );
   883 :}
   884 MOV.B Rm, @-Rn {:  
   885     load_reg( R_EAX, Rm );
   886     load_reg( R_ECX, Rn );
   887     ADD_imm8s_r32( -1, Rn );
   888     store_reg( R_ECX, Rn );
   889     MEM_WRITE_BYTE( R_ECX, R_EAX );
   890 :}
   891 MOV.B Rm, @(R0, Rn) {:  
   892     load_reg( R_EAX, 0 );
   893     load_reg( R_ECX, Rn );
   894     ADD_r32_r32( R_EAX, R_ECX );
   895     load_reg( R_EAX, Rm );
   896     MEM_WRITE_BYTE( R_ECX, R_EAX );
   897 :}
   898 MOV.B R0, @(disp, GBR) {:  
   899     load_reg( R_EAX, 0 );
   900     load_spreg( R_ECX, R_GBR );
   901     ADD_imm32_r32( disp, R_ECX );
   902     MEM_WRITE_BYTE( R_ECX, R_EAX );
   903 :}
   904 MOV.B R0, @(disp, Rn) {:  
   905     load_reg( R_EAX, 0 );
   906     load_reg( R_ECX, Rn );
   907     ADD_imm32_r32( disp, R_ECX );
   908     MEM_WRITE_BYTE( R_ECX, R_EAX );
   909 :}
   910 MOV.B @Rm, Rn {:  
   911     load_reg( R_ECX, Rm );
   912     MEM_READ_BYTE( R_ECX, R_EAX );
   913     store_reg( R_ECX, Rn );
   914 :}
   915 MOV.B @Rm+, Rn {:  
   916     load_reg( R_ECX, Rm );
   917     MOV_r32_r32( R_ECX, R_EAX );
   918     ADD_imm8s_r32( 1, R_EAX );
   919     store_reg( R_EAX, Rm );
   920     MEM_READ_BYTE( R_ECX, R_EAX );
   921     store_reg( R_EAX, Rn );
   922 :}
   923 MOV.B @(R0, Rm), Rn {:  
   924     load_reg( R_EAX, 0 );
   925     load_reg( R_ECX, Rm );
   926     ADD_r32_r32( R_EAX, R_ECX );
   927     MEM_READ_BYTE( R_ECX, R_EAX );
   928     store_reg( R_EAX, Rn );
   929 :}
   930 MOV.B @(disp, GBR), R0 {:  
   931     load_spreg( R_ECX, R_GBR );
   932     ADD_imm32_r32( disp, R_ECX );
   933     MEM_READ_BYTE( R_ECX, R_EAX );
   934     store_reg( R_EAX, 0 );
   935 :}
   936 MOV.B @(disp, Rm), R0 {:  
   937     load_reg( R_ECX, Rm );
   938     ADD_imm32_r32( disp, R_ECX );
   939     MEM_READ_BYTE( R_ECX, R_EAX );
   940     store_reg( R_EAX, 0 );
   941 :}
   942 MOV.L Rm, @Rn {:
   943     load_reg( R_EAX, Rm );
   944     load_reg( R_ECX, Rn );
   945     check_walign32(R_ECX);
   946     MEM_WRITE_LONG( R_ECX, R_EAX );
   947 :}
   948 MOV.L Rm, @-Rn {:  
   949     load_reg( R_EAX, Rm );
   950     load_reg( R_ECX, Rn );
   951     check_walign32( R_ECX );
   952     ADD_imm8s_r32( -4, R_ECX );
   953     store_reg( R_ECX, Rn );
   954     MEM_WRITE_LONG( R_ECX, R_EAX );
   955 :}
   956 MOV.L Rm, @(R0, Rn) {:  
   957     load_reg( R_EAX, 0 );
   958     load_reg( R_ECX, Rn );
   959     ADD_r32_r32( R_EAX, R_ECX );
   960     check_walign32( R_ECX );
   961     load_reg( R_EAX, Rm );
   962     MEM_WRITE_LONG( R_ECX, R_EAX );
   963 :}
   964 MOV.L R0, @(disp, GBR) {:  
   965     load_spreg( R_ECX, R_GBR );
   966     load_reg( R_EAX, 0 );
   967     ADD_imm32_r32( disp, R_ECX );
   968     check_walign32( R_ECX );
   969     MEM_WRITE_LONG( R_ECX, R_EAX );
   970 :}
   971 MOV.L Rm, @(disp, Rn) {:  
   972     load_reg( R_ECX, Rn );
   973     load_reg( R_EAX, Rm );
   974     ADD_imm32_r32( disp, R_ECX );
   975     check_walign32( R_ECX );
   976     MEM_WRITE_LONG( R_ECX, R_EAX );
   977 :}
   978 MOV.L @Rm, Rn {:  
   979     load_reg( R_ECX, Rm );
   980     check_ralign32( R_ECX );
   981     MEM_READ_LONG( R_ECX, R_EAX );
   982     store_reg( R_EAX, Rn );
   983 :}
   984 MOV.L @Rm+, Rn {:  
   985     load_reg( R_EAX, Rm );
   986     check_ralign32( R_ECX );
   987     MOV_r32_r32( R_EAX, R_ECX );
   988     ADD_imm8s_r32( 4, R_EAX );
   989     store_reg( R_EAX, Rm );
   990     MEM_READ_LONG( R_ECX, R_EAX );
   991     store_reg( R_EAX, Rn );
   992 :}
   993 MOV.L @(R0, Rm), Rn {:  
   994     load_reg( R_EAX, 0 );
   995     load_reg( R_ECX, Rm );
   996     ADD_r32_r32( R_EAX, R_ECX );
   997     check_ralign32( R_ECX );
   998     MEM_READ_LONG( R_ECX, R_EAX );
   999     store_reg( R_EAX, Rn );
  1000 :}
  1001 MOV.L @(disp, GBR), R0 {:
  1002     load_spreg( R_ECX, R_GBR );
  1003     ADD_imm32_r32( disp, R_ECX );
  1004     check_ralign32( R_ECX );
  1005     MEM_READ_LONG( R_ECX, R_EAX );
  1006     store_reg( R_EAX, 0 );
  1007 :}
  1008 MOV.L @(disp, PC), Rn {:  
  1009     if( sh4_x86.in_delay_slot ) {
  1010 	SLOTILLEGAL();
  1011     } else {
  1012 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1013 	MEM_READ_LONG( R_ECX, R_EAX );
  1014 	store_reg( R_EAX, 0 );
  1016 :}
  1017 MOV.L @(disp, Rm), Rn {:  
  1018     load_reg( R_ECX, Rm );
  1019     ADD_imm8s_r32( disp, R_ECX );
  1020     check_ralign32( R_ECX );
  1021     MEM_READ_LONG( R_ECX, R_EAX );
  1022     store_reg( R_EAX, Rn );
  1023 :}
  1024 MOV.W Rm, @Rn {:  
  1025     load_reg( R_ECX, Rn );
  1026     check_walign16( R_ECX );
  1027     MEM_READ_WORD( R_ECX, R_EAX );
  1028     store_reg( R_EAX, Rn );
  1029 :}
  1030 MOV.W Rm, @-Rn {:  
  1031     load_reg( R_ECX, Rn );
  1032     check_walign16( R_ECX );
  1033     load_reg( R_EAX, Rm );
  1034     ADD_imm8s_r32( -2, R_ECX );
  1035     MEM_WRITE_WORD( R_ECX, R_EAX );
  1036 :}
  1037 MOV.W Rm, @(R0, Rn) {:  
  1038     load_reg( R_EAX, 0 );
  1039     load_reg( R_ECX, Rn );
  1040     ADD_r32_r32( R_EAX, R_ECX );
  1041     check_walign16( R_ECX );
  1042     load_reg( R_EAX, Rm );
  1043     MEM_WRITE_WORD( R_ECX, R_EAX );
  1044 :}
  1045 MOV.W R0, @(disp, GBR) {:  
  1046     load_spreg( R_ECX, R_GBR );
  1047     load_reg( R_EAX, 0 );
  1048     ADD_imm32_r32( disp, R_ECX );
  1049     check_walign16( R_ECX );
  1050     MEM_WRITE_WORD( R_ECX, R_EAX );
  1051 :}
  1052 MOV.W R0, @(disp, Rn) {:  
  1053     load_reg( R_ECX, Rn );
  1054     load_reg( R_EAX, 0 );
  1055     ADD_imm32_r32( disp, R_ECX );
  1056     check_walign16( R_ECX );
  1057     MEM_WRITE_WORD( R_ECX, R_EAX );
  1058 :}
  1059 MOV.W @Rm, Rn {:  
  1060     load_reg( R_ECX, Rm );
  1061     check_ralign16( R_ECX );
  1062     MEM_READ_WORD( R_ECX, R_EAX );
  1063     store_reg( R_EAX, Rn );
  1064 :}
  1065 MOV.W @Rm+, Rn {:  
  1066     load_reg( R_EAX, Rm );
  1067     check_ralign16( R_EAX );
  1068     MOV_r32_r32( R_EAX, R_ECX );
  1069     ADD_imm8s_r32( 2, R_EAX );
  1070     store_reg( R_EAX, Rm );
  1071     MEM_READ_WORD( R_ECX, R_EAX );
  1072     store_reg( R_EAX, Rn );
  1073 :}
  1074 MOV.W @(R0, Rm), Rn {:  
  1075     load_reg( R_EAX, 0 );
  1076     load_reg( R_ECX, Rm );
  1077     ADD_r32_r32( R_EAX, R_ECX );
  1078     check_ralign16( R_ECX );
  1079     MEM_READ_WORD( R_ECX, R_EAX );
  1080     store_reg( R_EAX, Rn );
  1081 :}
  1082 MOV.W @(disp, GBR), R0 {:  
  1083     load_spreg( R_ECX, R_GBR );
  1084     ADD_imm32_r32( disp, R_ECX );
  1085     check_ralign16( R_ECX );
  1086     MEM_READ_WORD( R_ECX, R_EAX );
  1087     store_reg( R_EAX, 0 );
  1088 :}
  1089 MOV.W @(disp, PC), Rn {:  
  1090     if( sh4_x86.in_delay_slot ) {
  1091 	SLOTILLEGAL();
  1092     } else {
  1093 	load_imm32( R_ECX, pc + disp + 4 );
  1094 	MEM_READ_WORD( R_ECX, R_EAX );
  1095 	store_reg( R_EAX, Rn );
  1097 :}
  1098 MOV.W @(disp, Rm), R0 {:  
  1099     load_reg( R_ECX, Rm );
  1100     ADD_imm32_r32( disp, R_ECX );
  1101     check_ralign16( R_ECX );
  1102     MEM_READ_WORD( R_ECX, R_EAX );
  1103     store_reg( R_EAX, 0 );
  1104 :}
  1105 MOVA @(disp, PC), R0 {:  
  1106     if( sh4_x86.in_delay_slot ) {
  1107 	SLOTILLEGAL();
  1108     } else {
  1109 	load_imm32( R_ECX, (pc & 0xFFFFFFFC) + disp + 4 );
  1110 	store_reg( R_ECX, 0 );
  1112 :}
  1113 MOVCA.L R0, @Rn {:  
  1114     load_reg( R_EAX, 0 );
  1115     load_reg( R_ECX, Rn );
  1116     check_walign32( R_ECX );
  1117     MEM_WRITE_LONG( R_ECX, R_EAX );
  1118 :}
  1120 /* Control transfer instructions */
  1121 BF disp {:
  1122     if( sh4_x86.in_delay_slot ) {
  1123 	SLOTILLEGAL();
  1124     } else {
  1125 	load_imm32( R_EDI, pc + 2 );
  1126 	CMP_imm8s_sh4r( 0, R_T );
  1127 	JNE_rel8( 5, nottaken );
  1128 	load_imm32( R_EDI, disp + pc + 4 );
  1129 	JMP_TARGET(nottaken);
  1130 	INC_r32(R_ESI);
  1131 	return 1;
  1133 :}
  1134 BF/S disp {:
  1135     if( sh4_x86.in_delay_slot ) {
  1136 	SLOTILLEGAL();
  1137     } else {
  1138 	load_imm32( R_EDI, pc + 2 );
  1139 	CMP_imm8s_sh4r( 0, R_T );
  1140 	JNE_rel8( 5, nottaken );
  1141 	load_imm32( R_EDI, disp + pc + 4 );
  1142 	JMP_TARGET(nottaken);
  1143 	sh4_x86.in_delay_slot = TRUE;
  1144 	INC_r32(R_ESI);
  1145 	return 0;
  1147 :}
  1148 BRA disp {:  
  1149     if( sh4_x86.in_delay_slot ) {
  1150 	SLOTILLEGAL();
  1151     } else {
  1152 	load_imm32( R_EDI, disp + pc + 4 );
  1153 	sh4_x86.in_delay_slot = TRUE;
  1154 	INC_r32(R_ESI);
  1155 	return 0;
  1157 :}
  1158 BRAF Rn {:  
  1159     if( sh4_x86.in_delay_slot ) {
  1160 	SLOTILLEGAL();
  1161     } else {
  1162 	load_reg( R_EDI, Rn );
  1163 	sh4_x86.in_delay_slot = TRUE;
  1164 	INC_r32(R_ESI);
  1165 	return 0;
  1167 :}
  1168 BSR disp {:  
  1169     if( sh4_x86.in_delay_slot ) {
  1170 	SLOTILLEGAL();
  1171     } else {
  1172 	load_imm32( R_EAX, pc + 4 );
  1173 	store_spreg( R_EAX, R_PR );
  1174 	load_imm32( R_EDI, disp + pc + 4 );
  1175 	sh4_x86.in_delay_slot = TRUE;
  1176 	INC_r32(R_ESI);
  1177 	return 0;
  1179 :}
  1180 BSRF Rn {:  
  1181     if( sh4_x86.in_delay_slot ) {
  1182 	SLOTILLEGAL();
  1183     } else {
  1184 	load_imm32( R_EAX, pc + 4 );
  1185 	store_spreg( R_EAX, R_PR );
  1186 	load_reg( R_EDI, Rn );
  1187 	ADD_r32_r32( R_EAX, R_EDI );
  1188 	sh4_x86.in_delay_slot = TRUE;
  1189 	INC_r32(R_ESI);
  1190 	return 0;
  1192 :}
  1193 BT disp {:
  1194     if( sh4_x86.in_delay_slot ) {
  1195 	SLOTILLEGAL();
  1196     } else {
  1197 	load_imm32( R_EDI, pc + 2 );
  1198 	CMP_imm8s_sh4r( 0, R_T );
  1199 	JE_rel8( 5, nottaken );
  1200 	load_imm32( R_EDI, disp + pc + 4 );
  1201 	JMP_TARGET(nottaken);
  1202 	INC_r32(R_ESI);
  1203 	return 1;
  1205 :}
  1206 BT/S disp {:
  1207     if( sh4_x86.in_delay_slot ) {
  1208 	SLOTILLEGAL();
  1209     } else {
  1210 	load_imm32( R_EDI, pc + 2 );
  1211 	CMP_imm8s_sh4r( 0, R_T );
  1212 	JE_rel8( 5, nottaken );
  1213 	load_imm32( R_EDI, disp + pc + 4 );
  1214 	JMP_TARGET(nottaken);
  1215 	sh4_x86.in_delay_slot = TRUE;
  1216 	INC_r32(R_ESI);
  1217 	return 0;
  1219 :}
  1220 JMP @Rn {:  
  1221     if( sh4_x86.in_delay_slot ) {
  1222 	SLOTILLEGAL();
  1223     } else {
  1224 	load_reg( R_EDI, Rn );
  1225 	sh4_x86.in_delay_slot = TRUE;
  1226 	INC_r32(R_ESI);
  1227 	return 0;
  1229 :}
  1230 JSR @Rn {:  
  1231     if( sh4_x86.in_delay_slot ) {
  1232 	SLOTILLEGAL();
  1233     } else {
  1234 	load_imm32( R_EAX, pc + 4 );
  1235 	store_spreg( R_EAX, R_PR );
  1236 	load_reg( R_EDI, Rn );
  1237 	sh4_x86.in_delay_slot = TRUE;
  1238 	INC_r32(R_ESI);
  1239 	return 0;
  1241 :}
  1242 RTE {:  
  1243     check_priv();
  1244     if( sh4_x86.in_delay_slot ) {
  1245 	SLOTILLEGAL();
  1246     } else {
  1247 	load_spreg( R_EDI, R_PR );
  1248 	load_spreg( R_EAX, R_SSR );
  1249 	call_func1( sh4_write_sr, R_EAX );
  1250 	sh4_x86.in_delay_slot = TRUE;
  1251 	sh4_x86.priv_checked = FALSE;
  1252 	sh4_x86.fpuen_checked = FALSE;
  1253 	INC_r32(R_ESI);
  1254 	return 0;
  1256 :}
  1257 RTS {:  
  1258     if( sh4_x86.in_delay_slot ) {
  1259 	SLOTILLEGAL();
  1260     } else {
  1261 	load_spreg( R_EDI, R_PR );
  1262 	sh4_x86.in_delay_slot = TRUE;
  1263 	INC_r32(R_ESI);
  1264 	return 0;
  1266 :}
  1267 TRAPA #imm {:  
  1268     if( sh4_x86.in_delay_slot ) {
  1269 	SLOTILLEGAL();
  1270     } else {
  1271 	// TODO: Write TRA 
  1272 	RAISE_EXCEPTION(EXC_TRAP);
  1274 :}
  1275 UNDEF {:  
  1276     if( sh4_x86.in_delay_slot ) {
  1277 	RAISE_EXCEPTION(EXC_SLOT_ILLEGAL);
  1278     } else {
  1279 	RAISE_EXCEPTION(EXC_ILLEGAL);
  1281     return 1;
  1282 :}
  1284 CLRMAC {:  
  1285     XOR_r32_r32(R_EAX, R_EAX);
  1286     store_spreg( R_EAX, R_MACL );
  1287     store_spreg( R_EAX, R_MACH );
  1288 :}
  1289 CLRS {:
  1290     CLC();
  1291     SETC_sh4r(R_S);
  1292 :}
  1293 CLRT {:  
  1294     CLC();
  1295     SETC_t();
  1296 :}
  1297 SETS {:  
  1298     STC();
  1299     SETC_sh4r(R_S);
  1300 :}
  1301 SETT {:  
  1302     STC();
  1303     SETC_t();
  1304 :}
  1306 /* Floating point moves */
  1307 FMOV FRm, FRn {:  
  1308     /* As horrible as this looks, it's actually covering 5 separate cases:
  1309      * 1. 32-bit fr-to-fr (PR=0)
  1310      * 2. 64-bit dr-to-dr (PR=1, FRm&1 == 0, FRn&1 == 0 )
  1311      * 3. 64-bit dr-to-xd (PR=1, FRm&1 == 0, FRn&1 == 1 )
  1312      * 4. 64-bit xd-to-dr (PR=1, FRm&1 == 1, FRn&1 == 0 )
  1313      * 5. 64-bit xd-to-xd (PR=1, FRm&1 == 1, FRn&1 == 1 )
  1314      */
  1315     check_fpuen();
  1316     load_spreg( R_ECX, R_FPSCR );
  1317     load_fr_bank( R_EDX );
  1318     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1319     JNE_rel8(8, doublesize);
  1320     load_fr( R_EDX, R_EAX, FRm ); // PR=0 branch
  1321     store_fr( R_EDX, R_EAX, FRn );
  1322     if( FRm&1 ) {
  1323 	JMP_rel8(22, end);
  1324 	JMP_TARGET(doublesize);
  1325 	load_xf_bank( R_ECX ); 
  1326 	load_fr( R_ECX, R_EAX, FRm-1 );
  1327 	if( FRn&1 ) {
  1328 	    load_fr( R_ECX, R_EDX, FRm );
  1329 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1330 	    store_fr( R_ECX, R_EDX, FRn );
  1331 	} else /* FRn&1 == 0 */ {
  1332 	    load_fr( R_ECX, R_ECX, FRm );
  1333 	    store_fr( R_EDX, R_EAX, FRn-1 );
  1334 	    store_fr( R_EDX, R_ECX, FRn );
  1336 	JMP_TARGET(end);
  1337     } else /* FRm&1 == 0 */ {
  1338 	if( FRn&1 ) {
  1339 	    JMP_rel8(22, end);
  1340 	    load_xf_bank( R_ECX );
  1341 	    load_fr( R_EDX, R_EAX, FRm );
  1342 	    load_fr( R_EDX, R_EDX, FRm+1 );
  1343 	    store_fr( R_ECX, R_EAX, FRn-1 );
  1344 	    store_fr( R_ECX, R_EDX, FRn );
  1345 	    JMP_TARGET(end);
  1346 	} else /* FRn&1 == 0 */ {
  1347 	    JMP_rel8(12, end);
  1348 	    load_fr( R_EDX, R_EAX, FRm );
  1349 	    load_fr( R_EDX, R_ECX, FRm+1 );
  1350 	    store_fr( R_EDX, R_EAX, FRn );
  1351 	    store_fr( R_EDX, R_ECX, FRn+1 );
  1352 	    JMP_TARGET(end);
  1355 :}
  1356 FMOV FRm, @Rn {:  
  1357     check_fpuen();
  1358     load_reg( R_EDX, Rn );
  1359     check_walign32( R_EDX );
  1360     load_spreg( R_ECX, R_FPSCR );
  1361     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1362     JNE_rel8(20, doublesize);
  1363     load_fr_bank( R_ECX );
  1364     load_fr( R_ECX, R_EAX, FRm );
  1365     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1366     if( FRm&1 ) {
  1367 	JMP_rel8( 46, end );
  1368 	JMP_TARGET(doublesize);
  1369 	load_xf_bank( R_ECX );
  1370 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1371 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1372 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1373 	JMP_TARGET(end);
  1374     } else {
  1375 	JMP_rel8( 39, end );
  1376 	JMP_TARGET(doublesize);
  1377 	load_fr_bank( R_ECX );
  1378 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1379 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1380 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1381 	JMP_TARGET(end);
  1383 :}
  1384 FMOV @Rm, FRn {:  
  1385     check_fpuen();
  1386     load_reg( R_EDX, Rm );
  1387     check_ralign32( R_EDX );
  1388     load_spreg( R_ECX, R_FPSCR );
  1389     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1390     JNE_rel8(19, doublesize);
  1391     MEM_READ_LONG( R_EDX, R_EAX );
  1392     load_fr_bank( R_ECX );
  1393     store_fr( R_ECX, R_EAX, FRn );
  1394     if( FRn&1 ) {
  1395 	JMP_rel8(46, end);
  1396 	JMP_TARGET(doublesize);
  1397 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1398 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1399 	load_xf_bank( R_ECX );
  1400 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1401 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1402 	JMP_TARGET(end);
  1403     } else {
  1404 	JMP_rel8(36, end);
  1405 	JMP_TARGET(doublesize);
  1406 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1407 	load_fr_bank( R_ECX );
  1408 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1409 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1410 	JMP_TARGET(end);
  1412 :}
  1413 FMOV FRm, @-Rn {:  
  1414     check_fpuen();
  1415     load_reg( R_EDX, Rn );
  1416     check_walign32( R_EDX );
  1417     load_spreg( R_ECX, R_FPSCR );
  1418     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1419     JNE_rel8(20, doublesize);
  1420     load_fr_bank( R_ECX );
  1421     load_fr( R_ECX, R_EAX, FRm );
  1422     ADD_imm8s_r32(-4,R_EDX);
  1423     store_reg( R_EDX, Rn );
  1424     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1425     if( FRm&1 ) {
  1426 	JMP_rel8( 46, end );
  1427 	JMP_TARGET(doublesize);
  1428 	load_xf_bank( R_ECX );
  1429 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1430 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1431 	ADD_imm8s_r32(-8,R_EDX);
  1432 	store_reg( R_EDX, Rn );
  1433 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1434 	JMP_TARGET(end);
  1435     } else {
  1436 	JMP_rel8( 39, end );
  1437 	JMP_TARGET(doublesize);
  1438 	load_fr_bank( R_ECX );
  1439 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1440 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1441 	ADD_imm8s_r32(-8,R_EDX);
  1442 	store_reg( R_EDX, Rn );
  1443 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1444 	JMP_TARGET(end);
  1446 :}
  1447 FMOV @Rm+, FRn {:  
  1448     check_fpuen();
  1449     load_reg( R_EDX, Rm );
  1450     check_ralign32( R_EDX );
  1451     MOV_r32_r32( R_EDX, R_EAX );
  1452     load_spreg( R_ECX, R_FPSCR );
  1453     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1454     JNE_rel8(25, doublesize);
  1455     ADD_imm8s_r32( 4, R_EAX );
  1456     store_reg( R_EAX, Rm );
  1457     MEM_READ_LONG( R_EDX, R_EAX );
  1458     load_fr_bank( R_ECX );
  1459     store_fr( R_ECX, R_EAX, FRn );
  1460     if( FRn&1 ) {
  1461 	JMP_rel8(52, end);
  1462 	JMP_TARGET(doublesize);
  1463 	ADD_imm8s_r32( 8, R_EAX );
  1464 	store_reg(R_EAX, Rm);
  1465 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1466 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1467 	load_xf_bank( R_ECX );
  1468 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1469 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1470 	JMP_TARGET(end);
  1471     } else {
  1472 	JMP_rel8(42, end);
  1473 	ADD_imm8s_r32( 8, R_EAX );
  1474 	store_reg(R_EAX, Rm);
  1475 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1476 	load_fr_bank( R_ECX );
  1477 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1478 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1479 	JMP_TARGET(end);
  1481 :}
  1482 FMOV FRm, @(R0, Rn) {:  
  1483     check_fpuen();
  1484     load_reg( R_EDX, Rn );
  1485     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
  1486     check_walign32( R_EDX );
  1487     load_spreg( R_ECX, R_FPSCR );
  1488     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1489     JNE_rel8(20, doublesize);
  1490     load_fr_bank( R_ECX );
  1491     load_fr( R_ECX, R_EAX, FRm );
  1492     MEM_WRITE_LONG( R_EDX, R_EAX ); // 12
  1493     if( FRm&1 ) {
  1494 	JMP_rel8( 46, end );
  1495 	JMP_TARGET(doublesize);
  1496 	load_xf_bank( R_ECX );
  1497 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1498 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1499 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1500 	JMP_TARGET(end);
  1501     } else {
  1502 	JMP_rel8( 39, end );
  1503 	JMP_TARGET(doublesize);
  1504 	load_fr_bank( R_ECX );
  1505 	load_fr( R_ECX, R_EAX, FRm&0x0E );
  1506 	load_fr( R_ECX, R_ECX, FRm|0x01 );
  1507 	MEM_WRITE_DOUBLE( R_EDX, R_EAX, R_ECX );
  1508 	JMP_TARGET(end);
  1510 :}
  1511 FMOV @(R0, Rm), FRn {:  
  1512     check_fpuen();
  1513     load_reg( R_EDX, Rm );
  1514     ADD_sh4r_r32( REG_OFFSET(r[0]), R_EDX );
  1515     check_ralign32( R_EDX );
  1516     load_spreg( R_ECX, R_FPSCR );
  1517     TEST_imm32_r32( FPSCR_SZ, R_ECX );
  1518     JNE_rel8(19, doublesize);
  1519     MEM_READ_LONG( R_EDX, R_EAX );
  1520     load_fr_bank( R_ECX );
  1521     store_fr( R_ECX, R_EAX, FRn );
  1522     if( FRn&1 ) {
  1523 	JMP_rel8(46, end);
  1524 	JMP_TARGET(doublesize);
  1525 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1526 	load_spreg( R_ECX, R_FPSCR ); // assume read_long clobbered it
  1527 	load_xf_bank( R_ECX );
  1528 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1529 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1530 	JMP_TARGET(end);
  1531     } else {
  1532 	JMP_rel8(36, end);
  1533 	JMP_TARGET(doublesize);
  1534 	MEM_READ_DOUBLE( R_EDX, R_EAX, R_EDX );
  1535 	load_fr_bank( R_ECX );
  1536 	store_fr( R_ECX, R_EAX, FRn&0x0E );
  1537 	store_fr( R_ECX, R_EDX, FRn|0x01 );
  1538 	JMP_TARGET(end);
  1540 :}
  1541 FLDI0 FRn {:  /* IFF PR=0 */
  1542     check_fpuen();
  1543     load_spreg( R_ECX, R_FPSCR );
  1544     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1545     JNE_rel8(8, end);
  1546     XOR_r32_r32( R_EAX, R_EAX );
  1547     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1548     store_fr( R_ECX, R_EAX, FRn );
  1549     JMP_TARGET(end);
  1550 :}
  1551 FLDI1 FRn {:  /* IFF PR=0 */
  1552     check_fpuen();
  1553     load_spreg( R_ECX, R_FPSCR );
  1554     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1555     JNE_rel8(11, end);
  1556     load_imm32(R_EAX, 0x3F800000);
  1557     load_spreg( R_ECX, REG_OFFSET(fr_bank) );
  1558     store_fr( R_ECX, R_EAX, FRn );
  1559     JMP_TARGET(end);
  1560 :}
  1562 FLOAT FPUL, FRn {:  
  1563     check_fpuen();
  1564     load_spreg( R_ECX, R_FPSCR );
  1565     load_spreg(R_EDX, REG_OFFSET(fr_bank));
  1566     FILD_sh4r(R_FPUL);
  1567     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1568     JNE_rel8(5, doubleprec);
  1569     pop_fr( R_EDX, FRn );
  1570     JMP_rel8(3, end);
  1571     JMP_TARGET(doubleprec);
  1572     pop_dr( R_EDX, FRn );
  1573     JMP_TARGET(end);
  1574 :}
  1575 FTRC FRm, FPUL {:  
  1576     check_fpuen();
  1577     // TODO
  1578 :}
  1579 FLDS FRm, FPUL {:  
  1580     check_fpuen();
  1581     load_fr_bank( R_ECX );
  1582     load_fr( R_ECX, R_EAX, FRm );
  1583     store_spreg( R_EAX, R_FPUL );
  1584 :}
  1585 FSTS FPUL, FRn {:  
  1586     check_fpuen();
  1587     load_fr_bank( R_ECX );
  1588     load_spreg( R_EAX, R_FPUL );
  1589     store_fr( R_ECX, R_EAX, FRn );
  1590 :}
  1591 FCNVDS FRm, FPUL {:  
  1592     check_fpuen();
  1593     load_spreg( R_ECX, R_FPSCR );
  1594     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1595     JE_rel8(9, end); // only when PR=1
  1596     load_fr_bank( R_ECX );
  1597     push_dr( R_ECX, FRm );
  1598     pop_fpul();
  1599     JMP_TARGET(end);
  1600 :}
  1601 FCNVSD FPUL, FRn {:  
  1602     check_fpuen();
  1603     check_fpuen();
  1604     load_spreg( R_ECX, R_FPSCR );
  1605     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1606     JE_rel8(9, end); // only when PR=1
  1607     load_fr_bank( R_ECX );
  1608     push_fpul();
  1609     pop_dr( R_ECX, FRn );
  1610     JMP_TARGET(end);
  1611 :}
  1613 /* Floating point instructions */
  1614 FABS FRn {:  
  1615     check_fpuen();
  1616     load_spreg( R_ECX, R_FPSCR );
  1617     load_fr_bank( R_EDX );
  1618     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1619     JNE_rel8(10, doubleprec);
  1620     push_fr(R_EDX, FRn); // 3
  1621     FABS_st0(); // 2
  1622     pop_fr( R_EDX, FRn); //3
  1623     JMP_rel8(8,end); // 2
  1624     JMP_TARGET(doubleprec);
  1625     push_dr(R_EDX, FRn);
  1626     FABS_st0();
  1627     pop_dr(R_EDX, FRn);
  1628     JMP_TARGET(end);
  1629 :}
  1630 FADD FRm, FRn {:  
  1631     check_fpuen();
  1632     load_spreg( R_ECX, R_FPSCR );
  1633     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1634     load_fr_bank( R_EDX );
  1635     JNE_rel8(13,doubleprec);
  1636     push_fr(R_EDX, FRm);
  1637     push_fr(R_EDX, FRn);
  1638     FADDP_st(1);
  1639     pop_fr(R_EDX, FRn);
  1640     JMP_rel8(11,end);
  1641     JMP_TARGET(doubleprec);
  1642     push_dr(R_EDX, FRm);
  1643     push_dr(R_EDX, FRn);
  1644     FADDP_st(1);
  1645     pop_dr(R_EDX, FRn);
  1646     JMP_TARGET(end);
  1647 :}
  1648 FDIV FRm, FRn {:  
  1649     check_fpuen();
  1650     load_spreg( R_ECX, R_FPSCR );
  1651     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1652     load_fr_bank( R_EDX );
  1653     JNE_rel8(13, doubleprec);
  1654     push_fr(R_EDX, FRn);
  1655     push_fr(R_EDX, FRm);
  1656     FDIVP_st(1);
  1657     pop_fr(R_EDX, FRn);
  1658     JMP_rel8(11, end);
  1659     JMP_TARGET(doubleprec);
  1660     push_dr(R_EDX, FRn);
  1661     push_dr(R_EDX, FRm);
  1662     FDIVP_st(1);
  1663     pop_dr(R_EDX, FRn);
  1664     JMP_TARGET(end);
  1665 :}
  1666 FMAC FR0, FRm, FRn {:  
  1667     check_fpuen();
  1668     load_spreg( R_ECX, R_FPSCR );
  1669     load_spreg( R_EDX, REG_OFFSET(fr_bank));
  1670     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1671     JNE_rel8(18, doubleprec);
  1672     push_fr( R_EDX, 0 );
  1673     push_fr( R_EDX, FRm );
  1674     FMULP_st(1);
  1675     push_fr( R_EDX, FRn );
  1676     FADDP_st(1);
  1677     pop_fr( R_EDX, FRn );
  1678     JMP_rel8(16, end);
  1679     JMP_TARGET(doubleprec);
  1680     push_dr( R_EDX, 0 );
  1681     push_dr( R_EDX, FRm );
  1682     FMULP_st(1);
  1683     push_dr( R_EDX, FRn );
  1684     FADDP_st(1);
  1685     pop_dr( R_EDX, FRn );
  1686     JMP_TARGET(end);
  1687 :}
  1689 FMUL FRm, FRn {:  
  1690     check_fpuen();
  1691     load_spreg( R_ECX, R_FPSCR );
  1692     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1693     load_fr_bank( R_EDX );
  1694     JNE_rel8(13, doubleprec);
  1695     push_fr(R_EDX, FRm);
  1696     push_fr(R_EDX, FRn);
  1697     FMULP_st(1);
  1698     pop_fr(R_EDX, FRn);
  1699     JMP_rel8(11, end);
  1700     JMP_TARGET(doubleprec);
  1701     push_dr(R_EDX, FRm);
  1702     push_dr(R_EDX, FRn);
  1703     FMULP_st(1);
  1704     pop_dr(R_EDX, FRn);
  1705     JMP_TARGET(end);
  1706 :}
  1707 FNEG FRn {:  
  1708     check_fpuen();
  1709     load_spreg( R_ECX, R_FPSCR );
  1710     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1711     load_fr_bank( R_EDX );
  1712     JNE_rel8(10, doubleprec);
  1713     push_fr(R_EDX, FRn);
  1714     FCHS_st0();
  1715     pop_fr(R_EDX, FRn);
  1716     JMP_rel8(8, end);
  1717     JMP_TARGET(doubleprec);
  1718     push_dr(R_EDX, FRn);
  1719     FCHS_st0();
  1720     pop_dr(R_EDX, FRn);
  1721     JMP_TARGET(end);
  1722 :}
  1723 FSRRA FRn {:  
  1724     check_fpuen();
  1725     load_spreg( R_ECX, R_FPSCR );
  1726     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1727     load_fr_bank( R_EDX );
  1728     JNE_rel8(12, end); // PR=0 only
  1729     FLD1_st0();
  1730     push_fr(R_EDX, FRn);
  1731     FSQRT_st0();
  1732     FDIVP_st(1);
  1733     pop_fr(R_EDX, FRn);
  1734     JMP_TARGET(end);
  1735 :}
  1736 FSQRT FRn {:  
  1737     check_fpuen();
  1738     load_spreg( R_ECX, R_FPSCR );
  1739     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1740     load_fr_bank( R_EDX );
  1741     JNE_rel8(10, doubleprec);
  1742     push_fr(R_EDX, FRn);
  1743     FSQRT_st0();
  1744     pop_fr(R_EDX, FRn);
  1745     JMP_rel8(8, end);
  1746     JMP_TARGET(doubleprec);
  1747     push_dr(R_EDX, FRn);
  1748     FSQRT_st0();
  1749     pop_dr(R_EDX, FRn);
  1750     JMP_TARGET(end);
  1751 :}
  1752 FSUB FRm, FRn {:  
  1753     check_fpuen();
  1754     load_spreg( R_ECX, R_FPSCR );
  1755     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1756     load_fr_bank( R_EDX );
  1757     JNE_rel8(13, doubleprec);
  1758     push_fr(R_EDX, FRn);
  1759     push_fr(R_EDX, FRm);
  1760     FMULP_st(1);
  1761     pop_fr(R_EDX, FRn);
  1762     JMP_rel8(11, end);
  1763     JMP_TARGET(doubleprec);
  1764     push_dr(R_EDX, FRn);
  1765     push_dr(R_EDX, FRm);
  1766     FMULP_st(1);
  1767     pop_dr(R_EDX, FRn);
  1768     JMP_TARGET(end);
  1769 :}
  1771 FCMP/EQ FRm, FRn {:  
  1772     check_fpuen();
  1773     load_spreg( R_ECX, R_FPSCR );
  1774     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1775     load_fr_bank( R_EDX );
  1776     JNE_rel8(8, doubleprec);
  1777     push_fr(R_EDX, FRm);
  1778     push_fr(R_EDX, FRn);
  1779     JMP_rel8(6, end);
  1780     JMP_TARGET(doubleprec);
  1781     push_dr(R_EDX, FRm);
  1782     push_dr(R_EDX, FRn);
  1783     FCOMIP_st(1);
  1784     SETE_t();
  1785     FPOP_st();
  1786     JMP_TARGET(end);
  1787 :}
  1788 FCMP/GT FRm, FRn {:  
  1789     check_fpuen();
  1790     load_spreg( R_ECX, R_FPSCR );
  1791     TEST_imm32_r32( FPSCR_PR, R_ECX );
  1792     load_fr_bank( R_EDX );
  1793     JNE_rel8(8, doubleprec);
  1794     push_fr(R_EDX, FRm);
  1795     push_fr(R_EDX, FRn);
  1796     JMP_rel8(6, end);
  1797     JMP_TARGET(doubleprec);
  1798     push_dr(R_EDX, FRm);
  1799     push_dr(R_EDX, FRn);
  1800     JMP_TARGET(end);
  1801     FCOMIP_st(1);
  1802     SETA_t();
  1803     FPOP_st();
  1804 :}
  1806 FSCA FPUL, FRn {:  
  1807     check_fpuen();
  1808 :}
  1809 FIPR FVm, FVn {:  
  1810     check_fpuen();
  1811 :}
  1812 FTRV XMTRX, FVn {:  
  1813     check_fpuen();
  1814 :}
  1816 FRCHG {:  
  1817     check_fpuen();
  1818     load_spreg( R_ECX, R_FPSCR );
  1819     XOR_imm32_r32( FPSCR_FR, R_ECX );
  1820     store_spreg( R_ECX, R_FPSCR );
  1822 :}
  1823 FSCHG {:  
  1824     check_fpuen();
  1825     load_spreg( R_ECX, R_FPSCR );
  1826     XOR_imm32_r32( FPSCR_SZ, R_ECX );
  1827     store_spreg( R_ECX, R_FPSCR );
  1828 :}
  1830 /* Processor control instructions */
  1831 LDC Rm, SR {:
  1832     load_reg( R_EAX, Rm );
  1833     call_func1( sh4_write_sr, R_EAX );
  1834     sh4_x86.priv_checked = FALSE;
  1835     sh4_x86.fpuen_checked = FALSE;
  1836 :}
  1837 LDC Rm, GBR {: 
  1838     load_reg( R_EAX, Rm );
  1839     store_spreg( R_EAX, R_GBR );
  1840 :}
  1841 LDC Rm, VBR {:  
  1842     load_reg( R_EAX, Rm );
  1843     store_spreg( R_EAX, R_VBR );
  1844 :}
  1845 LDC Rm, SSR {:  
  1846     load_reg( R_EAX, Rm );
  1847     store_spreg( R_EAX, R_SSR );
  1848 :}
  1849 LDC Rm, SGR {:  
  1850     load_reg( R_EAX, Rm );
  1851     store_spreg( R_EAX, R_SGR );
  1852 :}
  1853 LDC Rm, SPC {:  
  1854     load_reg( R_EAX, Rm );
  1855     store_spreg( R_EAX, R_SPC );
  1856 :}
  1857 LDC Rm, DBR {:  
  1858     load_reg( R_EAX, Rm );
  1859     store_spreg( R_EAX, R_DBR );
  1860 :}
  1861 LDC Rm, Rn_BANK {:  
  1862     load_reg( R_EAX, Rm );
  1863     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  1864 :}
  1865 LDC.L @Rm+, GBR {:  
  1866     load_reg( R_EAX, Rm );
  1867     MOV_r32_r32( R_EAX, R_ECX );
  1868     ADD_imm8s_r32( 4, R_EAX );
  1869     store_reg( R_EAX, Rm );
  1870     MEM_READ_LONG( R_ECX, R_EAX );
  1871     store_spreg( R_EAX, R_GBR );
  1872 :}
  1873 LDC.L @Rm+, SR {:
  1874     load_reg( R_EAX, Rm );
  1875     MOV_r32_r32( R_EAX, R_ECX );
  1876     ADD_imm8s_r32( 4, R_EAX );
  1877     store_reg( R_EAX, Rm );
  1878     MEM_READ_LONG( R_ECX, R_EAX );
  1879     call_func1( sh4_write_sr, R_EAX );
  1880     sh4_x86.priv_checked = FALSE;
  1881     sh4_x86.fpuen_checked = FALSE;
  1882 :}
  1883 LDC.L @Rm+, VBR {:  
  1884     load_reg( R_EAX, Rm );
  1885     MOV_r32_r32( R_EAX, R_ECX );
  1886     ADD_imm8s_r32( 4, R_EAX );
  1887     store_reg( R_EAX, Rm );
  1888     MEM_READ_LONG( R_ECX, R_EAX );
  1889     store_spreg( R_EAX, R_VBR );
  1890 :}
  1891 LDC.L @Rm+, SSR {:
  1892     load_reg( R_EAX, Rm );
  1893     MOV_r32_r32( R_EAX, R_ECX );
  1894     ADD_imm8s_r32( 4, R_EAX );
  1895     store_reg( R_EAX, Rm );
  1896     MEM_READ_LONG( R_ECX, R_EAX );
  1897     store_spreg( R_EAX, R_SSR );
  1898 :}
  1899 LDC.L @Rm+, SGR {:  
  1900     load_reg( R_EAX, Rm );
  1901     MOV_r32_r32( R_EAX, R_ECX );
  1902     ADD_imm8s_r32( 4, R_EAX );
  1903     store_reg( R_EAX, Rm );
  1904     MEM_READ_LONG( R_ECX, R_EAX );
  1905     store_spreg( R_EAX, R_SGR );
  1906 :}
  1907 LDC.L @Rm+, SPC {:  
  1908     load_reg( R_EAX, Rm );
  1909     MOV_r32_r32( R_EAX, R_ECX );
  1910     ADD_imm8s_r32( 4, R_EAX );
  1911     store_reg( R_EAX, Rm );
  1912     MEM_READ_LONG( R_ECX, R_EAX );
  1913     store_spreg( R_EAX, R_SPC );
  1914 :}
  1915 LDC.L @Rm+, DBR {:  
  1916     load_reg( R_EAX, Rm );
  1917     MOV_r32_r32( R_EAX, R_ECX );
  1918     ADD_imm8s_r32( 4, R_EAX );
  1919     store_reg( R_EAX, Rm );
  1920     MEM_READ_LONG( R_ECX, R_EAX );
  1921     store_spreg( R_EAX, R_DBR );
  1922 :}
  1923 LDC.L @Rm+, Rn_BANK {:  
  1924     load_reg( R_EAX, Rm );
  1925     MOV_r32_r32( R_EAX, R_ECX );
  1926     ADD_imm8s_r32( 4, R_EAX );
  1927     store_reg( R_EAX, Rm );
  1928     MEM_READ_LONG( R_ECX, R_EAX );
  1929     store_spreg( R_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  1930 :}
  1931 LDS Rm, FPSCR {:  
  1932     load_reg( R_EAX, Rm );
  1933     store_spreg( R_EAX, R_FPSCR );
  1934 :}
  1935 LDS.L @Rm+, FPSCR {:  
  1936     load_reg( R_EAX, Rm );
  1937     MOV_r32_r32( R_EAX, R_ECX );
  1938     ADD_imm8s_r32( 4, R_EAX );
  1939     store_reg( R_EAX, Rm );
  1940     MEM_READ_LONG( R_ECX, R_EAX );
  1941     store_spreg( R_EAX, R_FPSCR );
  1942 :}
  1943 LDS Rm, FPUL {:  
  1944     load_reg( R_EAX, Rm );
  1945     store_spreg( R_EAX, R_FPUL );
  1946 :}
  1947 LDS.L @Rm+, FPUL {:  
  1948     load_reg( R_EAX, Rm );
  1949     MOV_r32_r32( R_EAX, R_ECX );
  1950     ADD_imm8s_r32( 4, R_EAX );
  1951     store_reg( R_EAX, Rm );
  1952     MEM_READ_LONG( R_ECX, R_EAX );
  1953     store_spreg( R_EAX, R_FPUL );
  1954 :}
  1955 LDS Rm, MACH {: 
  1956     load_reg( R_EAX, Rm );
  1957     store_spreg( R_EAX, R_MACH );
  1958 :}
  1959 LDS.L @Rm+, MACH {:  
  1960     load_reg( R_EAX, Rm );
  1961     MOV_r32_r32( R_EAX, R_ECX );
  1962     ADD_imm8s_r32( 4, R_EAX );
  1963     store_reg( R_EAX, Rm );
  1964     MEM_READ_LONG( R_ECX, R_EAX );
  1965     store_spreg( R_EAX, R_MACH );
  1966 :}
  1967 LDS Rm, MACL {:  
  1968     load_reg( R_EAX, Rm );
  1969     store_spreg( R_EAX, R_MACL );
  1970 :}
  1971 LDS.L @Rm+, MACL {:  
  1972     load_reg( R_EAX, Rm );
  1973     MOV_r32_r32( R_EAX, R_ECX );
  1974     ADD_imm8s_r32( 4, R_EAX );
  1975     store_reg( R_EAX, Rm );
  1976     MEM_READ_LONG( R_ECX, R_EAX );
  1977     store_spreg( R_EAX, R_MACL );
  1978 :}
  1979 LDS Rm, PR {:  
  1980     load_reg( R_EAX, Rm );
  1981     store_spreg( R_EAX, R_PR );
  1982 :}
  1983 LDS.L @Rm+, PR {:  
  1984     load_reg( R_EAX, Rm );
  1985     MOV_r32_r32( R_EAX, R_ECX );
  1986     ADD_imm8s_r32( 4, R_EAX );
  1987     store_reg( R_EAX, Rm );
  1988     MEM_READ_LONG( R_ECX, R_EAX );
  1989     store_spreg( R_EAX, R_PR );
  1990 :}
  1991 LDTLB {:  :}
  1992 OCBI @Rn {:  :}
  1993 OCBP @Rn {:  :}
  1994 OCBWB @Rn {:  :}
  1995 PREF @Rn {:
  1996     load_reg( R_EAX, Rn );
  1997     PUSH_r32( R_EAX );
  1998     AND_imm32_r32( 0xFC000000, R_EAX );
  1999     CMP_imm32_r32( 0xE0000000, R_EAX );
  2000     JNE_rel8(7, end);
  2001     call_func0( sh4_flush_store_queue );
  2002     JMP_TARGET(end);
  2003     ADD_imm8s_r32( 4, R_ESP );
  2004 :}
  2005  SLEEP {: /* TODO */ :}
  2006  STC SR, Rn {:
  2007      call_func0(sh4_read_sr);
  2008      store_reg( R_EAX, Rn );
  2009 :}
  2010 STC GBR, Rn {:  
  2011     load_spreg( R_EAX, R_GBR );
  2012     store_reg( R_EAX, Rn );
  2013 :}
  2014 STC VBR, Rn {:  
  2015     load_spreg( R_EAX, R_VBR );
  2016     store_reg( R_EAX, Rn );
  2017 :}
  2018 STC SSR, Rn {:  
  2019     load_spreg( R_EAX, R_SSR );
  2020     store_reg( R_EAX, Rn );
  2021 :}
  2022 STC SPC, Rn {:  
  2023     load_spreg( R_EAX, R_SPC );
  2024     store_reg( R_EAX, Rn );
  2025 :}
  2026 STC SGR, Rn {:  
  2027     load_spreg( R_EAX, R_SGR );
  2028     store_reg( R_EAX, Rn );
  2029 :}
  2030 STC DBR, Rn {:  
  2031     load_spreg( R_EAX, R_DBR );
  2032     store_reg( R_EAX, Rn );
  2033 :}
  2034 STC Rm_BANK, Rn {:
  2035     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2036     store_reg( R_EAX, Rn );
  2037 :}
  2038 STC.L SR, @-Rn {:
  2039     load_reg( R_ECX, Rn );
  2040     ADD_imm8s_r32( -4, Rn );
  2041     store_reg( R_ECX, Rn );
  2042     call_func0( sh4_read_sr );
  2043     MEM_WRITE_LONG( R_ECX, R_EAX );
  2044 :}
  2045 STC.L VBR, @-Rn {:  
  2046     load_reg( R_ECX, Rn );
  2047     ADD_imm8s_r32( -4, Rn );
  2048     store_reg( R_ECX, Rn );
  2049     load_spreg( R_EAX, R_VBR );
  2050     MEM_WRITE_LONG( R_ECX, R_EAX );
  2051 :}
  2052 STC.L SSR, @-Rn {:  
  2053     load_reg( R_ECX, Rn );
  2054     ADD_imm8s_r32( -4, Rn );
  2055     store_reg( R_ECX, Rn );
  2056     load_spreg( R_EAX, R_SSR );
  2057     MEM_WRITE_LONG( R_ECX, R_EAX );
  2058 :}
  2059 STC.L SPC, @-Rn {:  
  2060     load_reg( R_ECX, Rn );
  2061     ADD_imm8s_r32( -4, Rn );
  2062     store_reg( R_ECX, Rn );
  2063     load_spreg( R_EAX, R_SPC );
  2064     MEM_WRITE_LONG( R_ECX, R_EAX );
  2065 :}
  2066 STC.L SGR, @-Rn {:  
  2067     load_reg( R_ECX, Rn );
  2068     ADD_imm8s_r32( -4, Rn );
  2069     store_reg( R_ECX, Rn );
  2070     load_spreg( R_EAX, R_SGR );
  2071     MEM_WRITE_LONG( R_ECX, R_EAX );
  2072 :}
  2073 STC.L DBR, @-Rn {:  
  2074     load_reg( R_ECX, Rn );
  2075     ADD_imm8s_r32( -4, Rn );
  2076     store_reg( R_ECX, Rn );
  2077     load_spreg( R_EAX, R_DBR );
  2078     MEM_WRITE_LONG( R_ECX, R_EAX );
  2079 :}
  2080 STC.L Rm_BANK, @-Rn {:  
  2081     load_reg( R_ECX, Rn );
  2082     ADD_imm8s_r32( -4, Rn );
  2083     store_reg( R_ECX, Rn );
  2084     load_spreg( R_EAX, REG_OFFSET(r_bank[Rm_BANK]) );
  2085     MEM_WRITE_LONG( R_ECX, R_EAX );
  2086 :}
  2087 STC.L GBR, @-Rn {:  
  2088     load_reg( R_ECX, Rn );
  2089     ADD_imm8s_r32( -4, Rn );
  2090     store_reg( R_ECX, Rn );
  2091     load_spreg( R_EAX, R_GBR );
  2092     MEM_WRITE_LONG( R_ECX, R_EAX );
  2093 :}
  2094 STS FPSCR, Rn {:  
  2095     load_spreg( R_EAX, R_FPSCR );
  2096     store_reg( R_EAX, Rn );
  2097 :}
  2098 STS.L FPSCR, @-Rn {:  
  2099     load_reg( R_ECX, Rn );
  2100     ADD_imm8s_r32( -4, Rn );
  2101     store_reg( R_ECX, Rn );
  2102     load_spreg( R_EAX, R_FPSCR );
  2103     MEM_WRITE_LONG( R_ECX, R_EAX );
  2104 :}
  2105 STS FPUL, Rn {:  
  2106     load_spreg( R_EAX, R_FPUL );
  2107     store_reg( R_EAX, Rn );
  2108 :}
  2109 STS.L FPUL, @-Rn {:  
  2110     load_reg( R_ECX, Rn );
  2111     ADD_imm8s_r32( -4, Rn );
  2112     store_reg( R_ECX, Rn );
  2113     load_spreg( R_EAX, R_FPUL );
  2114     MEM_WRITE_LONG( R_ECX, R_EAX );
  2115 :}
  2116 STS MACH, Rn {:  
  2117     load_spreg( R_EAX, R_MACH );
  2118     store_reg( R_EAX, Rn );
  2119 :}
  2120 STS.L MACH, @-Rn {:  
  2121     load_reg( R_ECX, Rn );
  2122     ADD_imm8s_r32( -4, Rn );
  2123     store_reg( R_ECX, Rn );
  2124     load_spreg( R_EAX, R_MACH );
  2125     MEM_WRITE_LONG( R_ECX, R_EAX );
  2126 :}
  2127 STS MACL, Rn {:  
  2128     load_spreg( R_EAX, R_MACL );
  2129     store_reg( R_EAX, Rn );
  2130 :}
  2131 STS.L MACL, @-Rn {:  
  2132     load_reg( R_ECX, Rn );
  2133     ADD_imm8s_r32( -4, Rn );
  2134     store_reg( R_ECX, Rn );
  2135     load_spreg( R_EAX, R_MACL );
  2136     MEM_WRITE_LONG( R_ECX, R_EAX );
  2137 :}
  2138 STS PR, Rn {:  
  2139     load_spreg( R_EAX, R_PR );
  2140     store_reg( R_EAX, Rn );
  2141 :}
  2142 STS.L PR, @-Rn {:  
  2143     load_reg( R_ECX, Rn );
  2144     ADD_imm8s_r32( -4, Rn );
  2145     store_reg( R_ECX, Rn );
  2146     load_spreg( R_EAX, R_PR );
  2147     MEM_WRITE_LONG( R_ECX, R_EAX );
  2148 :}
  2150 NOP {: /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ :}
  2151 %%
  2152     INC_r32(R_ESI);
  2153     if( sh4_x86.in_delay_slot ) {
  2154 	sh4_x86.in_delay_slot = FALSE;
  2155 	return 1;
  2157     return 0;
.