Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1216:defbd44429d8
prev1214:49152b3d8b75
next1218:be02e87f9f87
author nkeynes
date Mon Feb 13 20:00:27 2012 +1000 (8 years ago)
permissions -rw-r--r--
last change Fix MMU on non-translated platforms
- reintroduce old VMA translation functions (slightly modified)
- modify shadow processing to work on post-translated memory ops
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   109     gboolean profile_blocks;
   111     /* Allocated memory for the (block-wide) back-patch list */
   112     struct backpatch_record *backpatch_list;
   113     uint32_t backpatch_posn;
   114     uint32_t backpatch_size;
   115 };
   117 static struct sh4_x86_state sh4_x86;
   119 static uint32_t max_int = 0x7FFFFFFF;
   120 static uint32_t min_int = 0x80000000;
   121 static uint32_t save_fcw; /* save value for fpu control word */
   122 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   124 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc );
   125 static void sh4_x86_translate_unlink_block( void *use_list );
   127 static struct x86_symbol x86_symbol_table[] = {
   128     { "sh4r+128", ((char *)&sh4r)+128 },
   129     { "sh4_cpu_period", &sh4_cpu_period },
   130     { "sh4_address_space", NULL },
   131     { "sh4_user_address_space", NULL },
   132     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   133     { "sh4_translate_get_code_and_backpatch", sh4_translate_get_code_and_backpatch },
   134     { "sh4_write_fpscr", sh4_write_fpscr },
   135     { "sh4_write_sr", sh4_write_sr },
   136     { "sh4_read_sr", sh4_read_sr },
   137     { "sh4_raise_exception", sh4_raise_exception },
   138     { "sh4_sleep", sh4_sleep },
   139     { "sh4_fsca", sh4_fsca },
   140     { "sh4_ftrv", sh4_ftrv },
   141     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   142     { "sh4_execute_instruction", sh4_execute_instruction },
   143     { "signsat48", signsat48 },
   144     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   145     { "xlat_get_code", xlat_get_code }
   146 };
   148 static struct xlat_target_fns x86_target_fns = {
   149 	sh4_x86_translate_unlink_block
   150 };	
   153 gboolean is_sse3_supported()
   154 {
   155     uint32_t features;
   157     __asm__ __volatile__(
   158         "mov $0x01, %%eax\n\t"
   159         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   160     return (features & 1) ? TRUE : FALSE;
   161 }
   163 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   164 {
   165     sh4_x86.priv_address_space = priv;
   166     sh4_x86.user_address_space = user;
   167     x86_symbol_table[2].ptr = priv;
   168     x86_symbol_table[3].ptr = user;
   169 }
   171 void sh4_translate_init(void)
   172 {
   173     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   174     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   175     sh4_x86.begin_callback = NULL;
   176     sh4_x86.end_callback = NULL;
   177     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   178     sh4_x86.fastmem = TRUE;
   179     sh4_x86.profile_blocks = FALSE;
   180     sh4_x86.sse3_enabled = is_sse3_supported();
   181     x86_disasm_init();
   182     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   183     xlat_set_target_fns(&x86_target_fns);
   184 }
   186 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   187 {
   188     sh4_x86.begin_callback = begin;
   189     sh4_x86.end_callback = end;
   190 }
   192 void sh4_translate_set_fastmem( gboolean flag )
   193 {
   194     sh4_x86.fastmem = flag;
   195 }
   197 void sh4_translate_set_profile_blocks( gboolean flag )
   198 {
   199     sh4_x86.profile_blocks = flag;
   200 }
   202 gboolean sh4_translate_get_profile_blocks()
   203 {
   204     return sh4_x86.profile_blocks;
   205 }
   207 /**
   208  * Disassemble the given translated code block, and it's source SH4 code block
   209  * side-by-side. The current native pc will be marked if non-null.
   210  */
   211 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   212 {
   213     char buf[256];
   214     char op[256];
   216     uintptr_t target_start = (uintptr_t)code, target_pc;
   217     uintptr_t target_end = target_start + xlat_get_code_size(code);
   218     uint32_t source_pc = source_start;
   219     uint32_t source_end = source_pc;
   220     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   221     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   223     for( target_pc = target_start; target_pc < target_end;  ) {
   224         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   225 #if SIZEOF_VOID_P == 8
   226         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   227                       target_pc, op, buf );
   228 #else
   229         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   230                       target_pc, op, buf );
   231 #endif        
   232         if( source_recov_table < source_recov_end && 
   233             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   234             source_recov_table++;
   235             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   236                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   237         }
   239         if( source_pc < source_end ) {
   240             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   241             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   242             source_pc = source_pc2;
   243         } else {
   244             fprintf( out, "\n" );
   245         }
   247         target_pc = pc2;
   248     }
   250     while( source_pc < source_end ) {
   251         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   252         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   253         source_pc = source_pc2;
   254     }
   255 }
   257 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   258 {
   259     int reloc_size = 4;
   261     if( exc_code == -2 ) {
   262         reloc_size = sizeof(void *);
   263     }
   265     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   266 	sh4_x86.backpatch_size <<= 1;
   267 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   268 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   269 	assert( sh4_x86.backpatch_list != NULL );
   270     }
   271     if( sh4_x86.in_delay_slot ) {
   272 	fixup_pc -= 2;
   273     }
   275     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   276 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   277     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   278     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   279     sh4_x86.backpatch_posn++;
   280 }
   282 #define TSTATE_NONE -1
   283 #define TSTATE_O    X86_COND_O
   284 #define TSTATE_C    X86_COND_C
   285 #define TSTATE_E    X86_COND_E
   286 #define TSTATE_NE   X86_COND_NE
   287 #define TSTATE_G    X86_COND_G
   288 #define TSTATE_GE   X86_COND_GE
   289 #define TSTATE_A    X86_COND_A
   290 #define TSTATE_AE   X86_COND_AE
   292 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   293 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   295 /* Convenience instructions */
   296 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   297 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   298 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   299 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   300 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   301 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   302 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   303 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   304 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   305 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   306 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   307 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   308 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   309 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   310 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   311 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   312 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   313 #define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
   314 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   315 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   316 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   318 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
   319 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
   321 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   322 #define JT_label(label) LOAD_t() \
   323     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   325 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   326 #define JF_label(label) LOAD_t() \
   327     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   330 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   331 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   333 /**
   334  * Load an FR register (single-precision floating point) into an integer x86
   335  * register (eg for register-to-register moves)
   336  */
   337 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   338 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   340 /**
   341  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   342  */
   343 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   344 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   346 /**
   347  * Store an FR register (single-precision floating point) from an integer x86+
   348  * register (eg for register-to-register moves)
   349  */
   350 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   351 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   353 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   354 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   357 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   358 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   359 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   360 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   361 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   362 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   363 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   364 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   365 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   366 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   368 #ifdef ENABLE_SH4STATS
   369 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   370 #else
   371 #define COUNT_INST(id)
   372 #endif
   375 /* Exception checks - Note that all exception checks will clobber EAX */
   377 #define check_priv( ) \
   378     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   379         if( sh4_x86.in_delay_slot ) { \
   380             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   381         } else { \
   382             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   383         } \
   384         sh4_x86.branch_taken = TRUE; \
   385         sh4_x86.in_delay_slot = DELAY_NONE; \
   386         return 2; \
   387     }
   389 #define check_fpuen( ) \
   390     if( !sh4_x86.fpuen_checked ) {\
   391 	sh4_x86.fpuen_checked = TRUE;\
   392 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   393 	ANDL_imms_r32( SR_FD, REG_EAX );\
   394 	if( sh4_x86.in_delay_slot ) {\
   395 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   396 	} else {\
   397 	    JNE_exc(EXC_FPU_DISABLED);\
   398 	}\
   399 	sh4_x86.tstate = TSTATE_NONE; \
   400     }
   402 #define check_ralign16( x86reg ) \
   403     TESTL_imms_r32( 0x00000001, x86reg ); \
   404     JNE_exc(EXC_DATA_ADDR_READ)
   406 #define check_walign16( x86reg ) \
   407     TESTL_imms_r32( 0x00000001, x86reg ); \
   408     JNE_exc(EXC_DATA_ADDR_WRITE);
   410 #define check_ralign32( x86reg ) \
   411     TESTL_imms_r32( 0x00000003, x86reg ); \
   412     JNE_exc(EXC_DATA_ADDR_READ)
   414 #define check_walign32( x86reg ) \
   415     TESTL_imms_r32( 0x00000003, x86reg ); \
   416     JNE_exc(EXC_DATA_ADDR_WRITE);
   418 #define check_ralign64( x86reg ) \
   419     TESTL_imms_r32( 0x00000007, x86reg ); \
   420     JNE_exc(EXC_DATA_ADDR_READ)
   422 #define check_walign64( x86reg ) \
   423     TESTL_imms_r32( 0x00000007, x86reg ); \
   424     JNE_exc(EXC_DATA_ADDR_WRITE);
   426 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   428 #define UNDEF(ir)
   429 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   430  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   431  */
   432 #ifdef HAVE_FRAME_ADDRESS
   433 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   434 {
   435     decode_address(address_space(), addr_reg);
   436     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   437         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   438     } else {
   439         if( addr_reg != REG_ARG1 ) {
   440             MOVL_r32_r32( addr_reg, REG_ARG1 );
   441         }
   442         MOVP_immptr_rptr( 0, REG_ARG2 );
   443         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   444         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   445     }
   446     if( value_reg != REG_RESULT1 ) { 
   447         MOVL_r32_r32( REG_RESULT1, value_reg );
   448     }
   449 }
   451 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   452 {
   453     decode_address(address_space(), addr_reg);
   454     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   455         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   456     } else {
   457         if( value_reg != REG_ARG2 ) {
   458             MOVL_r32_r32( value_reg, REG_ARG2 );
   459 	}        
   460         if( addr_reg != REG_ARG1 ) {
   461             MOVL_r32_r32( addr_reg, REG_ARG1 );
   462         }
   463 #if MAX_REG_ARG > 2        
   464         MOVP_immptr_rptr( 0, REG_ARG3 );
   465         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   466         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   467 #else
   468         MOVL_imm32_rspdisp( 0, 0 );
   469         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   470         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   471 #endif
   472     }
   473 }
   474 #else
   475 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   476 {
   477     decode_address(address_space(), addr_reg);
   478     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   479     if( value_reg != REG_RESULT1 ) {
   480         MOVL_r32_r32( REG_RESULT1, value_reg );
   481     }
   482 }     
   484 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   485 {
   486     decode_address(address_space(), addr_reg);
   487     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   488 }
   489 #endif
   491 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   492 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   493 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   494 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   495 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   496 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   497 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   498 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   499 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   501 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   503 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   504 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   505 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   506 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   508 void sh4_translate_begin_block( sh4addr_t pc ) 
   509 {
   510 	sh4_x86.code = xlat_output;
   511     sh4_x86.in_delay_slot = FALSE;
   512     sh4_x86.fpuen_checked = FALSE;
   513     sh4_x86.branch_taken = FALSE;
   514     sh4_x86.backpatch_posn = 0;
   515     sh4_x86.block_start_pc = pc;
   516     sh4_x86.tlb_on = IS_TLB_ENABLED();
   517     sh4_x86.tstate = TSTATE_NONE;
   518     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   519     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   520     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   521     emit_prologue();
   522     if( sh4_x86.begin_callback ) {
   523         CALL_ptr( sh4_x86.begin_callback );
   524     }
   525     if( sh4_x86.profile_blocks ) {
   526     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   527     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   528     }  
   529 }
   532 uint32_t sh4_translate_end_block_size()
   533 {
   534 	uint32_t epilogue_size = EPILOGUE_SIZE;
   535 	if( sh4_x86.end_callback ) {
   536 	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
   537 	}
   538     if( sh4_x86.backpatch_posn <= 3 ) {
   539         epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   540     } else {
   541         epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   542     }
   543     return epilogue_size;
   544 }
   547 /**
   548  * Embed a breakpoint into the generated code
   549  */
   550 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   551 {
   552     MOVL_imm32_r32( pc, REG_EAX );
   553     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   554     sh4_x86.tstate = TSTATE_NONE;
   555 }
   558 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   560 /**
   561  * Test if the loaded target code pointer in %eax is valid, and if so jump
   562  * directly into it, bypassing the normal exit.
   563  */
   564 static void jump_next_block()
   565 {
   566 	uint8_t *ptr = xlat_output;
   567 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   568 	JE_label(nocode);
   569 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   570 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   571 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   572 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   573 	} else {
   574 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   575 	}
   576 	JNE_label(wrongmode);
   577 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   578 	if( sh4_x86.end_callback ) {
   579 	    /* Note this does leave the stack out of alignment, but doesn't matter
   580 	     * for what we're currently using it for.
   581 	     */
   582 	    PUSH_r32(REG_EAX);
   583 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   584 	    JMP_rptr(REG_ECX);
   585 	} else {
   586 	    JMP_rptr(REG_EAX);
   587 	}
   588 	JMP_TARGET(wrongmode);
   589 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   590 	int rel = ptr - xlat_output;
   591     JMP_prerel(rel);
   592 	JMP_TARGET(nocode); 
   593 }
   595 /**
   596  * 
   597  */
   598 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
   599 {
   600     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   601     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   602         target = XLAT_BLOCK_CHAIN(target);
   603 	}
   604     if( target == NULL ) {
   605         target = sh4_translate_basic_block( pc );
   606     }
   607     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   608     *backpatch = 0xE9;
   609     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
   610     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   611     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   613     uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
   614     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   615 	*retptr = backpatch;
   616 }
   618 static void emit_translate_and_backpatch()
   619 {
   620     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   621     CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
   623     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   624      * we need to reserve sizeof(void*) bytes for the use-list
   625 	 * pointer
   626 	 */ 
   627     if( sizeof(void*) == 8 ) {
   628         NOP();
   629     } else {
   630         NOP2();
   631     }
   632 }
   634 /**
   635  * If we're jumping to a fixed address (or at least fixed relative to the
   636  * current PC, then we can do a direct branch. REG_ARG1 should contain
   637  * the PC at this point.
   638  */
   639 static void jump_next_block_fixed_pc( sh4addr_t pc )
   640 {
   641 	if( IS_IN_ICACHE(pc) ) {
   642 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
   643 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   644 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   645            emit_translate_and_backpatch();	         
   646            return;
   647 		} else {
   648             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   649             ANDP_imms_rptr( -4, REG_EAX );
   650         }
   651 	} else if( sh4_x86.tlb_on ) {
   652         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   653     } else {
   654         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   655     }
   656     jump_next_block();
   659 }
   661 static void sh4_x86_translate_unlink_block( void *use_list )
   662 {
   663 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   664 	void *next = use_list;
   665 	while( next != NULL ) {
   666     	xlat_output = (uint8_t *)next;
   667  	    next = *(void **)(xlat_output+5);
   668  		emit_translate_and_backpatch();
   669  	}
   670  	xlat_output = tmp;
   671 }
   675 static void exit_block()
   676 {
   677 	emit_epilogue();
   678 	if( sh4_x86.end_callback ) {
   679 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   680 	    JMP_rptr(REG_ECX);
   681 	} else {
   682 	    RET();
   683 	}
   684 }
   686 /**
   687  * Exit the block with sh4r.pc already written
   688  */
   689 void exit_block_pcset( sh4addr_t pc )
   690 {
   691     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   692     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   693     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   694     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   695     JBE_label(exitloop);
   696     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   697     if( sh4_x86.tlb_on ) {
   698         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   699     } else {
   700         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   701     }
   703     jump_next_block();
   704     JMP_TARGET(exitloop);
   705     exit_block();
   706 }
   708 /**
   709  * Exit the block with sh4r.new_pc written with the target pc
   710  */
   711 void exit_block_newpcset( sh4addr_t pc )
   712 {
   713     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   714     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   715     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   716     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   717     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   718     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   719     JBE_label(exitloop);
   720     if( sh4_x86.tlb_on ) {
   721         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   722     } else {
   723         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   724     }
   726 	jump_next_block();
   727     JMP_TARGET(exitloop);
   728     exit_block();
   729 }
   732 /**
   733  * Exit the block to an absolute PC
   734  */
   735 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   736 {
   737     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   738     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   739     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   741     MOVL_imm32_r32( pc, REG_ARG1 );
   742     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   743     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   744     JBE_label(exitloop);
   745     jump_next_block_fixed_pc(pc);    
   746     JMP_TARGET(exitloop);
   747     exit_block();
   748 }
   750 /**
   751  * Exit the block to a relative PC
   752  */
   753 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   754 {
   755     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   756     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   757     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   759 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   760 	    /* Special case for tight loops - the PC doesn't change, and
   761 	     * we already know the target address. Just check events pending before
   762 	     * looping.
   763 	     */
   764         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   765         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   766         JCC_cc_prerel(X86_COND_A, backdisp);
   767 	} else {
   768         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   769         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   770         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   771         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   772         JBE_label(exitloop2);
   774         jump_next_block_fixed_pc(pc);
   775         JMP_TARGET(exitloop2);
   776     }
   777     exit_block();
   778 }
   780 /**
   781  * Exit unconditionally with a general exception
   782  */
   783 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   784 {
   785     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   786     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   787     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   788     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   789     MOVL_imm32_r32( code, REG_ARG1 );
   790     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   791     exit_block();
   792 }    
   794 /**
   795  * Embed a call to sh4_execute_instruction for situations that we
   796  * can't translate (just page-crossing delay slots at the moment).
   797  * Caller is responsible for setting new_pc before calling this function.
   798  *
   799  * Performs:
   800  *   Set PC = endpc
   801  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   802  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   803  *   Call sh4_execute_instruction
   804  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   805  */
   806 void exit_block_emu( sh4vma_t endpc )
   807 {
   808     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   809     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   811     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   812     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   813     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   814     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   816     CALL_ptr( sh4_execute_instruction );
   817     exit_block();
   818 } 
   820 /**
   821  * Write the block trailer (exception handling block)
   822  */
   823 void sh4_translate_end_block( sh4addr_t pc ) {
   824     if( sh4_x86.branch_taken == FALSE ) {
   825         // Didn't exit unconditionally already, so write the termination here
   826         exit_block_rel( pc, pc );
   827     }
   828     if( sh4_x86.backpatch_posn != 0 ) {
   829         unsigned int i;
   830         // Exception raised - cleanup and exit
   831         uint8_t *end_ptr = xlat_output;
   832         MOVL_r32_r32( REG_EDX, REG_ECX );
   833         ADDL_r32_r32( REG_EDX, REG_ECX );
   834         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   835         MOVL_moffptr_eax( &sh4_cpu_period );
   836         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   837         MULL_r32( REG_EDX );
   838         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   839         exit_block();
   841         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   842             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   843             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   844                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   845                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   846                 } else {
   847                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   848                 }
   849                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   850                 int rel = end_ptr - xlat_output;
   851                 JMP_prerel(rel);
   852             } else {
   853                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   854                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   855                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   856                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   857                 int rel = end_ptr - xlat_output;
   858                 JMP_prerel(rel);
   859             }
   860         }
   861     }
   862 }
   864 /**
   865  * Translate a single instruction. Delayed branches are handled specially
   866  * by translating both branch and delayed instruction as a single unit (as
   867  * 
   868  * The instruction MUST be in the icache (assert check)
   869  *
   870  * @return true if the instruction marks the end of a basic block
   871  * (eg a branch or 
   872  */
   873 uint32_t sh4_translate_instruction( sh4vma_t pc )
   874 {
   875     uint32_t ir;
   876     /* Read instruction from icache */
   877     assert( IS_IN_ICACHE(pc) );
   878     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   880     if( !sh4_x86.in_delay_slot ) {
   881 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   882     }
   884     /* check for breakpoints at this pc */
   885     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   886         if( sh4_breakpoints[i].address == pc ) {
   887             sh4_translate_emit_breakpoint(pc);
   888             break;
   889         }
   890     }
   891 %%
   892 /* ALU operations */
   893 ADD Rm, Rn {:
   894     COUNT_INST(I_ADD);
   895     load_reg( REG_EAX, Rm );
   896     load_reg( REG_ECX, Rn );
   897     ADDL_r32_r32( REG_EAX, REG_ECX );
   898     store_reg( REG_ECX, Rn );
   899     sh4_x86.tstate = TSTATE_NONE;
   900 :}
   901 ADD #imm, Rn {:  
   902     COUNT_INST(I_ADDI);
   903     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   904     sh4_x86.tstate = TSTATE_NONE;
   905 :}
   906 ADDC Rm, Rn {:
   907     COUNT_INST(I_ADDC);
   908     if( sh4_x86.tstate != TSTATE_C ) {
   909         LDC_t();
   910     }
   911     load_reg( REG_EAX, Rm );
   912     load_reg( REG_ECX, Rn );
   913     ADCL_r32_r32( REG_EAX, REG_ECX );
   914     store_reg( REG_ECX, Rn );
   915     SETC_t();
   916     sh4_x86.tstate = TSTATE_C;
   917 :}
   918 ADDV Rm, Rn {:
   919     COUNT_INST(I_ADDV);
   920     load_reg( REG_EAX, Rm );
   921     load_reg( REG_ECX, Rn );
   922     ADDL_r32_r32( REG_EAX, REG_ECX );
   923     store_reg( REG_ECX, Rn );
   924     SETO_t();
   925     sh4_x86.tstate = TSTATE_O;
   926 :}
   927 AND Rm, Rn {:
   928     COUNT_INST(I_AND);
   929     load_reg( REG_EAX, Rm );
   930     load_reg( REG_ECX, Rn );
   931     ANDL_r32_r32( REG_EAX, REG_ECX );
   932     store_reg( REG_ECX, Rn );
   933     sh4_x86.tstate = TSTATE_NONE;
   934 :}
   935 AND #imm, R0 {:  
   936     COUNT_INST(I_ANDI);
   937     load_reg( REG_EAX, 0 );
   938     ANDL_imms_r32(imm, REG_EAX); 
   939     store_reg( REG_EAX, 0 );
   940     sh4_x86.tstate = TSTATE_NONE;
   941 :}
   942 AND.B #imm, @(R0, GBR) {: 
   943     COUNT_INST(I_ANDB);
   944     load_reg( REG_EAX, 0 );
   945     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   946     MOVL_r32_rspdisp(REG_EAX, 0);
   947     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   948     MOVL_rspdisp_r32(0, REG_EAX);
   949     ANDL_imms_r32(imm, REG_EDX );
   950     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   951     sh4_x86.tstate = TSTATE_NONE;
   952 :}
   953 CMP/EQ Rm, Rn {:  
   954     COUNT_INST(I_CMPEQ);
   955     load_reg( REG_EAX, Rm );
   956     load_reg( REG_ECX, Rn );
   957     CMPL_r32_r32( REG_EAX, REG_ECX );
   958     SETE_t();
   959     sh4_x86.tstate = TSTATE_E;
   960 :}
   961 CMP/EQ #imm, R0 {:  
   962     COUNT_INST(I_CMPEQI);
   963     load_reg( REG_EAX, 0 );
   964     CMPL_imms_r32(imm, REG_EAX);
   965     SETE_t();
   966     sh4_x86.tstate = TSTATE_E;
   967 :}
   968 CMP/GE Rm, Rn {:  
   969     COUNT_INST(I_CMPGE);
   970     load_reg( REG_EAX, Rm );
   971     load_reg( REG_ECX, Rn );
   972     CMPL_r32_r32( REG_EAX, REG_ECX );
   973     SETGE_t();
   974     sh4_x86.tstate = TSTATE_GE;
   975 :}
   976 CMP/GT Rm, Rn {: 
   977     COUNT_INST(I_CMPGT);
   978     load_reg( REG_EAX, Rm );
   979     load_reg( REG_ECX, Rn );
   980     CMPL_r32_r32( REG_EAX, REG_ECX );
   981     SETG_t();
   982     sh4_x86.tstate = TSTATE_G;
   983 :}
   984 CMP/HI Rm, Rn {:  
   985     COUNT_INST(I_CMPHI);
   986     load_reg( REG_EAX, Rm );
   987     load_reg( REG_ECX, Rn );
   988     CMPL_r32_r32( REG_EAX, REG_ECX );
   989     SETA_t();
   990     sh4_x86.tstate = TSTATE_A;
   991 :}
   992 CMP/HS Rm, Rn {: 
   993     COUNT_INST(I_CMPHS);
   994     load_reg( REG_EAX, Rm );
   995     load_reg( REG_ECX, Rn );
   996     CMPL_r32_r32( REG_EAX, REG_ECX );
   997     SETAE_t();
   998     sh4_x86.tstate = TSTATE_AE;
   999  :}
  1000 CMP/PL Rn {: 
  1001     COUNT_INST(I_CMPPL);
  1002     load_reg( REG_EAX, Rn );
  1003     CMPL_imms_r32( 0, REG_EAX );
  1004     SETG_t();
  1005     sh4_x86.tstate = TSTATE_G;
  1006 :}
  1007 CMP/PZ Rn {:  
  1008     COUNT_INST(I_CMPPZ);
  1009     load_reg( REG_EAX, Rn );
  1010     CMPL_imms_r32( 0, REG_EAX );
  1011     SETGE_t();
  1012     sh4_x86.tstate = TSTATE_GE;
  1013 :}
  1014 CMP/STR Rm, Rn {:  
  1015     COUNT_INST(I_CMPSTR);
  1016     load_reg( REG_EAX, Rm );
  1017     load_reg( REG_ECX, Rn );
  1018     XORL_r32_r32( REG_ECX, REG_EAX );
  1019     TESTB_r8_r8( REG_AL, REG_AL );
  1020     JE_label(target1);
  1021     TESTB_r8_r8( REG_AH, REG_AH );
  1022     JE_label(target2);
  1023     SHRL_imm_r32( 16, REG_EAX );
  1024     TESTB_r8_r8( REG_AL, REG_AL );
  1025     JE_label(target3);
  1026     TESTB_r8_r8( REG_AH, REG_AH );
  1027     JMP_TARGET(target1);
  1028     JMP_TARGET(target2);
  1029     JMP_TARGET(target3);
  1030     SETE_t();
  1031     sh4_x86.tstate = TSTATE_E;
  1032 :}
  1033 DIV0S Rm, Rn {:
  1034     COUNT_INST(I_DIV0S);
  1035     load_reg( REG_EAX, Rm );
  1036     load_reg( REG_ECX, Rn );
  1037     SHRL_imm_r32( 31, REG_EAX );
  1038     SHRL_imm_r32( 31, REG_ECX );
  1039     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1040     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1041     CMPL_r32_r32( REG_EAX, REG_ECX );
  1042     SETNE_t();
  1043     sh4_x86.tstate = TSTATE_NE;
  1044 :}
  1045 DIV0U {:  
  1046     COUNT_INST(I_DIV0U);
  1047     XORL_r32_r32( REG_EAX, REG_EAX );
  1048     MOVL_r32_rbpdisp( REG_EAX, R_Q );
  1049     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1050     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1051     sh4_x86.tstate = TSTATE_C; // works for DIV1
  1052 :}
  1053 DIV1 Rm, Rn {:
  1054     COUNT_INST(I_DIV1);
  1055     MOVL_rbpdisp_r32( R_M, REG_ECX );
  1056     load_reg( REG_EAX, Rn );
  1057     if( sh4_x86.tstate != TSTATE_C ) {
  1058 	LDC_t();
  1060     RCLL_imm_r32( 1, REG_EAX );
  1061     SETC_r8( REG_DL ); // Q'
  1062     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1063     JE_label(mqequal);
  1064     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1065     JMP_label(end);
  1066     JMP_TARGET(mqequal);
  1067     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1068     JMP_TARGET(end);
  1069     store_reg( REG_EAX, Rn ); // Done with Rn now
  1070     SETC_r8(REG_AL); // tmp1
  1071     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1072     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1073     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1074     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1075     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1076     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1077     sh4_x86.tstate = TSTATE_NONE;
  1078 :}
  1079 DMULS.L Rm, Rn {:  
  1080     COUNT_INST(I_DMULS);
  1081     load_reg( REG_EAX, Rm );
  1082     load_reg( REG_ECX, Rn );
  1083     IMULL_r32(REG_ECX);
  1084     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1085     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1086     sh4_x86.tstate = TSTATE_NONE;
  1087 :}
  1088 DMULU.L Rm, Rn {:  
  1089     COUNT_INST(I_DMULU);
  1090     load_reg( REG_EAX, Rm );
  1091     load_reg( REG_ECX, Rn );
  1092     MULL_r32(REG_ECX);
  1093     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1094     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1095     sh4_x86.tstate = TSTATE_NONE;
  1096 :}
  1097 DT Rn {:  
  1098     COUNT_INST(I_DT);
  1099     load_reg( REG_EAX, Rn );
  1100     ADDL_imms_r32( -1, REG_EAX );
  1101     store_reg( REG_EAX, Rn );
  1102     SETE_t();
  1103     sh4_x86.tstate = TSTATE_E;
  1104 :}
  1105 EXTS.B Rm, Rn {:  
  1106     COUNT_INST(I_EXTSB);
  1107     load_reg( REG_EAX, Rm );
  1108     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1109     store_reg( REG_EAX, Rn );
  1110 :}
  1111 EXTS.W Rm, Rn {:  
  1112     COUNT_INST(I_EXTSW);
  1113     load_reg( REG_EAX, Rm );
  1114     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1115     store_reg( REG_EAX, Rn );
  1116 :}
  1117 EXTU.B Rm, Rn {:  
  1118     COUNT_INST(I_EXTUB);
  1119     load_reg( REG_EAX, Rm );
  1120     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1121     store_reg( REG_EAX, Rn );
  1122 :}
  1123 EXTU.W Rm, Rn {:  
  1124     COUNT_INST(I_EXTUW);
  1125     load_reg( REG_EAX, Rm );
  1126     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1127     store_reg( REG_EAX, Rn );
  1128 :}
  1129 MAC.L @Rm+, @Rn+ {:
  1130     COUNT_INST(I_MACL);
  1131     if( Rm == Rn ) {
  1132 	load_reg( REG_EAX, Rm );
  1133 	check_ralign32( REG_EAX );
  1134 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1135 	MOVL_r32_rspdisp(REG_EAX, 0);
  1136 	load_reg( REG_EAX, Rm );
  1137 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1138 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1139         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1140     } else {
  1141 	load_reg( REG_EAX, Rm );
  1142 	check_ralign32( REG_EAX );
  1143 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1144 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1145 	load_reg( REG_EAX, Rn );
  1146 	check_ralign32( REG_EAX );
  1147 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1148 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1149 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1152     IMULL_rspdisp( 0 );
  1153     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1154     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1156     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1157     TESTL_r32_r32(REG_ECX, REG_ECX);
  1158     JE_label( nosat );
  1159     CALL_ptr( signsat48 );
  1160     JMP_TARGET( nosat );
  1161     sh4_x86.tstate = TSTATE_NONE;
  1162 :}
  1163 MAC.W @Rm+, @Rn+ {:  
  1164     COUNT_INST(I_MACW);
  1165     if( Rm == Rn ) {
  1166 	load_reg( REG_EAX, Rm );
  1167 	check_ralign16( REG_EAX );
  1168 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1169         MOVL_r32_rspdisp( REG_EAX, 0 );
  1170 	load_reg( REG_EAX, Rm );
  1171 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1172 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1173 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1174 	// Note translate twice in case of page boundaries. Maybe worth
  1175 	// adding a page-boundary check to skip the second translation
  1176     } else {
  1177 	load_reg( REG_EAX, Rn );
  1178 	check_ralign16( REG_EAX );
  1179 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1180         MOVL_r32_rspdisp( REG_EAX, 0 );
  1181 	load_reg( REG_EAX, Rm );
  1182 	check_ralign16( REG_EAX );
  1183 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1184 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1185 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1187     IMULL_rspdisp( 0 );
  1188     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1189     TESTL_r32_r32( REG_ECX, REG_ECX );
  1190     JE_label( nosat );
  1192     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1193     JNO_label( end );            // 2
  1194     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1195     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1196     JS_label( positive );        // 2
  1197     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1198     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1199     JMP_label(end2);           // 2
  1201     JMP_TARGET(positive);
  1202     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1203     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1204     JMP_label(end3);            // 2
  1206     JMP_TARGET(nosat);
  1207     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1208     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1209     JMP_TARGET(end);
  1210     JMP_TARGET(end2);
  1211     JMP_TARGET(end3);
  1212     sh4_x86.tstate = TSTATE_NONE;
  1213 :}
  1214 MOVT Rn {:  
  1215     COUNT_INST(I_MOVT);
  1216     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1217     store_reg( REG_EAX, Rn );
  1218 :}
  1219 MUL.L Rm, Rn {:  
  1220     COUNT_INST(I_MULL);
  1221     load_reg( REG_EAX, Rm );
  1222     load_reg( REG_ECX, Rn );
  1223     MULL_r32( REG_ECX );
  1224     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1225     sh4_x86.tstate = TSTATE_NONE;
  1226 :}
  1227 MULS.W Rm, Rn {:
  1228     COUNT_INST(I_MULSW);
  1229     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1230     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1231     MULL_r32( REG_ECX );
  1232     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1233     sh4_x86.tstate = TSTATE_NONE;
  1234 :}
  1235 MULU.W Rm, Rn {:  
  1236     COUNT_INST(I_MULUW);
  1237     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1238     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1239     MULL_r32( REG_ECX );
  1240     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1241     sh4_x86.tstate = TSTATE_NONE;
  1242 :}
  1243 NEG Rm, Rn {:
  1244     COUNT_INST(I_NEG);
  1245     load_reg( REG_EAX, Rm );
  1246     NEGL_r32( REG_EAX );
  1247     store_reg( REG_EAX, Rn );
  1248     sh4_x86.tstate = TSTATE_NONE;
  1249 :}
  1250 NEGC Rm, Rn {:  
  1251     COUNT_INST(I_NEGC);
  1252     load_reg( REG_EAX, Rm );
  1253     XORL_r32_r32( REG_ECX, REG_ECX );
  1254     LDC_t();
  1255     SBBL_r32_r32( REG_EAX, REG_ECX );
  1256     store_reg( REG_ECX, Rn );
  1257     SETC_t();
  1258     sh4_x86.tstate = TSTATE_C;
  1259 :}
  1260 NOT Rm, Rn {:  
  1261     COUNT_INST(I_NOT);
  1262     load_reg( REG_EAX, Rm );
  1263     NOTL_r32( REG_EAX );
  1264     store_reg( REG_EAX, Rn );
  1265     sh4_x86.tstate = TSTATE_NONE;
  1266 :}
  1267 OR Rm, Rn {:  
  1268     COUNT_INST(I_OR);
  1269     load_reg( REG_EAX, Rm );
  1270     load_reg( REG_ECX, Rn );
  1271     ORL_r32_r32( REG_EAX, REG_ECX );
  1272     store_reg( REG_ECX, Rn );
  1273     sh4_x86.tstate = TSTATE_NONE;
  1274 :}
  1275 OR #imm, R0 {:
  1276     COUNT_INST(I_ORI);
  1277     load_reg( REG_EAX, 0 );
  1278     ORL_imms_r32(imm, REG_EAX);
  1279     store_reg( REG_EAX, 0 );
  1280     sh4_x86.tstate = TSTATE_NONE;
  1281 :}
  1282 OR.B #imm, @(R0, GBR) {:  
  1283     COUNT_INST(I_ORB);
  1284     load_reg( REG_EAX, 0 );
  1285     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1286     MOVL_r32_rspdisp( REG_EAX, 0 );
  1287     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1288     MOVL_rspdisp_r32( 0, REG_EAX );
  1289     ORL_imms_r32(imm, REG_EDX );
  1290     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1291     sh4_x86.tstate = TSTATE_NONE;
  1292 :}
  1293 ROTCL Rn {:
  1294     COUNT_INST(I_ROTCL);
  1295     load_reg( REG_EAX, Rn );
  1296     if( sh4_x86.tstate != TSTATE_C ) {
  1297 	LDC_t();
  1299     RCLL_imm_r32( 1, REG_EAX );
  1300     store_reg( REG_EAX, Rn );
  1301     SETC_t();
  1302     sh4_x86.tstate = TSTATE_C;
  1303 :}
  1304 ROTCR Rn {:  
  1305     COUNT_INST(I_ROTCR);
  1306     load_reg( REG_EAX, Rn );
  1307     if( sh4_x86.tstate != TSTATE_C ) {
  1308 	LDC_t();
  1310     RCRL_imm_r32( 1, REG_EAX );
  1311     store_reg( REG_EAX, Rn );
  1312     SETC_t();
  1313     sh4_x86.tstate = TSTATE_C;
  1314 :}
  1315 ROTL Rn {:  
  1316     COUNT_INST(I_ROTL);
  1317     load_reg( REG_EAX, Rn );
  1318     ROLL_imm_r32( 1, REG_EAX );
  1319     store_reg( REG_EAX, Rn );
  1320     SETC_t();
  1321     sh4_x86.tstate = TSTATE_C;
  1322 :}
  1323 ROTR Rn {:  
  1324     COUNT_INST(I_ROTR);
  1325     load_reg( REG_EAX, Rn );
  1326     RORL_imm_r32( 1, REG_EAX );
  1327     store_reg( REG_EAX, Rn );
  1328     SETC_t();
  1329     sh4_x86.tstate = TSTATE_C;
  1330 :}
  1331 SHAD Rm, Rn {:
  1332     COUNT_INST(I_SHAD);
  1333     /* Annoyingly enough, not directly convertible */
  1334     load_reg( REG_EAX, Rn );
  1335     load_reg( REG_ECX, Rm );
  1336     CMPL_imms_r32( 0, REG_ECX );
  1337     JGE_label(doshl);
  1339     NEGL_r32( REG_ECX );      // 2
  1340     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1341     JE_label(emptysar);     // 2
  1342     SARL_cl_r32( REG_EAX );       // 2
  1343     JMP_label(end);          // 2
  1345     JMP_TARGET(emptysar);
  1346     SARL_imm_r32(31, REG_EAX );  // 3
  1347     JMP_label(end2);
  1349     JMP_TARGET(doshl);
  1350     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1351     SHLL_cl_r32( REG_EAX );       // 2
  1352     JMP_TARGET(end);
  1353     JMP_TARGET(end2);
  1354     store_reg( REG_EAX, Rn );
  1355     sh4_x86.tstate = TSTATE_NONE;
  1356 :}
  1357 SHLD Rm, Rn {:  
  1358     COUNT_INST(I_SHLD);
  1359     load_reg( REG_EAX, Rn );
  1360     load_reg( REG_ECX, Rm );
  1361     CMPL_imms_r32( 0, REG_ECX );
  1362     JGE_label(doshl);
  1364     NEGL_r32( REG_ECX );      // 2
  1365     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1366     JE_label(emptyshr );
  1367     SHRL_cl_r32( REG_EAX );       // 2
  1368     JMP_label(end);          // 2
  1370     JMP_TARGET(emptyshr);
  1371     XORL_r32_r32( REG_EAX, REG_EAX );
  1372     JMP_label(end2);
  1374     JMP_TARGET(doshl);
  1375     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1376     SHLL_cl_r32( REG_EAX );       // 2
  1377     JMP_TARGET(end);
  1378     JMP_TARGET(end2);
  1379     store_reg( REG_EAX, Rn );
  1380     sh4_x86.tstate = TSTATE_NONE;
  1381 :}
  1382 SHAL Rn {: 
  1383     COUNT_INST(I_SHAL);
  1384     load_reg( REG_EAX, Rn );
  1385     SHLL_imm_r32( 1, REG_EAX );
  1386     SETC_t();
  1387     store_reg( REG_EAX, Rn );
  1388     sh4_x86.tstate = TSTATE_C;
  1389 :}
  1390 SHAR Rn {:  
  1391     COUNT_INST(I_SHAR);
  1392     load_reg( REG_EAX, Rn );
  1393     SARL_imm_r32( 1, REG_EAX );
  1394     SETC_t();
  1395     store_reg( REG_EAX, Rn );
  1396     sh4_x86.tstate = TSTATE_C;
  1397 :}
  1398 SHLL Rn {:  
  1399     COUNT_INST(I_SHLL);
  1400     load_reg( REG_EAX, Rn );
  1401     SHLL_imm_r32( 1, REG_EAX );
  1402     SETC_t();
  1403     store_reg( REG_EAX, Rn );
  1404     sh4_x86.tstate = TSTATE_C;
  1405 :}
  1406 SHLL2 Rn {:
  1407     COUNT_INST(I_SHLL);
  1408     load_reg( REG_EAX, Rn );
  1409     SHLL_imm_r32( 2, REG_EAX );
  1410     store_reg( REG_EAX, Rn );
  1411     sh4_x86.tstate = TSTATE_NONE;
  1412 :}
  1413 SHLL8 Rn {:  
  1414     COUNT_INST(I_SHLL);
  1415     load_reg( REG_EAX, Rn );
  1416     SHLL_imm_r32( 8, REG_EAX );
  1417     store_reg( REG_EAX, Rn );
  1418     sh4_x86.tstate = TSTATE_NONE;
  1419 :}
  1420 SHLL16 Rn {:  
  1421     COUNT_INST(I_SHLL);
  1422     load_reg( REG_EAX, Rn );
  1423     SHLL_imm_r32( 16, REG_EAX );
  1424     store_reg( REG_EAX, Rn );
  1425     sh4_x86.tstate = TSTATE_NONE;
  1426 :}
  1427 SHLR Rn {:  
  1428     COUNT_INST(I_SHLR);
  1429     load_reg( REG_EAX, Rn );
  1430     SHRL_imm_r32( 1, REG_EAX );
  1431     SETC_t();
  1432     store_reg( REG_EAX, Rn );
  1433     sh4_x86.tstate = TSTATE_C;
  1434 :}
  1435 SHLR2 Rn {:  
  1436     COUNT_INST(I_SHLR);
  1437     load_reg( REG_EAX, Rn );
  1438     SHRL_imm_r32( 2, REG_EAX );
  1439     store_reg( REG_EAX, Rn );
  1440     sh4_x86.tstate = TSTATE_NONE;
  1441 :}
  1442 SHLR8 Rn {:  
  1443     COUNT_INST(I_SHLR);
  1444     load_reg( REG_EAX, Rn );
  1445     SHRL_imm_r32( 8, REG_EAX );
  1446     store_reg( REG_EAX, Rn );
  1447     sh4_x86.tstate = TSTATE_NONE;
  1448 :}
  1449 SHLR16 Rn {:  
  1450     COUNT_INST(I_SHLR);
  1451     load_reg( REG_EAX, Rn );
  1452     SHRL_imm_r32( 16, REG_EAX );
  1453     store_reg( REG_EAX, Rn );
  1454     sh4_x86.tstate = TSTATE_NONE;
  1455 :}
  1456 SUB Rm, Rn {:  
  1457     COUNT_INST(I_SUB);
  1458     load_reg( REG_EAX, Rm );
  1459     load_reg( REG_ECX, Rn );
  1460     SUBL_r32_r32( REG_EAX, REG_ECX );
  1461     store_reg( REG_ECX, Rn );
  1462     sh4_x86.tstate = TSTATE_NONE;
  1463 :}
  1464 SUBC Rm, Rn {:  
  1465     COUNT_INST(I_SUBC);
  1466     load_reg( REG_EAX, Rm );
  1467     load_reg( REG_ECX, Rn );
  1468     if( sh4_x86.tstate != TSTATE_C ) {
  1469 	LDC_t();
  1471     SBBL_r32_r32( REG_EAX, REG_ECX );
  1472     store_reg( REG_ECX, Rn );
  1473     SETC_t();
  1474     sh4_x86.tstate = TSTATE_C;
  1475 :}
  1476 SUBV Rm, Rn {:  
  1477     COUNT_INST(I_SUBV);
  1478     load_reg( REG_EAX, Rm );
  1479     load_reg( REG_ECX, Rn );
  1480     SUBL_r32_r32( REG_EAX, REG_ECX );
  1481     store_reg( REG_ECX, Rn );
  1482     SETO_t();
  1483     sh4_x86.tstate = TSTATE_O;
  1484 :}
  1485 SWAP.B Rm, Rn {:  
  1486     COUNT_INST(I_SWAPB);
  1487     load_reg( REG_EAX, Rm );
  1488     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1489     store_reg( REG_EAX, Rn );
  1490 :}
  1491 SWAP.W Rm, Rn {:  
  1492     COUNT_INST(I_SWAPB);
  1493     load_reg( REG_EAX, Rm );
  1494     MOVL_r32_r32( REG_EAX, REG_ECX );
  1495     SHLL_imm_r32( 16, REG_ECX );
  1496     SHRL_imm_r32( 16, REG_EAX );
  1497     ORL_r32_r32( REG_EAX, REG_ECX );
  1498     store_reg( REG_ECX, Rn );
  1499     sh4_x86.tstate = TSTATE_NONE;
  1500 :}
  1501 TAS.B @Rn {:  
  1502     COUNT_INST(I_TASB);
  1503     load_reg( REG_EAX, Rn );
  1504     MOVL_r32_rspdisp( REG_EAX, 0 );
  1505     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1506     TESTB_r8_r8( REG_DL, REG_DL );
  1507     SETE_t();
  1508     ORB_imms_r8( 0x80, REG_DL );
  1509     MOVL_rspdisp_r32( 0, REG_EAX );
  1510     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1511     sh4_x86.tstate = TSTATE_NONE;
  1512 :}
  1513 TST Rm, Rn {:  
  1514     COUNT_INST(I_TST);
  1515     load_reg( REG_EAX, Rm );
  1516     load_reg( REG_ECX, Rn );
  1517     TESTL_r32_r32( REG_EAX, REG_ECX );
  1518     SETE_t();
  1519     sh4_x86.tstate = TSTATE_E;
  1520 :}
  1521 TST #imm, R0 {:  
  1522     COUNT_INST(I_TSTI);
  1523     load_reg( REG_EAX, 0 );
  1524     TESTL_imms_r32( imm, REG_EAX );
  1525     SETE_t();
  1526     sh4_x86.tstate = TSTATE_E;
  1527 :}
  1528 TST.B #imm, @(R0, GBR) {:  
  1529     COUNT_INST(I_TSTB);
  1530     load_reg( REG_EAX, 0);
  1531     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1532     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1533     TESTB_imms_r8( imm, REG_AL );
  1534     SETE_t();
  1535     sh4_x86.tstate = TSTATE_E;
  1536 :}
  1537 XOR Rm, Rn {:  
  1538     COUNT_INST(I_XOR);
  1539     load_reg( REG_EAX, Rm );
  1540     load_reg( REG_ECX, Rn );
  1541     XORL_r32_r32( REG_EAX, REG_ECX );
  1542     store_reg( REG_ECX, Rn );
  1543     sh4_x86.tstate = TSTATE_NONE;
  1544 :}
  1545 XOR #imm, R0 {:  
  1546     COUNT_INST(I_XORI);
  1547     load_reg( REG_EAX, 0 );
  1548     XORL_imms_r32( imm, REG_EAX );
  1549     store_reg( REG_EAX, 0 );
  1550     sh4_x86.tstate = TSTATE_NONE;
  1551 :}
  1552 XOR.B #imm, @(R0, GBR) {:  
  1553     COUNT_INST(I_XORB);
  1554     load_reg( REG_EAX, 0 );
  1555     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1556     MOVL_r32_rspdisp( REG_EAX, 0 );
  1557     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1558     MOVL_rspdisp_r32( 0, REG_EAX );
  1559     XORL_imms_r32( imm, REG_EDX );
  1560     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1561     sh4_x86.tstate = TSTATE_NONE;
  1562 :}
  1563 XTRCT Rm, Rn {:
  1564     COUNT_INST(I_XTRCT);
  1565     load_reg( REG_EAX, Rm );
  1566     load_reg( REG_ECX, Rn );
  1567     SHLL_imm_r32( 16, REG_EAX );
  1568     SHRL_imm_r32( 16, REG_ECX );
  1569     ORL_r32_r32( REG_EAX, REG_ECX );
  1570     store_reg( REG_ECX, Rn );
  1571     sh4_x86.tstate = TSTATE_NONE;
  1572 :}
  1574 /* Data move instructions */
  1575 MOV Rm, Rn {:  
  1576     COUNT_INST(I_MOV);
  1577     load_reg( REG_EAX, Rm );
  1578     store_reg( REG_EAX, Rn );
  1579 :}
  1580 MOV #imm, Rn {:  
  1581     COUNT_INST(I_MOVI);
  1582     MOVL_imm32_r32( imm, REG_EAX );
  1583     store_reg( REG_EAX, Rn );
  1584 :}
  1585 MOV.B Rm, @Rn {:  
  1586     COUNT_INST(I_MOVB);
  1587     load_reg( REG_EAX, Rn );
  1588     load_reg( REG_EDX, Rm );
  1589     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1590     sh4_x86.tstate = TSTATE_NONE;
  1591 :}
  1592 MOV.B Rm, @-Rn {:  
  1593     COUNT_INST(I_MOVB);
  1594     load_reg( REG_EAX, Rn );
  1595     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1596     load_reg( REG_EDX, Rm );
  1597     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1598     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1599     sh4_x86.tstate = TSTATE_NONE;
  1600 :}
  1601 MOV.B Rm, @(R0, Rn) {:  
  1602     COUNT_INST(I_MOVB);
  1603     load_reg( REG_EAX, 0 );
  1604     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1605     load_reg( REG_EDX, Rm );
  1606     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1607     sh4_x86.tstate = TSTATE_NONE;
  1608 :}
  1609 MOV.B R0, @(disp, GBR) {:  
  1610     COUNT_INST(I_MOVB);
  1611     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1612     ADDL_imms_r32( disp, REG_EAX );
  1613     load_reg( REG_EDX, 0 );
  1614     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1615     sh4_x86.tstate = TSTATE_NONE;
  1616 :}
  1617 MOV.B R0, @(disp, Rn) {:  
  1618     COUNT_INST(I_MOVB);
  1619     load_reg( REG_EAX, Rn );
  1620     ADDL_imms_r32( disp, REG_EAX );
  1621     load_reg( REG_EDX, 0 );
  1622     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1623     sh4_x86.tstate = TSTATE_NONE;
  1624 :}
  1625 MOV.B @Rm, Rn {:  
  1626     COUNT_INST(I_MOVB);
  1627     load_reg( REG_EAX, Rm );
  1628     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1629     store_reg( REG_EAX, Rn );
  1630     sh4_x86.tstate = TSTATE_NONE;
  1631 :}
  1632 MOV.B @Rm+, Rn {:  
  1633     COUNT_INST(I_MOVB);
  1634     load_reg( REG_EAX, Rm );
  1635     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1636     if( Rm != Rn ) {
  1637     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1639     store_reg( REG_EAX, Rn );
  1640     sh4_x86.tstate = TSTATE_NONE;
  1641 :}
  1642 MOV.B @(R0, Rm), Rn {:  
  1643     COUNT_INST(I_MOVB);
  1644     load_reg( REG_EAX, 0 );
  1645     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1646     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1647     store_reg( REG_EAX, Rn );
  1648     sh4_x86.tstate = TSTATE_NONE;
  1649 :}
  1650 MOV.B @(disp, GBR), R0 {:  
  1651     COUNT_INST(I_MOVB);
  1652     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1653     ADDL_imms_r32( disp, REG_EAX );
  1654     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1655     store_reg( REG_EAX, 0 );
  1656     sh4_x86.tstate = TSTATE_NONE;
  1657 :}
  1658 MOV.B @(disp, Rm), R0 {:  
  1659     COUNT_INST(I_MOVB);
  1660     load_reg( REG_EAX, Rm );
  1661     ADDL_imms_r32( disp, REG_EAX );
  1662     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1663     store_reg( REG_EAX, 0 );
  1664     sh4_x86.tstate = TSTATE_NONE;
  1665 :}
  1666 MOV.L Rm, @Rn {:
  1667     COUNT_INST(I_MOVL);
  1668     load_reg( REG_EAX, Rn );
  1669     check_walign32(REG_EAX);
  1670     MOVL_r32_r32( REG_EAX, REG_ECX );
  1671     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1672     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1673     JNE_label( notsq );
  1674     ANDL_imms_r32( 0x3C, REG_EAX );
  1675     load_reg( REG_EDX, Rm );
  1676     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1677     JMP_label(end);
  1678     JMP_TARGET(notsq);
  1679     load_reg( REG_EDX, Rm );
  1680     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1681     JMP_TARGET(end);
  1682     sh4_x86.tstate = TSTATE_NONE;
  1683 :}
  1684 MOV.L Rm, @-Rn {:  
  1685     COUNT_INST(I_MOVL);
  1686     load_reg( REG_EAX, Rn );
  1687     ADDL_imms_r32( -4, REG_EAX );
  1688     check_walign32( REG_EAX );
  1689     load_reg( REG_EDX, Rm );
  1690     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1691     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1692     sh4_x86.tstate = TSTATE_NONE;
  1693 :}
  1694 MOV.L Rm, @(R0, Rn) {:  
  1695     COUNT_INST(I_MOVL);
  1696     load_reg( REG_EAX, 0 );
  1697     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1698     check_walign32( REG_EAX );
  1699     load_reg( REG_EDX, Rm );
  1700     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1701     sh4_x86.tstate = TSTATE_NONE;
  1702 :}
  1703 MOV.L R0, @(disp, GBR) {:  
  1704     COUNT_INST(I_MOVL);
  1705     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1706     ADDL_imms_r32( disp, REG_EAX );
  1707     check_walign32( REG_EAX );
  1708     load_reg( REG_EDX, 0 );
  1709     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1710     sh4_x86.tstate = TSTATE_NONE;
  1711 :}
  1712 MOV.L Rm, @(disp, Rn) {:  
  1713     COUNT_INST(I_MOVL);
  1714     load_reg( REG_EAX, Rn );
  1715     ADDL_imms_r32( disp, REG_EAX );
  1716     check_walign32( REG_EAX );
  1717     MOVL_r32_r32( REG_EAX, REG_ECX );
  1718     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1719     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1720     JNE_label( notsq );
  1721     ANDL_imms_r32( 0x3C, REG_EAX );
  1722     load_reg( REG_EDX, Rm );
  1723     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1724     JMP_label(end);
  1725     JMP_TARGET(notsq);
  1726     load_reg( REG_EDX, Rm );
  1727     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1728     JMP_TARGET(end);
  1729     sh4_x86.tstate = TSTATE_NONE;
  1730 :}
  1731 MOV.L @Rm, Rn {:  
  1732     COUNT_INST(I_MOVL);
  1733     load_reg( REG_EAX, Rm );
  1734     check_ralign32( REG_EAX );
  1735     MEM_READ_LONG( REG_EAX, REG_EAX );
  1736     store_reg( REG_EAX, Rn );
  1737     sh4_x86.tstate = TSTATE_NONE;
  1738 :}
  1739 MOV.L @Rm+, Rn {:  
  1740     COUNT_INST(I_MOVL);
  1741     load_reg( REG_EAX, Rm );
  1742     check_ralign32( REG_EAX );
  1743     MEM_READ_LONG( REG_EAX, REG_EAX );
  1744     if( Rm != Rn ) {
  1745     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1747     store_reg( REG_EAX, Rn );
  1748     sh4_x86.tstate = TSTATE_NONE;
  1749 :}
  1750 MOV.L @(R0, Rm), Rn {:  
  1751     COUNT_INST(I_MOVL);
  1752     load_reg( REG_EAX, 0 );
  1753     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1754     check_ralign32( REG_EAX );
  1755     MEM_READ_LONG( REG_EAX, REG_EAX );
  1756     store_reg( REG_EAX, Rn );
  1757     sh4_x86.tstate = TSTATE_NONE;
  1758 :}
  1759 MOV.L @(disp, GBR), R0 {:
  1760     COUNT_INST(I_MOVL);
  1761     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1762     ADDL_imms_r32( disp, REG_EAX );
  1763     check_ralign32( REG_EAX );
  1764     MEM_READ_LONG( REG_EAX, REG_EAX );
  1765     store_reg( REG_EAX, 0 );
  1766     sh4_x86.tstate = TSTATE_NONE;
  1767 :}
  1768 MOV.L @(disp, PC), Rn {:  
  1769     COUNT_INST(I_MOVLPC);
  1770     if( sh4_x86.in_delay_slot ) {
  1771 	SLOTILLEGAL();
  1772     } else {
  1773 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1774 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1775 	    // If the target address is in the same page as the code, it's
  1776 	    // pretty safe to just ref it directly and circumvent the whole
  1777 	    // memory subsystem. (this is a big performance win)
  1779 	    // FIXME: There's a corner-case that's not handled here when
  1780 	    // the current code-page is in the ITLB but not in the UTLB.
  1781 	    // (should generate a TLB miss although need to test SH4 
  1782 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1783 	    // behaviour though.
  1784 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1785 	    MOVL_moffptr_eax( ptr );
  1786 	} else {
  1787 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1788 	    // different virtual address than the translation was done with,
  1789 	    // but we can safely assume that the low bits are the same.
  1790 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1791 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1792 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1793 	    sh4_x86.tstate = TSTATE_NONE;
  1795 	store_reg( REG_EAX, Rn );
  1797 :}
  1798 MOV.L @(disp, Rm), Rn {:  
  1799     COUNT_INST(I_MOVL);
  1800     load_reg( REG_EAX, Rm );
  1801     ADDL_imms_r32( disp, REG_EAX );
  1802     check_ralign32( REG_EAX );
  1803     MEM_READ_LONG( REG_EAX, REG_EAX );
  1804     store_reg( REG_EAX, Rn );
  1805     sh4_x86.tstate = TSTATE_NONE;
  1806 :}
  1807 MOV.W Rm, @Rn {:  
  1808     COUNT_INST(I_MOVW);
  1809     load_reg( REG_EAX, Rn );
  1810     check_walign16( REG_EAX );
  1811     load_reg( REG_EDX, Rm );
  1812     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1813     sh4_x86.tstate = TSTATE_NONE;
  1814 :}
  1815 MOV.W Rm, @-Rn {:  
  1816     COUNT_INST(I_MOVW);
  1817     load_reg( REG_EAX, Rn );
  1818     check_walign16( REG_EAX );
  1819     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1820     load_reg( REG_EDX, Rm );
  1821     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1822     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1823     sh4_x86.tstate = TSTATE_NONE;
  1824 :}
  1825 MOV.W Rm, @(R0, Rn) {:  
  1826     COUNT_INST(I_MOVW);
  1827     load_reg( REG_EAX, 0 );
  1828     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1829     check_walign16( REG_EAX );
  1830     load_reg( REG_EDX, Rm );
  1831     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1832     sh4_x86.tstate = TSTATE_NONE;
  1833 :}
  1834 MOV.W R0, @(disp, GBR) {:  
  1835     COUNT_INST(I_MOVW);
  1836     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1837     ADDL_imms_r32( disp, REG_EAX );
  1838     check_walign16( REG_EAX );
  1839     load_reg( REG_EDX, 0 );
  1840     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1841     sh4_x86.tstate = TSTATE_NONE;
  1842 :}
  1843 MOV.W R0, @(disp, Rn) {:  
  1844     COUNT_INST(I_MOVW);
  1845     load_reg( REG_EAX, Rn );
  1846     ADDL_imms_r32( disp, REG_EAX );
  1847     check_walign16( REG_EAX );
  1848     load_reg( REG_EDX, 0 );
  1849     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1850     sh4_x86.tstate = TSTATE_NONE;
  1851 :}
  1852 MOV.W @Rm, Rn {:  
  1853     COUNT_INST(I_MOVW);
  1854     load_reg( REG_EAX, Rm );
  1855     check_ralign16( REG_EAX );
  1856     MEM_READ_WORD( REG_EAX, REG_EAX );
  1857     store_reg( REG_EAX, Rn );
  1858     sh4_x86.tstate = TSTATE_NONE;
  1859 :}
  1860 MOV.W @Rm+, Rn {:  
  1861     COUNT_INST(I_MOVW);
  1862     load_reg( REG_EAX, Rm );
  1863     check_ralign16( REG_EAX );
  1864     MEM_READ_WORD( REG_EAX, REG_EAX );
  1865     if( Rm != Rn ) {
  1866         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1868     store_reg( REG_EAX, Rn );
  1869     sh4_x86.tstate = TSTATE_NONE;
  1870 :}
  1871 MOV.W @(R0, Rm), Rn {:  
  1872     COUNT_INST(I_MOVW);
  1873     load_reg( REG_EAX, 0 );
  1874     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1875     check_ralign16( REG_EAX );
  1876     MEM_READ_WORD( REG_EAX, REG_EAX );
  1877     store_reg( REG_EAX, Rn );
  1878     sh4_x86.tstate = TSTATE_NONE;
  1879 :}
  1880 MOV.W @(disp, GBR), R0 {:  
  1881     COUNT_INST(I_MOVW);
  1882     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1883     ADDL_imms_r32( disp, REG_EAX );
  1884     check_ralign16( REG_EAX );
  1885     MEM_READ_WORD( REG_EAX, REG_EAX );
  1886     store_reg( REG_EAX, 0 );
  1887     sh4_x86.tstate = TSTATE_NONE;
  1888 :}
  1889 MOV.W @(disp, PC), Rn {:  
  1890     COUNT_INST(I_MOVW);
  1891     if( sh4_x86.in_delay_slot ) {
  1892 	SLOTILLEGAL();
  1893     } else {
  1894 	// See comments for MOV.L @(disp, PC), Rn
  1895 	uint32_t target = pc + disp + 4;
  1896 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1897 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1898 	    MOVL_moffptr_eax( ptr );
  1899 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1900 	} else {
  1901 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1902 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1903 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1904 	    sh4_x86.tstate = TSTATE_NONE;
  1906 	store_reg( REG_EAX, Rn );
  1908 :}
  1909 MOV.W @(disp, Rm), R0 {:  
  1910     COUNT_INST(I_MOVW);
  1911     load_reg( REG_EAX, Rm );
  1912     ADDL_imms_r32( disp, REG_EAX );
  1913     check_ralign16( REG_EAX );
  1914     MEM_READ_WORD( REG_EAX, REG_EAX );
  1915     store_reg( REG_EAX, 0 );
  1916     sh4_x86.tstate = TSTATE_NONE;
  1917 :}
  1918 MOVA @(disp, PC), R0 {:  
  1919     COUNT_INST(I_MOVA);
  1920     if( sh4_x86.in_delay_slot ) {
  1921 	SLOTILLEGAL();
  1922     } else {
  1923 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1924 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1925 	store_reg( REG_ECX, 0 );
  1926 	sh4_x86.tstate = TSTATE_NONE;
  1928 :}
  1929 MOVCA.L R0, @Rn {:  
  1930     COUNT_INST(I_MOVCA);
  1931     load_reg( REG_EAX, Rn );
  1932     check_walign32( REG_EAX );
  1933     load_reg( REG_EDX, 0 );
  1934     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1935     sh4_x86.tstate = TSTATE_NONE;
  1936 :}
  1938 /* Control transfer instructions */
  1939 BF disp {:
  1940     COUNT_INST(I_BF);
  1941     if( sh4_x86.in_delay_slot ) {
  1942 	SLOTILLEGAL();
  1943     } else {
  1944 	sh4vma_t target = disp + pc + 4;
  1945 	JT_label( nottaken );
  1946 	exit_block_rel(target, pc+2 );
  1947 	JMP_TARGET(nottaken);
  1948 	return 2;
  1950 :}
  1951 BF/S disp {:
  1952     COUNT_INST(I_BFS);
  1953     if( sh4_x86.in_delay_slot ) {
  1954 	SLOTILLEGAL();
  1955     } else {
  1956 	sh4_x86.in_delay_slot = DELAY_PC;
  1957 	if( UNTRANSLATABLE(pc+2) ) {
  1958 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1959 	    JT_label(nottaken);
  1960 	    ADDL_imms_r32( disp, REG_EAX );
  1961 	    JMP_TARGET(nottaken);
  1962 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1963 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1964 	    exit_block_emu(pc+2);
  1965 	    sh4_x86.branch_taken = TRUE;
  1966 	    return 2;
  1967 	} else {
  1968 	    LOAD_t();
  1969 	    sh4vma_t target = disp + pc + 4;
  1970 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1971 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1972 	    int save_tstate = sh4_x86.tstate;
  1973 	    sh4_translate_instruction(pc+2);
  1974             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1975 	    exit_block_rel( target, pc+4 );
  1977 	    // not taken
  1978 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1979 	    sh4_x86.tstate = save_tstate;
  1980 	    sh4_translate_instruction(pc+2);
  1981 	    return 4;
  1984 :}
  1985 BRA disp {:  
  1986     COUNT_INST(I_BRA);
  1987     if( sh4_x86.in_delay_slot ) {
  1988 	SLOTILLEGAL();
  1989     } else {
  1990 	sh4_x86.in_delay_slot = DELAY_PC;
  1991 	sh4_x86.branch_taken = TRUE;
  1992 	if( UNTRANSLATABLE(pc+2) ) {
  1993 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1994 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1995 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1996 	    exit_block_emu(pc+2);
  1997 	    return 2;
  1998 	} else {
  1999 	    sh4_translate_instruction( pc + 2 );
  2000 	    exit_block_rel( disp + pc + 4, pc+4 );
  2001 	    return 4;
  2004 :}
  2005 BRAF Rn {:  
  2006     COUNT_INST(I_BRAF);
  2007     if( sh4_x86.in_delay_slot ) {
  2008 	SLOTILLEGAL();
  2009     } else {
  2010 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2011 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2012 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2013 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2014 	sh4_x86.in_delay_slot = DELAY_PC;
  2015 	sh4_x86.tstate = TSTATE_NONE;
  2016 	sh4_x86.branch_taken = TRUE;
  2017 	if( UNTRANSLATABLE(pc+2) ) {
  2018 	    exit_block_emu(pc+2);
  2019 	    return 2;
  2020 	} else {
  2021 	    sh4_translate_instruction( pc + 2 );
  2022 	    exit_block_newpcset(pc+4);
  2023 	    return 4;
  2026 :}
  2027 BSR disp {:  
  2028     COUNT_INST(I_BSR);
  2029     if( sh4_x86.in_delay_slot ) {
  2030 	SLOTILLEGAL();
  2031     } else {
  2032 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2033 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2034 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2035 	sh4_x86.in_delay_slot = DELAY_PC;
  2036 	sh4_x86.branch_taken = TRUE;
  2037 	sh4_x86.tstate = TSTATE_NONE;
  2038 	if( UNTRANSLATABLE(pc+2) ) {
  2039 	    ADDL_imms_r32( disp, REG_EAX );
  2040 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2041 	    exit_block_emu(pc+2);
  2042 	    return 2;
  2043 	} else {
  2044 	    sh4_translate_instruction( pc + 2 );
  2045 	    exit_block_rel( disp + pc + 4, pc+4 );
  2046 	    return 4;
  2049 :}
  2050 BSRF Rn {:  
  2051     COUNT_INST(I_BSRF);
  2052     if( sh4_x86.in_delay_slot ) {
  2053 	SLOTILLEGAL();
  2054     } else {
  2055 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2056 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2057 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2058 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2059 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2061 	sh4_x86.in_delay_slot = DELAY_PC;
  2062 	sh4_x86.tstate = TSTATE_NONE;
  2063 	sh4_x86.branch_taken = TRUE;
  2064 	if( UNTRANSLATABLE(pc+2) ) {
  2065 	    exit_block_emu(pc+2);
  2066 	    return 2;
  2067 	} else {
  2068 	    sh4_translate_instruction( pc + 2 );
  2069 	    exit_block_newpcset(pc+4);
  2070 	    return 4;
  2073 :}
  2074 BT disp {:
  2075     COUNT_INST(I_BT);
  2076     if( sh4_x86.in_delay_slot ) {
  2077 	SLOTILLEGAL();
  2078     } else {
  2079 	sh4vma_t target = disp + pc + 4;
  2080 	JF_label( nottaken );
  2081 	exit_block_rel(target, pc+2 );
  2082 	JMP_TARGET(nottaken);
  2083 	return 2;
  2085 :}
  2086 BT/S disp {:
  2087     COUNT_INST(I_BTS);
  2088     if( sh4_x86.in_delay_slot ) {
  2089 	SLOTILLEGAL();
  2090     } else {
  2091 	sh4_x86.in_delay_slot = DELAY_PC;
  2092 	if( UNTRANSLATABLE(pc+2) ) {
  2093 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2094 	    JF_label(nottaken);
  2095 	    ADDL_imms_r32( disp, REG_EAX );
  2096 	    JMP_TARGET(nottaken);
  2097 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2098 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2099 	    exit_block_emu(pc+2);
  2100 	    sh4_x86.branch_taken = TRUE;
  2101 	    return 2;
  2102 	} else {
  2103 		LOAD_t();
  2104 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2105 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2107 	    int save_tstate = sh4_x86.tstate;
  2108 	    sh4_translate_instruction(pc+2);
  2109             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2110 	    exit_block_rel( disp + pc + 4, pc+4 );
  2111 	    // not taken
  2112 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2113 	    sh4_x86.tstate = save_tstate;
  2114 	    sh4_translate_instruction(pc+2);
  2115 	    return 4;
  2118 :}
  2119 JMP @Rn {:  
  2120     COUNT_INST(I_JMP);
  2121     if( sh4_x86.in_delay_slot ) {
  2122 	SLOTILLEGAL();
  2123     } else {
  2124 	load_reg( REG_ECX, Rn );
  2125 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2126 	sh4_x86.in_delay_slot = DELAY_PC;
  2127 	sh4_x86.branch_taken = TRUE;
  2128 	if( UNTRANSLATABLE(pc+2) ) {
  2129 	    exit_block_emu(pc+2);
  2130 	    return 2;
  2131 	} else {
  2132 	    sh4_translate_instruction(pc+2);
  2133 	    exit_block_newpcset(pc+4);
  2134 	    return 4;
  2137 :}
  2138 JSR @Rn {:  
  2139     COUNT_INST(I_JSR);
  2140     if( sh4_x86.in_delay_slot ) {
  2141 	SLOTILLEGAL();
  2142     } else {
  2143 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2144 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2145 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2146 	load_reg( REG_ECX, Rn );
  2147 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2148 	sh4_x86.in_delay_slot = DELAY_PC;
  2149 	sh4_x86.branch_taken = TRUE;
  2150 	sh4_x86.tstate = TSTATE_NONE;
  2151 	if( UNTRANSLATABLE(pc+2) ) {
  2152 	    exit_block_emu(pc+2);
  2153 	    return 2;
  2154 	} else {
  2155 	    sh4_translate_instruction(pc+2);
  2156 	    exit_block_newpcset(pc+4);
  2157 	    return 4;
  2160 :}
  2161 RTE {:  
  2162     COUNT_INST(I_RTE);
  2163     if( sh4_x86.in_delay_slot ) {
  2164 	SLOTILLEGAL();
  2165     } else {
  2166 	check_priv();
  2167 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2168 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2169 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2170 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2171 	sh4_x86.in_delay_slot = DELAY_PC;
  2172 	sh4_x86.fpuen_checked = FALSE;
  2173 	sh4_x86.tstate = TSTATE_NONE;
  2174 	sh4_x86.branch_taken = TRUE;
  2175     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2176 	if( UNTRANSLATABLE(pc+2) ) {
  2177 	    exit_block_emu(pc+2);
  2178 	    return 2;
  2179 	} else {
  2180 	    sh4_translate_instruction(pc+2);
  2181 	    exit_block_newpcset(pc+4);
  2182 	    return 4;
  2185 :}
  2186 RTS {:  
  2187     COUNT_INST(I_RTS);
  2188     if( sh4_x86.in_delay_slot ) {
  2189 	SLOTILLEGAL();
  2190     } else {
  2191 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2192 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2193 	sh4_x86.in_delay_slot = DELAY_PC;
  2194 	sh4_x86.branch_taken = TRUE;
  2195 	if( UNTRANSLATABLE(pc+2) ) {
  2196 	    exit_block_emu(pc+2);
  2197 	    return 2;
  2198 	} else {
  2199 	    sh4_translate_instruction(pc+2);
  2200 	    exit_block_newpcset(pc+4);
  2201 	    return 4;
  2204 :}
  2205 TRAPA #imm {:  
  2206     COUNT_INST(I_TRAPA);
  2207     if( sh4_x86.in_delay_slot ) {
  2208 	SLOTILLEGAL();
  2209     } else {
  2210 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2211 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2212 	MOVL_imm32_r32( imm, REG_EAX );
  2213 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2214 	sh4_x86.tstate = TSTATE_NONE;
  2215 	exit_block_pcset(pc+2);
  2216 	sh4_x86.branch_taken = TRUE;
  2217 	return 2;
  2219 :}
  2220 UNDEF {:  
  2221     COUNT_INST(I_UNDEF);
  2222     if( sh4_x86.in_delay_slot ) {
  2223 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2224     } else {
  2225 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2226 	return 2;
  2228 :}
  2230 CLRMAC {:  
  2231     COUNT_INST(I_CLRMAC);
  2232     XORL_r32_r32(REG_EAX, REG_EAX);
  2233     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2234     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2235     sh4_x86.tstate = TSTATE_NONE;
  2236 :}
  2237 CLRS {:
  2238     COUNT_INST(I_CLRS);
  2239     CLC();
  2240     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2241     sh4_x86.tstate = TSTATE_NONE;
  2242 :}
  2243 CLRT {:  
  2244     COUNT_INST(I_CLRT);
  2245     CLC();
  2246     SETC_t();
  2247     sh4_x86.tstate = TSTATE_C;
  2248 :}
  2249 SETS {:  
  2250     COUNT_INST(I_SETS);
  2251     STC();
  2252     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2253     sh4_x86.tstate = TSTATE_NONE;
  2254 :}
  2255 SETT {:  
  2256     COUNT_INST(I_SETT);
  2257     STC();
  2258     SETC_t();
  2259     sh4_x86.tstate = TSTATE_C;
  2260 :}
  2262 /* Floating point moves */
  2263 FMOV FRm, FRn {:  
  2264     COUNT_INST(I_FMOV1);
  2265     check_fpuen();
  2266     if( sh4_x86.double_size ) {
  2267         load_dr0( REG_EAX, FRm );
  2268         load_dr1( REG_ECX, FRm );
  2269         store_dr0( REG_EAX, FRn );
  2270         store_dr1( REG_ECX, FRn );
  2271     } else {
  2272         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2273         store_fr( REG_EAX, FRn );
  2275 :}
  2276 FMOV FRm, @Rn {: 
  2277     COUNT_INST(I_FMOV2);
  2278     check_fpuen();
  2279     load_reg( REG_EAX, Rn );
  2280     if( sh4_x86.double_size ) {
  2281         check_walign64( REG_EAX );
  2282         load_dr0( REG_EDX, FRm );
  2283         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2284         load_reg( REG_EAX, Rn );
  2285         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2286         load_dr1( REG_EDX, FRm );
  2287         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2288     } else {
  2289         check_walign32( REG_EAX );
  2290         load_fr( REG_EDX, FRm );
  2291         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2293     sh4_x86.tstate = TSTATE_NONE;
  2294 :}
  2295 FMOV @Rm, FRn {:  
  2296     COUNT_INST(I_FMOV5);
  2297     check_fpuen();
  2298     load_reg( REG_EAX, Rm );
  2299     if( sh4_x86.double_size ) {
  2300         check_ralign64( REG_EAX );
  2301         MEM_READ_LONG( REG_EAX, REG_EAX );
  2302         store_dr0( REG_EAX, FRn );
  2303         load_reg( REG_EAX, Rm );
  2304         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2305         MEM_READ_LONG( REG_EAX, REG_EAX );
  2306         store_dr1( REG_EAX, FRn );
  2307     } else {
  2308         check_ralign32( REG_EAX );
  2309         MEM_READ_LONG( REG_EAX, REG_EAX );
  2310         store_fr( REG_EAX, FRn );
  2312     sh4_x86.tstate = TSTATE_NONE;
  2313 :}
  2314 FMOV FRm, @-Rn {:  
  2315     COUNT_INST(I_FMOV3);
  2316     check_fpuen();
  2317     load_reg( REG_EAX, Rn );
  2318     if( sh4_x86.double_size ) {
  2319         check_walign64( REG_EAX );
  2320         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2321         load_dr0( REG_EDX, FRm );
  2322         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2323         load_reg( REG_EAX, Rn );
  2324         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2325         load_dr1( REG_EDX, FRm );
  2326         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2327         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2328     } else {
  2329         check_walign32( REG_EAX );
  2330         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2331         load_fr( REG_EDX, FRm );
  2332         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2333         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2335     sh4_x86.tstate = TSTATE_NONE;
  2336 :}
  2337 FMOV @Rm+, FRn {:
  2338     COUNT_INST(I_FMOV6);
  2339     check_fpuen();
  2340     load_reg( REG_EAX, Rm );
  2341     if( sh4_x86.double_size ) {
  2342         check_ralign64( REG_EAX );
  2343         MEM_READ_LONG( REG_EAX, REG_EAX );
  2344         store_dr0( REG_EAX, FRn );
  2345         load_reg( REG_EAX, Rm );
  2346         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2347         MEM_READ_LONG( REG_EAX, REG_EAX );
  2348         store_dr1( REG_EAX, FRn );
  2349         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2350     } else {
  2351         check_ralign32( REG_EAX );
  2352         MEM_READ_LONG( REG_EAX, REG_EAX );
  2353         store_fr( REG_EAX, FRn );
  2354         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2356     sh4_x86.tstate = TSTATE_NONE;
  2357 :}
  2358 FMOV FRm, @(R0, Rn) {:  
  2359     COUNT_INST(I_FMOV4);
  2360     check_fpuen();
  2361     load_reg( REG_EAX, Rn );
  2362     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2363     if( sh4_x86.double_size ) {
  2364         check_walign64( REG_EAX );
  2365         load_dr0( REG_EDX, FRm );
  2366         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2367         load_reg( REG_EAX, Rn );
  2368         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2369         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2370         load_dr1( REG_EDX, FRm );
  2371         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2372     } else {
  2373         check_walign32( REG_EAX );
  2374         load_fr( REG_EDX, FRm );
  2375         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2377     sh4_x86.tstate = TSTATE_NONE;
  2378 :}
  2379 FMOV @(R0, Rm), FRn {:  
  2380     COUNT_INST(I_FMOV7);
  2381     check_fpuen();
  2382     load_reg( REG_EAX, Rm );
  2383     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2384     if( sh4_x86.double_size ) {
  2385         check_ralign64( REG_EAX );
  2386         MEM_READ_LONG( REG_EAX, REG_EAX );
  2387         store_dr0( REG_EAX, FRn );
  2388         load_reg( REG_EAX, Rm );
  2389         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2390         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2391         MEM_READ_LONG( REG_EAX, REG_EAX );
  2392         store_dr1( REG_EAX, FRn );
  2393     } else {
  2394         check_ralign32( REG_EAX );
  2395         MEM_READ_LONG( REG_EAX, REG_EAX );
  2396         store_fr( REG_EAX, FRn );
  2398     sh4_x86.tstate = TSTATE_NONE;
  2399 :}
  2400 FLDI0 FRn {:  /* IFF PR=0 */
  2401     COUNT_INST(I_FLDI0);
  2402     check_fpuen();
  2403     if( sh4_x86.double_prec == 0 ) {
  2404         XORL_r32_r32( REG_EAX, REG_EAX );
  2405         store_fr( REG_EAX, FRn );
  2407     sh4_x86.tstate = TSTATE_NONE;
  2408 :}
  2409 FLDI1 FRn {:  /* IFF PR=0 */
  2410     COUNT_INST(I_FLDI1);
  2411     check_fpuen();
  2412     if( sh4_x86.double_prec == 0 ) {
  2413         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2414         store_fr( REG_EAX, FRn );
  2416 :}
  2418 FLOAT FPUL, FRn {:  
  2419     COUNT_INST(I_FLOAT);
  2420     check_fpuen();
  2421     FILD_rbpdisp(R_FPUL);
  2422     if( sh4_x86.double_prec ) {
  2423         pop_dr( FRn );
  2424     } else {
  2425         pop_fr( FRn );
  2427 :}
  2428 FTRC FRm, FPUL {:  
  2429     COUNT_INST(I_FTRC);
  2430     check_fpuen();
  2431     if( sh4_x86.double_prec ) {
  2432         push_dr( FRm );
  2433     } else {
  2434         push_fr( FRm );
  2436     MOVP_immptr_rptr( &min_int, REG_ECX );
  2437     FILD_r32disp( REG_ECX, 0 );
  2438     FCOMIP_st(1);              
  2439     JAE_label( sat );     
  2440     JP_label( sat2 );       
  2441     MOVP_immptr_rptr( &max_int, REG_ECX );
  2442     FILD_r32disp( REG_ECX, 0 );
  2443     FCOMIP_st(1);
  2444     JNA_label( sat3 );
  2445     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2446     FNSTCW_r32disp( REG_EAX, 0 );
  2447     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2448     FLDCW_r32disp( REG_EDX, 0 );
  2449     FISTP_rbpdisp(R_FPUL);             
  2450     FLDCW_r32disp( REG_EAX, 0 );
  2451     JMP_label(end);             
  2453     JMP_TARGET(sat);
  2454     JMP_TARGET(sat2);
  2455     JMP_TARGET(sat3);
  2456     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2457     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2458     FPOP_st();
  2459     JMP_TARGET(end);
  2460     sh4_x86.tstate = TSTATE_NONE;
  2461 :}
  2462 FLDS FRm, FPUL {:  
  2463     COUNT_INST(I_FLDS);
  2464     check_fpuen();
  2465     load_fr( REG_EAX, FRm );
  2466     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2467 :}
  2468 FSTS FPUL, FRn {:  
  2469     COUNT_INST(I_FSTS);
  2470     check_fpuen();
  2471     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2472     store_fr( REG_EAX, FRn );
  2473 :}
  2474 FCNVDS FRm, FPUL {:  
  2475     COUNT_INST(I_FCNVDS);
  2476     check_fpuen();
  2477     if( sh4_x86.double_prec ) {
  2478         push_dr( FRm );
  2479         pop_fpul();
  2481 :}
  2482 FCNVSD FPUL, FRn {:  
  2483     COUNT_INST(I_FCNVSD);
  2484     check_fpuen();
  2485     if( sh4_x86.double_prec ) {
  2486         push_fpul();
  2487         pop_dr( FRn );
  2489 :}
  2491 /* Floating point instructions */
  2492 FABS FRn {:  
  2493     COUNT_INST(I_FABS);
  2494     check_fpuen();
  2495     if( sh4_x86.double_prec ) {
  2496         push_dr(FRn);
  2497         FABS_st0();
  2498         pop_dr(FRn);
  2499     } else {
  2500         push_fr(FRn);
  2501         FABS_st0();
  2502         pop_fr(FRn);
  2504 :}
  2505 FADD FRm, FRn {:  
  2506     COUNT_INST(I_FADD);
  2507     check_fpuen();
  2508     if( sh4_x86.double_prec ) {
  2509         push_dr(FRm);
  2510         push_dr(FRn);
  2511         FADDP_st(1);
  2512         pop_dr(FRn);
  2513     } else {
  2514         push_fr(FRm);
  2515         push_fr(FRn);
  2516         FADDP_st(1);
  2517         pop_fr(FRn);
  2519 :}
  2520 FDIV FRm, FRn {:  
  2521     COUNT_INST(I_FDIV);
  2522     check_fpuen();
  2523     if( sh4_x86.double_prec ) {
  2524         push_dr(FRn);
  2525         push_dr(FRm);
  2526         FDIVP_st(1);
  2527         pop_dr(FRn);
  2528     } else {
  2529         push_fr(FRn);
  2530         push_fr(FRm);
  2531         FDIVP_st(1);
  2532         pop_fr(FRn);
  2534 :}
  2535 FMAC FR0, FRm, FRn {:  
  2536     COUNT_INST(I_FMAC);
  2537     check_fpuen();
  2538     if( sh4_x86.double_prec ) {
  2539         push_dr( 0 );
  2540         push_dr( FRm );
  2541         FMULP_st(1);
  2542         push_dr( FRn );
  2543         FADDP_st(1);
  2544         pop_dr( FRn );
  2545     } else {
  2546         push_fr( 0 );
  2547         push_fr( FRm );
  2548         FMULP_st(1);
  2549         push_fr( FRn );
  2550         FADDP_st(1);
  2551         pop_fr( FRn );
  2553 :}
  2555 FMUL FRm, FRn {:  
  2556     COUNT_INST(I_FMUL);
  2557     check_fpuen();
  2558     if( sh4_x86.double_prec ) {
  2559         push_dr(FRm);
  2560         push_dr(FRn);
  2561         FMULP_st(1);
  2562         pop_dr(FRn);
  2563     } else {
  2564         push_fr(FRm);
  2565         push_fr(FRn);
  2566         FMULP_st(1);
  2567         pop_fr(FRn);
  2569 :}
  2570 FNEG FRn {:  
  2571     COUNT_INST(I_FNEG);
  2572     check_fpuen();
  2573     if( sh4_x86.double_prec ) {
  2574         push_dr(FRn);
  2575         FCHS_st0();
  2576         pop_dr(FRn);
  2577     } else {
  2578         push_fr(FRn);
  2579         FCHS_st0();
  2580         pop_fr(FRn);
  2582 :}
  2583 FSRRA FRn {:  
  2584     COUNT_INST(I_FSRRA);
  2585     check_fpuen();
  2586     if( sh4_x86.double_prec == 0 ) {
  2587         FLD1_st0();
  2588         push_fr(FRn);
  2589         FSQRT_st0();
  2590         FDIVP_st(1);
  2591         pop_fr(FRn);
  2593 :}
  2594 FSQRT FRn {:  
  2595     COUNT_INST(I_FSQRT);
  2596     check_fpuen();
  2597     if( sh4_x86.double_prec ) {
  2598         push_dr(FRn);
  2599         FSQRT_st0();
  2600         pop_dr(FRn);
  2601     } else {
  2602         push_fr(FRn);
  2603         FSQRT_st0();
  2604         pop_fr(FRn);
  2606 :}
  2607 FSUB FRm, FRn {:  
  2608     COUNT_INST(I_FSUB);
  2609     check_fpuen();
  2610     if( sh4_x86.double_prec ) {
  2611         push_dr(FRn);
  2612         push_dr(FRm);
  2613         FSUBP_st(1);
  2614         pop_dr(FRn);
  2615     } else {
  2616         push_fr(FRn);
  2617         push_fr(FRm);
  2618         FSUBP_st(1);
  2619         pop_fr(FRn);
  2621 :}
  2623 FCMP/EQ FRm, FRn {:  
  2624     COUNT_INST(I_FCMPEQ);
  2625     check_fpuen();
  2626     if( sh4_x86.double_prec ) {
  2627         push_dr(FRm);
  2628         push_dr(FRn);
  2629     } else {
  2630         push_fr(FRm);
  2631         push_fr(FRn);
  2633     XORL_r32_r32(REG_EAX, REG_EAX);
  2634     XORL_r32_r32(REG_EDX, REG_EDX);
  2635     FCOMIP_st(1);
  2636     SETCCB_cc_r8(X86_COND_NP, REG_DL);
  2637     CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
  2638     MOVL_r32_rbpdisp(REG_EAX, R_T);
  2639     FPOP_st();
  2640     sh4_x86.tstate = TSTATE_NONE;
  2641 :}
  2642 FCMP/GT FRm, FRn {:  
  2643     COUNT_INST(I_FCMPGT);
  2644     check_fpuen();
  2645     if( sh4_x86.double_prec ) {
  2646         push_dr(FRm);
  2647         push_dr(FRn);
  2648     } else {
  2649         push_fr(FRm);
  2650         push_fr(FRn);
  2652     FCOMIP_st(1);
  2653     SETA_t();
  2654     FPOP_st();
  2655     sh4_x86.tstate = TSTATE_A;
  2656 :}
  2658 FSCA FPUL, FRn {:  
  2659     COUNT_INST(I_FSCA);
  2660     check_fpuen();
  2661     if( sh4_x86.double_prec == 0 ) {
  2662         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2663         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2664         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2666     sh4_x86.tstate = TSTATE_NONE;
  2667 :}
  2668 FIPR FVm, FVn {:  
  2669     COUNT_INST(I_FIPR);
  2670     check_fpuen();
  2671     if( sh4_x86.double_prec == 0 ) {
  2672         if( sh4_x86.sse3_enabled ) {
  2673             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2674             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2675             HADDPS_xmm_xmm( 4, 4 ); 
  2676             HADDPS_xmm_xmm( 4, 4 );
  2677             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2678         } else {
  2679             push_fr( FVm<<2 );
  2680             push_fr( FVn<<2 );
  2681             FMULP_st(1);
  2682             push_fr( (FVm<<2)+1);
  2683             push_fr( (FVn<<2)+1);
  2684             FMULP_st(1);
  2685             FADDP_st(1);
  2686             push_fr( (FVm<<2)+2);
  2687             push_fr( (FVn<<2)+2);
  2688             FMULP_st(1);
  2689             FADDP_st(1);
  2690             push_fr( (FVm<<2)+3);
  2691             push_fr( (FVn<<2)+3);
  2692             FMULP_st(1);
  2693             FADDP_st(1);
  2694             pop_fr( (FVn<<2)+3);
  2697 :}
  2698 FTRV XMTRX, FVn {:  
  2699     COUNT_INST(I_FTRV);
  2700     check_fpuen();
  2701     if( sh4_x86.double_prec == 0 ) {
  2702         if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
  2703         	/* FIXME: For now, disable this inlining when we're running in shadow mode -
  2704         	 * it gives slightly different results from the emu core. Need to
  2705         	 * fix the precision so both give the right results.
  2706         	 */
  2707             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2708             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2709             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2710             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2712             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2713             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2714             MOV_xmm_xmm( 4, 6 );
  2715             MOV_xmm_xmm( 5, 7 );
  2716             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2717             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2718             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2719             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2720             MULPS_xmm_xmm( 0, 4 );
  2721             MULPS_xmm_xmm( 1, 5 );
  2722             MULPS_xmm_xmm( 2, 6 );
  2723             MULPS_xmm_xmm( 3, 7 );
  2724             ADDPS_xmm_xmm( 5, 4 );
  2725             ADDPS_xmm_xmm( 7, 6 );
  2726             ADDPS_xmm_xmm( 6, 4 );
  2727             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2728         } else {
  2729             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2730             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2733     sh4_x86.tstate = TSTATE_NONE;
  2734 :}
  2736 FRCHG {:  
  2737     COUNT_INST(I_FRCHG);
  2738     check_fpuen();
  2739     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2740     CALL_ptr( sh4_switch_fr_banks );
  2741     sh4_x86.tstate = TSTATE_NONE;
  2742 :}
  2743 FSCHG {:  
  2744     COUNT_INST(I_FSCHG);
  2745     check_fpuen();
  2746     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2747     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2748     sh4_x86.tstate = TSTATE_NONE;
  2749     sh4_x86.double_size = !sh4_x86.double_size;
  2750     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2751 :}
  2753 /* Processor control instructions */
  2754 LDC Rm, SR {:
  2755     COUNT_INST(I_LDCSR);
  2756     if( sh4_x86.in_delay_slot ) {
  2757 	SLOTILLEGAL();
  2758     } else {
  2759 	check_priv();
  2760 	load_reg( REG_EAX, Rm );
  2761 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2762 	sh4_x86.fpuen_checked = FALSE;
  2763 	sh4_x86.tstate = TSTATE_NONE;
  2764     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2765 	return 2;
  2767 :}
  2768 LDC Rm, GBR {: 
  2769     COUNT_INST(I_LDC);
  2770     load_reg( REG_EAX, Rm );
  2771     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2772 :}
  2773 LDC Rm, VBR {:  
  2774     COUNT_INST(I_LDC);
  2775     check_priv();
  2776     load_reg( REG_EAX, Rm );
  2777     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2778     sh4_x86.tstate = TSTATE_NONE;
  2779 :}
  2780 LDC Rm, SSR {:  
  2781     COUNT_INST(I_LDC);
  2782     check_priv();
  2783     load_reg( REG_EAX, Rm );
  2784     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2785     sh4_x86.tstate = TSTATE_NONE;
  2786 :}
  2787 LDC Rm, SGR {:  
  2788     COUNT_INST(I_LDC);
  2789     check_priv();
  2790     load_reg( REG_EAX, Rm );
  2791     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2792     sh4_x86.tstate = TSTATE_NONE;
  2793 :}
  2794 LDC Rm, SPC {:  
  2795     COUNT_INST(I_LDC);
  2796     check_priv();
  2797     load_reg( REG_EAX, Rm );
  2798     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2799     sh4_x86.tstate = TSTATE_NONE;
  2800 :}
  2801 LDC Rm, DBR {:  
  2802     COUNT_INST(I_LDC);
  2803     check_priv();
  2804     load_reg( REG_EAX, Rm );
  2805     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2806     sh4_x86.tstate = TSTATE_NONE;
  2807 :}
  2808 LDC Rm, Rn_BANK {:  
  2809     COUNT_INST(I_LDC);
  2810     check_priv();
  2811     load_reg( REG_EAX, Rm );
  2812     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2813     sh4_x86.tstate = TSTATE_NONE;
  2814 :}
  2815 LDC.L @Rm+, GBR {:  
  2816     COUNT_INST(I_LDCM);
  2817     load_reg( REG_EAX, Rm );
  2818     check_ralign32( REG_EAX );
  2819     MEM_READ_LONG( REG_EAX, REG_EAX );
  2820     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2821     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2822     sh4_x86.tstate = TSTATE_NONE;
  2823 :}
  2824 LDC.L @Rm+, SR {:
  2825     COUNT_INST(I_LDCSRM);
  2826     if( sh4_x86.in_delay_slot ) {
  2827 	SLOTILLEGAL();
  2828     } else {
  2829 	check_priv();
  2830 	load_reg( REG_EAX, Rm );
  2831 	check_ralign32( REG_EAX );
  2832 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2833 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2834 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2835 	sh4_x86.fpuen_checked = FALSE;
  2836 	sh4_x86.tstate = TSTATE_NONE;
  2837     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2838 	return 2;
  2840 :}
  2841 LDC.L @Rm+, VBR {:  
  2842     COUNT_INST(I_LDCM);
  2843     check_priv();
  2844     load_reg( REG_EAX, Rm );
  2845     check_ralign32( REG_EAX );
  2846     MEM_READ_LONG( REG_EAX, REG_EAX );
  2847     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2848     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2849     sh4_x86.tstate = TSTATE_NONE;
  2850 :}
  2851 LDC.L @Rm+, SSR {:
  2852     COUNT_INST(I_LDCM);
  2853     check_priv();
  2854     load_reg( REG_EAX, Rm );
  2855     check_ralign32( REG_EAX );
  2856     MEM_READ_LONG( REG_EAX, REG_EAX );
  2857     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2858     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2859     sh4_x86.tstate = TSTATE_NONE;
  2860 :}
  2861 LDC.L @Rm+, SGR {:  
  2862     COUNT_INST(I_LDCM);
  2863     check_priv();
  2864     load_reg( REG_EAX, Rm );
  2865     check_ralign32( REG_EAX );
  2866     MEM_READ_LONG( REG_EAX, REG_EAX );
  2867     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2868     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2869     sh4_x86.tstate = TSTATE_NONE;
  2870 :}
  2871 LDC.L @Rm+, SPC {:  
  2872     COUNT_INST(I_LDCM);
  2873     check_priv();
  2874     load_reg( REG_EAX, Rm );
  2875     check_ralign32( REG_EAX );
  2876     MEM_READ_LONG( REG_EAX, REG_EAX );
  2877     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2878     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2879     sh4_x86.tstate = TSTATE_NONE;
  2880 :}
  2881 LDC.L @Rm+, DBR {:  
  2882     COUNT_INST(I_LDCM);
  2883     check_priv();
  2884     load_reg( REG_EAX, Rm );
  2885     check_ralign32( REG_EAX );
  2886     MEM_READ_LONG( REG_EAX, REG_EAX );
  2887     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2888     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2889     sh4_x86.tstate = TSTATE_NONE;
  2890 :}
  2891 LDC.L @Rm+, Rn_BANK {:  
  2892     COUNT_INST(I_LDCM);
  2893     check_priv();
  2894     load_reg( REG_EAX, Rm );
  2895     check_ralign32( REG_EAX );
  2896     MEM_READ_LONG( REG_EAX, REG_EAX );
  2897     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2898     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2899     sh4_x86.tstate = TSTATE_NONE;
  2900 :}
  2901 LDS Rm, FPSCR {:
  2902     COUNT_INST(I_LDSFPSCR);
  2903     check_fpuen();
  2904     load_reg( REG_EAX, Rm );
  2905     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2906     sh4_x86.tstate = TSTATE_NONE;
  2907     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2908     return 2;
  2909 :}
  2910 LDS.L @Rm+, FPSCR {:  
  2911     COUNT_INST(I_LDSFPSCRM);
  2912     check_fpuen();
  2913     load_reg( REG_EAX, Rm );
  2914     check_ralign32( REG_EAX );
  2915     MEM_READ_LONG( REG_EAX, REG_EAX );
  2916     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2917     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2918     sh4_x86.tstate = TSTATE_NONE;
  2919     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2920     return 2;
  2921 :}
  2922 LDS Rm, FPUL {:  
  2923     COUNT_INST(I_LDS);
  2924     check_fpuen();
  2925     load_reg( REG_EAX, Rm );
  2926     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2927 :}
  2928 LDS.L @Rm+, FPUL {:  
  2929     COUNT_INST(I_LDSM);
  2930     check_fpuen();
  2931     load_reg( REG_EAX, Rm );
  2932     check_ralign32( REG_EAX );
  2933     MEM_READ_LONG( REG_EAX, REG_EAX );
  2934     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2935     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2936     sh4_x86.tstate = TSTATE_NONE;
  2937 :}
  2938 LDS Rm, MACH {: 
  2939     COUNT_INST(I_LDS);
  2940     load_reg( REG_EAX, Rm );
  2941     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2942 :}
  2943 LDS.L @Rm+, MACH {:  
  2944     COUNT_INST(I_LDSM);
  2945     load_reg( REG_EAX, Rm );
  2946     check_ralign32( REG_EAX );
  2947     MEM_READ_LONG( REG_EAX, REG_EAX );
  2948     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2949     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2950     sh4_x86.tstate = TSTATE_NONE;
  2951 :}
  2952 LDS Rm, MACL {:  
  2953     COUNT_INST(I_LDS);
  2954     load_reg( REG_EAX, Rm );
  2955     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2956 :}
  2957 LDS.L @Rm+, MACL {:  
  2958     COUNT_INST(I_LDSM);
  2959     load_reg( REG_EAX, Rm );
  2960     check_ralign32( REG_EAX );
  2961     MEM_READ_LONG( REG_EAX, REG_EAX );
  2962     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2963     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2964     sh4_x86.tstate = TSTATE_NONE;
  2965 :}
  2966 LDS Rm, PR {:  
  2967     COUNT_INST(I_LDS);
  2968     load_reg( REG_EAX, Rm );
  2969     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2970 :}
  2971 LDS.L @Rm+, PR {:  
  2972     COUNT_INST(I_LDSM);
  2973     load_reg( REG_EAX, Rm );
  2974     check_ralign32( REG_EAX );
  2975     MEM_READ_LONG( REG_EAX, REG_EAX );
  2976     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2977     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2978     sh4_x86.tstate = TSTATE_NONE;
  2979 :}
  2980 LDTLB {:  
  2981     COUNT_INST(I_LDTLB);
  2982     CALL_ptr( MMU_ldtlb );
  2983     sh4_x86.tstate = TSTATE_NONE;
  2984 :}
  2985 OCBI @Rn {:
  2986     COUNT_INST(I_OCBI);
  2987 :}
  2988 OCBP @Rn {:
  2989     COUNT_INST(I_OCBP);
  2990 :}
  2991 OCBWB @Rn {:
  2992     COUNT_INST(I_OCBWB);
  2993 :}
  2994 PREF @Rn {:
  2995     COUNT_INST(I_PREF);
  2996     load_reg( REG_EAX, Rn );
  2997     MEM_PREFETCH( REG_EAX );
  2998     sh4_x86.tstate = TSTATE_NONE;
  2999 :}
  3000 SLEEP {: 
  3001     COUNT_INST(I_SLEEP);
  3002     check_priv();
  3003     CALL_ptr( sh4_sleep );
  3004     sh4_x86.tstate = TSTATE_NONE;
  3005     sh4_x86.in_delay_slot = DELAY_NONE;
  3006     return 2;
  3007 :}
  3008 STC SR, Rn {:
  3009     COUNT_INST(I_STCSR);
  3010     check_priv();
  3011     CALL_ptr(sh4_read_sr);
  3012     store_reg( REG_EAX, Rn );
  3013     sh4_x86.tstate = TSTATE_NONE;
  3014 :}
  3015 STC GBR, Rn {:  
  3016     COUNT_INST(I_STC);
  3017     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  3018     store_reg( REG_EAX, Rn );
  3019 :}
  3020 STC VBR, Rn {:  
  3021     COUNT_INST(I_STC);
  3022     check_priv();
  3023     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  3024     store_reg( REG_EAX, Rn );
  3025     sh4_x86.tstate = TSTATE_NONE;
  3026 :}
  3027 STC SSR, Rn {:  
  3028     COUNT_INST(I_STC);
  3029     check_priv();
  3030     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  3031     store_reg( REG_EAX, Rn );
  3032     sh4_x86.tstate = TSTATE_NONE;
  3033 :}
  3034 STC SPC, Rn {:  
  3035     COUNT_INST(I_STC);
  3036     check_priv();
  3037     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  3038     store_reg( REG_EAX, Rn );
  3039     sh4_x86.tstate = TSTATE_NONE;
  3040 :}
  3041 STC SGR, Rn {:  
  3042     COUNT_INST(I_STC);
  3043     check_priv();
  3044     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  3045     store_reg( REG_EAX, Rn );
  3046     sh4_x86.tstate = TSTATE_NONE;
  3047 :}
  3048 STC DBR, Rn {:  
  3049     COUNT_INST(I_STC);
  3050     check_priv();
  3051     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  3052     store_reg( REG_EAX, Rn );
  3053     sh4_x86.tstate = TSTATE_NONE;
  3054 :}
  3055 STC Rm_BANK, Rn {:
  3056     COUNT_INST(I_STC);
  3057     check_priv();
  3058     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  3059     store_reg( REG_EAX, Rn );
  3060     sh4_x86.tstate = TSTATE_NONE;
  3061 :}
  3062 STC.L SR, @-Rn {:
  3063     COUNT_INST(I_STCSRM);
  3064     check_priv();
  3065     CALL_ptr( sh4_read_sr );
  3066     MOVL_r32_r32( REG_EAX, REG_EDX );
  3067     load_reg( REG_EAX, Rn );
  3068     check_walign32( REG_EAX );
  3069     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3070     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3071     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3072     sh4_x86.tstate = TSTATE_NONE;
  3073 :}
  3074 STC.L VBR, @-Rn {:  
  3075     COUNT_INST(I_STCM);
  3076     check_priv();
  3077     load_reg( REG_EAX, Rn );
  3078     check_walign32( REG_EAX );
  3079     ADDL_imms_r32( -4, REG_EAX );
  3080     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3081     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3082     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3083     sh4_x86.tstate = TSTATE_NONE;
  3084 :}
  3085 STC.L SSR, @-Rn {:  
  3086     COUNT_INST(I_STCM);
  3087     check_priv();
  3088     load_reg( REG_EAX, Rn );
  3089     check_walign32( REG_EAX );
  3090     ADDL_imms_r32( -4, REG_EAX );
  3091     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3092     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3093     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3094     sh4_x86.tstate = TSTATE_NONE;
  3095 :}
  3096 STC.L SPC, @-Rn {:
  3097     COUNT_INST(I_STCM);
  3098     check_priv();
  3099     load_reg( REG_EAX, Rn );
  3100     check_walign32( REG_EAX );
  3101     ADDL_imms_r32( -4, REG_EAX );
  3102     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3103     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3104     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3105     sh4_x86.tstate = TSTATE_NONE;
  3106 :}
  3107 STC.L SGR, @-Rn {:  
  3108     COUNT_INST(I_STCM);
  3109     check_priv();
  3110     load_reg( REG_EAX, Rn );
  3111     check_walign32( REG_EAX );
  3112     ADDL_imms_r32( -4, REG_EAX );
  3113     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3114     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3115     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3116     sh4_x86.tstate = TSTATE_NONE;
  3117 :}
  3118 STC.L DBR, @-Rn {:  
  3119     COUNT_INST(I_STCM);
  3120     check_priv();
  3121     load_reg( REG_EAX, Rn );
  3122     check_walign32( REG_EAX );
  3123     ADDL_imms_r32( -4, REG_EAX );
  3124     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3125     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3126     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3127     sh4_x86.tstate = TSTATE_NONE;
  3128 :}
  3129 STC.L Rm_BANK, @-Rn {:  
  3130     COUNT_INST(I_STCM);
  3131     check_priv();
  3132     load_reg( REG_EAX, Rn );
  3133     check_walign32( REG_EAX );
  3134     ADDL_imms_r32( -4, REG_EAX );
  3135     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3136     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3137     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3138     sh4_x86.tstate = TSTATE_NONE;
  3139 :}
  3140 STC.L GBR, @-Rn {:  
  3141     COUNT_INST(I_STCM);
  3142     load_reg( REG_EAX, Rn );
  3143     check_walign32( REG_EAX );
  3144     ADDL_imms_r32( -4, REG_EAX );
  3145     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3146     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3147     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3148     sh4_x86.tstate = TSTATE_NONE;
  3149 :}
  3150 STS FPSCR, Rn {:  
  3151     COUNT_INST(I_STSFPSCR);
  3152     check_fpuen();
  3153     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3154     store_reg( REG_EAX, Rn );
  3155 :}
  3156 STS.L FPSCR, @-Rn {:  
  3157     COUNT_INST(I_STSFPSCRM);
  3158     check_fpuen();
  3159     load_reg( REG_EAX, Rn );
  3160     check_walign32( REG_EAX );
  3161     ADDL_imms_r32( -4, REG_EAX );
  3162     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3163     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3164     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3165     sh4_x86.tstate = TSTATE_NONE;
  3166 :}
  3167 STS FPUL, Rn {:  
  3168     COUNT_INST(I_STS);
  3169     check_fpuen();
  3170     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3171     store_reg( REG_EAX, Rn );
  3172 :}
  3173 STS.L FPUL, @-Rn {:  
  3174     COUNT_INST(I_STSM);
  3175     check_fpuen();
  3176     load_reg( REG_EAX, Rn );
  3177     check_walign32( REG_EAX );
  3178     ADDL_imms_r32( -4, REG_EAX );
  3179     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3180     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3181     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3182     sh4_x86.tstate = TSTATE_NONE;
  3183 :}
  3184 STS MACH, Rn {:  
  3185     COUNT_INST(I_STS);
  3186     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3187     store_reg( REG_EAX, Rn );
  3188 :}
  3189 STS.L MACH, @-Rn {:  
  3190     COUNT_INST(I_STSM);
  3191     load_reg( REG_EAX, Rn );
  3192     check_walign32( REG_EAX );
  3193     ADDL_imms_r32( -4, REG_EAX );
  3194     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3195     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3196     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3197     sh4_x86.tstate = TSTATE_NONE;
  3198 :}
  3199 STS MACL, Rn {:  
  3200     COUNT_INST(I_STS);
  3201     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3202     store_reg( REG_EAX, Rn );
  3203 :}
  3204 STS.L MACL, @-Rn {:  
  3205     COUNT_INST(I_STSM);
  3206     load_reg( REG_EAX, Rn );
  3207     check_walign32( REG_EAX );
  3208     ADDL_imms_r32( -4, REG_EAX );
  3209     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3210     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3211     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3212     sh4_x86.tstate = TSTATE_NONE;
  3213 :}
  3214 STS PR, Rn {:  
  3215     COUNT_INST(I_STS);
  3216     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3217     store_reg( REG_EAX, Rn );
  3218 :}
  3219 STS.L PR, @-Rn {:  
  3220     COUNT_INST(I_STSM);
  3221     load_reg( REG_EAX, Rn );
  3222     check_walign32( REG_EAX );
  3223     ADDL_imms_r32( -4, REG_EAX );
  3224     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3225     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3226     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3227     sh4_x86.tstate = TSTATE_NONE;
  3228 :}
  3230 NOP {: 
  3231     COUNT_INST(I_NOP);
  3232     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3233 :}
  3234 %%
  3235     sh4_x86.in_delay_slot = DELAY_NONE;
  3236     return 0;
  3240 /**
  3241  * The unwind methods only work if we compiled with DWARF2 frame information
  3242  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3243  */
  3244 #ifdef HAVE_EXCEPTIONS
  3245 #include <unwind.h>
  3247 struct UnwindInfo {
  3248     uintptr_t block_start;
  3249     uintptr_t block_end;
  3250     void *pc;
  3251 };
  3253 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3255     struct UnwindInfo *info = arg;
  3256     void *pc = (void *)_Unwind_GetIP(context);
  3257     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3258         info->pc = pc;
  3259         return _URC_NORMAL_STOP;
  3261     return _URC_NO_REASON;
  3264 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3266     struct _Unwind_Exception exc;
  3267     struct UnwindInfo info;
  3269     info.pc = NULL;
  3270     info.block_start = (uintptr_t)code;
  3271     info.block_end = info.block_start + code_size;
  3272     void *result = NULL;
  3273     _Unwind_Backtrace( xlat_check_frame, &info );
  3274     return info.pc;
  3276 #else
  3277 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3278 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3280     void *result = NULL;
  3281     __asm__(
  3282         "mov %%ebp, %%eax\n\t"
  3283         "mov $0x8, %%ecx\n\t"
  3284         "mov %1, %%edx\n"
  3285         "frame_loop: test %%eax, %%eax\n\t"
  3286         "je frame_not_found\n\t"
  3287         "cmp (%%eax), %%edx\n\t"
  3288         "je frame_found\n\t"
  3289         "sub $0x1, %%ecx\n\t"
  3290         "je frame_not_found\n\t"
  3291         "movl (%%eax), %%eax\n\t"
  3292         "jmp frame_loop\n"
  3293         "frame_found: movl 0x4(%%eax), %0\n"
  3294         "frame_not_found:"
  3295         : "=r" (result)
  3296         : "r" (((uint8_t *)&sh4r) + 128 )
  3297         : "eax", "ecx", "edx" );
  3298     return result;
  3300 #endif
.