Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1218:be02e87f9f87
prev1216:defbd44429d8
next1263:b3de98d19faf
author nkeynes
date Mon Feb 13 21:02:42 2012 +1000 (10 years ago)
permissions -rw-r--r--
last change Move profile_block setting out of sh4x86 and back into sh4.c. Fix last bits
preventing non-translation build
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   110     /* Allocated memory for the (block-wide) back-patch list */
   111     struct backpatch_record *backpatch_list;
   112     uint32_t backpatch_posn;
   113     uint32_t backpatch_size;
   114 };
   116 static struct sh4_x86_state sh4_x86;
   118 static uint32_t max_int = 0x7FFFFFFF;
   119 static uint32_t min_int = 0x80000000;
   120 static uint32_t save_fcw; /* save value for fpu control word */
   121 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   123 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc );
   124 static void sh4_x86_translate_unlink_block( void *use_list );
   126 static struct x86_symbol x86_symbol_table[] = {
   127     { "sh4r+128", ((char *)&sh4r)+128 },
   128     { "sh4_cpu_period", &sh4_cpu_period },
   129     { "sh4_address_space", NULL },
   130     { "sh4_user_address_space", NULL },
   131     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   132     { "sh4_translate_get_code_and_backpatch", sh4_translate_get_code_and_backpatch },
   133     { "sh4_write_fpscr", sh4_write_fpscr },
   134     { "sh4_write_sr", sh4_write_sr },
   135     { "sh4_read_sr", sh4_read_sr },
   136     { "sh4_raise_exception", sh4_raise_exception },
   137     { "sh4_sleep", sh4_sleep },
   138     { "sh4_fsca", sh4_fsca },
   139     { "sh4_ftrv", sh4_ftrv },
   140     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   141     { "sh4_execute_instruction", sh4_execute_instruction },
   142     { "signsat48", signsat48 },
   143     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   144     { "xlat_get_code", xlat_get_code }
   145 };
   147 static struct xlat_target_fns x86_target_fns = {
   148 	sh4_x86_translate_unlink_block
   149 };	
   152 gboolean is_sse3_supported()
   153 {
   154     uint32_t features;
   156     __asm__ __volatile__(
   157         "mov $0x01, %%eax\n\t"
   158         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   159     return (features & 1) ? TRUE : FALSE;
   160 }
   162 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   163 {
   164     sh4_x86.priv_address_space = priv;
   165     sh4_x86.user_address_space = user;
   166     x86_symbol_table[2].ptr = priv;
   167     x86_symbol_table[3].ptr = user;
   168 }
   170 void sh4_translate_init(void)
   171 {
   172     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   173     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   174     sh4_x86.begin_callback = NULL;
   175     sh4_x86.end_callback = NULL;
   176     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   177     sh4_x86.fastmem = TRUE;
   178     sh4_x86.sse3_enabled = is_sse3_supported();
   179     x86_disasm_init();
   180     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   181     xlat_set_target_fns(&x86_target_fns);
   182 }
   184 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   185 {
   186     sh4_x86.begin_callback = begin;
   187     sh4_x86.end_callback = end;
   188 }
   190 void sh4_translate_set_fastmem( gboolean flag )
   191 {
   192     sh4_x86.fastmem = flag;
   193 }
   195 /**
   196  * Disassemble the given translated code block, and it's source SH4 code block
   197  * side-by-side. The current native pc will be marked if non-null.
   198  */
   199 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   200 {
   201     char buf[256];
   202     char op[256];
   204     uintptr_t target_start = (uintptr_t)code, target_pc;
   205     uintptr_t target_end = target_start + xlat_get_code_size(code);
   206     uint32_t source_pc = source_start;
   207     uint32_t source_end = source_pc;
   208     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   209     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   211     for( target_pc = target_start; target_pc < target_end;  ) {
   212         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   213 #if SIZEOF_VOID_P == 8
   214         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   215                       target_pc, op, buf );
   216 #else
   217         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   218                       target_pc, op, buf );
   219 #endif        
   220         if( source_recov_table < source_recov_end && 
   221             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   222             source_recov_table++;
   223             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   224                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   225         }
   227         if( source_pc < source_end ) {
   228             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   229             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   230             source_pc = source_pc2;
   231         } else {
   232             fprintf( out, "\n" );
   233         }
   235         target_pc = pc2;
   236     }
   238     while( source_pc < source_end ) {
   239         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   240         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   241         source_pc = source_pc2;
   242     }
   243 }
   245 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   246 {
   247     int reloc_size = 4;
   249     if( exc_code == -2 ) {
   250         reloc_size = sizeof(void *);
   251     }
   253     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   254 	sh4_x86.backpatch_size <<= 1;
   255 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   256 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   257 	assert( sh4_x86.backpatch_list != NULL );
   258     }
   259     if( sh4_x86.in_delay_slot ) {
   260 	fixup_pc -= 2;
   261     }
   263     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   264 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   265     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   266     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   267     sh4_x86.backpatch_posn++;
   268 }
   270 #define TSTATE_NONE -1
   271 #define TSTATE_O    X86_COND_O
   272 #define TSTATE_C    X86_COND_C
   273 #define TSTATE_E    X86_COND_E
   274 #define TSTATE_NE   X86_COND_NE
   275 #define TSTATE_G    X86_COND_G
   276 #define TSTATE_GE   X86_COND_GE
   277 #define TSTATE_A    X86_COND_A
   278 #define TSTATE_AE   X86_COND_AE
   280 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   281 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   283 /* Convenience instructions */
   284 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   285 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   286 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   287 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   288 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   289 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   290 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   291 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   292 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   293 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   294 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   295 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   296 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   297 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   298 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   299 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   300 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   301 #define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
   302 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   303 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   304 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   306 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
   307 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
   309 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   310 #define JT_label(label) LOAD_t() \
   311     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   313 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   314 #define JF_label(label) LOAD_t() \
   315     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   318 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   319 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   321 /**
   322  * Load an FR register (single-precision floating point) into an integer x86
   323  * register (eg for register-to-register moves)
   324  */
   325 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   326 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   328 /**
   329  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   330  */
   331 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   332 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   334 /**
   335  * Store an FR register (single-precision floating point) from an integer x86+
   336  * register (eg for register-to-register moves)
   337  */
   338 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   339 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   341 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   342 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   345 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   346 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   347 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   348 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   349 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   350 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   351 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   352 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   353 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   354 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   356 #ifdef ENABLE_SH4STATS
   357 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   358 #else
   359 #define COUNT_INST(id)
   360 #endif
   363 /* Exception checks - Note that all exception checks will clobber EAX */
   365 #define check_priv( ) \
   366     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   367         if( sh4_x86.in_delay_slot ) { \
   368             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   369         } else { \
   370             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   371         } \
   372         sh4_x86.branch_taken = TRUE; \
   373         sh4_x86.in_delay_slot = DELAY_NONE; \
   374         return 2; \
   375     }
   377 #define check_fpuen( ) \
   378     if( !sh4_x86.fpuen_checked ) {\
   379 	sh4_x86.fpuen_checked = TRUE;\
   380 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   381 	ANDL_imms_r32( SR_FD, REG_EAX );\
   382 	if( sh4_x86.in_delay_slot ) {\
   383 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   384 	} else {\
   385 	    JNE_exc(EXC_FPU_DISABLED);\
   386 	}\
   387 	sh4_x86.tstate = TSTATE_NONE; \
   388     }
   390 #define check_ralign16( x86reg ) \
   391     TESTL_imms_r32( 0x00000001, x86reg ); \
   392     JNE_exc(EXC_DATA_ADDR_READ)
   394 #define check_walign16( x86reg ) \
   395     TESTL_imms_r32( 0x00000001, x86reg ); \
   396     JNE_exc(EXC_DATA_ADDR_WRITE);
   398 #define check_ralign32( x86reg ) \
   399     TESTL_imms_r32( 0x00000003, x86reg ); \
   400     JNE_exc(EXC_DATA_ADDR_READ)
   402 #define check_walign32( x86reg ) \
   403     TESTL_imms_r32( 0x00000003, x86reg ); \
   404     JNE_exc(EXC_DATA_ADDR_WRITE);
   406 #define check_ralign64( x86reg ) \
   407     TESTL_imms_r32( 0x00000007, x86reg ); \
   408     JNE_exc(EXC_DATA_ADDR_READ)
   410 #define check_walign64( x86reg ) \
   411     TESTL_imms_r32( 0x00000007, x86reg ); \
   412     JNE_exc(EXC_DATA_ADDR_WRITE);
   414 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   416 #define UNDEF(ir)
   417 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   418  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   419  */
   420 #ifdef HAVE_FRAME_ADDRESS
   421 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   422 {
   423     decode_address(address_space(), addr_reg);
   424     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   425         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   426     } else {
   427         if( addr_reg != REG_ARG1 ) {
   428             MOVL_r32_r32( addr_reg, REG_ARG1 );
   429         }
   430         MOVP_immptr_rptr( 0, REG_ARG2 );
   431         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   432         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   433     }
   434     if( value_reg != REG_RESULT1 ) { 
   435         MOVL_r32_r32( REG_RESULT1, value_reg );
   436     }
   437 }
   439 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   440 {
   441     decode_address(address_space(), addr_reg);
   442     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   443         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   444     } else {
   445         if( value_reg != REG_ARG2 ) {
   446             MOVL_r32_r32( value_reg, REG_ARG2 );
   447 	}        
   448         if( addr_reg != REG_ARG1 ) {
   449             MOVL_r32_r32( addr_reg, REG_ARG1 );
   450         }
   451 #if MAX_REG_ARG > 2        
   452         MOVP_immptr_rptr( 0, REG_ARG3 );
   453         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   454         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   455 #else
   456         MOVL_imm32_rspdisp( 0, 0 );
   457         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   458         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   459 #endif
   460     }
   461 }
   462 #else
   463 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   464 {
   465     decode_address(address_space(), addr_reg);
   466     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   467     if( value_reg != REG_RESULT1 ) {
   468         MOVL_r32_r32( REG_RESULT1, value_reg );
   469     }
   470 }     
   472 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   473 {
   474     decode_address(address_space(), addr_reg);
   475     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   476 }
   477 #endif
   479 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   480 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   481 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   482 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   483 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   484 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   485 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   486 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   487 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   489 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   491 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   492 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   493 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   494 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   496 void sh4_translate_begin_block( sh4addr_t pc ) 
   497 {
   498 	sh4_x86.code = xlat_output;
   499     sh4_x86.in_delay_slot = FALSE;
   500     sh4_x86.fpuen_checked = FALSE;
   501     sh4_x86.branch_taken = FALSE;
   502     sh4_x86.backpatch_posn = 0;
   503     sh4_x86.block_start_pc = pc;
   504     sh4_x86.tlb_on = IS_TLB_ENABLED();
   505     sh4_x86.tstate = TSTATE_NONE;
   506     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   507     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   508     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   509     emit_prologue();
   510     if( sh4_x86.begin_callback ) {
   511         CALL_ptr( sh4_x86.begin_callback );
   512     }
   513     if( sh4_profile_blocks ) {
   514     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   515     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   516     }  
   517 }
   520 uint32_t sh4_translate_end_block_size()
   521 {
   522 	uint32_t epilogue_size = EPILOGUE_SIZE;
   523 	if( sh4_x86.end_callback ) {
   524 	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
   525 	}
   526     if( sh4_x86.backpatch_posn <= 3 ) {
   527         epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   528     } else {
   529         epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   530     }
   531     return epilogue_size;
   532 }
   535 /**
   536  * Embed a breakpoint into the generated code
   537  */
   538 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   539 {
   540     MOVL_imm32_r32( pc, REG_EAX );
   541     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   542     sh4_x86.tstate = TSTATE_NONE;
   543 }
   546 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   548 /**
   549  * Test if the loaded target code pointer in %eax is valid, and if so jump
   550  * directly into it, bypassing the normal exit.
   551  */
   552 static void jump_next_block()
   553 {
   554 	uint8_t *ptr = xlat_output;
   555 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   556 	JE_label(nocode);
   557 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   558 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   559 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   560 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   561 	} else {
   562 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   563 	}
   564 	JNE_label(wrongmode);
   565 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   566 	if( sh4_x86.end_callback ) {
   567 	    /* Note this does leave the stack out of alignment, but doesn't matter
   568 	     * for what we're currently using it for.
   569 	     */
   570 	    PUSH_r32(REG_EAX);
   571 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   572 	    JMP_rptr(REG_ECX);
   573 	} else {
   574 	    JMP_rptr(REG_EAX);
   575 	}
   576 	JMP_TARGET(wrongmode);
   577 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   578 	int rel = ptr - xlat_output;
   579     JMP_prerel(rel);
   580 	JMP_TARGET(nocode); 
   581 }
   583 /**
   584  * 
   585  */
   586 static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
   587 {
   588     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   589     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   590         target = XLAT_BLOCK_CHAIN(target);
   591 	}
   592     if( target == NULL ) {
   593         target = sh4_translate_basic_block( pc );
   594     }
   595     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   596     *backpatch = 0xE9;
   597     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
   598     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   599     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   601     uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
   602     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   603 	*retptr = backpatch;
   604 }
   606 static void emit_translate_and_backpatch()
   607 {
   608     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   609     CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
   611     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   612      * we need to reserve sizeof(void*) bytes for the use-list
   613 	 * pointer
   614 	 */ 
   615     if( sizeof(void*) == 8 ) {
   616         NOP();
   617     } else {
   618         NOP2();
   619     }
   620 }
   622 /**
   623  * If we're jumping to a fixed address (or at least fixed relative to the
   624  * current PC, then we can do a direct branch. REG_ARG1 should contain
   625  * the PC at this point.
   626  */
   627 static void jump_next_block_fixed_pc( sh4addr_t pc )
   628 {
   629 	if( IS_IN_ICACHE(pc) ) {
   630 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
   631 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   632 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   633            emit_translate_and_backpatch();	         
   634            return;
   635 		} else {
   636             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   637             ANDP_imms_rptr( -4, REG_EAX );
   638         }
   639 	} else if( sh4_x86.tlb_on ) {
   640         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   641     } else {
   642         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   643     }
   644     jump_next_block();
   647 }
   649 static void sh4_x86_translate_unlink_block( void *use_list )
   650 {
   651 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   652 	void *next = use_list;
   653 	while( next != NULL ) {
   654     	xlat_output = (uint8_t *)next;
   655  	    next = *(void **)(xlat_output+5);
   656  		emit_translate_and_backpatch();
   657  	}
   658  	xlat_output = tmp;
   659 }
   663 static void exit_block()
   664 {
   665 	emit_epilogue();
   666 	if( sh4_x86.end_callback ) {
   667 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   668 	    JMP_rptr(REG_ECX);
   669 	} else {
   670 	    RET();
   671 	}
   672 }
   674 /**
   675  * Exit the block with sh4r.pc already written
   676  */
   677 void exit_block_pcset( sh4addr_t pc )
   678 {
   679     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   680     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   681     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   682     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   683     JBE_label(exitloop);
   684     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   685     if( sh4_x86.tlb_on ) {
   686         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   687     } else {
   688         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   689     }
   691     jump_next_block();
   692     JMP_TARGET(exitloop);
   693     exit_block();
   694 }
   696 /**
   697  * Exit the block with sh4r.new_pc written with the target pc
   698  */
   699 void exit_block_newpcset( sh4addr_t pc )
   700 {
   701     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   702     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   703     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   704     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   705     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   706     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   707     JBE_label(exitloop);
   708     if( sh4_x86.tlb_on ) {
   709         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   710     } else {
   711         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   712     }
   714 	jump_next_block();
   715     JMP_TARGET(exitloop);
   716     exit_block();
   717 }
   720 /**
   721  * Exit the block to an absolute PC
   722  */
   723 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   724 {
   725     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   726     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   727     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   729     MOVL_imm32_r32( pc, REG_ARG1 );
   730     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   731     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   732     JBE_label(exitloop);
   733     jump_next_block_fixed_pc(pc);    
   734     JMP_TARGET(exitloop);
   735     exit_block();
   736 }
   738 /**
   739  * Exit the block to a relative PC
   740  */
   741 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   742 {
   743     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   744     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   745     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   747 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   748 	    /* Special case for tight loops - the PC doesn't change, and
   749 	     * we already know the target address. Just check events pending before
   750 	     * looping.
   751 	     */
   752         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   753         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   754         JCC_cc_prerel(X86_COND_A, backdisp);
   755 	} else {
   756         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   757         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   758         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   759         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   760         JBE_label(exitloop2);
   762         jump_next_block_fixed_pc(pc);
   763         JMP_TARGET(exitloop2);
   764     }
   765     exit_block();
   766 }
   768 /**
   769  * Exit unconditionally with a general exception
   770  */
   771 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   772 {
   773     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   774     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   775     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   776     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   777     MOVL_imm32_r32( code, REG_ARG1 );
   778     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   779     exit_block();
   780 }    
   782 /**
   783  * Embed a call to sh4_execute_instruction for situations that we
   784  * can't translate (just page-crossing delay slots at the moment).
   785  * Caller is responsible for setting new_pc before calling this function.
   786  *
   787  * Performs:
   788  *   Set PC = endpc
   789  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   790  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   791  *   Call sh4_execute_instruction
   792  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   793  */
   794 void exit_block_emu( sh4vma_t endpc )
   795 {
   796     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   797     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   799     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   800     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   801     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   802     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   804     CALL_ptr( sh4_execute_instruction );
   805     exit_block();
   806 } 
   808 /**
   809  * Write the block trailer (exception handling block)
   810  */
   811 void sh4_translate_end_block( sh4addr_t pc ) {
   812     if( sh4_x86.branch_taken == FALSE ) {
   813         // Didn't exit unconditionally already, so write the termination here
   814         exit_block_rel( pc, pc );
   815     }
   816     if( sh4_x86.backpatch_posn != 0 ) {
   817         unsigned int i;
   818         // Exception raised - cleanup and exit
   819         uint8_t *end_ptr = xlat_output;
   820         MOVL_r32_r32( REG_EDX, REG_ECX );
   821         ADDL_r32_r32( REG_EDX, REG_ECX );
   822         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   823         MOVL_moffptr_eax( &sh4_cpu_period );
   824         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   825         MULL_r32( REG_EDX );
   826         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   827         exit_block();
   829         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   830             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   831             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   832                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   833                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   834                 } else {
   835                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   836                 }
   837                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   838                 int rel = end_ptr - xlat_output;
   839                 JMP_prerel(rel);
   840             } else {
   841                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   842                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   843                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   844                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   845                 int rel = end_ptr - xlat_output;
   846                 JMP_prerel(rel);
   847             }
   848         }
   849     }
   850 }
   852 /**
   853  * Translate a single instruction. Delayed branches are handled specially
   854  * by translating both branch and delayed instruction as a single unit (as
   855  * 
   856  * The instruction MUST be in the icache (assert check)
   857  *
   858  * @return true if the instruction marks the end of a basic block
   859  * (eg a branch or 
   860  */
   861 uint32_t sh4_translate_instruction( sh4vma_t pc )
   862 {
   863     uint32_t ir;
   864     /* Read instruction from icache */
   865     assert( IS_IN_ICACHE(pc) );
   866     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   868     if( !sh4_x86.in_delay_slot ) {
   869 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   870     }
   872     /* check for breakpoints at this pc */
   873     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   874         if( sh4_breakpoints[i].address == pc ) {
   875             sh4_translate_emit_breakpoint(pc);
   876             break;
   877         }
   878     }
   879 %%
   880 /* ALU operations */
   881 ADD Rm, Rn {:
   882     COUNT_INST(I_ADD);
   883     load_reg( REG_EAX, Rm );
   884     load_reg( REG_ECX, Rn );
   885     ADDL_r32_r32( REG_EAX, REG_ECX );
   886     store_reg( REG_ECX, Rn );
   887     sh4_x86.tstate = TSTATE_NONE;
   888 :}
   889 ADD #imm, Rn {:  
   890     COUNT_INST(I_ADDI);
   891     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   892     sh4_x86.tstate = TSTATE_NONE;
   893 :}
   894 ADDC Rm, Rn {:
   895     COUNT_INST(I_ADDC);
   896     if( sh4_x86.tstate != TSTATE_C ) {
   897         LDC_t();
   898     }
   899     load_reg( REG_EAX, Rm );
   900     load_reg( REG_ECX, Rn );
   901     ADCL_r32_r32( REG_EAX, REG_ECX );
   902     store_reg( REG_ECX, Rn );
   903     SETC_t();
   904     sh4_x86.tstate = TSTATE_C;
   905 :}
   906 ADDV Rm, Rn {:
   907     COUNT_INST(I_ADDV);
   908     load_reg( REG_EAX, Rm );
   909     load_reg( REG_ECX, Rn );
   910     ADDL_r32_r32( REG_EAX, REG_ECX );
   911     store_reg( REG_ECX, Rn );
   912     SETO_t();
   913     sh4_x86.tstate = TSTATE_O;
   914 :}
   915 AND Rm, Rn {:
   916     COUNT_INST(I_AND);
   917     load_reg( REG_EAX, Rm );
   918     load_reg( REG_ECX, Rn );
   919     ANDL_r32_r32( REG_EAX, REG_ECX );
   920     store_reg( REG_ECX, Rn );
   921     sh4_x86.tstate = TSTATE_NONE;
   922 :}
   923 AND #imm, R0 {:  
   924     COUNT_INST(I_ANDI);
   925     load_reg( REG_EAX, 0 );
   926     ANDL_imms_r32(imm, REG_EAX); 
   927     store_reg( REG_EAX, 0 );
   928     sh4_x86.tstate = TSTATE_NONE;
   929 :}
   930 AND.B #imm, @(R0, GBR) {: 
   931     COUNT_INST(I_ANDB);
   932     load_reg( REG_EAX, 0 );
   933     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   934     MOVL_r32_rspdisp(REG_EAX, 0);
   935     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   936     MOVL_rspdisp_r32(0, REG_EAX);
   937     ANDL_imms_r32(imm, REG_EDX );
   938     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   939     sh4_x86.tstate = TSTATE_NONE;
   940 :}
   941 CMP/EQ Rm, Rn {:  
   942     COUNT_INST(I_CMPEQ);
   943     load_reg( REG_EAX, Rm );
   944     load_reg( REG_ECX, Rn );
   945     CMPL_r32_r32( REG_EAX, REG_ECX );
   946     SETE_t();
   947     sh4_x86.tstate = TSTATE_E;
   948 :}
   949 CMP/EQ #imm, R0 {:  
   950     COUNT_INST(I_CMPEQI);
   951     load_reg( REG_EAX, 0 );
   952     CMPL_imms_r32(imm, REG_EAX);
   953     SETE_t();
   954     sh4_x86.tstate = TSTATE_E;
   955 :}
   956 CMP/GE Rm, Rn {:  
   957     COUNT_INST(I_CMPGE);
   958     load_reg( REG_EAX, Rm );
   959     load_reg( REG_ECX, Rn );
   960     CMPL_r32_r32( REG_EAX, REG_ECX );
   961     SETGE_t();
   962     sh4_x86.tstate = TSTATE_GE;
   963 :}
   964 CMP/GT Rm, Rn {: 
   965     COUNT_INST(I_CMPGT);
   966     load_reg( REG_EAX, Rm );
   967     load_reg( REG_ECX, Rn );
   968     CMPL_r32_r32( REG_EAX, REG_ECX );
   969     SETG_t();
   970     sh4_x86.tstate = TSTATE_G;
   971 :}
   972 CMP/HI Rm, Rn {:  
   973     COUNT_INST(I_CMPHI);
   974     load_reg( REG_EAX, Rm );
   975     load_reg( REG_ECX, Rn );
   976     CMPL_r32_r32( REG_EAX, REG_ECX );
   977     SETA_t();
   978     sh4_x86.tstate = TSTATE_A;
   979 :}
   980 CMP/HS Rm, Rn {: 
   981     COUNT_INST(I_CMPHS);
   982     load_reg( REG_EAX, Rm );
   983     load_reg( REG_ECX, Rn );
   984     CMPL_r32_r32( REG_EAX, REG_ECX );
   985     SETAE_t();
   986     sh4_x86.tstate = TSTATE_AE;
   987  :}
   988 CMP/PL Rn {: 
   989     COUNT_INST(I_CMPPL);
   990     load_reg( REG_EAX, Rn );
   991     CMPL_imms_r32( 0, REG_EAX );
   992     SETG_t();
   993     sh4_x86.tstate = TSTATE_G;
   994 :}
   995 CMP/PZ Rn {:  
   996     COUNT_INST(I_CMPPZ);
   997     load_reg( REG_EAX, Rn );
   998     CMPL_imms_r32( 0, REG_EAX );
   999     SETGE_t();
  1000     sh4_x86.tstate = TSTATE_GE;
  1001 :}
  1002 CMP/STR Rm, Rn {:  
  1003     COUNT_INST(I_CMPSTR);
  1004     load_reg( REG_EAX, Rm );
  1005     load_reg( REG_ECX, Rn );
  1006     XORL_r32_r32( REG_ECX, REG_EAX );
  1007     TESTB_r8_r8( REG_AL, REG_AL );
  1008     JE_label(target1);
  1009     TESTB_r8_r8( REG_AH, REG_AH );
  1010     JE_label(target2);
  1011     SHRL_imm_r32( 16, REG_EAX );
  1012     TESTB_r8_r8( REG_AL, REG_AL );
  1013     JE_label(target3);
  1014     TESTB_r8_r8( REG_AH, REG_AH );
  1015     JMP_TARGET(target1);
  1016     JMP_TARGET(target2);
  1017     JMP_TARGET(target3);
  1018     SETE_t();
  1019     sh4_x86.tstate = TSTATE_E;
  1020 :}
  1021 DIV0S Rm, Rn {:
  1022     COUNT_INST(I_DIV0S);
  1023     load_reg( REG_EAX, Rm );
  1024     load_reg( REG_ECX, Rn );
  1025     SHRL_imm_r32( 31, REG_EAX );
  1026     SHRL_imm_r32( 31, REG_ECX );
  1027     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1028     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1029     CMPL_r32_r32( REG_EAX, REG_ECX );
  1030     SETNE_t();
  1031     sh4_x86.tstate = TSTATE_NE;
  1032 :}
  1033 DIV0U {:  
  1034     COUNT_INST(I_DIV0U);
  1035     XORL_r32_r32( REG_EAX, REG_EAX );
  1036     MOVL_r32_rbpdisp( REG_EAX, R_Q );
  1037     MOVL_r32_rbpdisp( REG_EAX, R_M );
  1038     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1039     sh4_x86.tstate = TSTATE_C; // works for DIV1
  1040 :}
  1041 DIV1 Rm, Rn {:
  1042     COUNT_INST(I_DIV1);
  1043     MOVL_rbpdisp_r32( R_M, REG_ECX );
  1044     load_reg( REG_EAX, Rn );
  1045     if( sh4_x86.tstate != TSTATE_C ) {
  1046 	LDC_t();
  1048     RCLL_imm_r32( 1, REG_EAX );
  1049     SETC_r8( REG_DL ); // Q'
  1050     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1051     JE_label(mqequal);
  1052     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1053     JMP_label(end);
  1054     JMP_TARGET(mqequal);
  1055     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1056     JMP_TARGET(end);
  1057     store_reg( REG_EAX, Rn ); // Done with Rn now
  1058     SETC_r8(REG_AL); // tmp1
  1059     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1060     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1061     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1062     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1063     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1064     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1065     sh4_x86.tstate = TSTATE_NONE;
  1066 :}
  1067 DMULS.L Rm, Rn {:  
  1068     COUNT_INST(I_DMULS);
  1069     load_reg( REG_EAX, Rm );
  1070     load_reg( REG_ECX, Rn );
  1071     IMULL_r32(REG_ECX);
  1072     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1073     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1074     sh4_x86.tstate = TSTATE_NONE;
  1075 :}
  1076 DMULU.L Rm, Rn {:  
  1077     COUNT_INST(I_DMULU);
  1078     load_reg( REG_EAX, Rm );
  1079     load_reg( REG_ECX, Rn );
  1080     MULL_r32(REG_ECX);
  1081     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1082     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1083     sh4_x86.tstate = TSTATE_NONE;
  1084 :}
  1085 DT Rn {:  
  1086     COUNT_INST(I_DT);
  1087     load_reg( REG_EAX, Rn );
  1088     ADDL_imms_r32( -1, REG_EAX );
  1089     store_reg( REG_EAX, Rn );
  1090     SETE_t();
  1091     sh4_x86.tstate = TSTATE_E;
  1092 :}
  1093 EXTS.B Rm, Rn {:  
  1094     COUNT_INST(I_EXTSB);
  1095     load_reg( REG_EAX, Rm );
  1096     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1097     store_reg( REG_EAX, Rn );
  1098 :}
  1099 EXTS.W Rm, Rn {:  
  1100     COUNT_INST(I_EXTSW);
  1101     load_reg( REG_EAX, Rm );
  1102     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1103     store_reg( REG_EAX, Rn );
  1104 :}
  1105 EXTU.B Rm, Rn {:  
  1106     COUNT_INST(I_EXTUB);
  1107     load_reg( REG_EAX, Rm );
  1108     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1109     store_reg( REG_EAX, Rn );
  1110 :}
  1111 EXTU.W Rm, Rn {:  
  1112     COUNT_INST(I_EXTUW);
  1113     load_reg( REG_EAX, Rm );
  1114     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1115     store_reg( REG_EAX, Rn );
  1116 :}
  1117 MAC.L @Rm+, @Rn+ {:
  1118     COUNT_INST(I_MACL);
  1119     if( Rm == Rn ) {
  1120 	load_reg( REG_EAX, Rm );
  1121 	check_ralign32( REG_EAX );
  1122 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1123 	MOVL_r32_rspdisp(REG_EAX, 0);
  1124 	load_reg( REG_EAX, Rm );
  1125 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1126 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1127         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1128     } else {
  1129 	load_reg( REG_EAX, Rm );
  1130 	check_ralign32( REG_EAX );
  1131 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1132 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1133 	load_reg( REG_EAX, Rn );
  1134 	check_ralign32( REG_EAX );
  1135 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1136 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1137 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1140     IMULL_rspdisp( 0 );
  1141     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1142     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1144     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1145     TESTL_r32_r32(REG_ECX, REG_ECX);
  1146     JE_label( nosat );
  1147     CALL_ptr( signsat48 );
  1148     JMP_TARGET( nosat );
  1149     sh4_x86.tstate = TSTATE_NONE;
  1150 :}
  1151 MAC.W @Rm+, @Rn+ {:  
  1152     COUNT_INST(I_MACW);
  1153     if( Rm == Rn ) {
  1154 	load_reg( REG_EAX, Rm );
  1155 	check_ralign16( REG_EAX );
  1156 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1157         MOVL_r32_rspdisp( REG_EAX, 0 );
  1158 	load_reg( REG_EAX, Rm );
  1159 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1160 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1161 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1162 	// Note translate twice in case of page boundaries. Maybe worth
  1163 	// adding a page-boundary check to skip the second translation
  1164     } else {
  1165 	load_reg( REG_EAX, Rn );
  1166 	check_ralign16( REG_EAX );
  1167 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1168         MOVL_r32_rspdisp( REG_EAX, 0 );
  1169 	load_reg( REG_EAX, Rm );
  1170 	check_ralign16( REG_EAX );
  1171 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1172 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1173 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1175     IMULL_rspdisp( 0 );
  1176     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1177     TESTL_r32_r32( REG_ECX, REG_ECX );
  1178     JE_label( nosat );
  1180     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1181     JNO_label( end );            // 2
  1182     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1183     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1184     JS_label( positive );        // 2
  1185     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1186     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1187     JMP_label(end2);           // 2
  1189     JMP_TARGET(positive);
  1190     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1191     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1192     JMP_label(end3);            // 2
  1194     JMP_TARGET(nosat);
  1195     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1196     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1197     JMP_TARGET(end);
  1198     JMP_TARGET(end2);
  1199     JMP_TARGET(end3);
  1200     sh4_x86.tstate = TSTATE_NONE;
  1201 :}
  1202 MOVT Rn {:  
  1203     COUNT_INST(I_MOVT);
  1204     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1205     store_reg( REG_EAX, Rn );
  1206 :}
  1207 MUL.L Rm, Rn {:  
  1208     COUNT_INST(I_MULL);
  1209     load_reg( REG_EAX, Rm );
  1210     load_reg( REG_ECX, Rn );
  1211     MULL_r32( REG_ECX );
  1212     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1213     sh4_x86.tstate = TSTATE_NONE;
  1214 :}
  1215 MULS.W Rm, Rn {:
  1216     COUNT_INST(I_MULSW);
  1217     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1218     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1219     MULL_r32( REG_ECX );
  1220     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1221     sh4_x86.tstate = TSTATE_NONE;
  1222 :}
  1223 MULU.W Rm, Rn {:  
  1224     COUNT_INST(I_MULUW);
  1225     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1226     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1227     MULL_r32( REG_ECX );
  1228     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1229     sh4_x86.tstate = TSTATE_NONE;
  1230 :}
  1231 NEG Rm, Rn {:
  1232     COUNT_INST(I_NEG);
  1233     load_reg( REG_EAX, Rm );
  1234     NEGL_r32( REG_EAX );
  1235     store_reg( REG_EAX, Rn );
  1236     sh4_x86.tstate = TSTATE_NONE;
  1237 :}
  1238 NEGC Rm, Rn {:  
  1239     COUNT_INST(I_NEGC);
  1240     load_reg( REG_EAX, Rm );
  1241     XORL_r32_r32( REG_ECX, REG_ECX );
  1242     LDC_t();
  1243     SBBL_r32_r32( REG_EAX, REG_ECX );
  1244     store_reg( REG_ECX, Rn );
  1245     SETC_t();
  1246     sh4_x86.tstate = TSTATE_C;
  1247 :}
  1248 NOT Rm, Rn {:  
  1249     COUNT_INST(I_NOT);
  1250     load_reg( REG_EAX, Rm );
  1251     NOTL_r32( REG_EAX );
  1252     store_reg( REG_EAX, Rn );
  1253     sh4_x86.tstate = TSTATE_NONE;
  1254 :}
  1255 OR Rm, Rn {:  
  1256     COUNT_INST(I_OR);
  1257     load_reg( REG_EAX, Rm );
  1258     load_reg( REG_ECX, Rn );
  1259     ORL_r32_r32( REG_EAX, REG_ECX );
  1260     store_reg( REG_ECX, Rn );
  1261     sh4_x86.tstate = TSTATE_NONE;
  1262 :}
  1263 OR #imm, R0 {:
  1264     COUNT_INST(I_ORI);
  1265     load_reg( REG_EAX, 0 );
  1266     ORL_imms_r32(imm, REG_EAX);
  1267     store_reg( REG_EAX, 0 );
  1268     sh4_x86.tstate = TSTATE_NONE;
  1269 :}
  1270 OR.B #imm, @(R0, GBR) {:  
  1271     COUNT_INST(I_ORB);
  1272     load_reg( REG_EAX, 0 );
  1273     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1274     MOVL_r32_rspdisp( REG_EAX, 0 );
  1275     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1276     MOVL_rspdisp_r32( 0, REG_EAX );
  1277     ORL_imms_r32(imm, REG_EDX );
  1278     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1279     sh4_x86.tstate = TSTATE_NONE;
  1280 :}
  1281 ROTCL Rn {:
  1282     COUNT_INST(I_ROTCL);
  1283     load_reg( REG_EAX, Rn );
  1284     if( sh4_x86.tstate != TSTATE_C ) {
  1285 	LDC_t();
  1287     RCLL_imm_r32( 1, REG_EAX );
  1288     store_reg( REG_EAX, Rn );
  1289     SETC_t();
  1290     sh4_x86.tstate = TSTATE_C;
  1291 :}
  1292 ROTCR Rn {:  
  1293     COUNT_INST(I_ROTCR);
  1294     load_reg( REG_EAX, Rn );
  1295     if( sh4_x86.tstate != TSTATE_C ) {
  1296 	LDC_t();
  1298     RCRL_imm_r32( 1, REG_EAX );
  1299     store_reg( REG_EAX, Rn );
  1300     SETC_t();
  1301     sh4_x86.tstate = TSTATE_C;
  1302 :}
  1303 ROTL Rn {:  
  1304     COUNT_INST(I_ROTL);
  1305     load_reg( REG_EAX, Rn );
  1306     ROLL_imm_r32( 1, REG_EAX );
  1307     store_reg( REG_EAX, Rn );
  1308     SETC_t();
  1309     sh4_x86.tstate = TSTATE_C;
  1310 :}
  1311 ROTR Rn {:  
  1312     COUNT_INST(I_ROTR);
  1313     load_reg( REG_EAX, Rn );
  1314     RORL_imm_r32( 1, REG_EAX );
  1315     store_reg( REG_EAX, Rn );
  1316     SETC_t();
  1317     sh4_x86.tstate = TSTATE_C;
  1318 :}
  1319 SHAD Rm, Rn {:
  1320     COUNT_INST(I_SHAD);
  1321     /* Annoyingly enough, not directly convertible */
  1322     load_reg( REG_EAX, Rn );
  1323     load_reg( REG_ECX, Rm );
  1324     CMPL_imms_r32( 0, REG_ECX );
  1325     JGE_label(doshl);
  1327     NEGL_r32( REG_ECX );      // 2
  1328     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1329     JE_label(emptysar);     // 2
  1330     SARL_cl_r32( REG_EAX );       // 2
  1331     JMP_label(end);          // 2
  1333     JMP_TARGET(emptysar);
  1334     SARL_imm_r32(31, REG_EAX );  // 3
  1335     JMP_label(end2);
  1337     JMP_TARGET(doshl);
  1338     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1339     SHLL_cl_r32( REG_EAX );       // 2
  1340     JMP_TARGET(end);
  1341     JMP_TARGET(end2);
  1342     store_reg( REG_EAX, Rn );
  1343     sh4_x86.tstate = TSTATE_NONE;
  1344 :}
  1345 SHLD Rm, Rn {:  
  1346     COUNT_INST(I_SHLD);
  1347     load_reg( REG_EAX, Rn );
  1348     load_reg( REG_ECX, Rm );
  1349     CMPL_imms_r32( 0, REG_ECX );
  1350     JGE_label(doshl);
  1352     NEGL_r32( REG_ECX );      // 2
  1353     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1354     JE_label(emptyshr );
  1355     SHRL_cl_r32( REG_EAX );       // 2
  1356     JMP_label(end);          // 2
  1358     JMP_TARGET(emptyshr);
  1359     XORL_r32_r32( REG_EAX, REG_EAX );
  1360     JMP_label(end2);
  1362     JMP_TARGET(doshl);
  1363     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1364     SHLL_cl_r32( REG_EAX );       // 2
  1365     JMP_TARGET(end);
  1366     JMP_TARGET(end2);
  1367     store_reg( REG_EAX, Rn );
  1368     sh4_x86.tstate = TSTATE_NONE;
  1369 :}
  1370 SHAL Rn {: 
  1371     COUNT_INST(I_SHAL);
  1372     load_reg( REG_EAX, Rn );
  1373     SHLL_imm_r32( 1, REG_EAX );
  1374     SETC_t();
  1375     store_reg( REG_EAX, Rn );
  1376     sh4_x86.tstate = TSTATE_C;
  1377 :}
  1378 SHAR Rn {:  
  1379     COUNT_INST(I_SHAR);
  1380     load_reg( REG_EAX, Rn );
  1381     SARL_imm_r32( 1, REG_EAX );
  1382     SETC_t();
  1383     store_reg( REG_EAX, Rn );
  1384     sh4_x86.tstate = TSTATE_C;
  1385 :}
  1386 SHLL Rn {:  
  1387     COUNT_INST(I_SHLL);
  1388     load_reg( REG_EAX, Rn );
  1389     SHLL_imm_r32( 1, REG_EAX );
  1390     SETC_t();
  1391     store_reg( REG_EAX, Rn );
  1392     sh4_x86.tstate = TSTATE_C;
  1393 :}
  1394 SHLL2 Rn {:
  1395     COUNT_INST(I_SHLL);
  1396     load_reg( REG_EAX, Rn );
  1397     SHLL_imm_r32( 2, REG_EAX );
  1398     store_reg( REG_EAX, Rn );
  1399     sh4_x86.tstate = TSTATE_NONE;
  1400 :}
  1401 SHLL8 Rn {:  
  1402     COUNT_INST(I_SHLL);
  1403     load_reg( REG_EAX, Rn );
  1404     SHLL_imm_r32( 8, REG_EAX );
  1405     store_reg( REG_EAX, Rn );
  1406     sh4_x86.tstate = TSTATE_NONE;
  1407 :}
  1408 SHLL16 Rn {:  
  1409     COUNT_INST(I_SHLL);
  1410     load_reg( REG_EAX, Rn );
  1411     SHLL_imm_r32( 16, REG_EAX );
  1412     store_reg( REG_EAX, Rn );
  1413     sh4_x86.tstate = TSTATE_NONE;
  1414 :}
  1415 SHLR Rn {:  
  1416     COUNT_INST(I_SHLR);
  1417     load_reg( REG_EAX, Rn );
  1418     SHRL_imm_r32( 1, REG_EAX );
  1419     SETC_t();
  1420     store_reg( REG_EAX, Rn );
  1421     sh4_x86.tstate = TSTATE_C;
  1422 :}
  1423 SHLR2 Rn {:  
  1424     COUNT_INST(I_SHLR);
  1425     load_reg( REG_EAX, Rn );
  1426     SHRL_imm_r32( 2, REG_EAX );
  1427     store_reg( REG_EAX, Rn );
  1428     sh4_x86.tstate = TSTATE_NONE;
  1429 :}
  1430 SHLR8 Rn {:  
  1431     COUNT_INST(I_SHLR);
  1432     load_reg( REG_EAX, Rn );
  1433     SHRL_imm_r32( 8, REG_EAX );
  1434     store_reg( REG_EAX, Rn );
  1435     sh4_x86.tstate = TSTATE_NONE;
  1436 :}
  1437 SHLR16 Rn {:  
  1438     COUNT_INST(I_SHLR);
  1439     load_reg( REG_EAX, Rn );
  1440     SHRL_imm_r32( 16, REG_EAX );
  1441     store_reg( REG_EAX, Rn );
  1442     sh4_x86.tstate = TSTATE_NONE;
  1443 :}
  1444 SUB Rm, Rn {:  
  1445     COUNT_INST(I_SUB);
  1446     load_reg( REG_EAX, Rm );
  1447     load_reg( REG_ECX, Rn );
  1448     SUBL_r32_r32( REG_EAX, REG_ECX );
  1449     store_reg( REG_ECX, Rn );
  1450     sh4_x86.tstate = TSTATE_NONE;
  1451 :}
  1452 SUBC Rm, Rn {:  
  1453     COUNT_INST(I_SUBC);
  1454     load_reg( REG_EAX, Rm );
  1455     load_reg( REG_ECX, Rn );
  1456     if( sh4_x86.tstate != TSTATE_C ) {
  1457 	LDC_t();
  1459     SBBL_r32_r32( REG_EAX, REG_ECX );
  1460     store_reg( REG_ECX, Rn );
  1461     SETC_t();
  1462     sh4_x86.tstate = TSTATE_C;
  1463 :}
  1464 SUBV Rm, Rn {:  
  1465     COUNT_INST(I_SUBV);
  1466     load_reg( REG_EAX, Rm );
  1467     load_reg( REG_ECX, Rn );
  1468     SUBL_r32_r32( REG_EAX, REG_ECX );
  1469     store_reg( REG_ECX, Rn );
  1470     SETO_t();
  1471     sh4_x86.tstate = TSTATE_O;
  1472 :}
  1473 SWAP.B Rm, Rn {:  
  1474     COUNT_INST(I_SWAPB);
  1475     load_reg( REG_EAX, Rm );
  1476     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1477     store_reg( REG_EAX, Rn );
  1478 :}
  1479 SWAP.W Rm, Rn {:  
  1480     COUNT_INST(I_SWAPB);
  1481     load_reg( REG_EAX, Rm );
  1482     MOVL_r32_r32( REG_EAX, REG_ECX );
  1483     SHLL_imm_r32( 16, REG_ECX );
  1484     SHRL_imm_r32( 16, REG_EAX );
  1485     ORL_r32_r32( REG_EAX, REG_ECX );
  1486     store_reg( REG_ECX, Rn );
  1487     sh4_x86.tstate = TSTATE_NONE;
  1488 :}
  1489 TAS.B @Rn {:  
  1490     COUNT_INST(I_TASB);
  1491     load_reg( REG_EAX, Rn );
  1492     MOVL_r32_rspdisp( REG_EAX, 0 );
  1493     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1494     TESTB_r8_r8( REG_DL, REG_DL );
  1495     SETE_t();
  1496     ORB_imms_r8( 0x80, REG_DL );
  1497     MOVL_rspdisp_r32( 0, REG_EAX );
  1498     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1499     sh4_x86.tstate = TSTATE_NONE;
  1500 :}
  1501 TST Rm, Rn {:  
  1502     COUNT_INST(I_TST);
  1503     load_reg( REG_EAX, Rm );
  1504     load_reg( REG_ECX, Rn );
  1505     TESTL_r32_r32( REG_EAX, REG_ECX );
  1506     SETE_t();
  1507     sh4_x86.tstate = TSTATE_E;
  1508 :}
  1509 TST #imm, R0 {:  
  1510     COUNT_INST(I_TSTI);
  1511     load_reg( REG_EAX, 0 );
  1512     TESTL_imms_r32( imm, REG_EAX );
  1513     SETE_t();
  1514     sh4_x86.tstate = TSTATE_E;
  1515 :}
  1516 TST.B #imm, @(R0, GBR) {:  
  1517     COUNT_INST(I_TSTB);
  1518     load_reg( REG_EAX, 0);
  1519     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1520     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1521     TESTB_imms_r8( imm, REG_AL );
  1522     SETE_t();
  1523     sh4_x86.tstate = TSTATE_E;
  1524 :}
  1525 XOR Rm, Rn {:  
  1526     COUNT_INST(I_XOR);
  1527     load_reg( REG_EAX, Rm );
  1528     load_reg( REG_ECX, Rn );
  1529     XORL_r32_r32( REG_EAX, REG_ECX );
  1530     store_reg( REG_ECX, Rn );
  1531     sh4_x86.tstate = TSTATE_NONE;
  1532 :}
  1533 XOR #imm, R0 {:  
  1534     COUNT_INST(I_XORI);
  1535     load_reg( REG_EAX, 0 );
  1536     XORL_imms_r32( imm, REG_EAX );
  1537     store_reg( REG_EAX, 0 );
  1538     sh4_x86.tstate = TSTATE_NONE;
  1539 :}
  1540 XOR.B #imm, @(R0, GBR) {:  
  1541     COUNT_INST(I_XORB);
  1542     load_reg( REG_EAX, 0 );
  1543     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1544     MOVL_r32_rspdisp( REG_EAX, 0 );
  1545     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1546     MOVL_rspdisp_r32( 0, REG_EAX );
  1547     XORL_imms_r32( imm, REG_EDX );
  1548     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1549     sh4_x86.tstate = TSTATE_NONE;
  1550 :}
  1551 XTRCT Rm, Rn {:
  1552     COUNT_INST(I_XTRCT);
  1553     load_reg( REG_EAX, Rm );
  1554     load_reg( REG_ECX, Rn );
  1555     SHLL_imm_r32( 16, REG_EAX );
  1556     SHRL_imm_r32( 16, REG_ECX );
  1557     ORL_r32_r32( REG_EAX, REG_ECX );
  1558     store_reg( REG_ECX, Rn );
  1559     sh4_x86.tstate = TSTATE_NONE;
  1560 :}
  1562 /* Data move instructions */
  1563 MOV Rm, Rn {:  
  1564     COUNT_INST(I_MOV);
  1565     load_reg( REG_EAX, Rm );
  1566     store_reg( REG_EAX, Rn );
  1567 :}
  1568 MOV #imm, Rn {:  
  1569     COUNT_INST(I_MOVI);
  1570     MOVL_imm32_r32( imm, REG_EAX );
  1571     store_reg( REG_EAX, Rn );
  1572 :}
  1573 MOV.B Rm, @Rn {:  
  1574     COUNT_INST(I_MOVB);
  1575     load_reg( REG_EAX, Rn );
  1576     load_reg( REG_EDX, Rm );
  1577     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1578     sh4_x86.tstate = TSTATE_NONE;
  1579 :}
  1580 MOV.B Rm, @-Rn {:  
  1581     COUNT_INST(I_MOVB);
  1582     load_reg( REG_EAX, Rn );
  1583     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1584     load_reg( REG_EDX, Rm );
  1585     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1586     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1587     sh4_x86.tstate = TSTATE_NONE;
  1588 :}
  1589 MOV.B Rm, @(R0, Rn) {:  
  1590     COUNT_INST(I_MOVB);
  1591     load_reg( REG_EAX, 0 );
  1592     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1593     load_reg( REG_EDX, Rm );
  1594     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1595     sh4_x86.tstate = TSTATE_NONE;
  1596 :}
  1597 MOV.B R0, @(disp, GBR) {:  
  1598     COUNT_INST(I_MOVB);
  1599     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1600     ADDL_imms_r32( disp, REG_EAX );
  1601     load_reg( REG_EDX, 0 );
  1602     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1603     sh4_x86.tstate = TSTATE_NONE;
  1604 :}
  1605 MOV.B R0, @(disp, Rn) {:  
  1606     COUNT_INST(I_MOVB);
  1607     load_reg( REG_EAX, Rn );
  1608     ADDL_imms_r32( disp, REG_EAX );
  1609     load_reg( REG_EDX, 0 );
  1610     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1611     sh4_x86.tstate = TSTATE_NONE;
  1612 :}
  1613 MOV.B @Rm, Rn {:  
  1614     COUNT_INST(I_MOVB);
  1615     load_reg( REG_EAX, Rm );
  1616     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1617     store_reg( REG_EAX, Rn );
  1618     sh4_x86.tstate = TSTATE_NONE;
  1619 :}
  1620 MOV.B @Rm+, Rn {:  
  1621     COUNT_INST(I_MOVB);
  1622     load_reg( REG_EAX, Rm );
  1623     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1624     if( Rm != Rn ) {
  1625     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1627     store_reg( REG_EAX, Rn );
  1628     sh4_x86.tstate = TSTATE_NONE;
  1629 :}
  1630 MOV.B @(R0, Rm), Rn {:  
  1631     COUNT_INST(I_MOVB);
  1632     load_reg( REG_EAX, 0 );
  1633     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1634     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1635     store_reg( REG_EAX, Rn );
  1636     sh4_x86.tstate = TSTATE_NONE;
  1637 :}
  1638 MOV.B @(disp, GBR), R0 {:  
  1639     COUNT_INST(I_MOVB);
  1640     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1641     ADDL_imms_r32( disp, REG_EAX );
  1642     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1643     store_reg( REG_EAX, 0 );
  1644     sh4_x86.tstate = TSTATE_NONE;
  1645 :}
  1646 MOV.B @(disp, Rm), R0 {:  
  1647     COUNT_INST(I_MOVB);
  1648     load_reg( REG_EAX, Rm );
  1649     ADDL_imms_r32( disp, REG_EAX );
  1650     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1651     store_reg( REG_EAX, 0 );
  1652     sh4_x86.tstate = TSTATE_NONE;
  1653 :}
  1654 MOV.L Rm, @Rn {:
  1655     COUNT_INST(I_MOVL);
  1656     load_reg( REG_EAX, Rn );
  1657     check_walign32(REG_EAX);
  1658     MOVL_r32_r32( REG_EAX, REG_ECX );
  1659     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1660     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1661     JNE_label( notsq );
  1662     ANDL_imms_r32( 0x3C, REG_EAX );
  1663     load_reg( REG_EDX, Rm );
  1664     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1665     JMP_label(end);
  1666     JMP_TARGET(notsq);
  1667     load_reg( REG_EDX, Rm );
  1668     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1669     JMP_TARGET(end);
  1670     sh4_x86.tstate = TSTATE_NONE;
  1671 :}
  1672 MOV.L Rm, @-Rn {:  
  1673     COUNT_INST(I_MOVL);
  1674     load_reg( REG_EAX, Rn );
  1675     ADDL_imms_r32( -4, REG_EAX );
  1676     check_walign32( REG_EAX );
  1677     load_reg( REG_EDX, Rm );
  1678     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1679     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1680     sh4_x86.tstate = TSTATE_NONE;
  1681 :}
  1682 MOV.L Rm, @(R0, Rn) {:  
  1683     COUNT_INST(I_MOVL);
  1684     load_reg( REG_EAX, 0 );
  1685     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1686     check_walign32( REG_EAX );
  1687     load_reg( REG_EDX, Rm );
  1688     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1689     sh4_x86.tstate = TSTATE_NONE;
  1690 :}
  1691 MOV.L R0, @(disp, GBR) {:  
  1692     COUNT_INST(I_MOVL);
  1693     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1694     ADDL_imms_r32( disp, REG_EAX );
  1695     check_walign32( REG_EAX );
  1696     load_reg( REG_EDX, 0 );
  1697     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1698     sh4_x86.tstate = TSTATE_NONE;
  1699 :}
  1700 MOV.L Rm, @(disp, Rn) {:  
  1701     COUNT_INST(I_MOVL);
  1702     load_reg( REG_EAX, Rn );
  1703     ADDL_imms_r32( disp, REG_EAX );
  1704     check_walign32( REG_EAX );
  1705     MOVL_r32_r32( REG_EAX, REG_ECX );
  1706     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1707     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1708     JNE_label( notsq );
  1709     ANDL_imms_r32( 0x3C, REG_EAX );
  1710     load_reg( REG_EDX, Rm );
  1711     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1712     JMP_label(end);
  1713     JMP_TARGET(notsq);
  1714     load_reg( REG_EDX, Rm );
  1715     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1716     JMP_TARGET(end);
  1717     sh4_x86.tstate = TSTATE_NONE;
  1718 :}
  1719 MOV.L @Rm, Rn {:  
  1720     COUNT_INST(I_MOVL);
  1721     load_reg( REG_EAX, Rm );
  1722     check_ralign32( REG_EAX );
  1723     MEM_READ_LONG( REG_EAX, REG_EAX );
  1724     store_reg( REG_EAX, Rn );
  1725     sh4_x86.tstate = TSTATE_NONE;
  1726 :}
  1727 MOV.L @Rm+, Rn {:  
  1728     COUNT_INST(I_MOVL);
  1729     load_reg( REG_EAX, Rm );
  1730     check_ralign32( REG_EAX );
  1731     MEM_READ_LONG( REG_EAX, REG_EAX );
  1732     if( Rm != Rn ) {
  1733     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1735     store_reg( REG_EAX, Rn );
  1736     sh4_x86.tstate = TSTATE_NONE;
  1737 :}
  1738 MOV.L @(R0, Rm), Rn {:  
  1739     COUNT_INST(I_MOVL);
  1740     load_reg( REG_EAX, 0 );
  1741     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1742     check_ralign32( REG_EAX );
  1743     MEM_READ_LONG( REG_EAX, REG_EAX );
  1744     store_reg( REG_EAX, Rn );
  1745     sh4_x86.tstate = TSTATE_NONE;
  1746 :}
  1747 MOV.L @(disp, GBR), R0 {:
  1748     COUNT_INST(I_MOVL);
  1749     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1750     ADDL_imms_r32( disp, REG_EAX );
  1751     check_ralign32( REG_EAX );
  1752     MEM_READ_LONG( REG_EAX, REG_EAX );
  1753     store_reg( REG_EAX, 0 );
  1754     sh4_x86.tstate = TSTATE_NONE;
  1755 :}
  1756 MOV.L @(disp, PC), Rn {:  
  1757     COUNT_INST(I_MOVLPC);
  1758     if( sh4_x86.in_delay_slot ) {
  1759 	SLOTILLEGAL();
  1760     } else {
  1761 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1762 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1763 	    // If the target address is in the same page as the code, it's
  1764 	    // pretty safe to just ref it directly and circumvent the whole
  1765 	    // memory subsystem. (this is a big performance win)
  1767 	    // FIXME: There's a corner-case that's not handled here when
  1768 	    // the current code-page is in the ITLB but not in the UTLB.
  1769 	    // (should generate a TLB miss although need to test SH4 
  1770 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1771 	    // behaviour though.
  1772 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1773 	    MOVL_moffptr_eax( ptr );
  1774 	} else {
  1775 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1776 	    // different virtual address than the translation was done with,
  1777 	    // but we can safely assume that the low bits are the same.
  1778 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1779 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1780 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1781 	    sh4_x86.tstate = TSTATE_NONE;
  1783 	store_reg( REG_EAX, Rn );
  1785 :}
  1786 MOV.L @(disp, Rm), Rn {:  
  1787     COUNT_INST(I_MOVL);
  1788     load_reg( REG_EAX, Rm );
  1789     ADDL_imms_r32( disp, REG_EAX );
  1790     check_ralign32( REG_EAX );
  1791     MEM_READ_LONG( REG_EAX, REG_EAX );
  1792     store_reg( REG_EAX, Rn );
  1793     sh4_x86.tstate = TSTATE_NONE;
  1794 :}
  1795 MOV.W Rm, @Rn {:  
  1796     COUNT_INST(I_MOVW);
  1797     load_reg( REG_EAX, Rn );
  1798     check_walign16( REG_EAX );
  1799     load_reg( REG_EDX, Rm );
  1800     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1801     sh4_x86.tstate = TSTATE_NONE;
  1802 :}
  1803 MOV.W Rm, @-Rn {:  
  1804     COUNT_INST(I_MOVW);
  1805     load_reg( REG_EAX, Rn );
  1806     check_walign16( REG_EAX );
  1807     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1808     load_reg( REG_EDX, Rm );
  1809     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1810     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1811     sh4_x86.tstate = TSTATE_NONE;
  1812 :}
  1813 MOV.W Rm, @(R0, Rn) {:  
  1814     COUNT_INST(I_MOVW);
  1815     load_reg( REG_EAX, 0 );
  1816     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1817     check_walign16( REG_EAX );
  1818     load_reg( REG_EDX, Rm );
  1819     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1820     sh4_x86.tstate = TSTATE_NONE;
  1821 :}
  1822 MOV.W R0, @(disp, GBR) {:  
  1823     COUNT_INST(I_MOVW);
  1824     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1825     ADDL_imms_r32( disp, REG_EAX );
  1826     check_walign16( REG_EAX );
  1827     load_reg( REG_EDX, 0 );
  1828     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1829     sh4_x86.tstate = TSTATE_NONE;
  1830 :}
  1831 MOV.W R0, @(disp, Rn) {:  
  1832     COUNT_INST(I_MOVW);
  1833     load_reg( REG_EAX, Rn );
  1834     ADDL_imms_r32( disp, REG_EAX );
  1835     check_walign16( REG_EAX );
  1836     load_reg( REG_EDX, 0 );
  1837     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1838     sh4_x86.tstate = TSTATE_NONE;
  1839 :}
  1840 MOV.W @Rm, Rn {:  
  1841     COUNT_INST(I_MOVW);
  1842     load_reg( REG_EAX, Rm );
  1843     check_ralign16( REG_EAX );
  1844     MEM_READ_WORD( REG_EAX, REG_EAX );
  1845     store_reg( REG_EAX, Rn );
  1846     sh4_x86.tstate = TSTATE_NONE;
  1847 :}
  1848 MOV.W @Rm+, Rn {:  
  1849     COUNT_INST(I_MOVW);
  1850     load_reg( REG_EAX, Rm );
  1851     check_ralign16( REG_EAX );
  1852     MEM_READ_WORD( REG_EAX, REG_EAX );
  1853     if( Rm != Rn ) {
  1854         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1856     store_reg( REG_EAX, Rn );
  1857     sh4_x86.tstate = TSTATE_NONE;
  1858 :}
  1859 MOV.W @(R0, Rm), Rn {:  
  1860     COUNT_INST(I_MOVW);
  1861     load_reg( REG_EAX, 0 );
  1862     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1863     check_ralign16( REG_EAX );
  1864     MEM_READ_WORD( REG_EAX, REG_EAX );
  1865     store_reg( REG_EAX, Rn );
  1866     sh4_x86.tstate = TSTATE_NONE;
  1867 :}
  1868 MOV.W @(disp, GBR), R0 {:  
  1869     COUNT_INST(I_MOVW);
  1870     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1871     ADDL_imms_r32( disp, REG_EAX );
  1872     check_ralign16( REG_EAX );
  1873     MEM_READ_WORD( REG_EAX, REG_EAX );
  1874     store_reg( REG_EAX, 0 );
  1875     sh4_x86.tstate = TSTATE_NONE;
  1876 :}
  1877 MOV.W @(disp, PC), Rn {:  
  1878     COUNT_INST(I_MOVW);
  1879     if( sh4_x86.in_delay_slot ) {
  1880 	SLOTILLEGAL();
  1881     } else {
  1882 	// See comments for MOV.L @(disp, PC), Rn
  1883 	uint32_t target = pc + disp + 4;
  1884 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1885 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1886 	    MOVL_moffptr_eax( ptr );
  1887 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1888 	} else {
  1889 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1890 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1891 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1892 	    sh4_x86.tstate = TSTATE_NONE;
  1894 	store_reg( REG_EAX, Rn );
  1896 :}
  1897 MOV.W @(disp, Rm), R0 {:  
  1898     COUNT_INST(I_MOVW);
  1899     load_reg( REG_EAX, Rm );
  1900     ADDL_imms_r32( disp, REG_EAX );
  1901     check_ralign16( REG_EAX );
  1902     MEM_READ_WORD( REG_EAX, REG_EAX );
  1903     store_reg( REG_EAX, 0 );
  1904     sh4_x86.tstate = TSTATE_NONE;
  1905 :}
  1906 MOVA @(disp, PC), R0 {:  
  1907     COUNT_INST(I_MOVA);
  1908     if( sh4_x86.in_delay_slot ) {
  1909 	SLOTILLEGAL();
  1910     } else {
  1911 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1912 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1913 	store_reg( REG_ECX, 0 );
  1914 	sh4_x86.tstate = TSTATE_NONE;
  1916 :}
  1917 MOVCA.L R0, @Rn {:  
  1918     COUNT_INST(I_MOVCA);
  1919     load_reg( REG_EAX, Rn );
  1920     check_walign32( REG_EAX );
  1921     load_reg( REG_EDX, 0 );
  1922     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1923     sh4_x86.tstate = TSTATE_NONE;
  1924 :}
  1926 /* Control transfer instructions */
  1927 BF disp {:
  1928     COUNT_INST(I_BF);
  1929     if( sh4_x86.in_delay_slot ) {
  1930 	SLOTILLEGAL();
  1931     } else {
  1932 	sh4vma_t target = disp + pc + 4;
  1933 	JT_label( nottaken );
  1934 	exit_block_rel(target, pc+2 );
  1935 	JMP_TARGET(nottaken);
  1936 	return 2;
  1938 :}
  1939 BF/S disp {:
  1940     COUNT_INST(I_BFS);
  1941     if( sh4_x86.in_delay_slot ) {
  1942 	SLOTILLEGAL();
  1943     } else {
  1944 	sh4_x86.in_delay_slot = DELAY_PC;
  1945 	if( UNTRANSLATABLE(pc+2) ) {
  1946 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1947 	    JT_label(nottaken);
  1948 	    ADDL_imms_r32( disp, REG_EAX );
  1949 	    JMP_TARGET(nottaken);
  1950 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1951 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1952 	    exit_block_emu(pc+2);
  1953 	    sh4_x86.branch_taken = TRUE;
  1954 	    return 2;
  1955 	} else {
  1956 	    LOAD_t();
  1957 	    sh4vma_t target = disp + pc + 4;
  1958 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1959 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1960 	    int save_tstate = sh4_x86.tstate;
  1961 	    sh4_translate_instruction(pc+2);
  1962             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1963 	    exit_block_rel( target, pc+4 );
  1965 	    // not taken
  1966 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1967 	    sh4_x86.tstate = save_tstate;
  1968 	    sh4_translate_instruction(pc+2);
  1969 	    return 4;
  1972 :}
  1973 BRA disp {:  
  1974     COUNT_INST(I_BRA);
  1975     if( sh4_x86.in_delay_slot ) {
  1976 	SLOTILLEGAL();
  1977     } else {
  1978 	sh4_x86.in_delay_slot = DELAY_PC;
  1979 	sh4_x86.branch_taken = TRUE;
  1980 	if( UNTRANSLATABLE(pc+2) ) {
  1981 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1982 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1983 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1984 	    exit_block_emu(pc+2);
  1985 	    return 2;
  1986 	} else {
  1987 	    sh4_translate_instruction( pc + 2 );
  1988 	    exit_block_rel( disp + pc + 4, pc+4 );
  1989 	    return 4;
  1992 :}
  1993 BRAF Rn {:  
  1994     COUNT_INST(I_BRAF);
  1995     if( sh4_x86.in_delay_slot ) {
  1996 	SLOTILLEGAL();
  1997     } else {
  1998 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1999 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2000 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2001 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2002 	sh4_x86.in_delay_slot = DELAY_PC;
  2003 	sh4_x86.tstate = TSTATE_NONE;
  2004 	sh4_x86.branch_taken = TRUE;
  2005 	if( UNTRANSLATABLE(pc+2) ) {
  2006 	    exit_block_emu(pc+2);
  2007 	    return 2;
  2008 	} else {
  2009 	    sh4_translate_instruction( pc + 2 );
  2010 	    exit_block_newpcset(pc+4);
  2011 	    return 4;
  2014 :}
  2015 BSR disp {:  
  2016     COUNT_INST(I_BSR);
  2017     if( sh4_x86.in_delay_slot ) {
  2018 	SLOTILLEGAL();
  2019     } else {
  2020 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2021 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2022 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2023 	sh4_x86.in_delay_slot = DELAY_PC;
  2024 	sh4_x86.branch_taken = TRUE;
  2025 	sh4_x86.tstate = TSTATE_NONE;
  2026 	if( UNTRANSLATABLE(pc+2) ) {
  2027 	    ADDL_imms_r32( disp, REG_EAX );
  2028 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2029 	    exit_block_emu(pc+2);
  2030 	    return 2;
  2031 	} else {
  2032 	    sh4_translate_instruction( pc + 2 );
  2033 	    exit_block_rel( disp + pc + 4, pc+4 );
  2034 	    return 4;
  2037 :}
  2038 BSRF Rn {:  
  2039     COUNT_INST(I_BSRF);
  2040     if( sh4_x86.in_delay_slot ) {
  2041 	SLOTILLEGAL();
  2042     } else {
  2043 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2044 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2045 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2046 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  2047 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2049 	sh4_x86.in_delay_slot = DELAY_PC;
  2050 	sh4_x86.tstate = TSTATE_NONE;
  2051 	sh4_x86.branch_taken = TRUE;
  2052 	if( UNTRANSLATABLE(pc+2) ) {
  2053 	    exit_block_emu(pc+2);
  2054 	    return 2;
  2055 	} else {
  2056 	    sh4_translate_instruction( pc + 2 );
  2057 	    exit_block_newpcset(pc+4);
  2058 	    return 4;
  2061 :}
  2062 BT disp {:
  2063     COUNT_INST(I_BT);
  2064     if( sh4_x86.in_delay_slot ) {
  2065 	SLOTILLEGAL();
  2066     } else {
  2067 	sh4vma_t target = disp + pc + 4;
  2068 	JF_label( nottaken );
  2069 	exit_block_rel(target, pc+2 );
  2070 	JMP_TARGET(nottaken);
  2071 	return 2;
  2073 :}
  2074 BT/S disp {:
  2075     COUNT_INST(I_BTS);
  2076     if( sh4_x86.in_delay_slot ) {
  2077 	SLOTILLEGAL();
  2078     } else {
  2079 	sh4_x86.in_delay_slot = DELAY_PC;
  2080 	if( UNTRANSLATABLE(pc+2) ) {
  2081 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2082 	    JF_label(nottaken);
  2083 	    ADDL_imms_r32( disp, REG_EAX );
  2084 	    JMP_TARGET(nottaken);
  2085 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2086 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2087 	    exit_block_emu(pc+2);
  2088 	    sh4_x86.branch_taken = TRUE;
  2089 	    return 2;
  2090 	} else {
  2091 		LOAD_t();
  2092 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2093 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2095 	    int save_tstate = sh4_x86.tstate;
  2096 	    sh4_translate_instruction(pc+2);
  2097             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2098 	    exit_block_rel( disp + pc + 4, pc+4 );
  2099 	    // not taken
  2100 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2101 	    sh4_x86.tstate = save_tstate;
  2102 	    sh4_translate_instruction(pc+2);
  2103 	    return 4;
  2106 :}
  2107 JMP @Rn {:  
  2108     COUNT_INST(I_JMP);
  2109     if( sh4_x86.in_delay_slot ) {
  2110 	SLOTILLEGAL();
  2111     } else {
  2112 	load_reg( REG_ECX, Rn );
  2113 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2114 	sh4_x86.in_delay_slot = DELAY_PC;
  2115 	sh4_x86.branch_taken = TRUE;
  2116 	if( UNTRANSLATABLE(pc+2) ) {
  2117 	    exit_block_emu(pc+2);
  2118 	    return 2;
  2119 	} else {
  2120 	    sh4_translate_instruction(pc+2);
  2121 	    exit_block_newpcset(pc+4);
  2122 	    return 4;
  2125 :}
  2126 JSR @Rn {:  
  2127     COUNT_INST(I_JSR);
  2128     if( sh4_x86.in_delay_slot ) {
  2129 	SLOTILLEGAL();
  2130     } else {
  2131 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2132 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2133 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2134 	load_reg( REG_ECX, Rn );
  2135 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2136 	sh4_x86.in_delay_slot = DELAY_PC;
  2137 	sh4_x86.branch_taken = TRUE;
  2138 	sh4_x86.tstate = TSTATE_NONE;
  2139 	if( UNTRANSLATABLE(pc+2) ) {
  2140 	    exit_block_emu(pc+2);
  2141 	    return 2;
  2142 	} else {
  2143 	    sh4_translate_instruction(pc+2);
  2144 	    exit_block_newpcset(pc+4);
  2145 	    return 4;
  2148 :}
  2149 RTE {:  
  2150     COUNT_INST(I_RTE);
  2151     if( sh4_x86.in_delay_slot ) {
  2152 	SLOTILLEGAL();
  2153     } else {
  2154 	check_priv();
  2155 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2156 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2157 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2158 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2159 	sh4_x86.in_delay_slot = DELAY_PC;
  2160 	sh4_x86.fpuen_checked = FALSE;
  2161 	sh4_x86.tstate = TSTATE_NONE;
  2162 	sh4_x86.branch_taken = TRUE;
  2163     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2164 	if( UNTRANSLATABLE(pc+2) ) {
  2165 	    exit_block_emu(pc+2);
  2166 	    return 2;
  2167 	} else {
  2168 	    sh4_translate_instruction(pc+2);
  2169 	    exit_block_newpcset(pc+4);
  2170 	    return 4;
  2173 :}
  2174 RTS {:  
  2175     COUNT_INST(I_RTS);
  2176     if( sh4_x86.in_delay_slot ) {
  2177 	SLOTILLEGAL();
  2178     } else {
  2179 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2180 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2181 	sh4_x86.in_delay_slot = DELAY_PC;
  2182 	sh4_x86.branch_taken = TRUE;
  2183 	if( UNTRANSLATABLE(pc+2) ) {
  2184 	    exit_block_emu(pc+2);
  2185 	    return 2;
  2186 	} else {
  2187 	    sh4_translate_instruction(pc+2);
  2188 	    exit_block_newpcset(pc+4);
  2189 	    return 4;
  2192 :}
  2193 TRAPA #imm {:  
  2194     COUNT_INST(I_TRAPA);
  2195     if( sh4_x86.in_delay_slot ) {
  2196 	SLOTILLEGAL();
  2197     } else {
  2198 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2199 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2200 	MOVL_imm32_r32( imm, REG_EAX );
  2201 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2202 	sh4_x86.tstate = TSTATE_NONE;
  2203 	exit_block_pcset(pc+2);
  2204 	sh4_x86.branch_taken = TRUE;
  2205 	return 2;
  2207 :}
  2208 UNDEF {:  
  2209     COUNT_INST(I_UNDEF);
  2210     if( sh4_x86.in_delay_slot ) {
  2211 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2212     } else {
  2213 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2214 	return 2;
  2216 :}
  2218 CLRMAC {:  
  2219     COUNT_INST(I_CLRMAC);
  2220     XORL_r32_r32(REG_EAX, REG_EAX);
  2221     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2222     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2223     sh4_x86.tstate = TSTATE_NONE;
  2224 :}
  2225 CLRS {:
  2226     COUNT_INST(I_CLRS);
  2227     CLC();
  2228     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2229     sh4_x86.tstate = TSTATE_NONE;
  2230 :}
  2231 CLRT {:  
  2232     COUNT_INST(I_CLRT);
  2233     CLC();
  2234     SETC_t();
  2235     sh4_x86.tstate = TSTATE_C;
  2236 :}
  2237 SETS {:  
  2238     COUNT_INST(I_SETS);
  2239     STC();
  2240     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2241     sh4_x86.tstate = TSTATE_NONE;
  2242 :}
  2243 SETT {:  
  2244     COUNT_INST(I_SETT);
  2245     STC();
  2246     SETC_t();
  2247     sh4_x86.tstate = TSTATE_C;
  2248 :}
  2250 /* Floating point moves */
  2251 FMOV FRm, FRn {:  
  2252     COUNT_INST(I_FMOV1);
  2253     check_fpuen();
  2254     if( sh4_x86.double_size ) {
  2255         load_dr0( REG_EAX, FRm );
  2256         load_dr1( REG_ECX, FRm );
  2257         store_dr0( REG_EAX, FRn );
  2258         store_dr1( REG_ECX, FRn );
  2259     } else {
  2260         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2261         store_fr( REG_EAX, FRn );
  2263 :}
  2264 FMOV FRm, @Rn {: 
  2265     COUNT_INST(I_FMOV2);
  2266     check_fpuen();
  2267     load_reg( REG_EAX, Rn );
  2268     if( sh4_x86.double_size ) {
  2269         check_walign64( REG_EAX );
  2270         load_dr0( REG_EDX, FRm );
  2271         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2272         load_reg( REG_EAX, Rn );
  2273         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2274         load_dr1( REG_EDX, FRm );
  2275         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2276     } else {
  2277         check_walign32( REG_EAX );
  2278         load_fr( REG_EDX, FRm );
  2279         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2281     sh4_x86.tstate = TSTATE_NONE;
  2282 :}
  2283 FMOV @Rm, FRn {:  
  2284     COUNT_INST(I_FMOV5);
  2285     check_fpuen();
  2286     load_reg( REG_EAX, Rm );
  2287     if( sh4_x86.double_size ) {
  2288         check_ralign64( REG_EAX );
  2289         MEM_READ_LONG( REG_EAX, REG_EAX );
  2290         store_dr0( REG_EAX, FRn );
  2291         load_reg( REG_EAX, Rm );
  2292         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2293         MEM_READ_LONG( REG_EAX, REG_EAX );
  2294         store_dr1( REG_EAX, FRn );
  2295     } else {
  2296         check_ralign32( REG_EAX );
  2297         MEM_READ_LONG( REG_EAX, REG_EAX );
  2298         store_fr( REG_EAX, FRn );
  2300     sh4_x86.tstate = TSTATE_NONE;
  2301 :}
  2302 FMOV FRm, @-Rn {:  
  2303     COUNT_INST(I_FMOV3);
  2304     check_fpuen();
  2305     load_reg( REG_EAX, Rn );
  2306     if( sh4_x86.double_size ) {
  2307         check_walign64( REG_EAX );
  2308         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2309         load_dr0( REG_EDX, FRm );
  2310         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2311         load_reg( REG_EAX, Rn );
  2312         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2313         load_dr1( REG_EDX, FRm );
  2314         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2315         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2316     } else {
  2317         check_walign32( REG_EAX );
  2318         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2319         load_fr( REG_EDX, FRm );
  2320         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2321         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2323     sh4_x86.tstate = TSTATE_NONE;
  2324 :}
  2325 FMOV @Rm+, FRn {:
  2326     COUNT_INST(I_FMOV6);
  2327     check_fpuen();
  2328     load_reg( REG_EAX, Rm );
  2329     if( sh4_x86.double_size ) {
  2330         check_ralign64( REG_EAX );
  2331         MEM_READ_LONG( REG_EAX, REG_EAX );
  2332         store_dr0( REG_EAX, FRn );
  2333         load_reg( REG_EAX, Rm );
  2334         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2335         MEM_READ_LONG( REG_EAX, REG_EAX );
  2336         store_dr1( REG_EAX, FRn );
  2337         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2338     } else {
  2339         check_ralign32( REG_EAX );
  2340         MEM_READ_LONG( REG_EAX, REG_EAX );
  2341         store_fr( REG_EAX, FRn );
  2342         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2344     sh4_x86.tstate = TSTATE_NONE;
  2345 :}
  2346 FMOV FRm, @(R0, Rn) {:  
  2347     COUNT_INST(I_FMOV4);
  2348     check_fpuen();
  2349     load_reg( REG_EAX, Rn );
  2350     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2351     if( sh4_x86.double_size ) {
  2352         check_walign64( REG_EAX );
  2353         load_dr0( REG_EDX, FRm );
  2354         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2355         load_reg( REG_EAX, Rn );
  2356         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2357         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2358         load_dr1( REG_EDX, FRm );
  2359         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2360     } else {
  2361         check_walign32( REG_EAX );
  2362         load_fr( REG_EDX, FRm );
  2363         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2365     sh4_x86.tstate = TSTATE_NONE;
  2366 :}
  2367 FMOV @(R0, Rm), FRn {:  
  2368     COUNT_INST(I_FMOV7);
  2369     check_fpuen();
  2370     load_reg( REG_EAX, Rm );
  2371     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2372     if( sh4_x86.double_size ) {
  2373         check_ralign64( REG_EAX );
  2374         MEM_READ_LONG( REG_EAX, REG_EAX );
  2375         store_dr0( REG_EAX, FRn );
  2376         load_reg( REG_EAX, Rm );
  2377         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2378         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2379         MEM_READ_LONG( REG_EAX, REG_EAX );
  2380         store_dr1( REG_EAX, FRn );
  2381     } else {
  2382         check_ralign32( REG_EAX );
  2383         MEM_READ_LONG( REG_EAX, REG_EAX );
  2384         store_fr( REG_EAX, FRn );
  2386     sh4_x86.tstate = TSTATE_NONE;
  2387 :}
  2388 FLDI0 FRn {:  /* IFF PR=0 */
  2389     COUNT_INST(I_FLDI0);
  2390     check_fpuen();
  2391     if( sh4_x86.double_prec == 0 ) {
  2392         XORL_r32_r32( REG_EAX, REG_EAX );
  2393         store_fr( REG_EAX, FRn );
  2395     sh4_x86.tstate = TSTATE_NONE;
  2396 :}
  2397 FLDI1 FRn {:  /* IFF PR=0 */
  2398     COUNT_INST(I_FLDI1);
  2399     check_fpuen();
  2400     if( sh4_x86.double_prec == 0 ) {
  2401         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2402         store_fr( REG_EAX, FRn );
  2404 :}
  2406 FLOAT FPUL, FRn {:  
  2407     COUNT_INST(I_FLOAT);
  2408     check_fpuen();
  2409     FILD_rbpdisp(R_FPUL);
  2410     if( sh4_x86.double_prec ) {
  2411         pop_dr( FRn );
  2412     } else {
  2413         pop_fr( FRn );
  2415 :}
  2416 FTRC FRm, FPUL {:  
  2417     COUNT_INST(I_FTRC);
  2418     check_fpuen();
  2419     if( sh4_x86.double_prec ) {
  2420         push_dr( FRm );
  2421     } else {
  2422         push_fr( FRm );
  2424     MOVP_immptr_rptr( &min_int, REG_ECX );
  2425     FILD_r32disp( REG_ECX, 0 );
  2426     FCOMIP_st(1);              
  2427     JAE_label( sat );     
  2428     JP_label( sat2 );       
  2429     MOVP_immptr_rptr( &max_int, REG_ECX );
  2430     FILD_r32disp( REG_ECX, 0 );
  2431     FCOMIP_st(1);
  2432     JNA_label( sat3 );
  2433     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2434     FNSTCW_r32disp( REG_EAX, 0 );
  2435     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2436     FLDCW_r32disp( REG_EDX, 0 );
  2437     FISTP_rbpdisp(R_FPUL);             
  2438     FLDCW_r32disp( REG_EAX, 0 );
  2439     JMP_label(end);             
  2441     JMP_TARGET(sat);
  2442     JMP_TARGET(sat2);
  2443     JMP_TARGET(sat3);
  2444     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2445     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2446     FPOP_st();
  2447     JMP_TARGET(end);
  2448     sh4_x86.tstate = TSTATE_NONE;
  2449 :}
  2450 FLDS FRm, FPUL {:  
  2451     COUNT_INST(I_FLDS);
  2452     check_fpuen();
  2453     load_fr( REG_EAX, FRm );
  2454     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2455 :}
  2456 FSTS FPUL, FRn {:  
  2457     COUNT_INST(I_FSTS);
  2458     check_fpuen();
  2459     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2460     store_fr( REG_EAX, FRn );
  2461 :}
  2462 FCNVDS FRm, FPUL {:  
  2463     COUNT_INST(I_FCNVDS);
  2464     check_fpuen();
  2465     if( sh4_x86.double_prec ) {
  2466         push_dr( FRm );
  2467         pop_fpul();
  2469 :}
  2470 FCNVSD FPUL, FRn {:  
  2471     COUNT_INST(I_FCNVSD);
  2472     check_fpuen();
  2473     if( sh4_x86.double_prec ) {
  2474         push_fpul();
  2475         pop_dr( FRn );
  2477 :}
  2479 /* Floating point instructions */
  2480 FABS FRn {:  
  2481     COUNT_INST(I_FABS);
  2482     check_fpuen();
  2483     if( sh4_x86.double_prec ) {
  2484         push_dr(FRn);
  2485         FABS_st0();
  2486         pop_dr(FRn);
  2487     } else {
  2488         push_fr(FRn);
  2489         FABS_st0();
  2490         pop_fr(FRn);
  2492 :}
  2493 FADD FRm, FRn {:  
  2494     COUNT_INST(I_FADD);
  2495     check_fpuen();
  2496     if( sh4_x86.double_prec ) {
  2497         push_dr(FRm);
  2498         push_dr(FRn);
  2499         FADDP_st(1);
  2500         pop_dr(FRn);
  2501     } else {
  2502         push_fr(FRm);
  2503         push_fr(FRn);
  2504         FADDP_st(1);
  2505         pop_fr(FRn);
  2507 :}
  2508 FDIV FRm, FRn {:  
  2509     COUNT_INST(I_FDIV);
  2510     check_fpuen();
  2511     if( sh4_x86.double_prec ) {
  2512         push_dr(FRn);
  2513         push_dr(FRm);
  2514         FDIVP_st(1);
  2515         pop_dr(FRn);
  2516     } else {
  2517         push_fr(FRn);
  2518         push_fr(FRm);
  2519         FDIVP_st(1);
  2520         pop_fr(FRn);
  2522 :}
  2523 FMAC FR0, FRm, FRn {:  
  2524     COUNT_INST(I_FMAC);
  2525     check_fpuen();
  2526     if( sh4_x86.double_prec ) {
  2527         push_dr( 0 );
  2528         push_dr( FRm );
  2529         FMULP_st(1);
  2530         push_dr( FRn );
  2531         FADDP_st(1);
  2532         pop_dr( FRn );
  2533     } else {
  2534         push_fr( 0 );
  2535         push_fr( FRm );
  2536         FMULP_st(1);
  2537         push_fr( FRn );
  2538         FADDP_st(1);
  2539         pop_fr( FRn );
  2541 :}
  2543 FMUL FRm, FRn {:  
  2544     COUNT_INST(I_FMUL);
  2545     check_fpuen();
  2546     if( sh4_x86.double_prec ) {
  2547         push_dr(FRm);
  2548         push_dr(FRn);
  2549         FMULP_st(1);
  2550         pop_dr(FRn);
  2551     } else {
  2552         push_fr(FRm);
  2553         push_fr(FRn);
  2554         FMULP_st(1);
  2555         pop_fr(FRn);
  2557 :}
  2558 FNEG FRn {:  
  2559     COUNT_INST(I_FNEG);
  2560     check_fpuen();
  2561     if( sh4_x86.double_prec ) {
  2562         push_dr(FRn);
  2563         FCHS_st0();
  2564         pop_dr(FRn);
  2565     } else {
  2566         push_fr(FRn);
  2567         FCHS_st0();
  2568         pop_fr(FRn);
  2570 :}
  2571 FSRRA FRn {:  
  2572     COUNT_INST(I_FSRRA);
  2573     check_fpuen();
  2574     if( sh4_x86.double_prec == 0 ) {
  2575         FLD1_st0();
  2576         push_fr(FRn);
  2577         FSQRT_st0();
  2578         FDIVP_st(1);
  2579         pop_fr(FRn);
  2581 :}
  2582 FSQRT FRn {:  
  2583     COUNT_INST(I_FSQRT);
  2584     check_fpuen();
  2585     if( sh4_x86.double_prec ) {
  2586         push_dr(FRn);
  2587         FSQRT_st0();
  2588         pop_dr(FRn);
  2589     } else {
  2590         push_fr(FRn);
  2591         FSQRT_st0();
  2592         pop_fr(FRn);
  2594 :}
  2595 FSUB FRm, FRn {:  
  2596     COUNT_INST(I_FSUB);
  2597     check_fpuen();
  2598     if( sh4_x86.double_prec ) {
  2599         push_dr(FRn);
  2600         push_dr(FRm);
  2601         FSUBP_st(1);
  2602         pop_dr(FRn);
  2603     } else {
  2604         push_fr(FRn);
  2605         push_fr(FRm);
  2606         FSUBP_st(1);
  2607         pop_fr(FRn);
  2609 :}
  2611 FCMP/EQ FRm, FRn {:  
  2612     COUNT_INST(I_FCMPEQ);
  2613     check_fpuen();
  2614     if( sh4_x86.double_prec ) {
  2615         push_dr(FRm);
  2616         push_dr(FRn);
  2617     } else {
  2618         push_fr(FRm);
  2619         push_fr(FRn);
  2621     XORL_r32_r32(REG_EAX, REG_EAX);
  2622     XORL_r32_r32(REG_EDX, REG_EDX);
  2623     FCOMIP_st(1);
  2624     SETCCB_cc_r8(X86_COND_NP, REG_DL);
  2625     CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
  2626     MOVL_r32_rbpdisp(REG_EAX, R_T);
  2627     FPOP_st();
  2628     sh4_x86.tstate = TSTATE_NONE;
  2629 :}
  2630 FCMP/GT FRm, FRn {:  
  2631     COUNT_INST(I_FCMPGT);
  2632     check_fpuen();
  2633     if( sh4_x86.double_prec ) {
  2634         push_dr(FRm);
  2635         push_dr(FRn);
  2636     } else {
  2637         push_fr(FRm);
  2638         push_fr(FRn);
  2640     FCOMIP_st(1);
  2641     SETA_t();
  2642     FPOP_st();
  2643     sh4_x86.tstate = TSTATE_A;
  2644 :}
  2646 FSCA FPUL, FRn {:  
  2647     COUNT_INST(I_FSCA);
  2648     check_fpuen();
  2649     if( sh4_x86.double_prec == 0 ) {
  2650         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2651         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2652         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2654     sh4_x86.tstate = TSTATE_NONE;
  2655 :}
  2656 FIPR FVm, FVn {:  
  2657     COUNT_INST(I_FIPR);
  2658     check_fpuen();
  2659     if( sh4_x86.double_prec == 0 ) {
  2660         if( sh4_x86.sse3_enabled ) {
  2661             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2662             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2663             HADDPS_xmm_xmm( 4, 4 ); 
  2664             HADDPS_xmm_xmm( 4, 4 );
  2665             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2666         } else {
  2667             push_fr( FVm<<2 );
  2668             push_fr( FVn<<2 );
  2669             FMULP_st(1);
  2670             push_fr( (FVm<<2)+1);
  2671             push_fr( (FVn<<2)+1);
  2672             FMULP_st(1);
  2673             FADDP_st(1);
  2674             push_fr( (FVm<<2)+2);
  2675             push_fr( (FVn<<2)+2);
  2676             FMULP_st(1);
  2677             FADDP_st(1);
  2678             push_fr( (FVm<<2)+3);
  2679             push_fr( (FVn<<2)+3);
  2680             FMULP_st(1);
  2681             FADDP_st(1);
  2682             pop_fr( (FVn<<2)+3);
  2685 :}
  2686 FTRV XMTRX, FVn {:  
  2687     COUNT_INST(I_FTRV);
  2688     check_fpuen();
  2689     if( sh4_x86.double_prec == 0 ) {
  2690         if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
  2691         	/* FIXME: For now, disable this inlining when we're running in shadow mode -
  2692         	 * it gives slightly different results from the emu core. Need to
  2693         	 * fix the precision so both give the right results.
  2694         	 */
  2695             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2696             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2697             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2698             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2700             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2701             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2702             MOV_xmm_xmm( 4, 6 );
  2703             MOV_xmm_xmm( 5, 7 );
  2704             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2705             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2706             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2707             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2708             MULPS_xmm_xmm( 0, 4 );
  2709             MULPS_xmm_xmm( 1, 5 );
  2710             MULPS_xmm_xmm( 2, 6 );
  2711             MULPS_xmm_xmm( 3, 7 );
  2712             ADDPS_xmm_xmm( 5, 4 );
  2713             ADDPS_xmm_xmm( 7, 6 );
  2714             ADDPS_xmm_xmm( 6, 4 );
  2715             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2716         } else {
  2717             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2718             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2721     sh4_x86.tstate = TSTATE_NONE;
  2722 :}
  2724 FRCHG {:  
  2725     COUNT_INST(I_FRCHG);
  2726     check_fpuen();
  2727     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2728     CALL_ptr( sh4_switch_fr_banks );
  2729     sh4_x86.tstate = TSTATE_NONE;
  2730 :}
  2731 FSCHG {:  
  2732     COUNT_INST(I_FSCHG);
  2733     check_fpuen();
  2734     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2735     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2736     sh4_x86.tstate = TSTATE_NONE;
  2737     sh4_x86.double_size = !sh4_x86.double_size;
  2738     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2739 :}
  2741 /* Processor control instructions */
  2742 LDC Rm, SR {:
  2743     COUNT_INST(I_LDCSR);
  2744     if( sh4_x86.in_delay_slot ) {
  2745 	SLOTILLEGAL();
  2746     } else {
  2747 	check_priv();
  2748 	load_reg( REG_EAX, Rm );
  2749 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2750 	sh4_x86.fpuen_checked = FALSE;
  2751 	sh4_x86.tstate = TSTATE_NONE;
  2752     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2753 	return 2;
  2755 :}
  2756 LDC Rm, GBR {: 
  2757     COUNT_INST(I_LDC);
  2758     load_reg( REG_EAX, Rm );
  2759     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2760 :}
  2761 LDC Rm, VBR {:  
  2762     COUNT_INST(I_LDC);
  2763     check_priv();
  2764     load_reg( REG_EAX, Rm );
  2765     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2766     sh4_x86.tstate = TSTATE_NONE;
  2767 :}
  2768 LDC Rm, SSR {:  
  2769     COUNT_INST(I_LDC);
  2770     check_priv();
  2771     load_reg( REG_EAX, Rm );
  2772     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2773     sh4_x86.tstate = TSTATE_NONE;
  2774 :}
  2775 LDC Rm, SGR {:  
  2776     COUNT_INST(I_LDC);
  2777     check_priv();
  2778     load_reg( REG_EAX, Rm );
  2779     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2780     sh4_x86.tstate = TSTATE_NONE;
  2781 :}
  2782 LDC Rm, SPC {:  
  2783     COUNT_INST(I_LDC);
  2784     check_priv();
  2785     load_reg( REG_EAX, Rm );
  2786     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2787     sh4_x86.tstate = TSTATE_NONE;
  2788 :}
  2789 LDC Rm, DBR {:  
  2790     COUNT_INST(I_LDC);
  2791     check_priv();
  2792     load_reg( REG_EAX, Rm );
  2793     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2794     sh4_x86.tstate = TSTATE_NONE;
  2795 :}
  2796 LDC Rm, Rn_BANK {:  
  2797     COUNT_INST(I_LDC);
  2798     check_priv();
  2799     load_reg( REG_EAX, Rm );
  2800     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2801     sh4_x86.tstate = TSTATE_NONE;
  2802 :}
  2803 LDC.L @Rm+, GBR {:  
  2804     COUNT_INST(I_LDCM);
  2805     load_reg( REG_EAX, Rm );
  2806     check_ralign32( REG_EAX );
  2807     MEM_READ_LONG( REG_EAX, REG_EAX );
  2808     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2809     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2810     sh4_x86.tstate = TSTATE_NONE;
  2811 :}
  2812 LDC.L @Rm+, SR {:
  2813     COUNT_INST(I_LDCSRM);
  2814     if( sh4_x86.in_delay_slot ) {
  2815 	SLOTILLEGAL();
  2816     } else {
  2817 	check_priv();
  2818 	load_reg( REG_EAX, Rm );
  2819 	check_ralign32( REG_EAX );
  2820 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2821 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2822 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2823 	sh4_x86.fpuen_checked = FALSE;
  2824 	sh4_x86.tstate = TSTATE_NONE;
  2825     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2826 	return 2;
  2828 :}
  2829 LDC.L @Rm+, VBR {:  
  2830     COUNT_INST(I_LDCM);
  2831     check_priv();
  2832     load_reg( REG_EAX, Rm );
  2833     check_ralign32( REG_EAX );
  2834     MEM_READ_LONG( REG_EAX, REG_EAX );
  2835     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2836     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2837     sh4_x86.tstate = TSTATE_NONE;
  2838 :}
  2839 LDC.L @Rm+, SSR {:
  2840     COUNT_INST(I_LDCM);
  2841     check_priv();
  2842     load_reg( REG_EAX, Rm );
  2843     check_ralign32( REG_EAX );
  2844     MEM_READ_LONG( REG_EAX, REG_EAX );
  2845     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2846     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2847     sh4_x86.tstate = TSTATE_NONE;
  2848 :}
  2849 LDC.L @Rm+, SGR {:  
  2850     COUNT_INST(I_LDCM);
  2851     check_priv();
  2852     load_reg( REG_EAX, Rm );
  2853     check_ralign32( REG_EAX );
  2854     MEM_READ_LONG( REG_EAX, REG_EAX );
  2855     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2856     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2857     sh4_x86.tstate = TSTATE_NONE;
  2858 :}
  2859 LDC.L @Rm+, SPC {:  
  2860     COUNT_INST(I_LDCM);
  2861     check_priv();
  2862     load_reg( REG_EAX, Rm );
  2863     check_ralign32( REG_EAX );
  2864     MEM_READ_LONG( REG_EAX, REG_EAX );
  2865     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2866     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2867     sh4_x86.tstate = TSTATE_NONE;
  2868 :}
  2869 LDC.L @Rm+, DBR {:  
  2870     COUNT_INST(I_LDCM);
  2871     check_priv();
  2872     load_reg( REG_EAX, Rm );
  2873     check_ralign32( REG_EAX );
  2874     MEM_READ_LONG( REG_EAX, REG_EAX );
  2875     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2876     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2877     sh4_x86.tstate = TSTATE_NONE;
  2878 :}
  2879 LDC.L @Rm+, Rn_BANK {:  
  2880     COUNT_INST(I_LDCM);
  2881     check_priv();
  2882     load_reg( REG_EAX, Rm );
  2883     check_ralign32( REG_EAX );
  2884     MEM_READ_LONG( REG_EAX, REG_EAX );
  2885     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2886     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2887     sh4_x86.tstate = TSTATE_NONE;
  2888 :}
  2889 LDS Rm, FPSCR {:
  2890     COUNT_INST(I_LDSFPSCR);
  2891     check_fpuen();
  2892     load_reg( REG_EAX, Rm );
  2893     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2894     sh4_x86.tstate = TSTATE_NONE;
  2895     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2896     return 2;
  2897 :}
  2898 LDS.L @Rm+, FPSCR {:  
  2899     COUNT_INST(I_LDSFPSCRM);
  2900     check_fpuen();
  2901     load_reg( REG_EAX, Rm );
  2902     check_ralign32( REG_EAX );
  2903     MEM_READ_LONG( REG_EAX, REG_EAX );
  2904     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2905     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2906     sh4_x86.tstate = TSTATE_NONE;
  2907     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2908     return 2;
  2909 :}
  2910 LDS Rm, FPUL {:  
  2911     COUNT_INST(I_LDS);
  2912     check_fpuen();
  2913     load_reg( REG_EAX, Rm );
  2914     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2915 :}
  2916 LDS.L @Rm+, FPUL {:  
  2917     COUNT_INST(I_LDSM);
  2918     check_fpuen();
  2919     load_reg( REG_EAX, Rm );
  2920     check_ralign32( REG_EAX );
  2921     MEM_READ_LONG( REG_EAX, REG_EAX );
  2922     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2923     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2924     sh4_x86.tstate = TSTATE_NONE;
  2925 :}
  2926 LDS Rm, MACH {: 
  2927     COUNT_INST(I_LDS);
  2928     load_reg( REG_EAX, Rm );
  2929     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2930 :}
  2931 LDS.L @Rm+, MACH {:  
  2932     COUNT_INST(I_LDSM);
  2933     load_reg( REG_EAX, Rm );
  2934     check_ralign32( REG_EAX );
  2935     MEM_READ_LONG( REG_EAX, REG_EAX );
  2936     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2937     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2938     sh4_x86.tstate = TSTATE_NONE;
  2939 :}
  2940 LDS Rm, MACL {:  
  2941     COUNT_INST(I_LDS);
  2942     load_reg( REG_EAX, Rm );
  2943     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2944 :}
  2945 LDS.L @Rm+, MACL {:  
  2946     COUNT_INST(I_LDSM);
  2947     load_reg( REG_EAX, Rm );
  2948     check_ralign32( REG_EAX );
  2949     MEM_READ_LONG( REG_EAX, REG_EAX );
  2950     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2951     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2952     sh4_x86.tstate = TSTATE_NONE;
  2953 :}
  2954 LDS Rm, PR {:  
  2955     COUNT_INST(I_LDS);
  2956     load_reg( REG_EAX, Rm );
  2957     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2958 :}
  2959 LDS.L @Rm+, PR {:  
  2960     COUNT_INST(I_LDSM);
  2961     load_reg( REG_EAX, Rm );
  2962     check_ralign32( REG_EAX );
  2963     MEM_READ_LONG( REG_EAX, REG_EAX );
  2964     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2965     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2966     sh4_x86.tstate = TSTATE_NONE;
  2967 :}
  2968 LDTLB {:  
  2969     COUNT_INST(I_LDTLB);
  2970     CALL_ptr( MMU_ldtlb );
  2971     sh4_x86.tstate = TSTATE_NONE;
  2972 :}
  2973 OCBI @Rn {:
  2974     COUNT_INST(I_OCBI);
  2975 :}
  2976 OCBP @Rn {:
  2977     COUNT_INST(I_OCBP);
  2978 :}
  2979 OCBWB @Rn {:
  2980     COUNT_INST(I_OCBWB);
  2981 :}
  2982 PREF @Rn {:
  2983     COUNT_INST(I_PREF);
  2984     load_reg( REG_EAX, Rn );
  2985     MEM_PREFETCH( REG_EAX );
  2986     sh4_x86.tstate = TSTATE_NONE;
  2987 :}
  2988 SLEEP {: 
  2989     COUNT_INST(I_SLEEP);
  2990     check_priv();
  2991     CALL_ptr( sh4_sleep );
  2992     sh4_x86.tstate = TSTATE_NONE;
  2993     sh4_x86.in_delay_slot = DELAY_NONE;
  2994     return 2;
  2995 :}
  2996 STC SR, Rn {:
  2997     COUNT_INST(I_STCSR);
  2998     check_priv();
  2999     CALL_ptr(sh4_read_sr);
  3000     store_reg( REG_EAX, Rn );
  3001     sh4_x86.tstate = TSTATE_NONE;
  3002 :}
  3003 STC GBR, Rn {:  
  3004     COUNT_INST(I_STC);
  3005     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  3006     store_reg( REG_EAX, Rn );
  3007 :}
  3008 STC VBR, Rn {:  
  3009     COUNT_INST(I_STC);
  3010     check_priv();
  3011     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  3012     store_reg( REG_EAX, Rn );
  3013     sh4_x86.tstate = TSTATE_NONE;
  3014 :}
  3015 STC SSR, Rn {:  
  3016     COUNT_INST(I_STC);
  3017     check_priv();
  3018     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  3019     store_reg( REG_EAX, Rn );
  3020     sh4_x86.tstate = TSTATE_NONE;
  3021 :}
  3022 STC SPC, Rn {:  
  3023     COUNT_INST(I_STC);
  3024     check_priv();
  3025     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  3026     store_reg( REG_EAX, Rn );
  3027     sh4_x86.tstate = TSTATE_NONE;
  3028 :}
  3029 STC SGR, Rn {:  
  3030     COUNT_INST(I_STC);
  3031     check_priv();
  3032     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  3033     store_reg( REG_EAX, Rn );
  3034     sh4_x86.tstate = TSTATE_NONE;
  3035 :}
  3036 STC DBR, Rn {:  
  3037     COUNT_INST(I_STC);
  3038     check_priv();
  3039     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  3040     store_reg( REG_EAX, Rn );
  3041     sh4_x86.tstate = TSTATE_NONE;
  3042 :}
  3043 STC Rm_BANK, Rn {:
  3044     COUNT_INST(I_STC);
  3045     check_priv();
  3046     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  3047     store_reg( REG_EAX, Rn );
  3048     sh4_x86.tstate = TSTATE_NONE;
  3049 :}
  3050 STC.L SR, @-Rn {:
  3051     COUNT_INST(I_STCSRM);
  3052     check_priv();
  3053     CALL_ptr( sh4_read_sr );
  3054     MOVL_r32_r32( REG_EAX, REG_EDX );
  3055     load_reg( REG_EAX, Rn );
  3056     check_walign32( REG_EAX );
  3057     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3058     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3059     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3060     sh4_x86.tstate = TSTATE_NONE;
  3061 :}
  3062 STC.L VBR, @-Rn {:  
  3063     COUNT_INST(I_STCM);
  3064     check_priv();
  3065     load_reg( REG_EAX, Rn );
  3066     check_walign32( REG_EAX );
  3067     ADDL_imms_r32( -4, REG_EAX );
  3068     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3069     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3070     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3071     sh4_x86.tstate = TSTATE_NONE;
  3072 :}
  3073 STC.L SSR, @-Rn {:  
  3074     COUNT_INST(I_STCM);
  3075     check_priv();
  3076     load_reg( REG_EAX, Rn );
  3077     check_walign32( REG_EAX );
  3078     ADDL_imms_r32( -4, REG_EAX );
  3079     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3080     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3081     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3082     sh4_x86.tstate = TSTATE_NONE;
  3083 :}
  3084 STC.L SPC, @-Rn {:
  3085     COUNT_INST(I_STCM);
  3086     check_priv();
  3087     load_reg( REG_EAX, Rn );
  3088     check_walign32( REG_EAX );
  3089     ADDL_imms_r32( -4, REG_EAX );
  3090     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3091     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3092     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3093     sh4_x86.tstate = TSTATE_NONE;
  3094 :}
  3095 STC.L SGR, @-Rn {:  
  3096     COUNT_INST(I_STCM);
  3097     check_priv();
  3098     load_reg( REG_EAX, Rn );
  3099     check_walign32( REG_EAX );
  3100     ADDL_imms_r32( -4, REG_EAX );
  3101     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3102     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3103     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3104     sh4_x86.tstate = TSTATE_NONE;
  3105 :}
  3106 STC.L DBR, @-Rn {:  
  3107     COUNT_INST(I_STCM);
  3108     check_priv();
  3109     load_reg( REG_EAX, Rn );
  3110     check_walign32( REG_EAX );
  3111     ADDL_imms_r32( -4, REG_EAX );
  3112     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3113     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3114     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3115     sh4_x86.tstate = TSTATE_NONE;
  3116 :}
  3117 STC.L Rm_BANK, @-Rn {:  
  3118     COUNT_INST(I_STCM);
  3119     check_priv();
  3120     load_reg( REG_EAX, Rn );
  3121     check_walign32( REG_EAX );
  3122     ADDL_imms_r32( -4, REG_EAX );
  3123     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3124     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3125     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3126     sh4_x86.tstate = TSTATE_NONE;
  3127 :}
  3128 STC.L GBR, @-Rn {:  
  3129     COUNT_INST(I_STCM);
  3130     load_reg( REG_EAX, Rn );
  3131     check_walign32( REG_EAX );
  3132     ADDL_imms_r32( -4, REG_EAX );
  3133     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3134     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3135     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3136     sh4_x86.tstate = TSTATE_NONE;
  3137 :}
  3138 STS FPSCR, Rn {:  
  3139     COUNT_INST(I_STSFPSCR);
  3140     check_fpuen();
  3141     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3142     store_reg( REG_EAX, Rn );
  3143 :}
  3144 STS.L FPSCR, @-Rn {:  
  3145     COUNT_INST(I_STSFPSCRM);
  3146     check_fpuen();
  3147     load_reg( REG_EAX, Rn );
  3148     check_walign32( REG_EAX );
  3149     ADDL_imms_r32( -4, REG_EAX );
  3150     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3151     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3152     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3153     sh4_x86.tstate = TSTATE_NONE;
  3154 :}
  3155 STS FPUL, Rn {:  
  3156     COUNT_INST(I_STS);
  3157     check_fpuen();
  3158     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3159     store_reg( REG_EAX, Rn );
  3160 :}
  3161 STS.L FPUL, @-Rn {:  
  3162     COUNT_INST(I_STSM);
  3163     check_fpuen();
  3164     load_reg( REG_EAX, Rn );
  3165     check_walign32( REG_EAX );
  3166     ADDL_imms_r32( -4, REG_EAX );
  3167     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3168     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3169     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3170     sh4_x86.tstate = TSTATE_NONE;
  3171 :}
  3172 STS MACH, Rn {:  
  3173     COUNT_INST(I_STS);
  3174     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3175     store_reg( REG_EAX, Rn );
  3176 :}
  3177 STS.L MACH, @-Rn {:  
  3178     COUNT_INST(I_STSM);
  3179     load_reg( REG_EAX, Rn );
  3180     check_walign32( REG_EAX );
  3181     ADDL_imms_r32( -4, REG_EAX );
  3182     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3183     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3184     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3185     sh4_x86.tstate = TSTATE_NONE;
  3186 :}
  3187 STS MACL, Rn {:  
  3188     COUNT_INST(I_STS);
  3189     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3190     store_reg( REG_EAX, Rn );
  3191 :}
  3192 STS.L MACL, @-Rn {:  
  3193     COUNT_INST(I_STSM);
  3194     load_reg( REG_EAX, Rn );
  3195     check_walign32( REG_EAX );
  3196     ADDL_imms_r32( -4, REG_EAX );
  3197     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3198     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3199     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3200     sh4_x86.tstate = TSTATE_NONE;
  3201 :}
  3202 STS PR, Rn {:  
  3203     COUNT_INST(I_STS);
  3204     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3205     store_reg( REG_EAX, Rn );
  3206 :}
  3207 STS.L PR, @-Rn {:  
  3208     COUNT_INST(I_STSM);
  3209     load_reg( REG_EAX, Rn );
  3210     check_walign32( REG_EAX );
  3211     ADDL_imms_r32( -4, REG_EAX );
  3212     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3213     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3214     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3215     sh4_x86.tstate = TSTATE_NONE;
  3216 :}
  3218 NOP {: 
  3219     COUNT_INST(I_NOP);
  3220     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3221 :}
  3222 %%
  3223     sh4_x86.in_delay_slot = DELAY_NONE;
  3224     return 0;
  3228 /**
  3229  * The unwind methods only work if we compiled with DWARF2 frame information
  3230  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3231  */
  3232 #ifdef HAVE_EXCEPTIONS
  3233 #include <unwind.h>
  3235 struct UnwindInfo {
  3236     uintptr_t block_start;
  3237     uintptr_t block_end;
  3238     void *pc;
  3239 };
  3241 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3243     struct UnwindInfo *info = arg;
  3244     void *pc = (void *)_Unwind_GetIP(context);
  3245     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3246         info->pc = pc;
  3247         return _URC_NORMAL_STOP;
  3249     return _URC_NO_REASON;
  3252 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3254     struct _Unwind_Exception exc;
  3255     struct UnwindInfo info;
  3257     info.pc = NULL;
  3258     info.block_start = (uintptr_t)code;
  3259     info.block_end = info.block_start + code_size;
  3260     void *result = NULL;
  3261     _Unwind_Backtrace( xlat_check_frame, &info );
  3262     return info.pc;
  3264 #else
  3265 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3266 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3268     void *result = NULL;
  3269     __asm__(
  3270         "mov %%ebp, %%eax\n\t"
  3271         "mov $0x8, %%ecx\n\t"
  3272         "mov %1, %%edx\n"
  3273         "frame_loop: test %%eax, %%eax\n\t"
  3274         "je frame_not_found\n\t"
  3275         "cmp (%%eax), %%edx\n\t"
  3276         "je frame_found\n\t"
  3277         "sub $0x1, %%ecx\n\t"
  3278         "je frame_not_found\n\t"
  3279         "movl (%%eax), %%eax\n\t"
  3280         "jmp frame_loop\n"
  3281         "frame_found: movl 0x4(%%eax), %0\n"
  3282         "frame_not_found:"
  3283         : "=r" (result)
  3284         : "r" (((uint8_t *)&sh4r) + 128 )
  3285         : "eax", "ecx", "edx" );
  3286     return result;
  3288 #endif
.