Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1263:b3de98d19faf
prev1218:be02e87f9f87
next1292:799fdd4f704a
author nkeynes
date Tue Mar 06 12:42:33 2012 +1000 (12 years ago)
permissions -rw-r--r--
last change Merge ARM disassembler from binutils 2.22
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "xlat/xlatdasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   110     /* Allocated memory for the (block-wide) back-patch list */
   111     struct backpatch_record *backpatch_list;
   112     uint32_t backpatch_posn;
   113     uint32_t backpatch_size;
   114 };
   116 static struct sh4_x86_state sh4_x86;
   118 static uint32_t max_int = 0x7FFFFFFF;
   119 static uint32_t min_int = 0x80000000;
   120 static uint32_t save_fcw; /* save value for fpu control word */
   121 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   123 static void sh4_x86_translate_unlink_block( void *use_list );
   125 static struct xlat_target_fns x86_target_fns = {
   126 	sh4_x86_translate_unlink_block
   127 };	
   130 gboolean is_sse3_supported()
   131 {
   132     uint32_t features;
   134     __asm__ __volatile__(
   135         "mov $0x01, %%eax\n\t"
   136         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   137     return (features & 1) ? TRUE : FALSE;
   138 }
   140 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   141 {
   142     sh4_x86.priv_address_space = priv;
   143     sh4_x86.user_address_space = user;
   144 }
   146 void sh4_translate_init(void)
   147 {
   148     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   149     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   150     sh4_x86.begin_callback = NULL;
   151     sh4_x86.end_callback = NULL;
   152     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   153     sh4_x86.fastmem = TRUE;
   154     sh4_x86.sse3_enabled = is_sse3_supported();
   155     xlat_set_target_fns(&x86_target_fns);
   156 }
   158 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   159 {
   160     sh4_x86.begin_callback = begin;
   161     sh4_x86.end_callback = end;
   162 }
   164 void sh4_translate_set_fastmem( gboolean flag )
   165 {
   166     sh4_x86.fastmem = flag;
   167 }
   169 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   170 {
   171     int reloc_size = 4;
   173     if( exc_code == -2 ) {
   174         reloc_size = sizeof(void *);
   175     }
   177     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   178 	sh4_x86.backpatch_size <<= 1;
   179 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   180 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   181 	assert( sh4_x86.backpatch_list != NULL );
   182     }
   183     if( sh4_x86.in_delay_slot ) {
   184 	fixup_pc -= 2;
   185     }
   187     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   188 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   189     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   190     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   191     sh4_x86.backpatch_posn++;
   192 }
   194 #define TSTATE_NONE -1
   195 #define TSTATE_O    X86_COND_O
   196 #define TSTATE_C    X86_COND_C
   197 #define TSTATE_E    X86_COND_E
   198 #define TSTATE_NE   X86_COND_NE
   199 #define TSTATE_G    X86_COND_G
   200 #define TSTATE_GE   X86_COND_GE
   201 #define TSTATE_A    X86_COND_A
   202 #define TSTATE_AE   X86_COND_AE
   204 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   205 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   207 /* Convenience instructions */
   208 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   209 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   210 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   211 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   212 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   213 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   214 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   215 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   216 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   217 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   218 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   219 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   220 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   221 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   222 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   223 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   224 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   225 #define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
   226 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   227 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   228 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   230 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
   231 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
   233 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   234 #define JT_label(label) LOAD_t() \
   235     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   237 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   238 #define JF_label(label) LOAD_t() \
   239     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   242 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   243 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   245 /**
   246  * Load an FR register (single-precision floating point) into an integer x86
   247  * register (eg for register-to-register moves)
   248  */
   249 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   250 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   252 /**
   253  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   254  */
   255 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   256 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   258 /**
   259  * Store an FR register (single-precision floating point) from an integer x86+
   260  * register (eg for register-to-register moves)
   261  */
   262 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   263 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   265 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   266 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   269 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   270 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   271 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   272 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   273 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   274 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   275 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   276 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   277 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   278 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   280 #ifdef ENABLE_SH4STATS
   281 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   282 #else
   283 #define COUNT_INST(id)
   284 #endif
   287 /* Exception checks - Note that all exception checks will clobber EAX */
   289 #define check_priv( ) \
   290     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   291         if( sh4_x86.in_delay_slot ) { \
   292             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   293         } else { \
   294             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   295         } \
   296         sh4_x86.branch_taken = TRUE; \
   297         sh4_x86.in_delay_slot = DELAY_NONE; \
   298         return 2; \
   299     }
   301 #define check_fpuen( ) \
   302     if( !sh4_x86.fpuen_checked ) {\
   303 	sh4_x86.fpuen_checked = TRUE;\
   304 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   305 	ANDL_imms_r32( SR_FD, REG_EAX );\
   306 	if( sh4_x86.in_delay_slot ) {\
   307 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   308 	} else {\
   309 	    JNE_exc(EXC_FPU_DISABLED);\
   310 	}\
   311 	sh4_x86.tstate = TSTATE_NONE; \
   312     }
   314 #define check_ralign16( x86reg ) \
   315     TESTL_imms_r32( 0x00000001, x86reg ); \
   316     JNE_exc(EXC_DATA_ADDR_READ)
   318 #define check_walign16( x86reg ) \
   319     TESTL_imms_r32( 0x00000001, x86reg ); \
   320     JNE_exc(EXC_DATA_ADDR_WRITE);
   322 #define check_ralign32( x86reg ) \
   323     TESTL_imms_r32( 0x00000003, x86reg ); \
   324     JNE_exc(EXC_DATA_ADDR_READ)
   326 #define check_walign32( x86reg ) \
   327     TESTL_imms_r32( 0x00000003, x86reg ); \
   328     JNE_exc(EXC_DATA_ADDR_WRITE);
   330 #define check_ralign64( x86reg ) \
   331     TESTL_imms_r32( 0x00000007, x86reg ); \
   332     JNE_exc(EXC_DATA_ADDR_READ)
   334 #define check_walign64( x86reg ) \
   335     TESTL_imms_r32( 0x00000007, x86reg ); \
   336     JNE_exc(EXC_DATA_ADDR_WRITE);
   338 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   340 #define UNDEF(ir)
   341 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   342  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   343  */
   344 #ifdef HAVE_FRAME_ADDRESS
   345 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   346 {
   347     decode_address(address_space(), addr_reg);
   348     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   349         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   350     } else {
   351         if( addr_reg != REG_ARG1 ) {
   352             MOVL_r32_r32( addr_reg, REG_ARG1 );
   353         }
   354         MOVP_immptr_rptr( 0, REG_ARG2 );
   355         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   356         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   357     }
   358     if( value_reg != REG_RESULT1 ) { 
   359         MOVL_r32_r32( REG_RESULT1, value_reg );
   360     }
   361 }
   363 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   364 {
   365     decode_address(address_space(), addr_reg);
   366     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   367         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   368     } else {
   369         if( value_reg != REG_ARG2 ) {
   370             MOVL_r32_r32( value_reg, REG_ARG2 );
   371 	}        
   372         if( addr_reg != REG_ARG1 ) {
   373             MOVL_r32_r32( addr_reg, REG_ARG1 );
   374         }
   375 #if MAX_REG_ARG > 2        
   376         MOVP_immptr_rptr( 0, REG_ARG3 );
   377         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   378         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   379 #else
   380         MOVL_imm32_rspdisp( 0, 0 );
   381         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   382         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   383 #endif
   384     }
   385 }
   386 #else
   387 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   388 {
   389     decode_address(address_space(), addr_reg);
   390     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   391     if( value_reg != REG_RESULT1 ) {
   392         MOVL_r32_r32( REG_RESULT1, value_reg );
   393     }
   394 }     
   396 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   397 {
   398     decode_address(address_space(), addr_reg);
   399     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   400 }
   401 #endif
   403 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   404 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   405 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   406 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   407 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   408 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   409 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   410 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   411 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   413 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   415 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   416 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   417 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   418 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   420 void sh4_translate_begin_block( sh4addr_t pc ) 
   421 {
   422 	sh4_x86.code = xlat_output;
   423     sh4_x86.in_delay_slot = FALSE;
   424     sh4_x86.fpuen_checked = FALSE;
   425     sh4_x86.branch_taken = FALSE;
   426     sh4_x86.backpatch_posn = 0;
   427     sh4_x86.block_start_pc = pc;
   428     sh4_x86.tlb_on = IS_TLB_ENABLED();
   429     sh4_x86.tstate = TSTATE_NONE;
   430     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   431     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   432     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   433     emit_prologue();
   434     if( sh4_x86.begin_callback ) {
   435         CALL_ptr( sh4_x86.begin_callback );
   436     }
   437     if( sh4_profile_blocks ) {
   438     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   439     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   440     }  
   441 }
   444 uint32_t sh4_translate_end_block_size()
   445 {
   446 	uint32_t epilogue_size = EPILOGUE_SIZE;
   447 	if( sh4_x86.end_callback ) {
   448 	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
   449 	}
   450     if( sh4_x86.backpatch_posn <= 3 ) {
   451         epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   452     } else {
   453         epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   454     }
   455     return epilogue_size;
   456 }
   459 /**
   460  * Embed a breakpoint into the generated code
   461  */
   462 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   463 {
   464     MOVL_imm32_r32( pc, REG_EAX );
   465     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   466     sh4_x86.tstate = TSTATE_NONE;
   467 }
   470 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   472 /**
   473  * Test if the loaded target code pointer in %eax is valid, and if so jump
   474  * directly into it, bypassing the normal exit.
   475  */
   476 static void jump_next_block()
   477 {
   478 	uint8_t *ptr = xlat_output;
   479 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   480 	JE_label(nocode);
   481 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   482 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   483 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   484 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   485 	} else {
   486 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   487 	}
   488 	JNE_label(wrongmode);
   489 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   490 	if( sh4_x86.end_callback ) {
   491 	    /* Note this does leave the stack out of alignment, but doesn't matter
   492 	     * for what we're currently using it for.
   493 	     */
   494 	    PUSH_r32(REG_EAX);
   495 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   496 	    JMP_rptr(REG_ECX);
   497 	} else {
   498 	    JMP_rptr(REG_EAX);
   499 	}
   500 	JMP_TARGET(wrongmode);
   501 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   502 	int rel = ptr - xlat_output;
   503     JMP_prerel(rel);
   504 	JMP_TARGET(nocode); 
   505 }
   507 /**
   508  * 
   509  */
   510 void FASTCALL sh4_translate_link_block( uint32_t pc )
   511 {
   512     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   513     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   514         target = XLAT_BLOCK_CHAIN(target);
   515 	}
   516     if( target == NULL ) {
   517         target = sh4_translate_basic_block( pc );
   518     }
   519     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   520     *backpatch = 0xE9;
   521     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
   522     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   523     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   525     uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
   526     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   527 	*retptr = backpatch;
   528 }
   530 static void emit_translate_and_backpatch()
   531 {
   532     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   533     CALL1_ptr_r32(sh4_translate_link_block, REG_ARG1);
   535     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   536      * we need to reserve sizeof(void*) bytes for the use-list
   537 	 * pointer
   538 	 */ 
   539     if( sizeof(void*) == 8 ) {
   540         NOP();
   541     } else {
   542         NOP2();
   543     }
   544 }
   546 /**
   547  * If we're jumping to a fixed address (or at least fixed relative to the
   548  * current PC, then we can do a direct branch. REG_ARG1 should contain
   549  * the PC at this point.
   550  */
   551 static void jump_next_block_fixed_pc( sh4addr_t pc )
   552 {
   553 	if( IS_IN_ICACHE(pc) ) {
   554 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
   555 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   556 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   557            emit_translate_and_backpatch();	         
   558            return;
   559 		} else {
   560             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   561             ANDP_imms_rptr( -4, REG_EAX );
   562         }
   563 	} else if( sh4_x86.tlb_on ) {
   564         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   565     } else {
   566         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   567     }
   568     jump_next_block();
   571 }
   573 static void sh4_x86_translate_unlink_block( void *use_list )
   574 {
   575 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   576 	void *next = use_list;
   577 	while( next != NULL ) {
   578     	xlat_output = (uint8_t *)next;
   579  	    next = *(void **)(xlat_output+5);
   580  		emit_translate_and_backpatch();
   581  	}
   582  	xlat_output = tmp;
   583 }
   587 static void exit_block()
   588 {
   589 	emit_epilogue();
   590 	if( sh4_x86.end_callback ) {
   591 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   592 	    JMP_rptr(REG_ECX);
   593 	} else {
   594 	    RET();
   595 	}
   596 }
   598 /**
   599  * Exit the block with sh4r.pc already written
   600  */
   601 void exit_block_pcset( sh4addr_t pc )
   602 {
   603     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   604     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   605     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   606     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   607     JBE_label(exitloop);
   608     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   609     if( sh4_x86.tlb_on ) {
   610         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   611     } else {
   612         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   613     }
   615     jump_next_block();
   616     JMP_TARGET(exitloop);
   617     exit_block();
   618 }
   620 /**
   621  * Exit the block with sh4r.new_pc written with the target pc
   622  */
   623 void exit_block_newpcset( sh4addr_t pc )
   624 {
   625     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   626     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   627     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   628     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   629     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   630     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   631     JBE_label(exitloop);
   632     if( sh4_x86.tlb_on ) {
   633         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   634     } else {
   635         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   636     }
   638 	jump_next_block();
   639     JMP_TARGET(exitloop);
   640     exit_block();
   641 }
   644 /**
   645  * Exit the block to an absolute PC
   646  */
   647 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   648 {
   649     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   650     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   651     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   653     MOVL_imm32_r32( pc, REG_ARG1 );
   654     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   655     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   656     JBE_label(exitloop);
   657     jump_next_block_fixed_pc(pc);    
   658     JMP_TARGET(exitloop);
   659     exit_block();
   660 }
   662 /**
   663  * Exit the block to a relative PC
   664  */
   665 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   666 {
   667     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   668     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   669     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   671 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   672 	    /* Special case for tight loops - the PC doesn't change, and
   673 	     * we already know the target address. Just check events pending before
   674 	     * looping.
   675 	     */
   676         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   677         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   678         JCC_cc_prerel(X86_COND_A, backdisp);
   679 	} else {
   680         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   681         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   682         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   683         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   684         JBE_label(exitloop2);
   686         jump_next_block_fixed_pc(pc);
   687         JMP_TARGET(exitloop2);
   688     }
   689     exit_block();
   690 }
   692 /**
   693  * Exit unconditionally with a general exception
   694  */
   695 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   696 {
   697     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   698     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   699     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   700     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   701     MOVL_imm32_r32( code, REG_ARG1 );
   702     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   703     exit_block();
   704 }    
   706 /**
   707  * Embed a call to sh4_execute_instruction for situations that we
   708  * can't translate (just page-crossing delay slots at the moment).
   709  * Caller is responsible for setting new_pc before calling this function.
   710  *
   711  * Performs:
   712  *   Set PC = endpc
   713  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   714  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   715  *   Call sh4_execute_instruction
   716  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   717  */
   718 void exit_block_emu( sh4vma_t endpc )
   719 {
   720     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   721     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   723     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   724     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   725     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   726     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   728     CALL_ptr( sh4_execute_instruction );
   729     exit_block();
   730 } 
   732 /**
   733  * Write the block trailer (exception handling block)
   734  */
   735 void sh4_translate_end_block( sh4addr_t pc ) {
   736     if( sh4_x86.branch_taken == FALSE ) {
   737         // Didn't exit unconditionally already, so write the termination here
   738         exit_block_rel( pc, pc );
   739     }
   740     if( sh4_x86.backpatch_posn != 0 ) {
   741         unsigned int i;
   742         // Exception raised - cleanup and exit
   743         uint8_t *end_ptr = xlat_output;
   744         MOVL_r32_r32( REG_EDX, REG_ECX );
   745         ADDL_r32_r32( REG_EDX, REG_ECX );
   746         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   747         MOVL_moffptr_eax( &sh4_cpu_period );
   748         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   749         MULL_r32( REG_EDX );
   750         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   751         exit_block();
   753         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   754             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   755             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   756                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   757                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   758                 } else {
   759                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   760                 }
   761                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   762                 int rel = end_ptr - xlat_output;
   763                 JMP_prerel(rel);
   764             } else {
   765                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   766                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   767                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   768                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   769                 int rel = end_ptr - xlat_output;
   770                 JMP_prerel(rel);
   771             }
   772         }
   773     }
   774 }
   776 /**
   777  * Translate a single instruction. Delayed branches are handled specially
   778  * by translating both branch and delayed instruction as a single unit (as
   779  * 
   780  * The instruction MUST be in the icache (assert check)
   781  *
   782  * @return true if the instruction marks the end of a basic block
   783  * (eg a branch or 
   784  */
   785 uint32_t sh4_translate_instruction( sh4vma_t pc )
   786 {
   787     uint32_t ir;
   788     /* Read instruction from icache */
   789     assert( IS_IN_ICACHE(pc) );
   790     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   792     if( !sh4_x86.in_delay_slot ) {
   793 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   794     }
   796     /* check for breakpoints at this pc */
   797     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   798         if( sh4_breakpoints[i].address == pc ) {
   799             sh4_translate_emit_breakpoint(pc);
   800             break;
   801         }
   802     }
   803 %%
   804 /* ALU operations */
   805 ADD Rm, Rn {:
   806     COUNT_INST(I_ADD);
   807     load_reg( REG_EAX, Rm );
   808     load_reg( REG_ECX, Rn );
   809     ADDL_r32_r32( REG_EAX, REG_ECX );
   810     store_reg( REG_ECX, Rn );
   811     sh4_x86.tstate = TSTATE_NONE;
   812 :}
   813 ADD #imm, Rn {:  
   814     COUNT_INST(I_ADDI);
   815     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   816     sh4_x86.tstate = TSTATE_NONE;
   817 :}
   818 ADDC Rm, Rn {:
   819     COUNT_INST(I_ADDC);
   820     if( sh4_x86.tstate != TSTATE_C ) {
   821         LDC_t();
   822     }
   823     load_reg( REG_EAX, Rm );
   824     load_reg( REG_ECX, Rn );
   825     ADCL_r32_r32( REG_EAX, REG_ECX );
   826     store_reg( REG_ECX, Rn );
   827     SETC_t();
   828     sh4_x86.tstate = TSTATE_C;
   829 :}
   830 ADDV Rm, Rn {:
   831     COUNT_INST(I_ADDV);
   832     load_reg( REG_EAX, Rm );
   833     load_reg( REG_ECX, Rn );
   834     ADDL_r32_r32( REG_EAX, REG_ECX );
   835     store_reg( REG_ECX, Rn );
   836     SETO_t();
   837     sh4_x86.tstate = TSTATE_O;
   838 :}
   839 AND Rm, Rn {:
   840     COUNT_INST(I_AND);
   841     load_reg( REG_EAX, Rm );
   842     load_reg( REG_ECX, Rn );
   843     ANDL_r32_r32( REG_EAX, REG_ECX );
   844     store_reg( REG_ECX, Rn );
   845     sh4_x86.tstate = TSTATE_NONE;
   846 :}
   847 AND #imm, R0 {:  
   848     COUNT_INST(I_ANDI);
   849     load_reg( REG_EAX, 0 );
   850     ANDL_imms_r32(imm, REG_EAX); 
   851     store_reg( REG_EAX, 0 );
   852     sh4_x86.tstate = TSTATE_NONE;
   853 :}
   854 AND.B #imm, @(R0, GBR) {: 
   855     COUNT_INST(I_ANDB);
   856     load_reg( REG_EAX, 0 );
   857     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   858     MOVL_r32_rspdisp(REG_EAX, 0);
   859     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   860     MOVL_rspdisp_r32(0, REG_EAX);
   861     ANDL_imms_r32(imm, REG_EDX );
   862     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   863     sh4_x86.tstate = TSTATE_NONE;
   864 :}
   865 CMP/EQ Rm, Rn {:  
   866     COUNT_INST(I_CMPEQ);
   867     load_reg( REG_EAX, Rm );
   868     load_reg( REG_ECX, Rn );
   869     CMPL_r32_r32( REG_EAX, REG_ECX );
   870     SETE_t();
   871     sh4_x86.tstate = TSTATE_E;
   872 :}
   873 CMP/EQ #imm, R0 {:  
   874     COUNT_INST(I_CMPEQI);
   875     load_reg( REG_EAX, 0 );
   876     CMPL_imms_r32(imm, REG_EAX);
   877     SETE_t();
   878     sh4_x86.tstate = TSTATE_E;
   879 :}
   880 CMP/GE Rm, Rn {:  
   881     COUNT_INST(I_CMPGE);
   882     load_reg( REG_EAX, Rm );
   883     load_reg( REG_ECX, Rn );
   884     CMPL_r32_r32( REG_EAX, REG_ECX );
   885     SETGE_t();
   886     sh4_x86.tstate = TSTATE_GE;
   887 :}
   888 CMP/GT Rm, Rn {: 
   889     COUNT_INST(I_CMPGT);
   890     load_reg( REG_EAX, Rm );
   891     load_reg( REG_ECX, Rn );
   892     CMPL_r32_r32( REG_EAX, REG_ECX );
   893     SETG_t();
   894     sh4_x86.tstate = TSTATE_G;
   895 :}
   896 CMP/HI Rm, Rn {:  
   897     COUNT_INST(I_CMPHI);
   898     load_reg( REG_EAX, Rm );
   899     load_reg( REG_ECX, Rn );
   900     CMPL_r32_r32( REG_EAX, REG_ECX );
   901     SETA_t();
   902     sh4_x86.tstate = TSTATE_A;
   903 :}
   904 CMP/HS Rm, Rn {: 
   905     COUNT_INST(I_CMPHS);
   906     load_reg( REG_EAX, Rm );
   907     load_reg( REG_ECX, Rn );
   908     CMPL_r32_r32( REG_EAX, REG_ECX );
   909     SETAE_t();
   910     sh4_x86.tstate = TSTATE_AE;
   911  :}
   912 CMP/PL Rn {: 
   913     COUNT_INST(I_CMPPL);
   914     load_reg( REG_EAX, Rn );
   915     CMPL_imms_r32( 0, REG_EAX );
   916     SETG_t();
   917     sh4_x86.tstate = TSTATE_G;
   918 :}
   919 CMP/PZ Rn {:  
   920     COUNT_INST(I_CMPPZ);
   921     load_reg( REG_EAX, Rn );
   922     CMPL_imms_r32( 0, REG_EAX );
   923     SETGE_t();
   924     sh4_x86.tstate = TSTATE_GE;
   925 :}
   926 CMP/STR Rm, Rn {:  
   927     COUNT_INST(I_CMPSTR);
   928     load_reg( REG_EAX, Rm );
   929     load_reg( REG_ECX, Rn );
   930     XORL_r32_r32( REG_ECX, REG_EAX );
   931     TESTB_r8_r8( REG_AL, REG_AL );
   932     JE_label(target1);
   933     TESTB_r8_r8( REG_AH, REG_AH );
   934     JE_label(target2);
   935     SHRL_imm_r32( 16, REG_EAX );
   936     TESTB_r8_r8( REG_AL, REG_AL );
   937     JE_label(target3);
   938     TESTB_r8_r8( REG_AH, REG_AH );
   939     JMP_TARGET(target1);
   940     JMP_TARGET(target2);
   941     JMP_TARGET(target3);
   942     SETE_t();
   943     sh4_x86.tstate = TSTATE_E;
   944 :}
   945 DIV0S Rm, Rn {:
   946     COUNT_INST(I_DIV0S);
   947     load_reg( REG_EAX, Rm );
   948     load_reg( REG_ECX, Rn );
   949     SHRL_imm_r32( 31, REG_EAX );
   950     SHRL_imm_r32( 31, REG_ECX );
   951     MOVL_r32_rbpdisp( REG_EAX, R_M );
   952     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   953     CMPL_r32_r32( REG_EAX, REG_ECX );
   954     SETNE_t();
   955     sh4_x86.tstate = TSTATE_NE;
   956 :}
   957 DIV0U {:  
   958     COUNT_INST(I_DIV0U);
   959     XORL_r32_r32( REG_EAX, REG_EAX );
   960     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   961     MOVL_r32_rbpdisp( REG_EAX, R_M );
   962     MOVL_r32_rbpdisp( REG_EAX, R_T );
   963     sh4_x86.tstate = TSTATE_C; // works for DIV1
   964 :}
   965 DIV1 Rm, Rn {:
   966     COUNT_INST(I_DIV1);
   967     MOVL_rbpdisp_r32( R_M, REG_ECX );
   968     load_reg( REG_EAX, Rn );
   969     if( sh4_x86.tstate != TSTATE_C ) {
   970 	LDC_t();
   971     }
   972     RCLL_imm_r32( 1, REG_EAX );
   973     SETC_r8( REG_DL ); // Q'
   974     CMPL_rbpdisp_r32( R_Q, REG_ECX );
   975     JE_label(mqequal);
   976     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   977     JMP_label(end);
   978     JMP_TARGET(mqequal);
   979     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   980     JMP_TARGET(end);
   981     store_reg( REG_EAX, Rn ); // Done with Rn now
   982     SETC_r8(REG_AL); // tmp1
   983     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
   984     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
   985     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   986     XORL_imms_r32( 1, REG_AL );   // T = !Q'
   987     MOVZXL_r8_r32( REG_AL, REG_EAX );
   988     MOVL_r32_rbpdisp( REG_EAX, R_T );
   989     sh4_x86.tstate = TSTATE_NONE;
   990 :}
   991 DMULS.L Rm, Rn {:  
   992     COUNT_INST(I_DMULS);
   993     load_reg( REG_EAX, Rm );
   994     load_reg( REG_ECX, Rn );
   995     IMULL_r32(REG_ECX);
   996     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   997     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   998     sh4_x86.tstate = TSTATE_NONE;
   999 :}
  1000 DMULU.L Rm, Rn {:  
  1001     COUNT_INST(I_DMULU);
  1002     load_reg( REG_EAX, Rm );
  1003     load_reg( REG_ECX, Rn );
  1004     MULL_r32(REG_ECX);
  1005     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1006     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1007     sh4_x86.tstate = TSTATE_NONE;
  1008 :}
  1009 DT Rn {:  
  1010     COUNT_INST(I_DT);
  1011     load_reg( REG_EAX, Rn );
  1012     ADDL_imms_r32( -1, REG_EAX );
  1013     store_reg( REG_EAX, Rn );
  1014     SETE_t();
  1015     sh4_x86.tstate = TSTATE_E;
  1016 :}
  1017 EXTS.B Rm, Rn {:  
  1018     COUNT_INST(I_EXTSB);
  1019     load_reg( REG_EAX, Rm );
  1020     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1021     store_reg( REG_EAX, Rn );
  1022 :}
  1023 EXTS.W Rm, Rn {:  
  1024     COUNT_INST(I_EXTSW);
  1025     load_reg( REG_EAX, Rm );
  1026     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1027     store_reg( REG_EAX, Rn );
  1028 :}
  1029 EXTU.B Rm, Rn {:  
  1030     COUNT_INST(I_EXTUB);
  1031     load_reg( REG_EAX, Rm );
  1032     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1033     store_reg( REG_EAX, Rn );
  1034 :}
  1035 EXTU.W Rm, Rn {:  
  1036     COUNT_INST(I_EXTUW);
  1037     load_reg( REG_EAX, Rm );
  1038     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1039     store_reg( REG_EAX, Rn );
  1040 :}
  1041 MAC.L @Rm+, @Rn+ {:
  1042     COUNT_INST(I_MACL);
  1043     if( Rm == Rn ) {
  1044 	load_reg( REG_EAX, Rm );
  1045 	check_ralign32( REG_EAX );
  1046 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1047 	MOVL_r32_rspdisp(REG_EAX, 0);
  1048 	load_reg( REG_EAX, Rm );
  1049 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1050 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1051         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1052     } else {
  1053 	load_reg( REG_EAX, Rm );
  1054 	check_ralign32( REG_EAX );
  1055 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1056 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1057 	load_reg( REG_EAX, Rn );
  1058 	check_ralign32( REG_EAX );
  1059 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1060 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1061 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1064     IMULL_rspdisp( 0 );
  1065     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1066     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1068     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1069     TESTL_r32_r32(REG_ECX, REG_ECX);
  1070     JE_label( nosat );
  1071     CALL_ptr( signsat48 );
  1072     JMP_TARGET( nosat );
  1073     sh4_x86.tstate = TSTATE_NONE;
  1074 :}
  1075 MAC.W @Rm+, @Rn+ {:  
  1076     COUNT_INST(I_MACW);
  1077     if( Rm == Rn ) {
  1078 	load_reg( REG_EAX, Rm );
  1079 	check_ralign16( REG_EAX );
  1080 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1081         MOVL_r32_rspdisp( REG_EAX, 0 );
  1082 	load_reg( REG_EAX, Rm );
  1083 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1084 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1085 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1086 	// Note translate twice in case of page boundaries. Maybe worth
  1087 	// adding a page-boundary check to skip the second translation
  1088     } else {
  1089 	load_reg( REG_EAX, Rn );
  1090 	check_ralign16( REG_EAX );
  1091 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1092         MOVL_r32_rspdisp( REG_EAX, 0 );
  1093 	load_reg( REG_EAX, Rm );
  1094 	check_ralign16( REG_EAX );
  1095 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1096 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1097 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1099     IMULL_rspdisp( 0 );
  1100     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1101     TESTL_r32_r32( REG_ECX, REG_ECX );
  1102     JE_label( nosat );
  1104     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1105     JNO_label( end );            // 2
  1106     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1107     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1108     JS_label( positive );        // 2
  1109     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1110     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1111     JMP_label(end2);           // 2
  1113     JMP_TARGET(positive);
  1114     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1115     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1116     JMP_label(end3);            // 2
  1118     JMP_TARGET(nosat);
  1119     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1120     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1121     JMP_TARGET(end);
  1122     JMP_TARGET(end2);
  1123     JMP_TARGET(end3);
  1124     sh4_x86.tstate = TSTATE_NONE;
  1125 :}
  1126 MOVT Rn {:  
  1127     COUNT_INST(I_MOVT);
  1128     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1129     store_reg( REG_EAX, Rn );
  1130 :}
  1131 MUL.L Rm, Rn {:  
  1132     COUNT_INST(I_MULL);
  1133     load_reg( REG_EAX, Rm );
  1134     load_reg( REG_ECX, Rn );
  1135     MULL_r32( REG_ECX );
  1136     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1139 MULS.W Rm, Rn {:
  1140     COUNT_INST(I_MULSW);
  1141     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1142     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1143     MULL_r32( REG_ECX );
  1144     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1145     sh4_x86.tstate = TSTATE_NONE;
  1146 :}
  1147 MULU.W Rm, Rn {:  
  1148     COUNT_INST(I_MULUW);
  1149     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1150     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1151     MULL_r32( REG_ECX );
  1152     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1153     sh4_x86.tstate = TSTATE_NONE;
  1154 :}
  1155 NEG Rm, Rn {:
  1156     COUNT_INST(I_NEG);
  1157     load_reg( REG_EAX, Rm );
  1158     NEGL_r32( REG_EAX );
  1159     store_reg( REG_EAX, Rn );
  1160     sh4_x86.tstate = TSTATE_NONE;
  1161 :}
  1162 NEGC Rm, Rn {:  
  1163     COUNT_INST(I_NEGC);
  1164     load_reg( REG_EAX, Rm );
  1165     XORL_r32_r32( REG_ECX, REG_ECX );
  1166     LDC_t();
  1167     SBBL_r32_r32( REG_EAX, REG_ECX );
  1168     store_reg( REG_ECX, Rn );
  1169     SETC_t();
  1170     sh4_x86.tstate = TSTATE_C;
  1171 :}
  1172 NOT Rm, Rn {:  
  1173     COUNT_INST(I_NOT);
  1174     load_reg( REG_EAX, Rm );
  1175     NOTL_r32( REG_EAX );
  1176     store_reg( REG_EAX, Rn );
  1177     sh4_x86.tstate = TSTATE_NONE;
  1178 :}
  1179 OR Rm, Rn {:  
  1180     COUNT_INST(I_OR);
  1181     load_reg( REG_EAX, Rm );
  1182     load_reg( REG_ECX, Rn );
  1183     ORL_r32_r32( REG_EAX, REG_ECX );
  1184     store_reg( REG_ECX, Rn );
  1185     sh4_x86.tstate = TSTATE_NONE;
  1186 :}
  1187 OR #imm, R0 {:
  1188     COUNT_INST(I_ORI);
  1189     load_reg( REG_EAX, 0 );
  1190     ORL_imms_r32(imm, REG_EAX);
  1191     store_reg( REG_EAX, 0 );
  1192     sh4_x86.tstate = TSTATE_NONE;
  1193 :}
  1194 OR.B #imm, @(R0, GBR) {:  
  1195     COUNT_INST(I_ORB);
  1196     load_reg( REG_EAX, 0 );
  1197     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1198     MOVL_r32_rspdisp( REG_EAX, 0 );
  1199     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1200     MOVL_rspdisp_r32( 0, REG_EAX );
  1201     ORL_imms_r32(imm, REG_EDX );
  1202     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1203     sh4_x86.tstate = TSTATE_NONE;
  1204 :}
  1205 ROTCL Rn {:
  1206     COUNT_INST(I_ROTCL);
  1207     load_reg( REG_EAX, Rn );
  1208     if( sh4_x86.tstate != TSTATE_C ) {
  1209 	LDC_t();
  1211     RCLL_imm_r32( 1, REG_EAX );
  1212     store_reg( REG_EAX, Rn );
  1213     SETC_t();
  1214     sh4_x86.tstate = TSTATE_C;
  1215 :}
  1216 ROTCR Rn {:  
  1217     COUNT_INST(I_ROTCR);
  1218     load_reg( REG_EAX, Rn );
  1219     if( sh4_x86.tstate != TSTATE_C ) {
  1220 	LDC_t();
  1222     RCRL_imm_r32( 1, REG_EAX );
  1223     store_reg( REG_EAX, Rn );
  1224     SETC_t();
  1225     sh4_x86.tstate = TSTATE_C;
  1226 :}
  1227 ROTL Rn {:  
  1228     COUNT_INST(I_ROTL);
  1229     load_reg( REG_EAX, Rn );
  1230     ROLL_imm_r32( 1, REG_EAX );
  1231     store_reg( REG_EAX, Rn );
  1232     SETC_t();
  1233     sh4_x86.tstate = TSTATE_C;
  1234 :}
  1235 ROTR Rn {:  
  1236     COUNT_INST(I_ROTR);
  1237     load_reg( REG_EAX, Rn );
  1238     RORL_imm_r32( 1, REG_EAX );
  1239     store_reg( REG_EAX, Rn );
  1240     SETC_t();
  1241     sh4_x86.tstate = TSTATE_C;
  1242 :}
  1243 SHAD Rm, Rn {:
  1244     COUNT_INST(I_SHAD);
  1245     /* Annoyingly enough, not directly convertible */
  1246     load_reg( REG_EAX, Rn );
  1247     load_reg( REG_ECX, Rm );
  1248     CMPL_imms_r32( 0, REG_ECX );
  1249     JGE_label(doshl);
  1251     NEGL_r32( REG_ECX );      // 2
  1252     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1253     JE_label(emptysar);     // 2
  1254     SARL_cl_r32( REG_EAX );       // 2
  1255     JMP_label(end);          // 2
  1257     JMP_TARGET(emptysar);
  1258     SARL_imm_r32(31, REG_EAX );  // 3
  1259     JMP_label(end2);
  1261     JMP_TARGET(doshl);
  1262     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1263     SHLL_cl_r32( REG_EAX );       // 2
  1264     JMP_TARGET(end);
  1265     JMP_TARGET(end2);
  1266     store_reg( REG_EAX, Rn );
  1267     sh4_x86.tstate = TSTATE_NONE;
  1268 :}
  1269 SHLD Rm, Rn {:  
  1270     COUNT_INST(I_SHLD);
  1271     load_reg( REG_EAX, Rn );
  1272     load_reg( REG_ECX, Rm );
  1273     CMPL_imms_r32( 0, REG_ECX );
  1274     JGE_label(doshl);
  1276     NEGL_r32( REG_ECX );      // 2
  1277     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1278     JE_label(emptyshr );
  1279     SHRL_cl_r32( REG_EAX );       // 2
  1280     JMP_label(end);          // 2
  1282     JMP_TARGET(emptyshr);
  1283     XORL_r32_r32( REG_EAX, REG_EAX );
  1284     JMP_label(end2);
  1286     JMP_TARGET(doshl);
  1287     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1288     SHLL_cl_r32( REG_EAX );       // 2
  1289     JMP_TARGET(end);
  1290     JMP_TARGET(end2);
  1291     store_reg( REG_EAX, Rn );
  1292     sh4_x86.tstate = TSTATE_NONE;
  1293 :}
  1294 SHAL Rn {: 
  1295     COUNT_INST(I_SHAL);
  1296     load_reg( REG_EAX, Rn );
  1297     SHLL_imm_r32( 1, REG_EAX );
  1298     SETC_t();
  1299     store_reg( REG_EAX, Rn );
  1300     sh4_x86.tstate = TSTATE_C;
  1301 :}
  1302 SHAR Rn {:  
  1303     COUNT_INST(I_SHAR);
  1304     load_reg( REG_EAX, Rn );
  1305     SARL_imm_r32( 1, REG_EAX );
  1306     SETC_t();
  1307     store_reg( REG_EAX, Rn );
  1308     sh4_x86.tstate = TSTATE_C;
  1309 :}
  1310 SHLL Rn {:  
  1311     COUNT_INST(I_SHLL);
  1312     load_reg( REG_EAX, Rn );
  1313     SHLL_imm_r32( 1, REG_EAX );
  1314     SETC_t();
  1315     store_reg( REG_EAX, Rn );
  1316     sh4_x86.tstate = TSTATE_C;
  1317 :}
  1318 SHLL2 Rn {:
  1319     COUNT_INST(I_SHLL);
  1320     load_reg( REG_EAX, Rn );
  1321     SHLL_imm_r32( 2, REG_EAX );
  1322     store_reg( REG_EAX, Rn );
  1323     sh4_x86.tstate = TSTATE_NONE;
  1324 :}
  1325 SHLL8 Rn {:  
  1326     COUNT_INST(I_SHLL);
  1327     load_reg( REG_EAX, Rn );
  1328     SHLL_imm_r32( 8, REG_EAX );
  1329     store_reg( REG_EAX, Rn );
  1330     sh4_x86.tstate = TSTATE_NONE;
  1331 :}
  1332 SHLL16 Rn {:  
  1333     COUNT_INST(I_SHLL);
  1334     load_reg( REG_EAX, Rn );
  1335     SHLL_imm_r32( 16, REG_EAX );
  1336     store_reg( REG_EAX, Rn );
  1337     sh4_x86.tstate = TSTATE_NONE;
  1338 :}
  1339 SHLR Rn {:  
  1340     COUNT_INST(I_SHLR);
  1341     load_reg( REG_EAX, Rn );
  1342     SHRL_imm_r32( 1, REG_EAX );
  1343     SETC_t();
  1344     store_reg( REG_EAX, Rn );
  1345     sh4_x86.tstate = TSTATE_C;
  1346 :}
  1347 SHLR2 Rn {:  
  1348     COUNT_INST(I_SHLR);
  1349     load_reg( REG_EAX, Rn );
  1350     SHRL_imm_r32( 2, REG_EAX );
  1351     store_reg( REG_EAX, Rn );
  1352     sh4_x86.tstate = TSTATE_NONE;
  1353 :}
  1354 SHLR8 Rn {:  
  1355     COUNT_INST(I_SHLR);
  1356     load_reg( REG_EAX, Rn );
  1357     SHRL_imm_r32( 8, REG_EAX );
  1358     store_reg( REG_EAX, Rn );
  1359     sh4_x86.tstate = TSTATE_NONE;
  1360 :}
  1361 SHLR16 Rn {:  
  1362     COUNT_INST(I_SHLR);
  1363     load_reg( REG_EAX, Rn );
  1364     SHRL_imm_r32( 16, REG_EAX );
  1365     store_reg( REG_EAX, Rn );
  1366     sh4_x86.tstate = TSTATE_NONE;
  1367 :}
  1368 SUB Rm, Rn {:  
  1369     COUNT_INST(I_SUB);
  1370     load_reg( REG_EAX, Rm );
  1371     load_reg( REG_ECX, Rn );
  1372     SUBL_r32_r32( REG_EAX, REG_ECX );
  1373     store_reg( REG_ECX, Rn );
  1374     sh4_x86.tstate = TSTATE_NONE;
  1375 :}
  1376 SUBC Rm, Rn {:  
  1377     COUNT_INST(I_SUBC);
  1378     load_reg( REG_EAX, Rm );
  1379     load_reg( REG_ECX, Rn );
  1380     if( sh4_x86.tstate != TSTATE_C ) {
  1381 	LDC_t();
  1383     SBBL_r32_r32( REG_EAX, REG_ECX );
  1384     store_reg( REG_ECX, Rn );
  1385     SETC_t();
  1386     sh4_x86.tstate = TSTATE_C;
  1387 :}
  1388 SUBV Rm, Rn {:  
  1389     COUNT_INST(I_SUBV);
  1390     load_reg( REG_EAX, Rm );
  1391     load_reg( REG_ECX, Rn );
  1392     SUBL_r32_r32( REG_EAX, REG_ECX );
  1393     store_reg( REG_ECX, Rn );
  1394     SETO_t();
  1395     sh4_x86.tstate = TSTATE_O;
  1396 :}
  1397 SWAP.B Rm, Rn {:  
  1398     COUNT_INST(I_SWAPB);
  1399     load_reg( REG_EAX, Rm );
  1400     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1401     store_reg( REG_EAX, Rn );
  1402 :}
  1403 SWAP.W Rm, Rn {:  
  1404     COUNT_INST(I_SWAPB);
  1405     load_reg( REG_EAX, Rm );
  1406     MOVL_r32_r32( REG_EAX, REG_ECX );
  1407     SHLL_imm_r32( 16, REG_ECX );
  1408     SHRL_imm_r32( 16, REG_EAX );
  1409     ORL_r32_r32( REG_EAX, REG_ECX );
  1410     store_reg( REG_ECX, Rn );
  1411     sh4_x86.tstate = TSTATE_NONE;
  1412 :}
  1413 TAS.B @Rn {:  
  1414     COUNT_INST(I_TASB);
  1415     load_reg( REG_EAX, Rn );
  1416     MOVL_r32_rspdisp( REG_EAX, 0 );
  1417     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1418     TESTB_r8_r8( REG_DL, REG_DL );
  1419     SETE_t();
  1420     ORB_imms_r8( 0x80, REG_DL );
  1421     MOVL_rspdisp_r32( 0, REG_EAX );
  1422     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1423     sh4_x86.tstate = TSTATE_NONE;
  1424 :}
  1425 TST Rm, Rn {:  
  1426     COUNT_INST(I_TST);
  1427     load_reg( REG_EAX, Rm );
  1428     load_reg( REG_ECX, Rn );
  1429     TESTL_r32_r32( REG_EAX, REG_ECX );
  1430     SETE_t();
  1431     sh4_x86.tstate = TSTATE_E;
  1432 :}
  1433 TST #imm, R0 {:  
  1434     COUNT_INST(I_TSTI);
  1435     load_reg( REG_EAX, 0 );
  1436     TESTL_imms_r32( imm, REG_EAX );
  1437     SETE_t();
  1438     sh4_x86.tstate = TSTATE_E;
  1439 :}
  1440 TST.B #imm, @(R0, GBR) {:  
  1441     COUNT_INST(I_TSTB);
  1442     load_reg( REG_EAX, 0);
  1443     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1444     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1445     TESTB_imms_r8( imm, REG_AL );
  1446     SETE_t();
  1447     sh4_x86.tstate = TSTATE_E;
  1448 :}
  1449 XOR Rm, Rn {:  
  1450     COUNT_INST(I_XOR);
  1451     load_reg( REG_EAX, Rm );
  1452     load_reg( REG_ECX, Rn );
  1453     XORL_r32_r32( REG_EAX, REG_ECX );
  1454     store_reg( REG_ECX, Rn );
  1455     sh4_x86.tstate = TSTATE_NONE;
  1456 :}
  1457 XOR #imm, R0 {:  
  1458     COUNT_INST(I_XORI);
  1459     load_reg( REG_EAX, 0 );
  1460     XORL_imms_r32( imm, REG_EAX );
  1461     store_reg( REG_EAX, 0 );
  1462     sh4_x86.tstate = TSTATE_NONE;
  1463 :}
  1464 XOR.B #imm, @(R0, GBR) {:  
  1465     COUNT_INST(I_XORB);
  1466     load_reg( REG_EAX, 0 );
  1467     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1468     MOVL_r32_rspdisp( REG_EAX, 0 );
  1469     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1470     MOVL_rspdisp_r32( 0, REG_EAX );
  1471     XORL_imms_r32( imm, REG_EDX );
  1472     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1473     sh4_x86.tstate = TSTATE_NONE;
  1474 :}
  1475 XTRCT Rm, Rn {:
  1476     COUNT_INST(I_XTRCT);
  1477     load_reg( REG_EAX, Rm );
  1478     load_reg( REG_ECX, Rn );
  1479     SHLL_imm_r32( 16, REG_EAX );
  1480     SHRL_imm_r32( 16, REG_ECX );
  1481     ORL_r32_r32( REG_EAX, REG_ECX );
  1482     store_reg( REG_ECX, Rn );
  1483     sh4_x86.tstate = TSTATE_NONE;
  1484 :}
  1486 /* Data move instructions */
  1487 MOV Rm, Rn {:  
  1488     COUNT_INST(I_MOV);
  1489     load_reg( REG_EAX, Rm );
  1490     store_reg( REG_EAX, Rn );
  1491 :}
  1492 MOV #imm, Rn {:  
  1493     COUNT_INST(I_MOVI);
  1494     MOVL_imm32_r32( imm, REG_EAX );
  1495     store_reg( REG_EAX, Rn );
  1496 :}
  1497 MOV.B Rm, @Rn {:  
  1498     COUNT_INST(I_MOVB);
  1499     load_reg( REG_EAX, Rn );
  1500     load_reg( REG_EDX, Rm );
  1501     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1502     sh4_x86.tstate = TSTATE_NONE;
  1503 :}
  1504 MOV.B Rm, @-Rn {:  
  1505     COUNT_INST(I_MOVB);
  1506     load_reg( REG_EAX, Rn );
  1507     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1508     load_reg( REG_EDX, Rm );
  1509     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1510     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1511     sh4_x86.tstate = TSTATE_NONE;
  1512 :}
  1513 MOV.B Rm, @(R0, Rn) {:  
  1514     COUNT_INST(I_MOVB);
  1515     load_reg( REG_EAX, 0 );
  1516     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1517     load_reg( REG_EDX, Rm );
  1518     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1519     sh4_x86.tstate = TSTATE_NONE;
  1520 :}
  1521 MOV.B R0, @(disp, GBR) {:  
  1522     COUNT_INST(I_MOVB);
  1523     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1524     ADDL_imms_r32( disp, REG_EAX );
  1525     load_reg( REG_EDX, 0 );
  1526     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1527     sh4_x86.tstate = TSTATE_NONE;
  1528 :}
  1529 MOV.B R0, @(disp, Rn) {:  
  1530     COUNT_INST(I_MOVB);
  1531     load_reg( REG_EAX, Rn );
  1532     ADDL_imms_r32( disp, REG_EAX );
  1533     load_reg( REG_EDX, 0 );
  1534     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1535     sh4_x86.tstate = TSTATE_NONE;
  1536 :}
  1537 MOV.B @Rm, Rn {:  
  1538     COUNT_INST(I_MOVB);
  1539     load_reg( REG_EAX, Rm );
  1540     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1541     store_reg( REG_EAX, Rn );
  1542     sh4_x86.tstate = TSTATE_NONE;
  1543 :}
  1544 MOV.B @Rm+, Rn {:  
  1545     COUNT_INST(I_MOVB);
  1546     load_reg( REG_EAX, Rm );
  1547     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1548     if( Rm != Rn ) {
  1549     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1551     store_reg( REG_EAX, Rn );
  1552     sh4_x86.tstate = TSTATE_NONE;
  1553 :}
  1554 MOV.B @(R0, Rm), Rn {:  
  1555     COUNT_INST(I_MOVB);
  1556     load_reg( REG_EAX, 0 );
  1557     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1558     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1559     store_reg( REG_EAX, Rn );
  1560     sh4_x86.tstate = TSTATE_NONE;
  1561 :}
  1562 MOV.B @(disp, GBR), R0 {:  
  1563     COUNT_INST(I_MOVB);
  1564     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1565     ADDL_imms_r32( disp, REG_EAX );
  1566     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1567     store_reg( REG_EAX, 0 );
  1568     sh4_x86.tstate = TSTATE_NONE;
  1569 :}
  1570 MOV.B @(disp, Rm), R0 {:  
  1571     COUNT_INST(I_MOVB);
  1572     load_reg( REG_EAX, Rm );
  1573     ADDL_imms_r32( disp, REG_EAX );
  1574     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1575     store_reg( REG_EAX, 0 );
  1576     sh4_x86.tstate = TSTATE_NONE;
  1577 :}
  1578 MOV.L Rm, @Rn {:
  1579     COUNT_INST(I_MOVL);
  1580     load_reg( REG_EAX, Rn );
  1581     check_walign32(REG_EAX);
  1582     MOVL_r32_r32( REG_EAX, REG_ECX );
  1583     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1584     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1585     JNE_label( notsq );
  1586     ANDL_imms_r32( 0x3C, REG_EAX );
  1587     load_reg( REG_EDX, Rm );
  1588     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1589     JMP_label(end);
  1590     JMP_TARGET(notsq);
  1591     load_reg( REG_EDX, Rm );
  1592     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1593     JMP_TARGET(end);
  1594     sh4_x86.tstate = TSTATE_NONE;
  1595 :}
  1596 MOV.L Rm, @-Rn {:  
  1597     COUNT_INST(I_MOVL);
  1598     load_reg( REG_EAX, Rn );
  1599     ADDL_imms_r32( -4, REG_EAX );
  1600     check_walign32( REG_EAX );
  1601     load_reg( REG_EDX, Rm );
  1602     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1603     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1604     sh4_x86.tstate = TSTATE_NONE;
  1605 :}
  1606 MOV.L Rm, @(R0, Rn) {:  
  1607     COUNT_INST(I_MOVL);
  1608     load_reg( REG_EAX, 0 );
  1609     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1610     check_walign32( REG_EAX );
  1611     load_reg( REG_EDX, Rm );
  1612     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1613     sh4_x86.tstate = TSTATE_NONE;
  1614 :}
  1615 MOV.L R0, @(disp, GBR) {:  
  1616     COUNT_INST(I_MOVL);
  1617     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1618     ADDL_imms_r32( disp, REG_EAX );
  1619     check_walign32( REG_EAX );
  1620     load_reg( REG_EDX, 0 );
  1621     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1622     sh4_x86.tstate = TSTATE_NONE;
  1623 :}
  1624 MOV.L Rm, @(disp, Rn) {:  
  1625     COUNT_INST(I_MOVL);
  1626     load_reg( REG_EAX, Rn );
  1627     ADDL_imms_r32( disp, REG_EAX );
  1628     check_walign32( REG_EAX );
  1629     MOVL_r32_r32( REG_EAX, REG_ECX );
  1630     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1631     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1632     JNE_label( notsq );
  1633     ANDL_imms_r32( 0x3C, REG_EAX );
  1634     load_reg( REG_EDX, Rm );
  1635     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1636     JMP_label(end);
  1637     JMP_TARGET(notsq);
  1638     load_reg( REG_EDX, Rm );
  1639     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1640     JMP_TARGET(end);
  1641     sh4_x86.tstate = TSTATE_NONE;
  1642 :}
  1643 MOV.L @Rm, Rn {:  
  1644     COUNT_INST(I_MOVL);
  1645     load_reg( REG_EAX, Rm );
  1646     check_ralign32( REG_EAX );
  1647     MEM_READ_LONG( REG_EAX, REG_EAX );
  1648     store_reg( REG_EAX, Rn );
  1649     sh4_x86.tstate = TSTATE_NONE;
  1650 :}
  1651 MOV.L @Rm+, Rn {:  
  1652     COUNT_INST(I_MOVL);
  1653     load_reg( REG_EAX, Rm );
  1654     check_ralign32( REG_EAX );
  1655     MEM_READ_LONG( REG_EAX, REG_EAX );
  1656     if( Rm != Rn ) {
  1657     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1659     store_reg( REG_EAX, Rn );
  1660     sh4_x86.tstate = TSTATE_NONE;
  1661 :}
  1662 MOV.L @(R0, Rm), Rn {:  
  1663     COUNT_INST(I_MOVL);
  1664     load_reg( REG_EAX, 0 );
  1665     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1666     check_ralign32( REG_EAX );
  1667     MEM_READ_LONG( REG_EAX, REG_EAX );
  1668     store_reg( REG_EAX, Rn );
  1669     sh4_x86.tstate = TSTATE_NONE;
  1670 :}
  1671 MOV.L @(disp, GBR), R0 {:
  1672     COUNT_INST(I_MOVL);
  1673     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1674     ADDL_imms_r32( disp, REG_EAX );
  1675     check_ralign32( REG_EAX );
  1676     MEM_READ_LONG( REG_EAX, REG_EAX );
  1677     store_reg( REG_EAX, 0 );
  1678     sh4_x86.tstate = TSTATE_NONE;
  1679 :}
  1680 MOV.L @(disp, PC), Rn {:  
  1681     COUNT_INST(I_MOVLPC);
  1682     if( sh4_x86.in_delay_slot ) {
  1683 	SLOTILLEGAL();
  1684     } else {
  1685 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1686 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1687 	    // If the target address is in the same page as the code, it's
  1688 	    // pretty safe to just ref it directly and circumvent the whole
  1689 	    // memory subsystem. (this is a big performance win)
  1691 	    // FIXME: There's a corner-case that's not handled here when
  1692 	    // the current code-page is in the ITLB but not in the UTLB.
  1693 	    // (should generate a TLB miss although need to test SH4 
  1694 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1695 	    // behaviour though.
  1696 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1697 	    MOVL_moffptr_eax( ptr );
  1698 	} else {
  1699 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1700 	    // different virtual address than the translation was done with,
  1701 	    // but we can safely assume that the low bits are the same.
  1702 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1703 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1704 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1705 	    sh4_x86.tstate = TSTATE_NONE;
  1707 	store_reg( REG_EAX, Rn );
  1709 :}
  1710 MOV.L @(disp, Rm), Rn {:  
  1711     COUNT_INST(I_MOVL);
  1712     load_reg( REG_EAX, Rm );
  1713     ADDL_imms_r32( disp, REG_EAX );
  1714     check_ralign32( REG_EAX );
  1715     MEM_READ_LONG( REG_EAX, REG_EAX );
  1716     store_reg( REG_EAX, Rn );
  1717     sh4_x86.tstate = TSTATE_NONE;
  1718 :}
  1719 MOV.W Rm, @Rn {:  
  1720     COUNT_INST(I_MOVW);
  1721     load_reg( REG_EAX, Rn );
  1722     check_walign16( REG_EAX );
  1723     load_reg( REG_EDX, Rm );
  1724     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1725     sh4_x86.tstate = TSTATE_NONE;
  1726 :}
  1727 MOV.W Rm, @-Rn {:  
  1728     COUNT_INST(I_MOVW);
  1729     load_reg( REG_EAX, Rn );
  1730     check_walign16( REG_EAX );
  1731     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1732     load_reg( REG_EDX, Rm );
  1733     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1734     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1735     sh4_x86.tstate = TSTATE_NONE;
  1736 :}
  1737 MOV.W Rm, @(R0, Rn) {:  
  1738     COUNT_INST(I_MOVW);
  1739     load_reg( REG_EAX, 0 );
  1740     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1741     check_walign16( REG_EAX );
  1742     load_reg( REG_EDX, Rm );
  1743     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1744     sh4_x86.tstate = TSTATE_NONE;
  1745 :}
  1746 MOV.W R0, @(disp, GBR) {:  
  1747     COUNT_INST(I_MOVW);
  1748     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1749     ADDL_imms_r32( disp, REG_EAX );
  1750     check_walign16( REG_EAX );
  1751     load_reg( REG_EDX, 0 );
  1752     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1753     sh4_x86.tstate = TSTATE_NONE;
  1754 :}
  1755 MOV.W R0, @(disp, Rn) {:  
  1756     COUNT_INST(I_MOVW);
  1757     load_reg( REG_EAX, Rn );
  1758     ADDL_imms_r32( disp, REG_EAX );
  1759     check_walign16( REG_EAX );
  1760     load_reg( REG_EDX, 0 );
  1761     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1762     sh4_x86.tstate = TSTATE_NONE;
  1763 :}
  1764 MOV.W @Rm, Rn {:  
  1765     COUNT_INST(I_MOVW);
  1766     load_reg( REG_EAX, Rm );
  1767     check_ralign16( REG_EAX );
  1768     MEM_READ_WORD( REG_EAX, REG_EAX );
  1769     store_reg( REG_EAX, Rn );
  1770     sh4_x86.tstate = TSTATE_NONE;
  1771 :}
  1772 MOV.W @Rm+, Rn {:  
  1773     COUNT_INST(I_MOVW);
  1774     load_reg( REG_EAX, Rm );
  1775     check_ralign16( REG_EAX );
  1776     MEM_READ_WORD( REG_EAX, REG_EAX );
  1777     if( Rm != Rn ) {
  1778         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1780     store_reg( REG_EAX, Rn );
  1781     sh4_x86.tstate = TSTATE_NONE;
  1782 :}
  1783 MOV.W @(R0, Rm), Rn {:  
  1784     COUNT_INST(I_MOVW);
  1785     load_reg( REG_EAX, 0 );
  1786     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1787     check_ralign16( REG_EAX );
  1788     MEM_READ_WORD( REG_EAX, REG_EAX );
  1789     store_reg( REG_EAX, Rn );
  1790     sh4_x86.tstate = TSTATE_NONE;
  1791 :}
  1792 MOV.W @(disp, GBR), R0 {:  
  1793     COUNT_INST(I_MOVW);
  1794     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1795     ADDL_imms_r32( disp, REG_EAX );
  1796     check_ralign16( REG_EAX );
  1797     MEM_READ_WORD( REG_EAX, REG_EAX );
  1798     store_reg( REG_EAX, 0 );
  1799     sh4_x86.tstate = TSTATE_NONE;
  1800 :}
  1801 MOV.W @(disp, PC), Rn {:  
  1802     COUNT_INST(I_MOVW);
  1803     if( sh4_x86.in_delay_slot ) {
  1804 	SLOTILLEGAL();
  1805     } else {
  1806 	// See comments for MOV.L @(disp, PC), Rn
  1807 	uint32_t target = pc + disp + 4;
  1808 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1809 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1810 	    MOVL_moffptr_eax( ptr );
  1811 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1812 	} else {
  1813 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1814 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1815 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1816 	    sh4_x86.tstate = TSTATE_NONE;
  1818 	store_reg( REG_EAX, Rn );
  1820 :}
  1821 MOV.W @(disp, Rm), R0 {:  
  1822     COUNT_INST(I_MOVW);
  1823     load_reg( REG_EAX, Rm );
  1824     ADDL_imms_r32( disp, REG_EAX );
  1825     check_ralign16( REG_EAX );
  1826     MEM_READ_WORD( REG_EAX, REG_EAX );
  1827     store_reg( REG_EAX, 0 );
  1828     sh4_x86.tstate = TSTATE_NONE;
  1829 :}
  1830 MOVA @(disp, PC), R0 {:  
  1831     COUNT_INST(I_MOVA);
  1832     if( sh4_x86.in_delay_slot ) {
  1833 	SLOTILLEGAL();
  1834     } else {
  1835 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1836 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1837 	store_reg( REG_ECX, 0 );
  1838 	sh4_x86.tstate = TSTATE_NONE;
  1840 :}
  1841 MOVCA.L R0, @Rn {:  
  1842     COUNT_INST(I_MOVCA);
  1843     load_reg( REG_EAX, Rn );
  1844     check_walign32( REG_EAX );
  1845     load_reg( REG_EDX, 0 );
  1846     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1847     sh4_x86.tstate = TSTATE_NONE;
  1848 :}
  1850 /* Control transfer instructions */
  1851 BF disp {:
  1852     COUNT_INST(I_BF);
  1853     if( sh4_x86.in_delay_slot ) {
  1854 	SLOTILLEGAL();
  1855     } else {
  1856 	sh4vma_t target = disp + pc + 4;
  1857 	JT_label( nottaken );
  1858 	exit_block_rel(target, pc+2 );
  1859 	JMP_TARGET(nottaken);
  1860 	return 2;
  1862 :}
  1863 BF/S disp {:
  1864     COUNT_INST(I_BFS);
  1865     if( sh4_x86.in_delay_slot ) {
  1866 	SLOTILLEGAL();
  1867     } else {
  1868 	sh4_x86.in_delay_slot = DELAY_PC;
  1869 	if( UNTRANSLATABLE(pc+2) ) {
  1870 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1871 	    JT_label(nottaken);
  1872 	    ADDL_imms_r32( disp, REG_EAX );
  1873 	    JMP_TARGET(nottaken);
  1874 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1875 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1876 	    exit_block_emu(pc+2);
  1877 	    sh4_x86.branch_taken = TRUE;
  1878 	    return 2;
  1879 	} else {
  1880 	    LOAD_t();
  1881 	    sh4vma_t target = disp + pc + 4;
  1882 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1883 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1884 	    int save_tstate = sh4_x86.tstate;
  1885 	    sh4_translate_instruction(pc+2);
  1886             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1887 	    exit_block_rel( target, pc+4 );
  1889 	    // not taken
  1890 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1891 	    sh4_x86.tstate = save_tstate;
  1892 	    sh4_translate_instruction(pc+2);
  1893 	    return 4;
  1896 :}
  1897 BRA disp {:  
  1898     COUNT_INST(I_BRA);
  1899     if( sh4_x86.in_delay_slot ) {
  1900 	SLOTILLEGAL();
  1901     } else {
  1902 	sh4_x86.in_delay_slot = DELAY_PC;
  1903 	sh4_x86.branch_taken = TRUE;
  1904 	if( UNTRANSLATABLE(pc+2) ) {
  1905 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1906 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1907 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1908 	    exit_block_emu(pc+2);
  1909 	    return 2;
  1910 	} else {
  1911 	    sh4_translate_instruction( pc + 2 );
  1912 	    exit_block_rel( disp + pc + 4, pc+4 );
  1913 	    return 4;
  1916 :}
  1917 BRAF Rn {:  
  1918     COUNT_INST(I_BRAF);
  1919     if( sh4_x86.in_delay_slot ) {
  1920 	SLOTILLEGAL();
  1921     } else {
  1922 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1923 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1924 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1925 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1926 	sh4_x86.in_delay_slot = DELAY_PC;
  1927 	sh4_x86.tstate = TSTATE_NONE;
  1928 	sh4_x86.branch_taken = TRUE;
  1929 	if( UNTRANSLATABLE(pc+2) ) {
  1930 	    exit_block_emu(pc+2);
  1931 	    return 2;
  1932 	} else {
  1933 	    sh4_translate_instruction( pc + 2 );
  1934 	    exit_block_newpcset(pc+4);
  1935 	    return 4;
  1938 :}
  1939 BSR disp {:  
  1940     COUNT_INST(I_BSR);
  1941     if( sh4_x86.in_delay_slot ) {
  1942 	SLOTILLEGAL();
  1943     } else {
  1944 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1945 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1946 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1947 	sh4_x86.in_delay_slot = DELAY_PC;
  1948 	sh4_x86.branch_taken = TRUE;
  1949 	sh4_x86.tstate = TSTATE_NONE;
  1950 	if( UNTRANSLATABLE(pc+2) ) {
  1951 	    ADDL_imms_r32( disp, REG_EAX );
  1952 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1953 	    exit_block_emu(pc+2);
  1954 	    return 2;
  1955 	} else {
  1956 	    sh4_translate_instruction( pc + 2 );
  1957 	    exit_block_rel( disp + pc + 4, pc+4 );
  1958 	    return 4;
  1961 :}
  1962 BSRF Rn {:  
  1963     COUNT_INST(I_BSRF);
  1964     if( sh4_x86.in_delay_slot ) {
  1965 	SLOTILLEGAL();
  1966     } else {
  1967 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1968 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1969 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1970 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1971 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1973 	sh4_x86.in_delay_slot = DELAY_PC;
  1974 	sh4_x86.tstate = TSTATE_NONE;
  1975 	sh4_x86.branch_taken = TRUE;
  1976 	if( UNTRANSLATABLE(pc+2) ) {
  1977 	    exit_block_emu(pc+2);
  1978 	    return 2;
  1979 	} else {
  1980 	    sh4_translate_instruction( pc + 2 );
  1981 	    exit_block_newpcset(pc+4);
  1982 	    return 4;
  1985 :}
  1986 BT disp {:
  1987     COUNT_INST(I_BT);
  1988     if( sh4_x86.in_delay_slot ) {
  1989 	SLOTILLEGAL();
  1990     } else {
  1991 	sh4vma_t target = disp + pc + 4;
  1992 	JF_label( nottaken );
  1993 	exit_block_rel(target, pc+2 );
  1994 	JMP_TARGET(nottaken);
  1995 	return 2;
  1997 :}
  1998 BT/S disp {:
  1999     COUNT_INST(I_BTS);
  2000     if( sh4_x86.in_delay_slot ) {
  2001 	SLOTILLEGAL();
  2002     } else {
  2003 	sh4_x86.in_delay_slot = DELAY_PC;
  2004 	if( UNTRANSLATABLE(pc+2) ) {
  2005 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2006 	    JF_label(nottaken);
  2007 	    ADDL_imms_r32( disp, REG_EAX );
  2008 	    JMP_TARGET(nottaken);
  2009 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2010 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2011 	    exit_block_emu(pc+2);
  2012 	    sh4_x86.branch_taken = TRUE;
  2013 	    return 2;
  2014 	} else {
  2015 		LOAD_t();
  2016 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2017 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2019 	    int save_tstate = sh4_x86.tstate;
  2020 	    sh4_translate_instruction(pc+2);
  2021             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2022 	    exit_block_rel( disp + pc + 4, pc+4 );
  2023 	    // not taken
  2024 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2025 	    sh4_x86.tstate = save_tstate;
  2026 	    sh4_translate_instruction(pc+2);
  2027 	    return 4;
  2030 :}
  2031 JMP @Rn {:  
  2032     COUNT_INST(I_JMP);
  2033     if( sh4_x86.in_delay_slot ) {
  2034 	SLOTILLEGAL();
  2035     } else {
  2036 	load_reg( REG_ECX, Rn );
  2037 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2038 	sh4_x86.in_delay_slot = DELAY_PC;
  2039 	sh4_x86.branch_taken = TRUE;
  2040 	if( UNTRANSLATABLE(pc+2) ) {
  2041 	    exit_block_emu(pc+2);
  2042 	    return 2;
  2043 	} else {
  2044 	    sh4_translate_instruction(pc+2);
  2045 	    exit_block_newpcset(pc+4);
  2046 	    return 4;
  2049 :}
  2050 JSR @Rn {:  
  2051     COUNT_INST(I_JSR);
  2052     if( sh4_x86.in_delay_slot ) {
  2053 	SLOTILLEGAL();
  2054     } else {
  2055 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2056 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2057 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2058 	load_reg( REG_ECX, Rn );
  2059 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2060 	sh4_x86.in_delay_slot = DELAY_PC;
  2061 	sh4_x86.branch_taken = TRUE;
  2062 	sh4_x86.tstate = TSTATE_NONE;
  2063 	if( UNTRANSLATABLE(pc+2) ) {
  2064 	    exit_block_emu(pc+2);
  2065 	    return 2;
  2066 	} else {
  2067 	    sh4_translate_instruction(pc+2);
  2068 	    exit_block_newpcset(pc+4);
  2069 	    return 4;
  2072 :}
  2073 RTE {:  
  2074     COUNT_INST(I_RTE);
  2075     if( sh4_x86.in_delay_slot ) {
  2076 	SLOTILLEGAL();
  2077     } else {
  2078 	check_priv();
  2079 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2080 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2081 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2082 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2083 	sh4_x86.in_delay_slot = DELAY_PC;
  2084 	sh4_x86.fpuen_checked = FALSE;
  2085 	sh4_x86.tstate = TSTATE_NONE;
  2086 	sh4_x86.branch_taken = TRUE;
  2087     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2088 	if( UNTRANSLATABLE(pc+2) ) {
  2089 	    exit_block_emu(pc+2);
  2090 	    return 2;
  2091 	} else {
  2092 	    sh4_translate_instruction(pc+2);
  2093 	    exit_block_newpcset(pc+4);
  2094 	    return 4;
  2097 :}
  2098 RTS {:  
  2099     COUNT_INST(I_RTS);
  2100     if( sh4_x86.in_delay_slot ) {
  2101 	SLOTILLEGAL();
  2102     } else {
  2103 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2104 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2105 	sh4_x86.in_delay_slot = DELAY_PC;
  2106 	sh4_x86.branch_taken = TRUE;
  2107 	if( UNTRANSLATABLE(pc+2) ) {
  2108 	    exit_block_emu(pc+2);
  2109 	    return 2;
  2110 	} else {
  2111 	    sh4_translate_instruction(pc+2);
  2112 	    exit_block_newpcset(pc+4);
  2113 	    return 4;
  2116 :}
  2117 TRAPA #imm {:  
  2118     COUNT_INST(I_TRAPA);
  2119     if( sh4_x86.in_delay_slot ) {
  2120 	SLOTILLEGAL();
  2121     } else {
  2122 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2123 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2124 	MOVL_imm32_r32( imm, REG_EAX );
  2125 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2126 	sh4_x86.tstate = TSTATE_NONE;
  2127 	exit_block_pcset(pc+2);
  2128 	sh4_x86.branch_taken = TRUE;
  2129 	return 2;
  2131 :}
  2132 UNDEF {:  
  2133     COUNT_INST(I_UNDEF);
  2134     if( sh4_x86.in_delay_slot ) {
  2135 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2136     } else {
  2137 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2138 	return 2;
  2140 :}
  2142 CLRMAC {:  
  2143     COUNT_INST(I_CLRMAC);
  2144     XORL_r32_r32(REG_EAX, REG_EAX);
  2145     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2146     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2147     sh4_x86.tstate = TSTATE_NONE;
  2148 :}
  2149 CLRS {:
  2150     COUNT_INST(I_CLRS);
  2151     CLC();
  2152     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2153     sh4_x86.tstate = TSTATE_NONE;
  2154 :}
  2155 CLRT {:  
  2156     COUNT_INST(I_CLRT);
  2157     CLC();
  2158     SETC_t();
  2159     sh4_x86.tstate = TSTATE_C;
  2160 :}
  2161 SETS {:  
  2162     COUNT_INST(I_SETS);
  2163     STC();
  2164     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2165     sh4_x86.tstate = TSTATE_NONE;
  2166 :}
  2167 SETT {:  
  2168     COUNT_INST(I_SETT);
  2169     STC();
  2170     SETC_t();
  2171     sh4_x86.tstate = TSTATE_C;
  2172 :}
  2174 /* Floating point moves */
  2175 FMOV FRm, FRn {:  
  2176     COUNT_INST(I_FMOV1);
  2177     check_fpuen();
  2178     if( sh4_x86.double_size ) {
  2179         load_dr0( REG_EAX, FRm );
  2180         load_dr1( REG_ECX, FRm );
  2181         store_dr0( REG_EAX, FRn );
  2182         store_dr1( REG_ECX, FRn );
  2183     } else {
  2184         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2185         store_fr( REG_EAX, FRn );
  2187 :}
  2188 FMOV FRm, @Rn {: 
  2189     COUNT_INST(I_FMOV2);
  2190     check_fpuen();
  2191     load_reg( REG_EAX, Rn );
  2192     if( sh4_x86.double_size ) {
  2193         check_walign64( REG_EAX );
  2194         load_dr0( REG_EDX, FRm );
  2195         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2196         load_reg( REG_EAX, Rn );
  2197         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2198         load_dr1( REG_EDX, FRm );
  2199         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2200     } else {
  2201         check_walign32( REG_EAX );
  2202         load_fr( REG_EDX, FRm );
  2203         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2205     sh4_x86.tstate = TSTATE_NONE;
  2206 :}
  2207 FMOV @Rm, FRn {:  
  2208     COUNT_INST(I_FMOV5);
  2209     check_fpuen();
  2210     load_reg( REG_EAX, Rm );
  2211     if( sh4_x86.double_size ) {
  2212         check_ralign64( REG_EAX );
  2213         MEM_READ_LONG( REG_EAX, REG_EAX );
  2214         store_dr0( REG_EAX, FRn );
  2215         load_reg( REG_EAX, Rm );
  2216         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2217         MEM_READ_LONG( REG_EAX, REG_EAX );
  2218         store_dr1( REG_EAX, FRn );
  2219     } else {
  2220         check_ralign32( REG_EAX );
  2221         MEM_READ_LONG( REG_EAX, REG_EAX );
  2222         store_fr( REG_EAX, FRn );
  2224     sh4_x86.tstate = TSTATE_NONE;
  2225 :}
  2226 FMOV FRm, @-Rn {:  
  2227     COUNT_INST(I_FMOV3);
  2228     check_fpuen();
  2229     load_reg( REG_EAX, Rn );
  2230     if( sh4_x86.double_size ) {
  2231         check_walign64( REG_EAX );
  2232         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2233         load_dr0( REG_EDX, FRm );
  2234         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2235         load_reg( REG_EAX, Rn );
  2236         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2237         load_dr1( REG_EDX, FRm );
  2238         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2239         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2240     } else {
  2241         check_walign32( REG_EAX );
  2242         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2243         load_fr( REG_EDX, FRm );
  2244         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2245         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2247     sh4_x86.tstate = TSTATE_NONE;
  2248 :}
  2249 FMOV @Rm+, FRn {:
  2250     COUNT_INST(I_FMOV6);
  2251     check_fpuen();
  2252     load_reg( REG_EAX, Rm );
  2253     if( sh4_x86.double_size ) {
  2254         check_ralign64( REG_EAX );
  2255         MEM_READ_LONG( REG_EAX, REG_EAX );
  2256         store_dr0( REG_EAX, FRn );
  2257         load_reg( REG_EAX, Rm );
  2258         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2259         MEM_READ_LONG( REG_EAX, REG_EAX );
  2260         store_dr1( REG_EAX, FRn );
  2261         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2262     } else {
  2263         check_ralign32( REG_EAX );
  2264         MEM_READ_LONG( REG_EAX, REG_EAX );
  2265         store_fr( REG_EAX, FRn );
  2266         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2268     sh4_x86.tstate = TSTATE_NONE;
  2269 :}
  2270 FMOV FRm, @(R0, Rn) {:  
  2271     COUNT_INST(I_FMOV4);
  2272     check_fpuen();
  2273     load_reg( REG_EAX, Rn );
  2274     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2275     if( sh4_x86.double_size ) {
  2276         check_walign64( REG_EAX );
  2277         load_dr0( REG_EDX, FRm );
  2278         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2279         load_reg( REG_EAX, Rn );
  2280         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2281         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2282         load_dr1( REG_EDX, FRm );
  2283         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2284     } else {
  2285         check_walign32( REG_EAX );
  2286         load_fr( REG_EDX, FRm );
  2287         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2289     sh4_x86.tstate = TSTATE_NONE;
  2290 :}
  2291 FMOV @(R0, Rm), FRn {:  
  2292     COUNT_INST(I_FMOV7);
  2293     check_fpuen();
  2294     load_reg( REG_EAX, Rm );
  2295     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2296     if( sh4_x86.double_size ) {
  2297         check_ralign64( REG_EAX );
  2298         MEM_READ_LONG( REG_EAX, REG_EAX );
  2299         store_dr0( REG_EAX, FRn );
  2300         load_reg( REG_EAX, Rm );
  2301         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2302         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2303         MEM_READ_LONG( REG_EAX, REG_EAX );
  2304         store_dr1( REG_EAX, FRn );
  2305     } else {
  2306         check_ralign32( REG_EAX );
  2307         MEM_READ_LONG( REG_EAX, REG_EAX );
  2308         store_fr( REG_EAX, FRn );
  2310     sh4_x86.tstate = TSTATE_NONE;
  2311 :}
  2312 FLDI0 FRn {:  /* IFF PR=0 */
  2313     COUNT_INST(I_FLDI0);
  2314     check_fpuen();
  2315     if( sh4_x86.double_prec == 0 ) {
  2316         XORL_r32_r32( REG_EAX, REG_EAX );
  2317         store_fr( REG_EAX, FRn );
  2319     sh4_x86.tstate = TSTATE_NONE;
  2320 :}
  2321 FLDI1 FRn {:  /* IFF PR=0 */
  2322     COUNT_INST(I_FLDI1);
  2323     check_fpuen();
  2324     if( sh4_x86.double_prec == 0 ) {
  2325         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2326         store_fr( REG_EAX, FRn );
  2328 :}
  2330 FLOAT FPUL, FRn {:  
  2331     COUNT_INST(I_FLOAT);
  2332     check_fpuen();
  2333     FILD_rbpdisp(R_FPUL);
  2334     if( sh4_x86.double_prec ) {
  2335         pop_dr( FRn );
  2336     } else {
  2337         pop_fr( FRn );
  2339 :}
  2340 FTRC FRm, FPUL {:  
  2341     COUNT_INST(I_FTRC);
  2342     check_fpuen();
  2343     if( sh4_x86.double_prec ) {
  2344         push_dr( FRm );
  2345     } else {
  2346         push_fr( FRm );
  2348     MOVP_immptr_rptr( &min_int, REG_ECX );
  2349     FILD_r32disp( REG_ECX, 0 );
  2350     FCOMIP_st(1);              
  2351     JAE_label( sat );     
  2352     JP_label( sat2 );       
  2353     MOVP_immptr_rptr( &max_int, REG_ECX );
  2354     FILD_r32disp( REG_ECX, 0 );
  2355     FCOMIP_st(1);
  2356     JNA_label( sat3 );
  2357     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2358     FNSTCW_r32disp( REG_EAX, 0 );
  2359     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2360     FLDCW_r32disp( REG_EDX, 0 );
  2361     FISTP_rbpdisp(R_FPUL);             
  2362     FLDCW_r32disp( REG_EAX, 0 );
  2363     JMP_label(end);             
  2365     JMP_TARGET(sat);
  2366     JMP_TARGET(sat2);
  2367     JMP_TARGET(sat3);
  2368     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2369     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2370     FPOP_st();
  2371     JMP_TARGET(end);
  2372     sh4_x86.tstate = TSTATE_NONE;
  2373 :}
  2374 FLDS FRm, FPUL {:  
  2375     COUNT_INST(I_FLDS);
  2376     check_fpuen();
  2377     load_fr( REG_EAX, FRm );
  2378     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2379 :}
  2380 FSTS FPUL, FRn {:  
  2381     COUNT_INST(I_FSTS);
  2382     check_fpuen();
  2383     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2384     store_fr( REG_EAX, FRn );
  2385 :}
  2386 FCNVDS FRm, FPUL {:  
  2387     COUNT_INST(I_FCNVDS);
  2388     check_fpuen();
  2389     if( sh4_x86.double_prec ) {
  2390         push_dr( FRm );
  2391         pop_fpul();
  2393 :}
  2394 FCNVSD FPUL, FRn {:  
  2395     COUNT_INST(I_FCNVSD);
  2396     check_fpuen();
  2397     if( sh4_x86.double_prec ) {
  2398         push_fpul();
  2399         pop_dr( FRn );
  2401 :}
  2403 /* Floating point instructions */
  2404 FABS FRn {:  
  2405     COUNT_INST(I_FABS);
  2406     check_fpuen();
  2407     if( sh4_x86.double_prec ) {
  2408         push_dr(FRn);
  2409         FABS_st0();
  2410         pop_dr(FRn);
  2411     } else {
  2412         push_fr(FRn);
  2413         FABS_st0();
  2414         pop_fr(FRn);
  2416 :}
  2417 FADD FRm, FRn {:  
  2418     COUNT_INST(I_FADD);
  2419     check_fpuen();
  2420     if( sh4_x86.double_prec ) {
  2421         push_dr(FRm);
  2422         push_dr(FRn);
  2423         FADDP_st(1);
  2424         pop_dr(FRn);
  2425     } else {
  2426         push_fr(FRm);
  2427         push_fr(FRn);
  2428         FADDP_st(1);
  2429         pop_fr(FRn);
  2431 :}
  2432 FDIV FRm, FRn {:  
  2433     COUNT_INST(I_FDIV);
  2434     check_fpuen();
  2435     if( sh4_x86.double_prec ) {
  2436         push_dr(FRn);
  2437         push_dr(FRm);
  2438         FDIVP_st(1);
  2439         pop_dr(FRn);
  2440     } else {
  2441         push_fr(FRn);
  2442         push_fr(FRm);
  2443         FDIVP_st(1);
  2444         pop_fr(FRn);
  2446 :}
  2447 FMAC FR0, FRm, FRn {:  
  2448     COUNT_INST(I_FMAC);
  2449     check_fpuen();
  2450     if( sh4_x86.double_prec ) {
  2451         push_dr( 0 );
  2452         push_dr( FRm );
  2453         FMULP_st(1);
  2454         push_dr( FRn );
  2455         FADDP_st(1);
  2456         pop_dr( FRn );
  2457     } else {
  2458         push_fr( 0 );
  2459         push_fr( FRm );
  2460         FMULP_st(1);
  2461         push_fr( FRn );
  2462         FADDP_st(1);
  2463         pop_fr( FRn );
  2465 :}
  2467 FMUL FRm, FRn {:  
  2468     COUNT_INST(I_FMUL);
  2469     check_fpuen();
  2470     if( sh4_x86.double_prec ) {
  2471         push_dr(FRm);
  2472         push_dr(FRn);
  2473         FMULP_st(1);
  2474         pop_dr(FRn);
  2475     } else {
  2476         push_fr(FRm);
  2477         push_fr(FRn);
  2478         FMULP_st(1);
  2479         pop_fr(FRn);
  2481 :}
  2482 FNEG FRn {:  
  2483     COUNT_INST(I_FNEG);
  2484     check_fpuen();
  2485     if( sh4_x86.double_prec ) {
  2486         push_dr(FRn);
  2487         FCHS_st0();
  2488         pop_dr(FRn);
  2489     } else {
  2490         push_fr(FRn);
  2491         FCHS_st0();
  2492         pop_fr(FRn);
  2494 :}
  2495 FSRRA FRn {:  
  2496     COUNT_INST(I_FSRRA);
  2497     check_fpuen();
  2498     if( sh4_x86.double_prec == 0 ) {
  2499         FLD1_st0();
  2500         push_fr(FRn);
  2501         FSQRT_st0();
  2502         FDIVP_st(1);
  2503         pop_fr(FRn);
  2505 :}
  2506 FSQRT FRn {:  
  2507     COUNT_INST(I_FSQRT);
  2508     check_fpuen();
  2509     if( sh4_x86.double_prec ) {
  2510         push_dr(FRn);
  2511         FSQRT_st0();
  2512         pop_dr(FRn);
  2513     } else {
  2514         push_fr(FRn);
  2515         FSQRT_st0();
  2516         pop_fr(FRn);
  2518 :}
  2519 FSUB FRm, FRn {:  
  2520     COUNT_INST(I_FSUB);
  2521     check_fpuen();
  2522     if( sh4_x86.double_prec ) {
  2523         push_dr(FRn);
  2524         push_dr(FRm);
  2525         FSUBP_st(1);
  2526         pop_dr(FRn);
  2527     } else {
  2528         push_fr(FRn);
  2529         push_fr(FRm);
  2530         FSUBP_st(1);
  2531         pop_fr(FRn);
  2533 :}
  2535 FCMP/EQ FRm, FRn {:  
  2536     COUNT_INST(I_FCMPEQ);
  2537     check_fpuen();
  2538     if( sh4_x86.double_prec ) {
  2539         push_dr(FRm);
  2540         push_dr(FRn);
  2541     } else {
  2542         push_fr(FRm);
  2543         push_fr(FRn);
  2545     XORL_r32_r32(REG_EAX, REG_EAX);
  2546     XORL_r32_r32(REG_EDX, REG_EDX);
  2547     FCOMIP_st(1);
  2548     SETCCB_cc_r8(X86_COND_NP, REG_DL);
  2549     CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
  2550     MOVL_r32_rbpdisp(REG_EAX, R_T);
  2551     FPOP_st();
  2552     sh4_x86.tstate = TSTATE_NONE;
  2553 :}
  2554 FCMP/GT FRm, FRn {:  
  2555     COUNT_INST(I_FCMPGT);
  2556     check_fpuen();
  2557     if( sh4_x86.double_prec ) {
  2558         push_dr(FRm);
  2559         push_dr(FRn);
  2560     } else {
  2561         push_fr(FRm);
  2562         push_fr(FRn);
  2564     FCOMIP_st(1);
  2565     SETA_t();
  2566     FPOP_st();
  2567     sh4_x86.tstate = TSTATE_A;
  2568 :}
  2570 FSCA FPUL, FRn {:  
  2571     COUNT_INST(I_FSCA);
  2572     check_fpuen();
  2573     if( sh4_x86.double_prec == 0 ) {
  2574         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2575         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2576         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2578     sh4_x86.tstate = TSTATE_NONE;
  2579 :}
  2580 FIPR FVm, FVn {:  
  2581     COUNT_INST(I_FIPR);
  2582     check_fpuen();
  2583     if( sh4_x86.double_prec == 0 ) {
  2584         if( sh4_x86.sse3_enabled ) {
  2585             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2586             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2587             HADDPS_xmm_xmm( 4, 4 ); 
  2588             HADDPS_xmm_xmm( 4, 4 );
  2589             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2590         } else {
  2591             push_fr( FVm<<2 );
  2592             push_fr( FVn<<2 );
  2593             FMULP_st(1);
  2594             push_fr( (FVm<<2)+1);
  2595             push_fr( (FVn<<2)+1);
  2596             FMULP_st(1);
  2597             FADDP_st(1);
  2598             push_fr( (FVm<<2)+2);
  2599             push_fr( (FVn<<2)+2);
  2600             FMULP_st(1);
  2601             FADDP_st(1);
  2602             push_fr( (FVm<<2)+3);
  2603             push_fr( (FVn<<2)+3);
  2604             FMULP_st(1);
  2605             FADDP_st(1);
  2606             pop_fr( (FVn<<2)+3);
  2609 :}
  2610 FTRV XMTRX, FVn {:  
  2611     COUNT_INST(I_FTRV);
  2612     check_fpuen();
  2613     if( sh4_x86.double_prec == 0 ) {
  2614         if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
  2615         	/* FIXME: For now, disable this inlining when we're running in shadow mode -
  2616         	 * it gives slightly different results from the emu core. Need to
  2617         	 * fix the precision so both give the right results.
  2618         	 */
  2619             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2620             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2621             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2622             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2624             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2625             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2626             MOV_xmm_xmm( 4, 6 );
  2627             MOV_xmm_xmm( 5, 7 );
  2628             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2629             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2630             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2631             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2632             MULPS_xmm_xmm( 0, 4 );
  2633             MULPS_xmm_xmm( 1, 5 );
  2634             MULPS_xmm_xmm( 2, 6 );
  2635             MULPS_xmm_xmm( 3, 7 );
  2636             ADDPS_xmm_xmm( 5, 4 );
  2637             ADDPS_xmm_xmm( 7, 6 );
  2638             ADDPS_xmm_xmm( 6, 4 );
  2639             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2640         } else {
  2641             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2642             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2645     sh4_x86.tstate = TSTATE_NONE;
  2646 :}
  2648 FRCHG {:  
  2649     COUNT_INST(I_FRCHG);
  2650     check_fpuen();
  2651     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2652     CALL_ptr( sh4_switch_fr_banks );
  2653     sh4_x86.tstate = TSTATE_NONE;
  2654 :}
  2655 FSCHG {:  
  2656     COUNT_INST(I_FSCHG);
  2657     check_fpuen();
  2658     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2659     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2660     sh4_x86.tstate = TSTATE_NONE;
  2661     sh4_x86.double_size = !sh4_x86.double_size;
  2662     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2663 :}
  2665 /* Processor control instructions */
  2666 LDC Rm, SR {:
  2667     COUNT_INST(I_LDCSR);
  2668     if( sh4_x86.in_delay_slot ) {
  2669 	SLOTILLEGAL();
  2670     } else {
  2671 	check_priv();
  2672 	load_reg( REG_EAX, Rm );
  2673 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2674 	sh4_x86.fpuen_checked = FALSE;
  2675 	sh4_x86.tstate = TSTATE_NONE;
  2676     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2677 	return 2;
  2679 :}
  2680 LDC Rm, GBR {: 
  2681     COUNT_INST(I_LDC);
  2682     load_reg( REG_EAX, Rm );
  2683     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2684 :}
  2685 LDC Rm, VBR {:  
  2686     COUNT_INST(I_LDC);
  2687     check_priv();
  2688     load_reg( REG_EAX, Rm );
  2689     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2690     sh4_x86.tstate = TSTATE_NONE;
  2691 :}
  2692 LDC Rm, SSR {:  
  2693     COUNT_INST(I_LDC);
  2694     check_priv();
  2695     load_reg( REG_EAX, Rm );
  2696     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2697     sh4_x86.tstate = TSTATE_NONE;
  2698 :}
  2699 LDC Rm, SGR {:  
  2700     COUNT_INST(I_LDC);
  2701     check_priv();
  2702     load_reg( REG_EAX, Rm );
  2703     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2704     sh4_x86.tstate = TSTATE_NONE;
  2705 :}
  2706 LDC Rm, SPC {:  
  2707     COUNT_INST(I_LDC);
  2708     check_priv();
  2709     load_reg( REG_EAX, Rm );
  2710     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2711     sh4_x86.tstate = TSTATE_NONE;
  2712 :}
  2713 LDC Rm, DBR {:  
  2714     COUNT_INST(I_LDC);
  2715     check_priv();
  2716     load_reg( REG_EAX, Rm );
  2717     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2718     sh4_x86.tstate = TSTATE_NONE;
  2719 :}
  2720 LDC Rm, Rn_BANK {:  
  2721     COUNT_INST(I_LDC);
  2722     check_priv();
  2723     load_reg( REG_EAX, Rm );
  2724     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2725     sh4_x86.tstate = TSTATE_NONE;
  2726 :}
  2727 LDC.L @Rm+, GBR {:  
  2728     COUNT_INST(I_LDCM);
  2729     load_reg( REG_EAX, Rm );
  2730     check_ralign32( REG_EAX );
  2731     MEM_READ_LONG( REG_EAX, REG_EAX );
  2732     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2733     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2734     sh4_x86.tstate = TSTATE_NONE;
  2735 :}
  2736 LDC.L @Rm+, SR {:
  2737     COUNT_INST(I_LDCSRM);
  2738     if( sh4_x86.in_delay_slot ) {
  2739 	SLOTILLEGAL();
  2740     } else {
  2741 	check_priv();
  2742 	load_reg( REG_EAX, Rm );
  2743 	check_ralign32( REG_EAX );
  2744 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2745 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2746 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2747 	sh4_x86.fpuen_checked = FALSE;
  2748 	sh4_x86.tstate = TSTATE_NONE;
  2749     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2750 	return 2;
  2752 :}
  2753 LDC.L @Rm+, VBR {:  
  2754     COUNT_INST(I_LDCM);
  2755     check_priv();
  2756     load_reg( REG_EAX, Rm );
  2757     check_ralign32( REG_EAX );
  2758     MEM_READ_LONG( REG_EAX, REG_EAX );
  2759     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2760     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2761     sh4_x86.tstate = TSTATE_NONE;
  2762 :}
  2763 LDC.L @Rm+, SSR {:
  2764     COUNT_INST(I_LDCM);
  2765     check_priv();
  2766     load_reg( REG_EAX, Rm );
  2767     check_ralign32( REG_EAX );
  2768     MEM_READ_LONG( REG_EAX, REG_EAX );
  2769     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2770     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2771     sh4_x86.tstate = TSTATE_NONE;
  2772 :}
  2773 LDC.L @Rm+, SGR {:  
  2774     COUNT_INST(I_LDCM);
  2775     check_priv();
  2776     load_reg( REG_EAX, Rm );
  2777     check_ralign32( REG_EAX );
  2778     MEM_READ_LONG( REG_EAX, REG_EAX );
  2779     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2780     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2781     sh4_x86.tstate = TSTATE_NONE;
  2782 :}
  2783 LDC.L @Rm+, SPC {:  
  2784     COUNT_INST(I_LDCM);
  2785     check_priv();
  2786     load_reg( REG_EAX, Rm );
  2787     check_ralign32( REG_EAX );
  2788     MEM_READ_LONG( REG_EAX, REG_EAX );
  2789     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2790     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2791     sh4_x86.tstate = TSTATE_NONE;
  2792 :}
  2793 LDC.L @Rm+, DBR {:  
  2794     COUNT_INST(I_LDCM);
  2795     check_priv();
  2796     load_reg( REG_EAX, Rm );
  2797     check_ralign32( REG_EAX );
  2798     MEM_READ_LONG( REG_EAX, REG_EAX );
  2799     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2800     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2801     sh4_x86.tstate = TSTATE_NONE;
  2802 :}
  2803 LDC.L @Rm+, Rn_BANK {:  
  2804     COUNT_INST(I_LDCM);
  2805     check_priv();
  2806     load_reg( REG_EAX, Rm );
  2807     check_ralign32( REG_EAX );
  2808     MEM_READ_LONG( REG_EAX, REG_EAX );
  2809     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2810     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2811     sh4_x86.tstate = TSTATE_NONE;
  2812 :}
  2813 LDS Rm, FPSCR {:
  2814     COUNT_INST(I_LDSFPSCR);
  2815     check_fpuen();
  2816     load_reg( REG_EAX, Rm );
  2817     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2818     sh4_x86.tstate = TSTATE_NONE;
  2819     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2820     return 2;
  2821 :}
  2822 LDS.L @Rm+, FPSCR {:  
  2823     COUNT_INST(I_LDSFPSCRM);
  2824     check_fpuen();
  2825     load_reg( REG_EAX, Rm );
  2826     check_ralign32( REG_EAX );
  2827     MEM_READ_LONG( REG_EAX, REG_EAX );
  2828     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2829     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2830     sh4_x86.tstate = TSTATE_NONE;
  2831     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2832     return 2;
  2833 :}
  2834 LDS Rm, FPUL {:  
  2835     COUNT_INST(I_LDS);
  2836     check_fpuen();
  2837     load_reg( REG_EAX, Rm );
  2838     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2839 :}
  2840 LDS.L @Rm+, FPUL {:  
  2841     COUNT_INST(I_LDSM);
  2842     check_fpuen();
  2843     load_reg( REG_EAX, Rm );
  2844     check_ralign32( REG_EAX );
  2845     MEM_READ_LONG( REG_EAX, REG_EAX );
  2846     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2847     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2848     sh4_x86.tstate = TSTATE_NONE;
  2849 :}
  2850 LDS Rm, MACH {: 
  2851     COUNT_INST(I_LDS);
  2852     load_reg( REG_EAX, Rm );
  2853     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2854 :}
  2855 LDS.L @Rm+, MACH {:  
  2856     COUNT_INST(I_LDSM);
  2857     load_reg( REG_EAX, Rm );
  2858     check_ralign32( REG_EAX );
  2859     MEM_READ_LONG( REG_EAX, REG_EAX );
  2860     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2861     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2862     sh4_x86.tstate = TSTATE_NONE;
  2863 :}
  2864 LDS Rm, MACL {:  
  2865     COUNT_INST(I_LDS);
  2866     load_reg( REG_EAX, Rm );
  2867     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2868 :}
  2869 LDS.L @Rm+, MACL {:  
  2870     COUNT_INST(I_LDSM);
  2871     load_reg( REG_EAX, Rm );
  2872     check_ralign32( REG_EAX );
  2873     MEM_READ_LONG( REG_EAX, REG_EAX );
  2874     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2875     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2876     sh4_x86.tstate = TSTATE_NONE;
  2877 :}
  2878 LDS Rm, PR {:  
  2879     COUNT_INST(I_LDS);
  2880     load_reg( REG_EAX, Rm );
  2881     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2882 :}
  2883 LDS.L @Rm+, PR {:  
  2884     COUNT_INST(I_LDSM);
  2885     load_reg( REG_EAX, Rm );
  2886     check_ralign32( REG_EAX );
  2887     MEM_READ_LONG( REG_EAX, REG_EAX );
  2888     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2889     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2890     sh4_x86.tstate = TSTATE_NONE;
  2891 :}
  2892 LDTLB {:  
  2893     COUNT_INST(I_LDTLB);
  2894     CALL_ptr( MMU_ldtlb );
  2895     sh4_x86.tstate = TSTATE_NONE;
  2896 :}
  2897 OCBI @Rn {:
  2898     COUNT_INST(I_OCBI);
  2899 :}
  2900 OCBP @Rn {:
  2901     COUNT_INST(I_OCBP);
  2902 :}
  2903 OCBWB @Rn {:
  2904     COUNT_INST(I_OCBWB);
  2905 :}
  2906 PREF @Rn {:
  2907     COUNT_INST(I_PREF);
  2908     load_reg( REG_EAX, Rn );
  2909     MEM_PREFETCH( REG_EAX );
  2910     sh4_x86.tstate = TSTATE_NONE;
  2911 :}
  2912 SLEEP {: 
  2913     COUNT_INST(I_SLEEP);
  2914     check_priv();
  2915     CALL_ptr( sh4_sleep );
  2916     sh4_x86.tstate = TSTATE_NONE;
  2917     sh4_x86.in_delay_slot = DELAY_NONE;
  2918     return 2;
  2919 :}
  2920 STC SR, Rn {:
  2921     COUNT_INST(I_STCSR);
  2922     check_priv();
  2923     CALL_ptr(sh4_read_sr);
  2924     store_reg( REG_EAX, Rn );
  2925     sh4_x86.tstate = TSTATE_NONE;
  2926 :}
  2927 STC GBR, Rn {:  
  2928     COUNT_INST(I_STC);
  2929     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2930     store_reg( REG_EAX, Rn );
  2931 :}
  2932 STC VBR, Rn {:  
  2933     COUNT_INST(I_STC);
  2934     check_priv();
  2935     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2936     store_reg( REG_EAX, Rn );
  2937     sh4_x86.tstate = TSTATE_NONE;
  2938 :}
  2939 STC SSR, Rn {:  
  2940     COUNT_INST(I_STC);
  2941     check_priv();
  2942     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2943     store_reg( REG_EAX, Rn );
  2944     sh4_x86.tstate = TSTATE_NONE;
  2945 :}
  2946 STC SPC, Rn {:  
  2947     COUNT_INST(I_STC);
  2948     check_priv();
  2949     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2950     store_reg( REG_EAX, Rn );
  2951     sh4_x86.tstate = TSTATE_NONE;
  2952 :}
  2953 STC SGR, Rn {:  
  2954     COUNT_INST(I_STC);
  2955     check_priv();
  2956     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2957     store_reg( REG_EAX, Rn );
  2958     sh4_x86.tstate = TSTATE_NONE;
  2959 :}
  2960 STC DBR, Rn {:  
  2961     COUNT_INST(I_STC);
  2962     check_priv();
  2963     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2964     store_reg( REG_EAX, Rn );
  2965     sh4_x86.tstate = TSTATE_NONE;
  2966 :}
  2967 STC Rm_BANK, Rn {:
  2968     COUNT_INST(I_STC);
  2969     check_priv();
  2970     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  2971     store_reg( REG_EAX, Rn );
  2972     sh4_x86.tstate = TSTATE_NONE;
  2973 :}
  2974 STC.L SR, @-Rn {:
  2975     COUNT_INST(I_STCSRM);
  2976     check_priv();
  2977     CALL_ptr( sh4_read_sr );
  2978     MOVL_r32_r32( REG_EAX, REG_EDX );
  2979     load_reg( REG_EAX, Rn );
  2980     check_walign32( REG_EAX );
  2981     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2982     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2983     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2984     sh4_x86.tstate = TSTATE_NONE;
  2985 :}
  2986 STC.L VBR, @-Rn {:  
  2987     COUNT_INST(I_STCM);
  2988     check_priv();
  2989     load_reg( REG_EAX, Rn );
  2990     check_walign32( REG_EAX );
  2991     ADDL_imms_r32( -4, REG_EAX );
  2992     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  2993     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2994     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2995     sh4_x86.tstate = TSTATE_NONE;
  2996 :}
  2997 STC.L SSR, @-Rn {:  
  2998     COUNT_INST(I_STCM);
  2999     check_priv();
  3000     load_reg( REG_EAX, Rn );
  3001     check_walign32( REG_EAX );
  3002     ADDL_imms_r32( -4, REG_EAX );
  3003     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3004     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3005     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3006     sh4_x86.tstate = TSTATE_NONE;
  3007 :}
  3008 STC.L SPC, @-Rn {:
  3009     COUNT_INST(I_STCM);
  3010     check_priv();
  3011     load_reg( REG_EAX, Rn );
  3012     check_walign32( REG_EAX );
  3013     ADDL_imms_r32( -4, REG_EAX );
  3014     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3015     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3016     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3017     sh4_x86.tstate = TSTATE_NONE;
  3018 :}
  3019 STC.L SGR, @-Rn {:  
  3020     COUNT_INST(I_STCM);
  3021     check_priv();
  3022     load_reg( REG_EAX, Rn );
  3023     check_walign32( REG_EAX );
  3024     ADDL_imms_r32( -4, REG_EAX );
  3025     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3026     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3027     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3028     sh4_x86.tstate = TSTATE_NONE;
  3029 :}
  3030 STC.L DBR, @-Rn {:  
  3031     COUNT_INST(I_STCM);
  3032     check_priv();
  3033     load_reg( REG_EAX, Rn );
  3034     check_walign32( REG_EAX );
  3035     ADDL_imms_r32( -4, REG_EAX );
  3036     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3037     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3038     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3039     sh4_x86.tstate = TSTATE_NONE;
  3040 :}
  3041 STC.L Rm_BANK, @-Rn {:  
  3042     COUNT_INST(I_STCM);
  3043     check_priv();
  3044     load_reg( REG_EAX, Rn );
  3045     check_walign32( REG_EAX );
  3046     ADDL_imms_r32( -4, REG_EAX );
  3047     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3048     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3049     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3050     sh4_x86.tstate = TSTATE_NONE;
  3051 :}
  3052 STC.L GBR, @-Rn {:  
  3053     COUNT_INST(I_STCM);
  3054     load_reg( REG_EAX, Rn );
  3055     check_walign32( REG_EAX );
  3056     ADDL_imms_r32( -4, REG_EAX );
  3057     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3058     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3059     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3060     sh4_x86.tstate = TSTATE_NONE;
  3061 :}
  3062 STS FPSCR, Rn {:  
  3063     COUNT_INST(I_STSFPSCR);
  3064     check_fpuen();
  3065     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3066     store_reg( REG_EAX, Rn );
  3067 :}
  3068 STS.L FPSCR, @-Rn {:  
  3069     COUNT_INST(I_STSFPSCRM);
  3070     check_fpuen();
  3071     load_reg( REG_EAX, Rn );
  3072     check_walign32( REG_EAX );
  3073     ADDL_imms_r32( -4, REG_EAX );
  3074     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3075     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3076     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3077     sh4_x86.tstate = TSTATE_NONE;
  3078 :}
  3079 STS FPUL, Rn {:  
  3080     COUNT_INST(I_STS);
  3081     check_fpuen();
  3082     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3083     store_reg( REG_EAX, Rn );
  3084 :}
  3085 STS.L FPUL, @-Rn {:  
  3086     COUNT_INST(I_STSM);
  3087     check_fpuen();
  3088     load_reg( REG_EAX, Rn );
  3089     check_walign32( REG_EAX );
  3090     ADDL_imms_r32( -4, REG_EAX );
  3091     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3092     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3093     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3094     sh4_x86.tstate = TSTATE_NONE;
  3095 :}
  3096 STS MACH, Rn {:  
  3097     COUNT_INST(I_STS);
  3098     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3099     store_reg( REG_EAX, Rn );
  3100 :}
  3101 STS.L MACH, @-Rn {:  
  3102     COUNT_INST(I_STSM);
  3103     load_reg( REG_EAX, Rn );
  3104     check_walign32( REG_EAX );
  3105     ADDL_imms_r32( -4, REG_EAX );
  3106     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3107     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3108     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3109     sh4_x86.tstate = TSTATE_NONE;
  3110 :}
  3111 STS MACL, Rn {:  
  3112     COUNT_INST(I_STS);
  3113     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3114     store_reg( REG_EAX, Rn );
  3115 :}
  3116 STS.L MACL, @-Rn {:  
  3117     COUNT_INST(I_STSM);
  3118     load_reg( REG_EAX, Rn );
  3119     check_walign32( REG_EAX );
  3120     ADDL_imms_r32( -4, REG_EAX );
  3121     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3122     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3123     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3124     sh4_x86.tstate = TSTATE_NONE;
  3125 :}
  3126 STS PR, Rn {:  
  3127     COUNT_INST(I_STS);
  3128     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3129     store_reg( REG_EAX, Rn );
  3130 :}
  3131 STS.L PR, @-Rn {:  
  3132     COUNT_INST(I_STSM);
  3133     load_reg( REG_EAX, Rn );
  3134     check_walign32( REG_EAX );
  3135     ADDL_imms_r32( -4, REG_EAX );
  3136     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3137     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3138     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3139     sh4_x86.tstate = TSTATE_NONE;
  3140 :}
  3142 NOP {: 
  3143     COUNT_INST(I_NOP);
  3144     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3145 :}
  3146 %%
  3147     sh4_x86.in_delay_slot = DELAY_NONE;
  3148     return 0;
  3152 /**
  3153  * The unwind methods only work if we compiled with DWARF2 frame information
  3154  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3155  */
  3156 #ifdef HAVE_EXCEPTIONS
  3157 #include <unwind.h>
  3159 struct UnwindInfo {
  3160     uintptr_t block_start;
  3161     uintptr_t block_end;
  3162     void *pc;
  3163 };
  3165 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3167     struct UnwindInfo *info = arg;
  3168     void *pc = (void *)_Unwind_GetIP(context);
  3169     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3170         info->pc = pc;
  3171         return _URC_NORMAL_STOP;
  3173     return _URC_NO_REASON;
  3176 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3178     struct _Unwind_Exception exc;
  3179     struct UnwindInfo info;
  3181     info.pc = NULL;
  3182     info.block_start = (uintptr_t)code;
  3183     info.block_end = info.block_start + code_size;
  3184     void *result = NULL;
  3185     _Unwind_Backtrace( xlat_check_frame, &info );
  3186     return info.pc;
  3188 #else
  3189 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3190 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3192     void *result = NULL;
  3193     __asm__(
  3194         "mov %%ebp, %%eax\n\t"
  3195         "mov $0x8, %%ecx\n\t"
  3196         "mov %1, %%edx\n"
  3197         "frame_loop: test %%eax, %%eax\n\t"
  3198         "je frame_not_found\n\t"
  3199         "cmp (%%eax), %%edx\n\t"
  3200         "je frame_found\n\t"
  3201         "sub $0x1, %%ecx\n\t"
  3202         "je frame_not_found\n\t"
  3203         "movl (%%eax), %%eax\n\t"
  3204         "jmp frame_loop\n"
  3205         "frame_found: movl 0x4(%%eax), %0\n"
  3206         "frame_not_found:"
  3207         : "=r" (result)
  3208         : "r" (((uint8_t *)&sh4r) + 128 )
  3209         : "eax", "ecx", "edx" );
  3210     return result;
  3212 #endif
.