Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1301:b76840ccf94b
prev1298:d0eb2307b847
author nkeynes
date Fri May 29 18:47:05 2015 +1000 (8 years ago)
permissions -rw-r--r--
last change Fix test case
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "xlat/xlatdasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   110     /* Allocated memory for the (block-wide) back-patch list */
   111     struct backpatch_record *backpatch_list;
   112     uint32_t backpatch_posn;
   113     uint32_t backpatch_size;
   114 };
   116 static struct sh4_x86_state sh4_x86;
   118 static uint8_t sh4_entry_stub[128];
   119 typedef FASTCALL void (*entry_point_t)(void *);
   120 entry_point_t sh4_translate_enter;
   122 static uint32_t max_int = 0x7FFFFFFF;
   123 static uint32_t min_int = 0x80000000;
   124 static uint32_t save_fcw; /* save value for fpu control word */
   125 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   127 static void sh4_x86_translate_unlink_block( void *use_list );
   129 static struct xlat_target_fns x86_target_fns = {
   130 	sh4_x86_translate_unlink_block
   131 };	
   134 gboolean is_sse3_supported()
   135 {
   136     uint32_t features;
   138     __asm__ __volatile__(
   139         "mov $0x01, %%eax\n\t"
   140         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   141     return (features & 1) ? TRUE : FALSE;
   142 }
   144 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   145 {
   146     sh4_x86.priv_address_space = priv;
   147     sh4_x86.user_address_space = user;
   148 }
   150 void sh4_translate_write_entry_stub(void)
   151 {
   152 	mem_unprotect(sh4_entry_stub, sizeof(sh4_entry_stub));
   153 	xlat_output = sh4_entry_stub;
   154 	PUSH_r32(REG_EBP);
   155 	MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
   156 	PUSH_r32(REG_EBX);
   157 	PUSH_r32(REG_SAVE1);
   158 	PUSH_r32(REG_SAVE2);
   159 #if SIZEOF_VOID_P == 8
   160     PUSH_r32(REG_SAVE3);
   161     PUSH_r32(REG_SAVE4);
   162     CALL_r32( REG_ARG1 );
   163     POP_r32(REG_SAVE4);
   164     POP_r32(REG_SAVE3);
   165 #else
   166     SUBL_imms_r32( 8, REG_ESP ); 
   167 	CALL_r32( REG_ARG1 );
   168 	ADDL_imms_r32( 8, REG_ESP );
   169 #endif
   170 	POP_r32(REG_SAVE2);	
   171 	POP_r32(REG_SAVE1);
   172 	POP_r32(REG_EBX);
   173 	POP_r32(REG_EBP);
   174 	RET();
   175 	sh4_translate_enter = (entry_point_t)sh4_entry_stub;
   176 }
   178 void sh4_translate_init(void)
   179 {
   180     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   181     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   182     sh4_x86.begin_callback = NULL;
   183     sh4_x86.end_callback = NULL;
   184     sh4_x86.fastmem = TRUE;
   185     sh4_x86.sse3_enabled = is_sse3_supported();
   186     xlat_set_target_fns(&x86_target_fns);
   187     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   188     sh4_translate_write_entry_stub();
   189 }
   191 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   192 {
   193     sh4_x86.begin_callback = begin;
   194     sh4_x86.end_callback = end;
   195 }
   197 void sh4_translate_set_fastmem( gboolean flag )
   198 {
   199     sh4_x86.fastmem = flag;
   200 }
   202 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   203 {
   204     int reloc_size = 4;
   206     if( exc_code == -2 ) {
   207         reloc_size = sizeof(void *);
   208     }
   210     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   211 	sh4_x86.backpatch_size <<= 1;
   212 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   213 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   214 	assert( sh4_x86.backpatch_list != NULL );
   215     }
   216     if( sh4_x86.in_delay_slot ) {
   217 	fixup_pc -= 2;
   218     }
   220     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   221 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   222     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   223     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   224     sh4_x86.backpatch_posn++;
   225 }
   227 #define TSTATE_NONE -1
   228 #define TSTATE_O    X86_COND_O
   229 #define TSTATE_C    X86_COND_C
   230 #define TSTATE_E    X86_COND_E
   231 #define TSTATE_NE   X86_COND_NE
   232 #define TSTATE_G    X86_COND_G
   233 #define TSTATE_GE   X86_COND_GE
   234 #define TSTATE_A    X86_COND_A
   235 #define TSTATE_AE   X86_COND_AE
   237 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   238 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   240 /* Convenience instructions */
   241 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   242 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   243 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   244 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   245 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   246 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   247 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   248 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   249 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   250 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   251 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   252 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   253 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   254 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   255 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   256 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   257 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   258 #define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
   259 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   260 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   261 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   263 #define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
   264 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
   266 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   267 #define JT_label(label) LOAD_t() \
   268     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   270 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   271 #define JF_label(label) LOAD_t() \
   272     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   275 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   276 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   278 /**
   279  * Load an FR register (single-precision floating point) into an integer x86
   280  * register (eg for register-to-register moves)
   281  */
   282 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   283 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   285 /**
   286  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   287  */
   288 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   289 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   291 /**
   292  * Store an FR register (single-precision floating point) from an integer x86+
   293  * register (eg for register-to-register moves)
   294  */
   295 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   296 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   298 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   299 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   302 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   303 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   304 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   305 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   306 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   307 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   308 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   309 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   310 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   311 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   313 #ifdef ENABLE_SH4STATS
   314 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   315 #else
   316 #define COUNT_INST(id)
   317 #endif
   320 /* Exception checks - Note that all exception checks will clobber EAX */
   322 #define check_priv( ) \
   323     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   324         if( sh4_x86.in_delay_slot ) { \
   325             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
   326         } else { \
   327             exit_block_exc(EXC_ILLEGAL, pc, 2); \
   328         } \
   329         sh4_x86.branch_taken = TRUE; \
   330         sh4_x86.in_delay_slot = DELAY_NONE; \
   331         return 2; \
   332     }
   334 #define check_fpuen( ) \
   335     if( !sh4_x86.fpuen_checked ) {\
   336 	sh4_x86.fpuen_checked = TRUE;\
   337 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   338 	ANDL_imms_r32( SR_FD, REG_EAX );\
   339 	if( sh4_x86.in_delay_slot ) {\
   340 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   341 	} else {\
   342 	    JNE_exc(EXC_FPU_DISABLED);\
   343 	}\
   344 	sh4_x86.tstate = TSTATE_NONE; \
   345     }
   347 #define check_ralign16( x86reg ) \
   348     TESTL_imms_r32( 0x00000001, x86reg ); \
   349     JNE_exc(EXC_DATA_ADDR_READ)
   351 #define check_walign16( x86reg ) \
   352     TESTL_imms_r32( 0x00000001, x86reg ); \
   353     JNE_exc(EXC_DATA_ADDR_WRITE);
   355 #define check_ralign32( x86reg ) \
   356     TESTL_imms_r32( 0x00000003, x86reg ); \
   357     JNE_exc(EXC_DATA_ADDR_READ)
   359 #define check_walign32( x86reg ) \
   360     TESTL_imms_r32( 0x00000003, x86reg ); \
   361     JNE_exc(EXC_DATA_ADDR_WRITE);
   363 #define check_ralign64( x86reg ) \
   364     TESTL_imms_r32( 0x00000007, x86reg ); \
   365     JNE_exc(EXC_DATA_ADDR_READ)
   367 #define check_walign64( x86reg ) \
   368     TESTL_imms_r32( 0x00000007, x86reg ); \
   369     JNE_exc(EXC_DATA_ADDR_WRITE);
   371 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   373 #define UNDEF(ir)
   374 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   375  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   376  */
   377 #ifdef HAVE_FRAME_ADDRESS
   378 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   379 {
   380     decode_address(address_space(), addr_reg, REG_CALLPTR);
   381     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   382         CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
   383     } else {
   384         if( addr_reg != REG_ARG1 ) {
   385             MOVL_r32_r32( addr_reg, REG_ARG1 );
   386         }
   387         MOVP_immptr_rptr( 0, REG_ARG2 );
   388         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   389         CALL2_r32disp_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2);
   390     }
   391     if( value_reg != REG_RESULT1 ) { 
   392         MOVL_r32_r32( REG_RESULT1, value_reg );
   393     }
   394 }
   396 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   397 {
   398     decode_address(address_space(), addr_reg, REG_CALLPTR);
   399     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   400         CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
   401     } else {
   402         if( value_reg != REG_ARG2 ) {
   403             MOVL_r32_r32( value_reg, REG_ARG2 );
   404 	}        
   405         if( addr_reg != REG_ARG1 ) {
   406             MOVL_r32_r32( addr_reg, REG_ARG1 );
   407         }
   408 #if MAX_REG_ARG > 2        
   409         MOVP_immptr_rptr( 0, REG_ARG3 );
   410         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   411         CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   412 #else
   413         MOVL_imm32_rspdisp( 0, 0 );
   414         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   415         CALL3_r32disp_r32_r32_r32(REG_CALLPTR, offset, REG_ARG1, REG_ARG2, 0);
   416 #endif
   417     }
   418 }
   419 #else
   420 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   421 {
   422     decode_address(address_space(), addr_reg, REG_CALLPTR);
   423     CALL1_r32disp_r32(REG_CALLPTR, offset, addr_reg);
   424     if( value_reg != REG_RESULT1 ) {
   425         MOVL_r32_r32( REG_RESULT1, value_reg );
   426     }
   427 }     
   429 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   430 {
   431     decode_address(address_space(), addr_reg, REG_CALLPTR);
   432     CALL2_r32disp_r32_r32(REG_CALLPTR, offset, addr_reg, value_reg);
   433 }
   434 #endif
   436 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   437 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   438 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   439 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   440 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   441 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   442 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   443 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   444 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   446 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   448 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   449 #define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   450 #define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   451 #define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   453 void sh4_translate_begin_block( sh4addr_t pc ) 
   454 {
   455 	sh4_x86.code = xlat_output;
   456     sh4_x86.in_delay_slot = FALSE;
   457     sh4_x86.fpuen_checked = FALSE;
   458     sh4_x86.branch_taken = FALSE;
   459     sh4_x86.backpatch_posn = 0;
   460     sh4_x86.block_start_pc = pc;
   461     sh4_x86.tlb_on = IS_TLB_ENABLED();
   462     sh4_x86.tstate = TSTATE_NONE;
   463     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   464     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   465     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   466     if( sh4_x86.begin_callback ) {
   467         CALL_ptr( sh4_x86.begin_callback );
   468     }
   469     if( sh4_profile_blocks ) {
   470     	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   471     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   472     }  
   473 }
   476 uint32_t sh4_translate_end_block_size()
   477 {
   478 	uint32_t epilogue_size = EPILOGUE_SIZE;
   479 	if( sh4_x86.end_callback ) {
   480 	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
   481 	}
   482     if( sh4_x86.backpatch_posn <= 3 ) {
   483         epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   484     } else {
   485         epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   486     }
   487     return epilogue_size;
   488 }
   491 /**
   492  * Embed a breakpoint into the generated code
   493  */
   494 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   495 {
   496     MOVL_imm32_r32( pc, REG_EAX );
   497     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   498     sh4_x86.tstate = TSTATE_NONE;
   499 }
   502 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   504 /**
   505  * Test if the loaded target code pointer in %eax is valid, and if so jump
   506  * directly into it, bypassing the normal exit.
   507  */
   508 static void jump_next_block()
   509 {
   510 	uint8_t *ptr = xlat_output;
   511 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   512 	JE_label(nocode);
   513 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   514 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   515 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   516 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   517 	} else {
   518 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   519 	}
   520 	JNE_label(wrongmode);
   521 	if( sh4_x86.end_callback ) {
   522 	    PUSH_r32(REG_EAX);
   523 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   524 	    JMP_rptr(REG_ECX);
   525 	} else {
   526 	    JMP_rptr(REG_EAX);
   527 	}
   528 	JMP_TARGET(wrongmode);
   529 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   530 	int rel = ptr - xlat_output;
   531     JMP_prerel(rel);
   532 	JMP_TARGET(nocode); 
   533 }
   535 /**
   536  * 
   537  */
   538 void FASTCALL sh4_translate_link_block( uint32_t pc )
   539 {
   540     uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
   541     while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
   542         target = XLAT_BLOCK_CHAIN(target);
   543 	}
   544     if( target == NULL ) {
   545         target = sh4_translate_basic_block( pc );
   546     }
   547     uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
   548     *backpatch = 0xE9;
   549     *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)-5;
   550     *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
   551     XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
   553     uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
   554     assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
   555 	*retptr = backpatch;
   556 }
   558 static void emit_translate_and_backpatch()
   559 {
   560     /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
   561     CALL1_ptr_r32(sh4_translate_link_block, REG_ARG1);
   563     /* When patched, the jmp instruction will be 5 bytes (either platform) -
   564      * we need to reserve sizeof(void*) bytes for the use-list
   565 	 * pointer
   566 	 */ 
   567     if( sizeof(void*) == 8 ) {
   568         NOP();
   569     } else {
   570         NOP2();
   571     }
   572 }
   574 /**
   575  * If we're jumping to a fixed address (or at least fixed relative to the
   576  * current PC, then we can do a direct branch. REG_ARG1 should contain
   577  * the PC at this point.
   578  */
   579 static void jump_next_block_fixed_pc( sh4addr_t pc )
   580 {
   581 	if( IS_IN_ICACHE(pc) ) {
   582 	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
   583 	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
   584 	         * fetch-and-backpatch routine, which will replace the call with a branch */
   585            emit_translate_and_backpatch();	         
   586            return;
   587 		} else {
   588             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   589             ANDP_imms_rptr( -4, REG_EAX );
   590         }
   591 	} else if( sh4_x86.tlb_on ) {
   592         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   593     } else {
   594         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   595     }
   596     jump_next_block();
   599 }
   601 static void sh4_x86_translate_unlink_block( void *use_list )
   602 {
   603 	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
   604 	void *next = use_list;
   605 	while( next != NULL ) {
   606     	xlat_output = (uint8_t *)next;
   607  	    next = *(void **)(xlat_output+5);
   608  		emit_translate_and_backpatch();
   609  	}
   610  	xlat_output = tmp;
   611 }
   615 static void exit_block()
   616 {
   617 	if( sh4_x86.end_callback ) {
   618 	    CALL_ptr(sh4_x86.end_callback);
   619 	}
   620 	RET();
   621 }
   623 /**
   624  * Exit the block with sh4r.pc already written
   625  */
   626 void exit_block_pcset( sh4addr_t pc )
   627 {
   628     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   629     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   630     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   631     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   632     JBE_label(exitloop);
   633     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   634     if( sh4_x86.tlb_on ) {
   635         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   636     } else {
   637         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   638     }
   640     jump_next_block();
   641     JMP_TARGET(exitloop);
   642     exit_block();
   643 }
   645 /**
   646  * Exit the block with sh4r.new_pc written with the target pc
   647  */
   648 void exit_block_newpcset( sh4addr_t pc )
   649 {
   650     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   651     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   652     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   653     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   654     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   655     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   656     JBE_label(exitloop);
   657     if( sh4_x86.tlb_on ) {
   658         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   659     } else {
   660         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   661     }
   663 	jump_next_block();
   664     JMP_TARGET(exitloop);
   665     exit_block();
   666 }
   669 /**
   670  * Exit the block to an absolute PC
   671  */
   672 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   673 {
   674     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   675     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   676     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   678     MOVL_imm32_r32( pc, REG_ARG1 );
   679     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   680     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   681     JBE_label(exitloop);
   682     jump_next_block_fixed_pc(pc);    
   683     JMP_TARGET(exitloop);
   684     exit_block();
   685 }
   687 /**
   688  * Exit the block to a relative PC
   689  */
   690 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   691 {
   692     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   693     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   694     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   696 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   697 	    /* Special case for tight loops - the PC doesn't change, and
   698 	     * we already know the target address. Just check events pending before
   699 	     * looping.
   700 	     */
   701         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   702         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output));
   703         JCC_cc_prerel(X86_COND_A, backdisp);
   704 	} else {
   705         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   706         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   707         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   708         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   709         JBE_label(exitloop2);
   711         jump_next_block_fixed_pc(pc);
   712         JMP_TARGET(exitloop2);
   713     }
   714     exit_block();
   715 }
   717 /**
   718  * Exit unconditionally with a general exception
   719  */
   720 void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
   721 {
   722     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   723     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   724     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
   725     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   726     MOVL_imm32_r32( code, REG_ARG1 );
   727     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   728     exit_block();
   729 }    
   731 /**
   732  * Embed a call to sh4_execute_instruction for situations that we
   733  * can't translate (just page-crossing delay slots at the moment).
   734  * Caller is responsible for setting new_pc before calling this function.
   735  *
   736  * Performs:
   737  *   Set PC = endpc
   738  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   739  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   740  *   Call sh4_execute_instruction
   741  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   742  */
   743 void exit_block_emu( sh4vma_t endpc )
   744 {
   745     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   746     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   748     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   749     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   750     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   751     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   753     CALL_ptr( sh4_execute_instruction );
   754     exit_block();
   755 } 
   757 /**
   758  * Write the block trailer (exception handling block)
   759  */
   760 void sh4_translate_end_block( sh4addr_t pc ) {
   761     if( sh4_x86.branch_taken == FALSE ) {
   762         // Didn't exit unconditionally already, so write the termination here
   763         exit_block_rel( pc, pc );
   764     }
   765     if( sh4_x86.backpatch_posn != 0 ) {
   766         unsigned int i;
   767         // Exception raised - cleanup and exit
   768         uint8_t *end_ptr = xlat_output;
   769         MOVL_r32_r32( REG_EDX, REG_ECX );
   770         ADDL_r32_r32( REG_EDX, REG_ECX );
   771         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   772         MOVL_moffptr_eax( &sh4_cpu_period );
   773         INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
   774         MULL_r32( REG_EDX );
   775         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   776         exit_block();
   778         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   779             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   780             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   781                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   782                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   783                 } else {
   784                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   785                 }
   786                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   787                 int rel = end_ptr - xlat_output;
   788                 JMP_prerel(rel);
   789             } else {
   790                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   791                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   792                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   793                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   794                 int rel = end_ptr - xlat_output;
   795                 JMP_prerel(rel);
   796             }
   797         }
   798     }
   799 }
   801 /**
   802  * Translate a single instruction. Delayed branches are handled specially
   803  * by translating both branch and delayed instruction as a single unit (as
   804  * 
   805  * The instruction MUST be in the icache (assert check)
   806  *
   807  * @return true if the instruction marks the end of a basic block
   808  * (eg a branch or 
   809  */
   810 uint32_t sh4_translate_instruction( sh4vma_t pc )
   811 {
   812     uint32_t ir;
   813     /* Read instruction from icache */
   814     assert( IS_IN_ICACHE(pc) );
   815     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   817     if( !sh4_x86.in_delay_slot ) {
   818 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   819     }
   821     /* check for breakpoints at this pc */
   822     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   823         if( sh4_breakpoints[i].address == pc ) {
   824             sh4_translate_emit_breakpoint(pc);
   825             break;
   826         }
   827     }
   828 %%
   829 /* ALU operations */
   830 ADD Rm, Rn {:
   831     COUNT_INST(I_ADD);
   832     load_reg( REG_EAX, Rm );
   833     load_reg( REG_ECX, Rn );
   834     ADDL_r32_r32( REG_EAX, REG_ECX );
   835     store_reg( REG_ECX, Rn );
   836     sh4_x86.tstate = TSTATE_NONE;
   837 :}
   838 ADD #imm, Rn {:  
   839     COUNT_INST(I_ADDI);
   840     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   841     sh4_x86.tstate = TSTATE_NONE;
   842 :}
   843 ADDC Rm, Rn {:
   844     COUNT_INST(I_ADDC);
   845     if( sh4_x86.tstate != TSTATE_C ) {
   846         LDC_t();
   847     }
   848     load_reg( REG_EAX, Rm );
   849     load_reg( REG_ECX, Rn );
   850     ADCL_r32_r32( REG_EAX, REG_ECX );
   851     store_reg( REG_ECX, Rn );
   852     SETC_t();
   853     sh4_x86.tstate = TSTATE_C;
   854 :}
   855 ADDV Rm, Rn {:
   856     COUNT_INST(I_ADDV);
   857     load_reg( REG_EAX, Rm );
   858     load_reg( REG_ECX, Rn );
   859     ADDL_r32_r32( REG_EAX, REG_ECX );
   860     store_reg( REG_ECX, Rn );
   861     SETO_t();
   862     sh4_x86.tstate = TSTATE_O;
   863 :}
   864 AND Rm, Rn {:
   865     COUNT_INST(I_AND);
   866     load_reg( REG_EAX, Rm );
   867     load_reg( REG_ECX, Rn );
   868     ANDL_r32_r32( REG_EAX, REG_ECX );
   869     store_reg( REG_ECX, Rn );
   870     sh4_x86.tstate = TSTATE_NONE;
   871 :}
   872 AND #imm, R0 {:  
   873     COUNT_INST(I_ANDI);
   874     load_reg( REG_EAX, 0 );
   875     ANDL_imms_r32(imm, REG_EAX); 
   876     store_reg( REG_EAX, 0 );
   877     sh4_x86.tstate = TSTATE_NONE;
   878 :}
   879 AND.B #imm, @(R0, GBR) {: 
   880     COUNT_INST(I_ANDB);
   881     load_reg( REG_EAX, 0 );
   882     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   883     MOVL_r32_r32(REG_EAX, REG_SAVE1);
   884     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   885     MOVL_r32_r32(REG_SAVE1, REG_EAX);
   886     ANDL_imms_r32(imm, REG_EDX );
   887     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   888     sh4_x86.tstate = TSTATE_NONE;
   889 :}
   890 CMP/EQ Rm, Rn {:  
   891     COUNT_INST(I_CMPEQ);
   892     load_reg( REG_EAX, Rm );
   893     load_reg( REG_ECX, Rn );
   894     CMPL_r32_r32( REG_EAX, REG_ECX );
   895     SETE_t();
   896     sh4_x86.tstate = TSTATE_E;
   897 :}
   898 CMP/EQ #imm, R0 {:  
   899     COUNT_INST(I_CMPEQI);
   900     load_reg( REG_EAX, 0 );
   901     CMPL_imms_r32(imm, REG_EAX);
   902     SETE_t();
   903     sh4_x86.tstate = TSTATE_E;
   904 :}
   905 CMP/GE Rm, Rn {:  
   906     COUNT_INST(I_CMPGE);
   907     load_reg( REG_EAX, Rm );
   908     load_reg( REG_ECX, Rn );
   909     CMPL_r32_r32( REG_EAX, REG_ECX );
   910     SETGE_t();
   911     sh4_x86.tstate = TSTATE_GE;
   912 :}
   913 CMP/GT Rm, Rn {: 
   914     COUNT_INST(I_CMPGT);
   915     load_reg( REG_EAX, Rm );
   916     load_reg( REG_ECX, Rn );
   917     CMPL_r32_r32( REG_EAX, REG_ECX );
   918     SETG_t();
   919     sh4_x86.tstate = TSTATE_G;
   920 :}
   921 CMP/HI Rm, Rn {:  
   922     COUNT_INST(I_CMPHI);
   923     load_reg( REG_EAX, Rm );
   924     load_reg( REG_ECX, Rn );
   925     CMPL_r32_r32( REG_EAX, REG_ECX );
   926     SETA_t();
   927     sh4_x86.tstate = TSTATE_A;
   928 :}
   929 CMP/HS Rm, Rn {: 
   930     COUNT_INST(I_CMPHS);
   931     load_reg( REG_EAX, Rm );
   932     load_reg( REG_ECX, Rn );
   933     CMPL_r32_r32( REG_EAX, REG_ECX );
   934     SETAE_t();
   935     sh4_x86.tstate = TSTATE_AE;
   936  :}
   937 CMP/PL Rn {: 
   938     COUNT_INST(I_CMPPL);
   939     load_reg( REG_EAX, Rn );
   940     CMPL_imms_r32( 0, REG_EAX );
   941     SETG_t();
   942     sh4_x86.tstate = TSTATE_G;
   943 :}
   944 CMP/PZ Rn {:  
   945     COUNT_INST(I_CMPPZ);
   946     load_reg( REG_EAX, Rn );
   947     CMPL_imms_r32( 0, REG_EAX );
   948     SETGE_t();
   949     sh4_x86.tstate = TSTATE_GE;
   950 :}
   951 CMP/STR Rm, Rn {:  
   952     COUNT_INST(I_CMPSTR);
   953     load_reg( REG_EAX, Rm );
   954     load_reg( REG_ECX, Rn );
   955     XORL_r32_r32( REG_ECX, REG_EAX );
   956     TESTB_r8_r8( REG_AL, REG_AL );
   957     JE_label(target1);
   958     TESTB_r8_r8( REG_AH, REG_AH );
   959     JE_label(target2);
   960     SHRL_imm_r32( 16, REG_EAX );
   961     TESTB_r8_r8( REG_AL, REG_AL );
   962     JE_label(target3);
   963     TESTB_r8_r8( REG_AH, REG_AH );
   964     JMP_TARGET(target1);
   965     JMP_TARGET(target2);
   966     JMP_TARGET(target3);
   967     SETE_t();
   968     sh4_x86.tstate = TSTATE_E;
   969 :}
   970 DIV0S Rm, Rn {:
   971     COUNT_INST(I_DIV0S);
   972     load_reg( REG_EAX, Rm );
   973     load_reg( REG_ECX, Rn );
   974     SHRL_imm_r32( 31, REG_EAX );
   975     SHRL_imm_r32( 31, REG_ECX );
   976     MOVL_r32_rbpdisp( REG_EAX, R_M );
   977     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   978     CMPL_r32_r32( REG_EAX, REG_ECX );
   979     SETNE_t();
   980     sh4_x86.tstate = TSTATE_NE;
   981 :}
   982 DIV0U {:  
   983     COUNT_INST(I_DIV0U);
   984     XORL_r32_r32( REG_EAX, REG_EAX );
   985     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   986     MOVL_r32_rbpdisp( REG_EAX, R_M );
   987     MOVL_r32_rbpdisp( REG_EAX, R_T );
   988     sh4_x86.tstate = TSTATE_C; // works for DIV1
   989 :}
   990 DIV1 Rm, Rn {:
   991     COUNT_INST(I_DIV1);
   992     MOVL_rbpdisp_r32( R_M, REG_ECX );
   993     load_reg( REG_EAX, Rn );
   994     if( sh4_x86.tstate != TSTATE_C ) {
   995 	LDC_t();
   996     }
   997     RCLL_imm_r32( 1, REG_EAX );
   998     SETC_r8( REG_DL ); // Q'
   999     CMPL_rbpdisp_r32( R_Q, REG_ECX );
  1000     JE_label(mqequal);
  1001     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1002     JMP_label(end);
  1003     JMP_TARGET(mqequal);
  1004     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1005     JMP_TARGET(end);
  1006     store_reg( REG_EAX, Rn ); // Done with Rn now
  1007     SETC_r8(REG_AL); // tmp1
  1008     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
  1009     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
  1010     MOVL_r32_rbpdisp( REG_ECX, R_Q );
  1011     XORL_imms_r32( 1, REG_AL );   // T = !Q'
  1012     MOVZXL_r8_r32( REG_AL, REG_EAX );
  1013     MOVL_r32_rbpdisp( REG_EAX, R_T );
  1014     sh4_x86.tstate = TSTATE_NONE;
  1015 :}
  1016 DMULS.L Rm, Rn {:  
  1017     COUNT_INST(I_DMULS);
  1018     load_reg( REG_EAX, Rm );
  1019     load_reg( REG_ECX, Rn );
  1020     IMULL_r32(REG_ECX);
  1021     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1022     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1023     sh4_x86.tstate = TSTATE_NONE;
  1024 :}
  1025 DMULU.L Rm, Rn {:  
  1026     COUNT_INST(I_DMULU);
  1027     load_reg( REG_EAX, Rm );
  1028     load_reg( REG_ECX, Rn );
  1029     MULL_r32(REG_ECX);
  1030     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1031     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1032     sh4_x86.tstate = TSTATE_NONE;
  1033 :}
  1034 DT Rn {:  
  1035     COUNT_INST(I_DT);
  1036     load_reg( REG_EAX, Rn );
  1037     ADDL_imms_r32( -1, REG_EAX );
  1038     store_reg( REG_EAX, Rn );
  1039     SETE_t();
  1040     sh4_x86.tstate = TSTATE_E;
  1041 :}
  1042 EXTS.B Rm, Rn {:  
  1043     COUNT_INST(I_EXTSB);
  1044     load_reg( REG_EAX, Rm );
  1045     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1046     store_reg( REG_EAX, Rn );
  1047 :}
  1048 EXTS.W Rm, Rn {:  
  1049     COUNT_INST(I_EXTSW);
  1050     load_reg( REG_EAX, Rm );
  1051     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1052     store_reg( REG_EAX, Rn );
  1053 :}
  1054 EXTU.B Rm, Rn {:  
  1055     COUNT_INST(I_EXTUB);
  1056     load_reg( REG_EAX, Rm );
  1057     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1058     store_reg( REG_EAX, Rn );
  1059 :}
  1060 EXTU.W Rm, Rn {:  
  1061     COUNT_INST(I_EXTUW);
  1062     load_reg( REG_EAX, Rm );
  1063     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1064     store_reg( REG_EAX, Rn );
  1065 :}
  1066 MAC.L @Rm+, @Rn+ {:
  1067     COUNT_INST(I_MACL);
  1068     if( Rm == Rn ) {
  1069 	load_reg( REG_EAX, Rm );
  1070 	check_ralign32( REG_EAX );
  1071 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1072 	MOVL_r32_r32(REG_EAX, REG_SAVE1);
  1073 	load_reg( REG_EAX, Rm );
  1074 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1075 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1076         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1077     } else {
  1078 	load_reg( REG_EAX, Rm );
  1079 	check_ralign32( REG_EAX );
  1080 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1081 	MOVL_r32_r32(REG_EAX, REG_SAVE1);
  1082 	load_reg( REG_EAX, Rn );
  1083 	check_ralign32( REG_EAX );
  1084 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1085 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1086 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1089     IMULL_r32( REG_SAVE1 );
  1090     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1091     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1093     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1094     TESTL_r32_r32(REG_ECX, REG_ECX);
  1095     JE_label( nosat );
  1096     CALL_ptr( signsat48 );
  1097     JMP_TARGET( nosat );
  1098     sh4_x86.tstate = TSTATE_NONE;
  1099 :}
  1100 MAC.W @Rm+, @Rn+ {:  
  1101     COUNT_INST(I_MACW);
  1102     if( Rm == Rn ) {
  1103 	load_reg( REG_EAX, Rm );
  1104 	check_ralign16( REG_EAX );
  1105 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1106         MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1107 	load_reg( REG_EAX, Rm );
  1108 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1109 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1110 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1111 	// Note translate twice in case of page boundaries. Maybe worth
  1112 	// adding a page-boundary check to skip the second translation
  1113     } else {
  1114 	load_reg( REG_EAX, Rn );
  1115 	check_ralign16( REG_EAX );
  1116 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1117         MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1118 	load_reg( REG_EAX, Rm );
  1119 	check_ralign16( REG_EAX );
  1120 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1121 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1122 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1124     IMULL_r32( REG_SAVE1 );
  1125     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1126     TESTL_r32_r32( REG_ECX, REG_ECX );
  1127     JE_label( nosat );
  1129     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1130     JNO_label( end );            // 2
  1131     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1132     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1133     JS_label( positive );        // 2
  1134     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1135     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1136     JMP_label(end2);           // 2
  1138     JMP_TARGET(positive);
  1139     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1140     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1141     JMP_label(end3);            // 2
  1143     JMP_TARGET(nosat);
  1144     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1145     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1146     JMP_TARGET(end);
  1147     JMP_TARGET(end2);
  1148     JMP_TARGET(end3);
  1149     sh4_x86.tstate = TSTATE_NONE;
  1150 :}
  1151 MOVT Rn {:  
  1152     COUNT_INST(I_MOVT);
  1153     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1154     store_reg( REG_EAX, Rn );
  1155 :}
  1156 MUL.L Rm, Rn {:  
  1157     COUNT_INST(I_MULL);
  1158     load_reg( REG_EAX, Rm );
  1159     load_reg( REG_ECX, Rn );
  1160     MULL_r32( REG_ECX );
  1161     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1162     sh4_x86.tstate = TSTATE_NONE;
  1163 :}
  1164 MULS.W Rm, Rn {:
  1165     COUNT_INST(I_MULSW);
  1166     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1167     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1168     MULL_r32( REG_ECX );
  1169     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1170     sh4_x86.tstate = TSTATE_NONE;
  1171 :}
  1172 MULU.W Rm, Rn {:  
  1173     COUNT_INST(I_MULUW);
  1174     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1175     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1176     MULL_r32( REG_ECX );
  1177     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1178     sh4_x86.tstate = TSTATE_NONE;
  1179 :}
  1180 NEG Rm, Rn {:
  1181     COUNT_INST(I_NEG);
  1182     load_reg( REG_EAX, Rm );
  1183     NEGL_r32( REG_EAX );
  1184     store_reg( REG_EAX, Rn );
  1185     sh4_x86.tstate = TSTATE_NONE;
  1186 :}
  1187 NEGC Rm, Rn {:  
  1188     COUNT_INST(I_NEGC);
  1189     load_reg( REG_EAX, Rm );
  1190     XORL_r32_r32( REG_ECX, REG_ECX );
  1191     LDC_t();
  1192     SBBL_r32_r32( REG_EAX, REG_ECX );
  1193     store_reg( REG_ECX, Rn );
  1194     SETC_t();
  1195     sh4_x86.tstate = TSTATE_C;
  1196 :}
  1197 NOT Rm, Rn {:  
  1198     COUNT_INST(I_NOT);
  1199     load_reg( REG_EAX, Rm );
  1200     NOTL_r32( REG_EAX );
  1201     store_reg( REG_EAX, Rn );
  1202     sh4_x86.tstate = TSTATE_NONE;
  1203 :}
  1204 OR Rm, Rn {:  
  1205     COUNT_INST(I_OR);
  1206     load_reg( REG_EAX, Rm );
  1207     load_reg( REG_ECX, Rn );
  1208     ORL_r32_r32( REG_EAX, REG_ECX );
  1209     store_reg( REG_ECX, Rn );
  1210     sh4_x86.tstate = TSTATE_NONE;
  1211 :}
  1212 OR #imm, R0 {:
  1213     COUNT_INST(I_ORI);
  1214     load_reg( REG_EAX, 0 );
  1215     ORL_imms_r32(imm, REG_EAX);
  1216     store_reg( REG_EAX, 0 );
  1217     sh4_x86.tstate = TSTATE_NONE;
  1218 :}
  1219 OR.B #imm, @(R0, GBR) {:  
  1220     COUNT_INST(I_ORB);
  1221     load_reg( REG_EAX, 0 );
  1222     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1223     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1224     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1225     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1226     ORL_imms_r32(imm, REG_EDX );
  1227     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1228     sh4_x86.tstate = TSTATE_NONE;
  1229 :}
  1230 ROTCL Rn {:
  1231     COUNT_INST(I_ROTCL);
  1232     load_reg( REG_EAX, Rn );
  1233     if( sh4_x86.tstate != TSTATE_C ) {
  1234 	LDC_t();
  1236     RCLL_imm_r32( 1, REG_EAX );
  1237     store_reg( REG_EAX, Rn );
  1238     SETC_t();
  1239     sh4_x86.tstate = TSTATE_C;
  1240 :}
  1241 ROTCR Rn {:  
  1242     COUNT_INST(I_ROTCR);
  1243     load_reg( REG_EAX, Rn );
  1244     if( sh4_x86.tstate != TSTATE_C ) {
  1245 	LDC_t();
  1247     RCRL_imm_r32( 1, REG_EAX );
  1248     store_reg( REG_EAX, Rn );
  1249     SETC_t();
  1250     sh4_x86.tstate = TSTATE_C;
  1251 :}
  1252 ROTL Rn {:  
  1253     COUNT_INST(I_ROTL);
  1254     load_reg( REG_EAX, Rn );
  1255     ROLL_imm_r32( 1, REG_EAX );
  1256     store_reg( REG_EAX, Rn );
  1257     SETC_t();
  1258     sh4_x86.tstate = TSTATE_C;
  1259 :}
  1260 ROTR Rn {:  
  1261     COUNT_INST(I_ROTR);
  1262     load_reg( REG_EAX, Rn );
  1263     RORL_imm_r32( 1, REG_EAX );
  1264     store_reg( REG_EAX, Rn );
  1265     SETC_t();
  1266     sh4_x86.tstate = TSTATE_C;
  1267 :}
  1268 SHAD Rm, Rn {:
  1269     COUNT_INST(I_SHAD);
  1270     /* Annoyingly enough, not directly convertible */
  1271     load_reg( REG_EAX, Rn );
  1272     load_reg( REG_ECX, Rm );
  1273     CMPL_imms_r32( 0, REG_ECX );
  1274     JGE_label(doshl);
  1276     NEGL_r32( REG_ECX );      // 2
  1277     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1278     JE_label(emptysar);     // 2
  1279     SARL_cl_r32( REG_EAX );       // 2
  1280     JMP_label(end);          // 2
  1282     JMP_TARGET(emptysar);
  1283     SARL_imm_r32(31, REG_EAX );  // 3
  1284     JMP_label(end2);
  1286     JMP_TARGET(doshl);
  1287     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1288     SHLL_cl_r32( REG_EAX );       // 2
  1289     JMP_TARGET(end);
  1290     JMP_TARGET(end2);
  1291     store_reg( REG_EAX, Rn );
  1292     sh4_x86.tstate = TSTATE_NONE;
  1293 :}
  1294 SHLD Rm, Rn {:  
  1295     COUNT_INST(I_SHLD);
  1296     load_reg( REG_EAX, Rn );
  1297     load_reg( REG_ECX, Rm );
  1298     CMPL_imms_r32( 0, REG_ECX );
  1299     JGE_label(doshl);
  1301     NEGL_r32( REG_ECX );      // 2
  1302     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1303     JE_label(emptyshr );
  1304     SHRL_cl_r32( REG_EAX );       // 2
  1305     JMP_label(end);          // 2
  1307     JMP_TARGET(emptyshr);
  1308     XORL_r32_r32( REG_EAX, REG_EAX );
  1309     JMP_label(end2);
  1311     JMP_TARGET(doshl);
  1312     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1313     SHLL_cl_r32( REG_EAX );       // 2
  1314     JMP_TARGET(end);
  1315     JMP_TARGET(end2);
  1316     store_reg( REG_EAX, Rn );
  1317     sh4_x86.tstate = TSTATE_NONE;
  1318 :}
  1319 SHAL Rn {: 
  1320     COUNT_INST(I_SHAL);
  1321     load_reg( REG_EAX, Rn );
  1322     SHLL_imm_r32( 1, REG_EAX );
  1323     SETC_t();
  1324     store_reg( REG_EAX, Rn );
  1325     sh4_x86.tstate = TSTATE_C;
  1326 :}
  1327 SHAR Rn {:  
  1328     COUNT_INST(I_SHAR);
  1329     load_reg( REG_EAX, Rn );
  1330     SARL_imm_r32( 1, REG_EAX );
  1331     SETC_t();
  1332     store_reg( REG_EAX, Rn );
  1333     sh4_x86.tstate = TSTATE_C;
  1334 :}
  1335 SHLL Rn {:  
  1336     COUNT_INST(I_SHLL);
  1337     load_reg( REG_EAX, Rn );
  1338     SHLL_imm_r32( 1, REG_EAX );
  1339     SETC_t();
  1340     store_reg( REG_EAX, Rn );
  1341     sh4_x86.tstate = TSTATE_C;
  1342 :}
  1343 SHLL2 Rn {:
  1344     COUNT_INST(I_SHLL);
  1345     load_reg( REG_EAX, Rn );
  1346     SHLL_imm_r32( 2, REG_EAX );
  1347     store_reg( REG_EAX, Rn );
  1348     sh4_x86.tstate = TSTATE_NONE;
  1349 :}
  1350 SHLL8 Rn {:  
  1351     COUNT_INST(I_SHLL);
  1352     load_reg( REG_EAX, Rn );
  1353     SHLL_imm_r32( 8, REG_EAX );
  1354     store_reg( REG_EAX, Rn );
  1355     sh4_x86.tstate = TSTATE_NONE;
  1356 :}
  1357 SHLL16 Rn {:  
  1358     COUNT_INST(I_SHLL);
  1359     load_reg( REG_EAX, Rn );
  1360     SHLL_imm_r32( 16, REG_EAX );
  1361     store_reg( REG_EAX, Rn );
  1362     sh4_x86.tstate = TSTATE_NONE;
  1363 :}
  1364 SHLR Rn {:  
  1365     COUNT_INST(I_SHLR);
  1366     load_reg( REG_EAX, Rn );
  1367     SHRL_imm_r32( 1, REG_EAX );
  1368     SETC_t();
  1369     store_reg( REG_EAX, Rn );
  1370     sh4_x86.tstate = TSTATE_C;
  1371 :}
  1372 SHLR2 Rn {:  
  1373     COUNT_INST(I_SHLR);
  1374     load_reg( REG_EAX, Rn );
  1375     SHRL_imm_r32( 2, REG_EAX );
  1376     store_reg( REG_EAX, Rn );
  1377     sh4_x86.tstate = TSTATE_NONE;
  1378 :}
  1379 SHLR8 Rn {:  
  1380     COUNT_INST(I_SHLR);
  1381     load_reg( REG_EAX, Rn );
  1382     SHRL_imm_r32( 8, REG_EAX );
  1383     store_reg( REG_EAX, Rn );
  1384     sh4_x86.tstate = TSTATE_NONE;
  1385 :}
  1386 SHLR16 Rn {:  
  1387     COUNT_INST(I_SHLR);
  1388     load_reg( REG_EAX, Rn );
  1389     SHRL_imm_r32( 16, REG_EAX );
  1390     store_reg( REG_EAX, Rn );
  1391     sh4_x86.tstate = TSTATE_NONE;
  1392 :}
  1393 SUB Rm, Rn {:  
  1394     COUNT_INST(I_SUB);
  1395     load_reg( REG_EAX, Rm );
  1396     load_reg( REG_ECX, Rn );
  1397     SUBL_r32_r32( REG_EAX, REG_ECX );
  1398     store_reg( REG_ECX, Rn );
  1399     sh4_x86.tstate = TSTATE_NONE;
  1400 :}
  1401 SUBC Rm, Rn {:  
  1402     COUNT_INST(I_SUBC);
  1403     load_reg( REG_EAX, Rm );
  1404     load_reg( REG_ECX, Rn );
  1405     if( sh4_x86.tstate != TSTATE_C ) {
  1406 	LDC_t();
  1408     SBBL_r32_r32( REG_EAX, REG_ECX );
  1409     store_reg( REG_ECX, Rn );
  1410     SETC_t();
  1411     sh4_x86.tstate = TSTATE_C;
  1412 :}
  1413 SUBV Rm, Rn {:  
  1414     COUNT_INST(I_SUBV);
  1415     load_reg( REG_EAX, Rm );
  1416     load_reg( REG_ECX, Rn );
  1417     SUBL_r32_r32( REG_EAX, REG_ECX );
  1418     store_reg( REG_ECX, Rn );
  1419     SETO_t();
  1420     sh4_x86.tstate = TSTATE_O;
  1421 :}
  1422 SWAP.B Rm, Rn {:  
  1423     COUNT_INST(I_SWAPB);
  1424     load_reg( REG_EAX, Rm );
  1425     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1426     store_reg( REG_EAX, Rn );
  1427 :}
  1428 SWAP.W Rm, Rn {:  
  1429     COUNT_INST(I_SWAPB);
  1430     load_reg( REG_EAX, Rm );
  1431     MOVL_r32_r32( REG_EAX, REG_ECX );
  1432     SHLL_imm_r32( 16, REG_ECX );
  1433     SHRL_imm_r32( 16, REG_EAX );
  1434     ORL_r32_r32( REG_EAX, REG_ECX );
  1435     store_reg( REG_ECX, Rn );
  1436     sh4_x86.tstate = TSTATE_NONE;
  1437 :}
  1438 TAS.B @Rn {:  
  1439     COUNT_INST(I_TASB);
  1440     load_reg( REG_EAX, Rn );
  1441     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1442     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1443     TESTB_r8_r8( REG_DL, REG_DL );
  1444     SETE_t();
  1445     ORB_imms_r8( 0x80, REG_DL );
  1446     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1447     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1448     sh4_x86.tstate = TSTATE_NONE;
  1449 :}
  1450 TST Rm, Rn {:  
  1451     COUNT_INST(I_TST);
  1452     load_reg( REG_EAX, Rm );
  1453     load_reg( REG_ECX, Rn );
  1454     TESTL_r32_r32( REG_EAX, REG_ECX );
  1455     SETE_t();
  1456     sh4_x86.tstate = TSTATE_E;
  1457 :}
  1458 TST #imm, R0 {:  
  1459     COUNT_INST(I_TSTI);
  1460     load_reg( REG_EAX, 0 );
  1461     TESTL_imms_r32( imm, REG_EAX );
  1462     SETE_t();
  1463     sh4_x86.tstate = TSTATE_E;
  1464 :}
  1465 TST.B #imm, @(R0, GBR) {:  
  1466     COUNT_INST(I_TSTB);
  1467     load_reg( REG_EAX, 0);
  1468     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1469     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1470     TESTB_imms_r8( imm, REG_AL );
  1471     SETE_t();
  1472     sh4_x86.tstate = TSTATE_E;
  1473 :}
  1474 XOR Rm, Rn {:  
  1475     COUNT_INST(I_XOR);
  1476     load_reg( REG_EAX, Rm );
  1477     load_reg( REG_ECX, Rn );
  1478     XORL_r32_r32( REG_EAX, REG_ECX );
  1479     store_reg( REG_ECX, Rn );
  1480     sh4_x86.tstate = TSTATE_NONE;
  1481 :}
  1482 XOR #imm, R0 {:  
  1483     COUNT_INST(I_XORI);
  1484     load_reg( REG_EAX, 0 );
  1485     XORL_imms_r32( imm, REG_EAX );
  1486     store_reg( REG_EAX, 0 );
  1487     sh4_x86.tstate = TSTATE_NONE;
  1488 :}
  1489 XOR.B #imm, @(R0, GBR) {:  
  1490     COUNT_INST(I_XORB);
  1491     load_reg( REG_EAX, 0 );
  1492     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1493     MOVL_r32_r32( REG_EAX, REG_SAVE1 );
  1494     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1495     MOVL_r32_r32( REG_SAVE1, REG_EAX );
  1496     XORL_imms_r32( imm, REG_EDX );
  1497     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1498     sh4_x86.tstate = TSTATE_NONE;
  1499 :}
  1500 XTRCT Rm, Rn {:
  1501     COUNT_INST(I_XTRCT);
  1502     load_reg( REG_EAX, Rm );
  1503     load_reg( REG_ECX, Rn );
  1504     SHLL_imm_r32( 16, REG_EAX );
  1505     SHRL_imm_r32( 16, REG_ECX );
  1506     ORL_r32_r32( REG_EAX, REG_ECX );
  1507     store_reg( REG_ECX, Rn );
  1508     sh4_x86.tstate = TSTATE_NONE;
  1509 :}
  1511 /* Data move instructions */
  1512 MOV Rm, Rn {:  
  1513     COUNT_INST(I_MOV);
  1514     load_reg( REG_EAX, Rm );
  1515     store_reg( REG_EAX, Rn );
  1516 :}
  1517 MOV #imm, Rn {:  
  1518     COUNT_INST(I_MOVI);
  1519     MOVL_imm32_r32( imm, REG_EAX );
  1520     store_reg( REG_EAX, Rn );
  1521 :}
  1522 MOV.B Rm, @Rn {:  
  1523     COUNT_INST(I_MOVB);
  1524     load_reg( REG_EAX, Rn );
  1525     load_reg( REG_EDX, Rm );
  1526     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1527     sh4_x86.tstate = TSTATE_NONE;
  1528 :}
  1529 MOV.B Rm, @-Rn {:  
  1530     COUNT_INST(I_MOVB);
  1531     load_reg( REG_EAX, Rn );
  1532     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1533     load_reg( REG_EDX, Rm );
  1534     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1535     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1536     sh4_x86.tstate = TSTATE_NONE;
  1537 :}
  1538 MOV.B Rm, @(R0, Rn) {:  
  1539     COUNT_INST(I_MOVB);
  1540     load_reg( REG_EAX, 0 );
  1541     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1542     load_reg( REG_EDX, Rm );
  1543     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1544     sh4_x86.tstate = TSTATE_NONE;
  1545 :}
  1546 MOV.B R0, @(disp, GBR) {:  
  1547     COUNT_INST(I_MOVB);
  1548     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1549     ADDL_imms_r32( disp, REG_EAX );
  1550     load_reg( REG_EDX, 0 );
  1551     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1552     sh4_x86.tstate = TSTATE_NONE;
  1553 :}
  1554 MOV.B R0, @(disp, Rn) {:  
  1555     COUNT_INST(I_MOVB);
  1556     load_reg( REG_EAX, Rn );
  1557     ADDL_imms_r32( disp, REG_EAX );
  1558     load_reg( REG_EDX, 0 );
  1559     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1560     sh4_x86.tstate = TSTATE_NONE;
  1561 :}
  1562 MOV.B @Rm, Rn {:  
  1563     COUNT_INST(I_MOVB);
  1564     load_reg( REG_EAX, Rm );
  1565     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1566     store_reg( REG_EAX, Rn );
  1567     sh4_x86.tstate = TSTATE_NONE;
  1568 :}
  1569 MOV.B @Rm+, Rn {:  
  1570     COUNT_INST(I_MOVB);
  1571     load_reg( REG_EAX, Rm );
  1572     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1573     if( Rm != Rn ) {
  1574     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1576     store_reg( REG_EAX, Rn );
  1577     sh4_x86.tstate = TSTATE_NONE;
  1578 :}
  1579 MOV.B @(R0, Rm), Rn {:  
  1580     COUNT_INST(I_MOVB);
  1581     load_reg( REG_EAX, 0 );
  1582     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1583     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1584     store_reg( REG_EAX, Rn );
  1585     sh4_x86.tstate = TSTATE_NONE;
  1586 :}
  1587 MOV.B @(disp, GBR), R0 {:  
  1588     COUNT_INST(I_MOVB);
  1589     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1590     ADDL_imms_r32( disp, REG_EAX );
  1591     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1592     store_reg( REG_EAX, 0 );
  1593     sh4_x86.tstate = TSTATE_NONE;
  1594 :}
  1595 MOV.B @(disp, Rm), R0 {:  
  1596     COUNT_INST(I_MOVB);
  1597     load_reg( REG_EAX, Rm );
  1598     ADDL_imms_r32( disp, REG_EAX );
  1599     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1600     store_reg( REG_EAX, 0 );
  1601     sh4_x86.tstate = TSTATE_NONE;
  1602 :}
  1603 MOV.L Rm, @Rn {:
  1604     COUNT_INST(I_MOVL);
  1605     load_reg( REG_EAX, Rn );
  1606     check_walign32(REG_EAX);
  1607     MOVL_r32_r32( REG_EAX, REG_ECX );
  1608     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1609     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1610     JNE_label( notsq );
  1611     ANDL_imms_r32( 0x3C, REG_EAX );
  1612     load_reg( REG_EDX, Rm );
  1613     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1614     JMP_label(end);
  1615     JMP_TARGET(notsq);
  1616     load_reg( REG_EDX, Rm );
  1617     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1618     JMP_TARGET(end);
  1619     sh4_x86.tstate = TSTATE_NONE;
  1620 :}
  1621 MOV.L Rm, @-Rn {:  
  1622     COUNT_INST(I_MOVL);
  1623     load_reg( REG_EAX, Rn );
  1624     ADDL_imms_r32( -4, REG_EAX );
  1625     check_walign32( REG_EAX );
  1626     load_reg( REG_EDX, Rm );
  1627     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1628     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1629     sh4_x86.tstate = TSTATE_NONE;
  1630 :}
  1631 MOV.L Rm, @(R0, Rn) {:  
  1632     COUNT_INST(I_MOVL);
  1633     load_reg( REG_EAX, 0 );
  1634     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1635     check_walign32( REG_EAX );
  1636     load_reg( REG_EDX, Rm );
  1637     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1638     sh4_x86.tstate = TSTATE_NONE;
  1639 :}
  1640 MOV.L R0, @(disp, GBR) {:  
  1641     COUNT_INST(I_MOVL);
  1642     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1643     ADDL_imms_r32( disp, REG_EAX );
  1644     check_walign32( REG_EAX );
  1645     load_reg( REG_EDX, 0 );
  1646     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1647     sh4_x86.tstate = TSTATE_NONE;
  1648 :}
  1649 MOV.L Rm, @(disp, Rn) {:  
  1650     COUNT_INST(I_MOVL);
  1651     load_reg( REG_EAX, Rn );
  1652     ADDL_imms_r32( disp, REG_EAX );
  1653     check_walign32( REG_EAX );
  1654     MOVL_r32_r32( REG_EAX, REG_ECX );
  1655     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1656     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1657     JNE_label( notsq );
  1658     ANDL_imms_r32( 0x3C, REG_EAX );
  1659     load_reg( REG_EDX, Rm );
  1660     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1661     JMP_label(end);
  1662     JMP_TARGET(notsq);
  1663     load_reg( REG_EDX, Rm );
  1664     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1665     JMP_TARGET(end);
  1666     sh4_x86.tstate = TSTATE_NONE;
  1667 :}
  1668 MOV.L @Rm, Rn {:  
  1669     COUNT_INST(I_MOVL);
  1670     load_reg( REG_EAX, Rm );
  1671     check_ralign32( REG_EAX );
  1672     MEM_READ_LONG( REG_EAX, REG_EAX );
  1673     store_reg( REG_EAX, Rn );
  1674     sh4_x86.tstate = TSTATE_NONE;
  1675 :}
  1676 MOV.L @Rm+, Rn {:  
  1677     COUNT_INST(I_MOVL);
  1678     load_reg( REG_EAX, Rm );
  1679     check_ralign32( REG_EAX );
  1680     MEM_READ_LONG( REG_EAX, REG_EAX );
  1681     if( Rm != Rn ) {
  1682     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1684     store_reg( REG_EAX, Rn );
  1685     sh4_x86.tstate = TSTATE_NONE;
  1686 :}
  1687 MOV.L @(R0, Rm), Rn {:  
  1688     COUNT_INST(I_MOVL);
  1689     load_reg( REG_EAX, 0 );
  1690     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1691     check_ralign32( REG_EAX );
  1692     MEM_READ_LONG( REG_EAX, REG_EAX );
  1693     store_reg( REG_EAX, Rn );
  1694     sh4_x86.tstate = TSTATE_NONE;
  1695 :}
  1696 MOV.L @(disp, GBR), R0 {:
  1697     COUNT_INST(I_MOVL);
  1698     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1699     ADDL_imms_r32( disp, REG_EAX );
  1700     check_ralign32( REG_EAX );
  1701     MEM_READ_LONG( REG_EAX, REG_EAX );
  1702     store_reg( REG_EAX, 0 );
  1703     sh4_x86.tstate = TSTATE_NONE;
  1704 :}
  1705 MOV.L @(disp, PC), Rn {:  
  1706     COUNT_INST(I_MOVLPC);
  1707     if( sh4_x86.in_delay_slot ) {
  1708 	SLOTILLEGAL();
  1709     } else {
  1710 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1711 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1712 	    // If the target address is in the same page as the code, it's
  1713 	    // pretty safe to just ref it directly and circumvent the whole
  1714 	    // memory subsystem. (this is a big performance win)
  1716 	    // FIXME: There's a corner-case that's not handled here when
  1717 	    // the current code-page is in the ITLB but not in the UTLB.
  1718 	    // (should generate a TLB miss although need to test SH4 
  1719 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1720 	    // behaviour though.
  1721 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1722 	    MOVL_moffptr_eax( ptr );
  1723 	} else {
  1724 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1725 	    // different virtual address than the translation was done with,
  1726 	    // but we can safely assume that the low bits are the same.
  1727 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1728 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1729 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1730 	    sh4_x86.tstate = TSTATE_NONE;
  1732 	store_reg( REG_EAX, Rn );
  1734 :}
  1735 MOV.L @(disp, Rm), Rn {:  
  1736     COUNT_INST(I_MOVL);
  1737     load_reg( REG_EAX, Rm );
  1738     ADDL_imms_r32( disp, REG_EAX );
  1739     check_ralign32( REG_EAX );
  1740     MEM_READ_LONG( REG_EAX, REG_EAX );
  1741     store_reg( REG_EAX, Rn );
  1742     sh4_x86.tstate = TSTATE_NONE;
  1743 :}
  1744 MOV.W Rm, @Rn {:  
  1745     COUNT_INST(I_MOVW);
  1746     load_reg( REG_EAX, Rn );
  1747     check_walign16( REG_EAX );
  1748     load_reg( REG_EDX, Rm );
  1749     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1750     sh4_x86.tstate = TSTATE_NONE;
  1751 :}
  1752 MOV.W Rm, @-Rn {:  
  1753     COUNT_INST(I_MOVW);
  1754     load_reg( REG_EAX, Rn );
  1755     check_walign16( REG_EAX );
  1756     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1757     load_reg( REG_EDX, Rm );
  1758     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1759     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1760     sh4_x86.tstate = TSTATE_NONE;
  1761 :}
  1762 MOV.W Rm, @(R0, Rn) {:  
  1763     COUNT_INST(I_MOVW);
  1764     load_reg( REG_EAX, 0 );
  1765     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1766     check_walign16( REG_EAX );
  1767     load_reg( REG_EDX, Rm );
  1768     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1769     sh4_x86.tstate = TSTATE_NONE;
  1770 :}
  1771 MOV.W R0, @(disp, GBR) {:  
  1772     COUNT_INST(I_MOVW);
  1773     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1774     ADDL_imms_r32( disp, REG_EAX );
  1775     check_walign16( REG_EAX );
  1776     load_reg( REG_EDX, 0 );
  1777     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1778     sh4_x86.tstate = TSTATE_NONE;
  1779 :}
  1780 MOV.W R0, @(disp, Rn) {:  
  1781     COUNT_INST(I_MOVW);
  1782     load_reg( REG_EAX, Rn );
  1783     ADDL_imms_r32( disp, REG_EAX );
  1784     check_walign16( REG_EAX );
  1785     load_reg( REG_EDX, 0 );
  1786     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1787     sh4_x86.tstate = TSTATE_NONE;
  1788 :}
  1789 MOV.W @Rm, Rn {:  
  1790     COUNT_INST(I_MOVW);
  1791     load_reg( REG_EAX, Rm );
  1792     check_ralign16( REG_EAX );
  1793     MEM_READ_WORD( REG_EAX, REG_EAX );
  1794     store_reg( REG_EAX, Rn );
  1795     sh4_x86.tstate = TSTATE_NONE;
  1796 :}
  1797 MOV.W @Rm+, Rn {:  
  1798     COUNT_INST(I_MOVW);
  1799     load_reg( REG_EAX, Rm );
  1800     check_ralign16( REG_EAX );
  1801     MEM_READ_WORD( REG_EAX, REG_EAX );
  1802     if( Rm != Rn ) {
  1803         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1805     store_reg( REG_EAX, Rn );
  1806     sh4_x86.tstate = TSTATE_NONE;
  1807 :}
  1808 MOV.W @(R0, Rm), Rn {:  
  1809     COUNT_INST(I_MOVW);
  1810     load_reg( REG_EAX, 0 );
  1811     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1812     check_ralign16( REG_EAX );
  1813     MEM_READ_WORD( REG_EAX, REG_EAX );
  1814     store_reg( REG_EAX, Rn );
  1815     sh4_x86.tstate = TSTATE_NONE;
  1816 :}
  1817 MOV.W @(disp, GBR), R0 {:  
  1818     COUNT_INST(I_MOVW);
  1819     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1820     ADDL_imms_r32( disp, REG_EAX );
  1821     check_ralign16( REG_EAX );
  1822     MEM_READ_WORD( REG_EAX, REG_EAX );
  1823     store_reg( REG_EAX, 0 );
  1824     sh4_x86.tstate = TSTATE_NONE;
  1825 :}
  1826 MOV.W @(disp, PC), Rn {:  
  1827     COUNT_INST(I_MOVW);
  1828     if( sh4_x86.in_delay_slot ) {
  1829 	SLOTILLEGAL();
  1830     } else {
  1831 	// See comments for MOV.L @(disp, PC), Rn
  1832 	uint32_t target = pc + disp + 4;
  1833 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1834 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1835 	    MOVL_moffptr_eax( ptr );
  1836 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1837 	} else {
  1838 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1839 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1840 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1841 	    sh4_x86.tstate = TSTATE_NONE;
  1843 	store_reg( REG_EAX, Rn );
  1845 :}
  1846 MOV.W @(disp, Rm), R0 {:  
  1847     COUNT_INST(I_MOVW);
  1848     load_reg( REG_EAX, Rm );
  1849     ADDL_imms_r32( disp, REG_EAX );
  1850     check_ralign16( REG_EAX );
  1851     MEM_READ_WORD( REG_EAX, REG_EAX );
  1852     store_reg( REG_EAX, 0 );
  1853     sh4_x86.tstate = TSTATE_NONE;
  1854 :}
  1855 MOVA @(disp, PC), R0 {:  
  1856     COUNT_INST(I_MOVA);
  1857     if( sh4_x86.in_delay_slot ) {
  1858 	SLOTILLEGAL();
  1859     } else {
  1860 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1861 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1862 	store_reg( REG_ECX, 0 );
  1863 	sh4_x86.tstate = TSTATE_NONE;
  1865 :}
  1866 MOVCA.L R0, @Rn {:  
  1867     COUNT_INST(I_MOVCA);
  1868     load_reg( REG_EAX, Rn );
  1869     check_walign32( REG_EAX );
  1870     load_reg( REG_EDX, 0 );
  1871     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1872     sh4_x86.tstate = TSTATE_NONE;
  1873 :}
  1875 /* Control transfer instructions */
  1876 BF disp {:
  1877     COUNT_INST(I_BF);
  1878     if( sh4_x86.in_delay_slot ) {
  1879 	SLOTILLEGAL();
  1880     } else {
  1881 	sh4vma_t target = disp + pc + 4;
  1882 	JT_label( nottaken );
  1883 	exit_block_rel(target, pc+2 );
  1884 	JMP_TARGET(nottaken);
  1885 	return 2;
  1887 :}
  1888 BF/S disp {:
  1889     COUNT_INST(I_BFS);
  1890     if( sh4_x86.in_delay_slot ) {
  1891 	SLOTILLEGAL();
  1892     } else {
  1893 	sh4_x86.in_delay_slot = DELAY_PC;
  1894 	if( UNTRANSLATABLE(pc+2) ) {
  1895 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1896 	    JT_label(nottaken);
  1897 	    ADDL_imms_r32( disp, REG_EAX );
  1898 	    JMP_TARGET(nottaken);
  1899 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1900 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1901 	    exit_block_emu(pc+2);
  1902 	    sh4_x86.branch_taken = TRUE;
  1903 	    return 2;
  1904 	} else {
  1905 	    LOAD_t();
  1906 	    sh4vma_t target = disp + pc + 4;
  1907 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1908 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1909 	    int save_tstate = sh4_x86.tstate;
  1910 	    sh4_translate_instruction(pc+2);
  1911             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1912 	    exit_block_rel( target, pc+4 );
  1914 	    // not taken
  1915 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1916 	    sh4_x86.tstate = save_tstate;
  1917 	    sh4_translate_instruction(pc+2);
  1918 	    return 4;
  1921 :}
  1922 BRA disp {:  
  1923     COUNT_INST(I_BRA);
  1924     if( sh4_x86.in_delay_slot ) {
  1925 	SLOTILLEGAL();
  1926     } else {
  1927 	sh4_x86.in_delay_slot = DELAY_PC;
  1928 	sh4_x86.branch_taken = TRUE;
  1929 	if( UNTRANSLATABLE(pc+2) ) {
  1930 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1931 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1932 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1933 	    exit_block_emu(pc+2);
  1934 	    return 2;
  1935 	} else {
  1936 	    sh4_translate_instruction( pc + 2 );
  1937 	    exit_block_rel( disp + pc + 4, pc+4 );
  1938 	    return 4;
  1941 :}
  1942 BRAF Rn {:  
  1943     COUNT_INST(I_BRAF);
  1944     if( sh4_x86.in_delay_slot ) {
  1945 	SLOTILLEGAL();
  1946     } else {
  1947 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1948 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1949 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1950 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1951 	sh4_x86.in_delay_slot = DELAY_PC;
  1952 	sh4_x86.tstate = TSTATE_NONE;
  1953 	sh4_x86.branch_taken = TRUE;
  1954 	if( UNTRANSLATABLE(pc+2) ) {
  1955 	    exit_block_emu(pc+2);
  1956 	    return 2;
  1957 	} else {
  1958 	    sh4_translate_instruction( pc + 2 );
  1959 	    exit_block_newpcset(pc+4);
  1960 	    return 4;
  1963 :}
  1964 BSR disp {:  
  1965     COUNT_INST(I_BSR);
  1966     if( sh4_x86.in_delay_slot ) {
  1967 	SLOTILLEGAL();
  1968     } else {
  1969 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1970 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1971 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1972 	sh4_x86.in_delay_slot = DELAY_PC;
  1973 	sh4_x86.branch_taken = TRUE;
  1974 	sh4_x86.tstate = TSTATE_NONE;
  1975 	if( UNTRANSLATABLE(pc+2) ) {
  1976 	    ADDL_imms_r32( disp, REG_EAX );
  1977 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1978 	    exit_block_emu(pc+2);
  1979 	    return 2;
  1980 	} else {
  1981 	    sh4_translate_instruction( pc + 2 );
  1982 	    exit_block_rel( disp + pc + 4, pc+4 );
  1983 	    return 4;
  1986 :}
  1987 BSRF Rn {:  
  1988     COUNT_INST(I_BSRF);
  1989     if( sh4_x86.in_delay_slot ) {
  1990 	SLOTILLEGAL();
  1991     } else {
  1992 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1993 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1994 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1995 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1996 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1998 	sh4_x86.in_delay_slot = DELAY_PC;
  1999 	sh4_x86.tstate = TSTATE_NONE;
  2000 	sh4_x86.branch_taken = TRUE;
  2001 	if( UNTRANSLATABLE(pc+2) ) {
  2002 	    exit_block_emu(pc+2);
  2003 	    return 2;
  2004 	} else {
  2005 	    sh4_translate_instruction( pc + 2 );
  2006 	    exit_block_newpcset(pc+4);
  2007 	    return 4;
  2010 :}
  2011 BT disp {:
  2012     COUNT_INST(I_BT);
  2013     if( sh4_x86.in_delay_slot ) {
  2014 	SLOTILLEGAL();
  2015     } else {
  2016 	sh4vma_t target = disp + pc + 4;
  2017 	JF_label( nottaken );
  2018 	exit_block_rel(target, pc+2 );
  2019 	JMP_TARGET(nottaken);
  2020 	return 2;
  2022 :}
  2023 BT/S disp {:
  2024     COUNT_INST(I_BTS);
  2025     if( sh4_x86.in_delay_slot ) {
  2026 	SLOTILLEGAL();
  2027     } else {
  2028 	sh4_x86.in_delay_slot = DELAY_PC;
  2029 	if( UNTRANSLATABLE(pc+2) ) {
  2030 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2031 	    JF_label(nottaken);
  2032 	    ADDL_imms_r32( disp, REG_EAX );
  2033 	    JMP_TARGET(nottaken);
  2034 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2035 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2036 	    exit_block_emu(pc+2);
  2037 	    sh4_x86.branch_taken = TRUE;
  2038 	    return 2;
  2039 	} else {
  2040 		LOAD_t();
  2041 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2042 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2044 	    int save_tstate = sh4_x86.tstate;
  2045 	    sh4_translate_instruction(pc+2);
  2046             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2047 	    exit_block_rel( disp + pc + 4, pc+4 );
  2048 	    // not taken
  2049 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2050 	    sh4_x86.tstate = save_tstate;
  2051 	    sh4_translate_instruction(pc+2);
  2052 	    return 4;
  2055 :}
  2056 JMP @Rn {:  
  2057     COUNT_INST(I_JMP);
  2058     if( sh4_x86.in_delay_slot ) {
  2059 	SLOTILLEGAL();
  2060     } else {
  2061 	load_reg( REG_ECX, Rn );
  2062 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2063 	sh4_x86.in_delay_slot = DELAY_PC;
  2064 	sh4_x86.branch_taken = TRUE;
  2065 	if( UNTRANSLATABLE(pc+2) ) {
  2066 	    exit_block_emu(pc+2);
  2067 	    return 2;
  2068 	} else {
  2069 	    sh4_translate_instruction(pc+2);
  2070 	    exit_block_newpcset(pc+4);
  2071 	    return 4;
  2074 :}
  2075 JSR @Rn {:  
  2076     COUNT_INST(I_JSR);
  2077     if( sh4_x86.in_delay_slot ) {
  2078 	SLOTILLEGAL();
  2079     } else {
  2080 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2081 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2082 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2083 	load_reg( REG_ECX, Rn );
  2084 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2085 	sh4_x86.in_delay_slot = DELAY_PC;
  2086 	sh4_x86.branch_taken = TRUE;
  2087 	sh4_x86.tstate = TSTATE_NONE;
  2088 	if( UNTRANSLATABLE(pc+2) ) {
  2089 	    exit_block_emu(pc+2);
  2090 	    return 2;
  2091 	} else {
  2092 	    sh4_translate_instruction(pc+2);
  2093 	    exit_block_newpcset(pc+4);
  2094 	    return 4;
  2097 :}
  2098 RTE {:  
  2099     COUNT_INST(I_RTE);
  2100     if( sh4_x86.in_delay_slot ) {
  2101 	SLOTILLEGAL();
  2102     } else {
  2103 	check_priv();
  2104 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2105 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2106 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2107 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2108 	sh4_x86.in_delay_slot = DELAY_PC;
  2109 	sh4_x86.fpuen_checked = FALSE;
  2110 	sh4_x86.tstate = TSTATE_NONE;
  2111 	sh4_x86.branch_taken = TRUE;
  2112     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2113 	if( UNTRANSLATABLE(pc+2) ) {
  2114 	    exit_block_emu(pc+2);
  2115 	    return 2;
  2116 	} else {
  2117 	    sh4_translate_instruction(pc+2);
  2118 	    exit_block_newpcset(pc+4);
  2119 	    return 4;
  2122 :}
  2123 RTS {:  
  2124     COUNT_INST(I_RTS);
  2125     if( sh4_x86.in_delay_slot ) {
  2126 	SLOTILLEGAL();
  2127     } else {
  2128 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2129 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2130 	sh4_x86.in_delay_slot = DELAY_PC;
  2131 	sh4_x86.branch_taken = TRUE;
  2132 	if( UNTRANSLATABLE(pc+2) ) {
  2133 	    exit_block_emu(pc+2);
  2134 	    return 2;
  2135 	} else {
  2136 	    sh4_translate_instruction(pc+2);
  2137 	    exit_block_newpcset(pc+4);
  2138 	    return 4;
  2141 :}
  2142 TRAPA #imm {:  
  2143     COUNT_INST(I_TRAPA);
  2144     if( sh4_x86.in_delay_slot ) {
  2145 	SLOTILLEGAL();
  2146     } else {
  2147 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2148 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2149 	MOVL_imm32_r32( imm, REG_EAX );
  2150 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2151 	sh4_x86.tstate = TSTATE_NONE;
  2152 	exit_block_pcset(pc+2);
  2153 	sh4_x86.branch_taken = TRUE;
  2154 	return 2;
  2156 :}
  2157 UNDEF {:  
  2158     COUNT_INST(I_UNDEF);
  2159     if( sh4_x86.in_delay_slot ) {
  2160 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
  2161     } else {
  2162 	exit_block_exc(EXC_ILLEGAL, pc, 2);    
  2163 	return 2;
  2165 :}
  2167 CLRMAC {:  
  2168     COUNT_INST(I_CLRMAC);
  2169     XORL_r32_r32(REG_EAX, REG_EAX);
  2170     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2171     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2172     sh4_x86.tstate = TSTATE_NONE;
  2173 :}
  2174 CLRS {:
  2175     COUNT_INST(I_CLRS);
  2176     CLC();
  2177     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2178     sh4_x86.tstate = TSTATE_NONE;
  2179 :}
  2180 CLRT {:  
  2181     COUNT_INST(I_CLRT);
  2182     CLC();
  2183     SETC_t();
  2184     sh4_x86.tstate = TSTATE_C;
  2185 :}
  2186 SETS {:  
  2187     COUNT_INST(I_SETS);
  2188     STC();
  2189     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2190     sh4_x86.tstate = TSTATE_NONE;
  2191 :}
  2192 SETT {:  
  2193     COUNT_INST(I_SETT);
  2194     STC();
  2195     SETC_t();
  2196     sh4_x86.tstate = TSTATE_C;
  2197 :}
  2199 /* Floating point moves */
  2200 FMOV FRm, FRn {:  
  2201     COUNT_INST(I_FMOV1);
  2202     check_fpuen();
  2203     if( sh4_x86.double_size ) {
  2204         load_dr0( REG_EAX, FRm );
  2205         load_dr1( REG_ECX, FRm );
  2206         store_dr0( REG_EAX, FRn );
  2207         store_dr1( REG_ECX, FRn );
  2208     } else {
  2209         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2210         store_fr( REG_EAX, FRn );
  2212 :}
  2213 FMOV FRm, @Rn {: 
  2214     COUNT_INST(I_FMOV2);
  2215     check_fpuen();
  2216     load_reg( REG_EAX, Rn );
  2217     if( sh4_x86.double_size ) {
  2218         check_walign64( REG_EAX );
  2219         load_dr0( REG_EDX, FRm );
  2220         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2221         load_reg( REG_EAX, Rn );
  2222         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2223         load_dr1( REG_EDX, FRm );
  2224         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2225     } else {
  2226         check_walign32( REG_EAX );
  2227         load_fr( REG_EDX, FRm );
  2228         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2230     sh4_x86.tstate = TSTATE_NONE;
  2231 :}
  2232 FMOV @Rm, FRn {:  
  2233     COUNT_INST(I_FMOV5);
  2234     check_fpuen();
  2235     load_reg( REG_EAX, Rm );
  2236     if( sh4_x86.double_size ) {
  2237         check_ralign64( REG_EAX );
  2238         MEM_READ_LONG( REG_EAX, REG_EAX );
  2239         store_dr0( REG_EAX, FRn );
  2240         load_reg( REG_EAX, Rm );
  2241         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2242         MEM_READ_LONG( REG_EAX, REG_EAX );
  2243         store_dr1( REG_EAX, FRn );
  2244     } else {
  2245         check_ralign32( REG_EAX );
  2246         MEM_READ_LONG( REG_EAX, REG_EAX );
  2247         store_fr( REG_EAX, FRn );
  2249     sh4_x86.tstate = TSTATE_NONE;
  2250 :}
  2251 FMOV FRm, @-Rn {:  
  2252     COUNT_INST(I_FMOV3);
  2253     check_fpuen();
  2254     load_reg( REG_EAX, Rn );
  2255     if( sh4_x86.double_size ) {
  2256         check_walign64( REG_EAX );
  2257         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2258         load_dr0( REG_EDX, FRm );
  2259         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2260         load_reg( REG_EAX, Rn );
  2261         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2262         load_dr1( REG_EDX, FRm );
  2263         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2264         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2265     } else {
  2266         check_walign32( REG_EAX );
  2267         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2268         load_fr( REG_EDX, FRm );
  2269         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2270         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2272     sh4_x86.tstate = TSTATE_NONE;
  2273 :}
  2274 FMOV @Rm+, FRn {:
  2275     COUNT_INST(I_FMOV6);
  2276     check_fpuen();
  2277     load_reg( REG_EAX, Rm );
  2278     if( sh4_x86.double_size ) {
  2279         check_ralign64( REG_EAX );
  2280         MEM_READ_LONG( REG_EAX, REG_EAX );
  2281         store_dr0( REG_EAX, FRn );
  2282         load_reg( REG_EAX, Rm );
  2283         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2284         MEM_READ_LONG( REG_EAX, REG_EAX );
  2285         store_dr1( REG_EAX, FRn );
  2286         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2287     } else {
  2288         check_ralign32( REG_EAX );
  2289         MEM_READ_LONG( REG_EAX, REG_EAX );
  2290         store_fr( REG_EAX, FRn );
  2291         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2293     sh4_x86.tstate = TSTATE_NONE;
  2294 :}
  2295 FMOV FRm, @(R0, Rn) {:  
  2296     COUNT_INST(I_FMOV4);
  2297     check_fpuen();
  2298     load_reg( REG_EAX, Rn );
  2299     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2300     if( sh4_x86.double_size ) {
  2301         check_walign64( REG_EAX );
  2302         load_dr0( REG_EDX, FRm );
  2303         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2304         load_reg( REG_EAX, Rn );
  2305         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2306         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2307         load_dr1( REG_EDX, FRm );
  2308         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2309     } else {
  2310         check_walign32( REG_EAX );
  2311         load_fr( REG_EDX, FRm );
  2312         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2314     sh4_x86.tstate = TSTATE_NONE;
  2315 :}
  2316 FMOV @(R0, Rm), FRn {:  
  2317     COUNT_INST(I_FMOV7);
  2318     check_fpuen();
  2319     load_reg( REG_EAX, Rm );
  2320     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2321     if( sh4_x86.double_size ) {
  2322         check_ralign64( REG_EAX );
  2323         MEM_READ_LONG( REG_EAX, REG_EAX );
  2324         store_dr0( REG_EAX, FRn );
  2325         load_reg( REG_EAX, Rm );
  2326         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2327         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2328         MEM_READ_LONG( REG_EAX, REG_EAX );
  2329         store_dr1( REG_EAX, FRn );
  2330     } else {
  2331         check_ralign32( REG_EAX );
  2332         MEM_READ_LONG( REG_EAX, REG_EAX );
  2333         store_fr( REG_EAX, FRn );
  2335     sh4_x86.tstate = TSTATE_NONE;
  2336 :}
  2337 FLDI0 FRn {:  /* IFF PR=0 */
  2338     COUNT_INST(I_FLDI0);
  2339     check_fpuen();
  2340     if( sh4_x86.double_prec == 0 ) {
  2341         XORL_r32_r32( REG_EAX, REG_EAX );
  2342         store_fr( REG_EAX, FRn );
  2344     sh4_x86.tstate = TSTATE_NONE;
  2345 :}
  2346 FLDI1 FRn {:  /* IFF PR=0 */
  2347     COUNT_INST(I_FLDI1);
  2348     check_fpuen();
  2349     if( sh4_x86.double_prec == 0 ) {
  2350         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2351         store_fr( REG_EAX, FRn );
  2353 :}
  2355 FLOAT FPUL, FRn {:  
  2356     COUNT_INST(I_FLOAT);
  2357     check_fpuen();
  2358     FILD_rbpdisp(R_FPUL);
  2359     if( sh4_x86.double_prec ) {
  2360         pop_dr( FRn );
  2361     } else {
  2362         pop_fr( FRn );
  2364 :}
  2365 FTRC FRm, FPUL {:  
  2366     COUNT_INST(I_FTRC);
  2367     check_fpuen();
  2368     if( sh4_x86.double_prec ) {
  2369         push_dr( FRm );
  2370     } else {
  2371         push_fr( FRm );
  2373     MOVP_immptr_rptr( &min_int, REG_ECX );
  2374     FILD_r32disp( REG_ECX, 0 );
  2375     FCOMIP_st(1);              
  2376     JAE_label( sat );     
  2377     JP_label( sat2 );       
  2378     MOVP_immptr_rptr( &max_int, REG_ECX );
  2379     FILD_r32disp( REG_ECX, 0 );
  2380     FCOMIP_st(1);
  2381     JNA_label( sat3 );
  2382     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2383     FNSTCW_r32disp( REG_EAX, 0 );
  2384     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2385     FLDCW_r32disp( REG_EDX, 0 );
  2386     FISTP_rbpdisp(R_FPUL);             
  2387     FLDCW_r32disp( REG_EAX, 0 );
  2388     JMP_label(end);             
  2390     JMP_TARGET(sat);
  2391     JMP_TARGET(sat2);
  2392     JMP_TARGET(sat3);
  2393     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2394     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2395     FPOP_st();
  2396     JMP_TARGET(end);
  2397     sh4_x86.tstate = TSTATE_NONE;
  2398 :}
  2399 FLDS FRm, FPUL {:  
  2400     COUNT_INST(I_FLDS);
  2401     check_fpuen();
  2402     load_fr( REG_EAX, FRm );
  2403     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2404 :}
  2405 FSTS FPUL, FRn {:  
  2406     COUNT_INST(I_FSTS);
  2407     check_fpuen();
  2408     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2409     store_fr( REG_EAX, FRn );
  2410 :}
  2411 FCNVDS FRm, FPUL {:  
  2412     COUNT_INST(I_FCNVDS);
  2413     check_fpuen();
  2414     if( sh4_x86.double_prec ) {
  2415         push_dr( FRm );
  2416         pop_fpul();
  2418 :}
  2419 FCNVSD FPUL, FRn {:  
  2420     COUNT_INST(I_FCNVSD);
  2421     check_fpuen();
  2422     if( sh4_x86.double_prec ) {
  2423         push_fpul();
  2424         pop_dr( FRn );
  2426 :}
  2428 /* Floating point instructions */
  2429 FABS FRn {:  
  2430     COUNT_INST(I_FABS);
  2431     check_fpuen();
  2432     if( sh4_x86.double_prec ) {
  2433         push_dr(FRn);
  2434         FABS_st0();
  2435         pop_dr(FRn);
  2436     } else {
  2437         push_fr(FRn);
  2438         FABS_st0();
  2439         pop_fr(FRn);
  2441 :}
  2442 FADD FRm, FRn {:  
  2443     COUNT_INST(I_FADD);
  2444     check_fpuen();
  2445     if( sh4_x86.double_prec ) {
  2446         push_dr(FRm);
  2447         push_dr(FRn);
  2448         FADDP_st(1);
  2449         pop_dr(FRn);
  2450     } else {
  2451         push_fr(FRm);
  2452         push_fr(FRn);
  2453         FADDP_st(1);
  2454         pop_fr(FRn);
  2456 :}
  2457 FDIV FRm, FRn {:  
  2458     COUNT_INST(I_FDIV);
  2459     check_fpuen();
  2460     if( sh4_x86.double_prec ) {
  2461         push_dr(FRn);
  2462         push_dr(FRm);
  2463         FDIVP_st(1);
  2464         pop_dr(FRn);
  2465     } else {
  2466         push_fr(FRn);
  2467         push_fr(FRm);
  2468         FDIVP_st(1);
  2469         pop_fr(FRn);
  2471 :}
  2472 FMAC FR0, FRm, FRn {:  
  2473     COUNT_INST(I_FMAC);
  2474     check_fpuen();
  2475     if( sh4_x86.double_prec ) {
  2476         push_dr( 0 );
  2477         push_dr( FRm );
  2478         FMULP_st(1);
  2479         push_dr( FRn );
  2480         FADDP_st(1);
  2481         pop_dr( FRn );
  2482     } else {
  2483         push_fr( 0 );
  2484         push_fr( FRm );
  2485         FMULP_st(1);
  2486         push_fr( FRn );
  2487         FADDP_st(1);
  2488         pop_fr( FRn );
  2490 :}
  2492 FMUL FRm, FRn {:  
  2493     COUNT_INST(I_FMUL);
  2494     check_fpuen();
  2495     if( sh4_x86.double_prec ) {
  2496         push_dr(FRm);
  2497         push_dr(FRn);
  2498         FMULP_st(1);
  2499         pop_dr(FRn);
  2500     } else {
  2501         push_fr(FRm);
  2502         push_fr(FRn);
  2503         FMULP_st(1);
  2504         pop_fr(FRn);
  2506 :}
  2507 FNEG FRn {:  
  2508     COUNT_INST(I_FNEG);
  2509     check_fpuen();
  2510     if( sh4_x86.double_prec ) {
  2511         push_dr(FRn);
  2512         FCHS_st0();
  2513         pop_dr(FRn);
  2514     } else {
  2515         push_fr(FRn);
  2516         FCHS_st0();
  2517         pop_fr(FRn);
  2519 :}
  2520 FSRRA FRn {:  
  2521     COUNT_INST(I_FSRRA);
  2522     check_fpuen();
  2523     if( sh4_x86.double_prec == 0 ) {
  2524         FLD1_st0();
  2525         push_fr(FRn);
  2526         FSQRT_st0();
  2527         FDIVP_st(1);
  2528         pop_fr(FRn);
  2530 :}
  2531 FSQRT FRn {:  
  2532     COUNT_INST(I_FSQRT);
  2533     check_fpuen();
  2534     if( sh4_x86.double_prec ) {
  2535         push_dr(FRn);
  2536         FSQRT_st0();
  2537         pop_dr(FRn);
  2538     } else {
  2539         push_fr(FRn);
  2540         FSQRT_st0();
  2541         pop_fr(FRn);
  2543 :}
  2544 FSUB FRm, FRn {:  
  2545     COUNT_INST(I_FSUB);
  2546     check_fpuen();
  2547     if( sh4_x86.double_prec ) {
  2548         push_dr(FRn);
  2549         push_dr(FRm);
  2550         FSUBP_st(1);
  2551         pop_dr(FRn);
  2552     } else {
  2553         push_fr(FRn);
  2554         push_fr(FRm);
  2555         FSUBP_st(1);
  2556         pop_fr(FRn);
  2558 :}
  2560 FCMP/EQ FRm, FRn {:  
  2561     COUNT_INST(I_FCMPEQ);
  2562     check_fpuen();
  2563     if( sh4_x86.double_prec ) {
  2564         push_dr(FRm);
  2565         push_dr(FRn);
  2566     } else {
  2567         push_fr(FRm);
  2568         push_fr(FRn);
  2570     XORL_r32_r32(REG_EAX, REG_EAX);
  2571     XORL_r32_r32(REG_EDX, REG_EDX);
  2572     FCOMIP_st(1);
  2573     SETCCB_cc_r8(X86_COND_NP, REG_DL);
  2574     CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
  2575     MOVL_r32_rbpdisp(REG_EAX, R_T);
  2576     FPOP_st();
  2577     sh4_x86.tstate = TSTATE_NONE;
  2578 :}
  2579 FCMP/GT FRm, FRn {:  
  2580     COUNT_INST(I_FCMPGT);
  2581     check_fpuen();
  2582     if( sh4_x86.double_prec ) {
  2583         push_dr(FRm);
  2584         push_dr(FRn);
  2585     } else {
  2586         push_fr(FRm);
  2587         push_fr(FRn);
  2589     FCOMIP_st(1);
  2590     SETA_t();
  2591     FPOP_st();
  2592     sh4_x86.tstate = TSTATE_A;
  2593 :}
  2595 FSCA FPUL, FRn {:  
  2596     COUNT_INST(I_FSCA);
  2597     check_fpuen();
  2598     if( sh4_x86.double_prec == 0 ) {
  2599         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2600         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2601         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2603     sh4_x86.tstate = TSTATE_NONE;
  2604 :}
  2605 FIPR FVm, FVn {:  
  2606     COUNT_INST(I_FIPR);
  2607     check_fpuen();
  2608     if( sh4_x86.double_prec == 0 ) {
  2609         if( sh4_x86.sse3_enabled ) {
  2610             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2611             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2612             HADDPS_xmm_xmm( 4, 4 ); 
  2613             HADDPS_xmm_xmm( 4, 4 );
  2614             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2615         } else {
  2616             push_fr( FVm<<2 );
  2617             push_fr( FVn<<2 );
  2618             FMULP_st(1);
  2619             push_fr( (FVm<<2)+1);
  2620             push_fr( (FVn<<2)+1);
  2621             FMULP_st(1);
  2622             FADDP_st(1);
  2623             push_fr( (FVm<<2)+2);
  2624             push_fr( (FVn<<2)+2);
  2625             FMULP_st(1);
  2626             FADDP_st(1);
  2627             push_fr( (FVm<<2)+3);
  2628             push_fr( (FVn<<2)+3);
  2629             FMULP_st(1);
  2630             FADDP_st(1);
  2631             pop_fr( (FVn<<2)+3);
  2634 :}
  2635 FTRV XMTRX, FVn {:  
  2636     COUNT_INST(I_FTRV);
  2637     check_fpuen();
  2638     if( sh4_x86.double_prec == 0 ) {
  2639         if( sh4_x86.sse3_enabled && sh4_x86.begin_callback == NULL ) {
  2640         	/* FIXME: For now, disable this inlining when we're running in shadow mode -
  2641         	 * it gives slightly different results from the emu core. Need to
  2642         	 * fix the precision so both give the right results.
  2643         	 */
  2644             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2645             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2646             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2647             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2649             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2650             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2651             MOV_xmm_xmm( 4, 6 );
  2652             MOV_xmm_xmm( 5, 7 );
  2653             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2654             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2655             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2656             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2657             MULPS_xmm_xmm( 0, 4 );
  2658             MULPS_xmm_xmm( 1, 5 );
  2659             MULPS_xmm_xmm( 2, 6 );
  2660             MULPS_xmm_xmm( 3, 7 );
  2661             ADDPS_xmm_xmm( 5, 4 );
  2662             ADDPS_xmm_xmm( 7, 6 );
  2663             ADDPS_xmm_xmm( 6, 4 );
  2664             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2665         } else {
  2666             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2667             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2670     sh4_x86.tstate = TSTATE_NONE;
  2671 :}
  2673 FRCHG {:  
  2674     COUNT_INST(I_FRCHG);
  2675     check_fpuen();
  2676     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2677     CALL_ptr( sh4_switch_fr_banks );
  2678     sh4_x86.tstate = TSTATE_NONE;
  2679 :}
  2680 FSCHG {:  
  2681     COUNT_INST(I_FSCHG);
  2682     check_fpuen();
  2683     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2684     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2685     sh4_x86.tstate = TSTATE_NONE;
  2686     sh4_x86.double_size = !sh4_x86.double_size;
  2687     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2688 :}
  2690 /* Processor control instructions */
  2691 LDC Rm, SR {:
  2692     COUNT_INST(I_LDCSR);
  2693     if( sh4_x86.in_delay_slot ) {
  2694 	SLOTILLEGAL();
  2695     } else {
  2696 	check_priv();
  2697 	load_reg( REG_EAX, Rm );
  2698 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2699 	sh4_x86.fpuen_checked = FALSE;
  2700 	sh4_x86.tstate = TSTATE_NONE;
  2701     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2702 	return 2;
  2704 :}
  2705 LDC Rm, GBR {: 
  2706     COUNT_INST(I_LDC);
  2707     load_reg( REG_EAX, Rm );
  2708     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2709 :}
  2710 LDC Rm, VBR {:  
  2711     COUNT_INST(I_LDC);
  2712     check_priv();
  2713     load_reg( REG_EAX, Rm );
  2714     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2715     sh4_x86.tstate = TSTATE_NONE;
  2716 :}
  2717 LDC Rm, SSR {:  
  2718     COUNT_INST(I_LDC);
  2719     check_priv();
  2720     load_reg( REG_EAX, Rm );
  2721     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2722     sh4_x86.tstate = TSTATE_NONE;
  2723 :}
  2724 LDC Rm, SGR {:  
  2725     COUNT_INST(I_LDC);
  2726     check_priv();
  2727     load_reg( REG_EAX, Rm );
  2728     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2729     sh4_x86.tstate = TSTATE_NONE;
  2730 :}
  2731 LDC Rm, SPC {:  
  2732     COUNT_INST(I_LDC);
  2733     check_priv();
  2734     load_reg( REG_EAX, Rm );
  2735     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2736     sh4_x86.tstate = TSTATE_NONE;
  2737 :}
  2738 LDC Rm, DBR {:  
  2739     COUNT_INST(I_LDC);
  2740     check_priv();
  2741     load_reg( REG_EAX, Rm );
  2742     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2743     sh4_x86.tstate = TSTATE_NONE;
  2744 :}
  2745 LDC Rm, Rn_BANK {:  
  2746     COUNT_INST(I_LDC);
  2747     check_priv();
  2748     load_reg( REG_EAX, Rm );
  2749     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2750     sh4_x86.tstate = TSTATE_NONE;
  2751 :}
  2752 LDC.L @Rm+, GBR {:  
  2753     COUNT_INST(I_LDCM);
  2754     load_reg( REG_EAX, Rm );
  2755     check_ralign32( REG_EAX );
  2756     MEM_READ_LONG( REG_EAX, REG_EAX );
  2757     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2758     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2759     sh4_x86.tstate = TSTATE_NONE;
  2760 :}
  2761 LDC.L @Rm+, SR {:
  2762     COUNT_INST(I_LDCSRM);
  2763     if( sh4_x86.in_delay_slot ) {
  2764 	SLOTILLEGAL();
  2765     } else {
  2766 	check_priv();
  2767 	load_reg( REG_EAX, Rm );
  2768 	check_ralign32( REG_EAX );
  2769 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2770 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2771 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2772 	sh4_x86.fpuen_checked = FALSE;
  2773 	sh4_x86.tstate = TSTATE_NONE;
  2774     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2775 	return 2;
  2777 :}
  2778 LDC.L @Rm+, VBR {:  
  2779     COUNT_INST(I_LDCM);
  2780     check_priv();
  2781     load_reg( REG_EAX, Rm );
  2782     check_ralign32( REG_EAX );
  2783     MEM_READ_LONG( REG_EAX, REG_EAX );
  2784     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2785     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2786     sh4_x86.tstate = TSTATE_NONE;
  2787 :}
  2788 LDC.L @Rm+, SSR {:
  2789     COUNT_INST(I_LDCM);
  2790     check_priv();
  2791     load_reg( REG_EAX, Rm );
  2792     check_ralign32( REG_EAX );
  2793     MEM_READ_LONG( REG_EAX, REG_EAX );
  2794     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2795     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2796     sh4_x86.tstate = TSTATE_NONE;
  2797 :}
  2798 LDC.L @Rm+, SGR {:  
  2799     COUNT_INST(I_LDCM);
  2800     check_priv();
  2801     load_reg( REG_EAX, Rm );
  2802     check_ralign32( REG_EAX );
  2803     MEM_READ_LONG( REG_EAX, REG_EAX );
  2804     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2805     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2806     sh4_x86.tstate = TSTATE_NONE;
  2807 :}
  2808 LDC.L @Rm+, SPC {:  
  2809     COUNT_INST(I_LDCM);
  2810     check_priv();
  2811     load_reg( REG_EAX, Rm );
  2812     check_ralign32( REG_EAX );
  2813     MEM_READ_LONG( REG_EAX, REG_EAX );
  2814     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2815     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2816     sh4_x86.tstate = TSTATE_NONE;
  2817 :}
  2818 LDC.L @Rm+, DBR {:  
  2819     COUNT_INST(I_LDCM);
  2820     check_priv();
  2821     load_reg( REG_EAX, Rm );
  2822     check_ralign32( REG_EAX );
  2823     MEM_READ_LONG( REG_EAX, REG_EAX );
  2824     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2825     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2826     sh4_x86.tstate = TSTATE_NONE;
  2827 :}
  2828 LDC.L @Rm+, Rn_BANK {:  
  2829     COUNT_INST(I_LDCM);
  2830     check_priv();
  2831     load_reg( REG_EAX, Rm );
  2832     check_ralign32( REG_EAX );
  2833     MEM_READ_LONG( REG_EAX, REG_EAX );
  2834     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2835     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2836     sh4_x86.tstate = TSTATE_NONE;
  2837 :}
  2838 LDS Rm, FPSCR {:
  2839     COUNT_INST(I_LDSFPSCR);
  2840     check_fpuen();
  2841     load_reg( REG_EAX, Rm );
  2842     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2843     sh4_x86.tstate = TSTATE_NONE;
  2844     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2845     return 2;
  2846 :}
  2847 LDS.L @Rm+, FPSCR {:  
  2848     COUNT_INST(I_LDSFPSCRM);
  2849     check_fpuen();
  2850     load_reg( REG_EAX, Rm );
  2851     check_ralign32( REG_EAX );
  2852     MEM_READ_LONG( REG_EAX, REG_EAX );
  2853     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2854     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2855     sh4_x86.tstate = TSTATE_NONE;
  2856     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2857     return 2;
  2858 :}
  2859 LDS Rm, FPUL {:  
  2860     COUNT_INST(I_LDS);
  2861     check_fpuen();
  2862     load_reg( REG_EAX, Rm );
  2863     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2864 :}
  2865 LDS.L @Rm+, FPUL {:  
  2866     COUNT_INST(I_LDSM);
  2867     check_fpuen();
  2868     load_reg( REG_EAX, Rm );
  2869     check_ralign32( REG_EAX );
  2870     MEM_READ_LONG( REG_EAX, REG_EAX );
  2871     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2872     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2873     sh4_x86.tstate = TSTATE_NONE;
  2874 :}
  2875 LDS Rm, MACH {: 
  2876     COUNT_INST(I_LDS);
  2877     load_reg( REG_EAX, Rm );
  2878     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2879 :}
  2880 LDS.L @Rm+, MACH {:  
  2881     COUNT_INST(I_LDSM);
  2882     load_reg( REG_EAX, Rm );
  2883     check_ralign32( REG_EAX );
  2884     MEM_READ_LONG( REG_EAX, REG_EAX );
  2885     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2886     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2887     sh4_x86.tstate = TSTATE_NONE;
  2888 :}
  2889 LDS Rm, MACL {:  
  2890     COUNT_INST(I_LDS);
  2891     load_reg( REG_EAX, Rm );
  2892     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2893 :}
  2894 LDS.L @Rm+, MACL {:  
  2895     COUNT_INST(I_LDSM);
  2896     load_reg( REG_EAX, Rm );
  2897     check_ralign32( REG_EAX );
  2898     MEM_READ_LONG( REG_EAX, REG_EAX );
  2899     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2900     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2901     sh4_x86.tstate = TSTATE_NONE;
  2902 :}
  2903 LDS Rm, PR {:  
  2904     COUNT_INST(I_LDS);
  2905     load_reg( REG_EAX, Rm );
  2906     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2907 :}
  2908 LDS.L @Rm+, PR {:  
  2909     COUNT_INST(I_LDSM);
  2910     load_reg( REG_EAX, Rm );
  2911     check_ralign32( REG_EAX );
  2912     MEM_READ_LONG( REG_EAX, REG_EAX );
  2913     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2914     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2915     sh4_x86.tstate = TSTATE_NONE;
  2916 :}
  2917 LDTLB {:  
  2918     COUNT_INST(I_LDTLB);
  2919     CALL_ptr( MMU_ldtlb );
  2920     sh4_x86.tstate = TSTATE_NONE;
  2921 :}
  2922 OCBI @Rn {:
  2923     COUNT_INST(I_OCBI);
  2924 :}
  2925 OCBP @Rn {:
  2926     COUNT_INST(I_OCBP);
  2927 :}
  2928 OCBWB @Rn {:
  2929     COUNT_INST(I_OCBWB);
  2930 :}
  2931 PREF @Rn {:
  2932     COUNT_INST(I_PREF);
  2933     load_reg( REG_EAX, Rn );
  2934     MEM_PREFETCH( REG_EAX );
  2935     sh4_x86.tstate = TSTATE_NONE;
  2936 :}
  2937 SLEEP {: 
  2938     COUNT_INST(I_SLEEP);
  2939     check_priv();
  2940     CALL_ptr( sh4_sleep );
  2941     sh4_x86.tstate = TSTATE_NONE;
  2942     sh4_x86.in_delay_slot = DELAY_NONE;
  2943     return 2;
  2944 :}
  2945 STC SR, Rn {:
  2946     COUNT_INST(I_STCSR);
  2947     check_priv();
  2948     CALL_ptr(sh4_read_sr);
  2949     store_reg( REG_EAX, Rn );
  2950     sh4_x86.tstate = TSTATE_NONE;
  2951 :}
  2952 STC GBR, Rn {:  
  2953     COUNT_INST(I_STC);
  2954     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2955     store_reg( REG_EAX, Rn );
  2956 :}
  2957 STC VBR, Rn {:  
  2958     COUNT_INST(I_STC);
  2959     check_priv();
  2960     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2961     store_reg( REG_EAX, Rn );
  2962     sh4_x86.tstate = TSTATE_NONE;
  2963 :}
  2964 STC SSR, Rn {:  
  2965     COUNT_INST(I_STC);
  2966     check_priv();
  2967     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2968     store_reg( REG_EAX, Rn );
  2969     sh4_x86.tstate = TSTATE_NONE;
  2970 :}
  2971 STC SPC, Rn {:  
  2972     COUNT_INST(I_STC);
  2973     check_priv();
  2974     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2975     store_reg( REG_EAX, Rn );
  2976     sh4_x86.tstate = TSTATE_NONE;
  2977 :}
  2978 STC SGR, Rn {:  
  2979     COUNT_INST(I_STC);
  2980     check_priv();
  2981     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2982     store_reg( REG_EAX, Rn );
  2983     sh4_x86.tstate = TSTATE_NONE;
  2984 :}
  2985 STC DBR, Rn {:  
  2986     COUNT_INST(I_STC);
  2987     check_priv();
  2988     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2989     store_reg( REG_EAX, Rn );
  2990     sh4_x86.tstate = TSTATE_NONE;
  2991 :}
  2992 STC Rm_BANK, Rn {:
  2993     COUNT_INST(I_STC);
  2994     check_priv();
  2995     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  2996     store_reg( REG_EAX, Rn );
  2997     sh4_x86.tstate = TSTATE_NONE;
  2998 :}
  2999 STC.L SR, @-Rn {:
  3000     COUNT_INST(I_STCSRM);
  3001     check_priv();
  3002     CALL_ptr( sh4_read_sr );
  3003     MOVL_r32_r32( REG_EAX, REG_EDX );
  3004     load_reg( REG_EAX, Rn );
  3005     check_walign32( REG_EAX );
  3006     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  3007     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3008     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3009     sh4_x86.tstate = TSTATE_NONE;
  3010 :}
  3011 STC.L VBR, @-Rn {:  
  3012     COUNT_INST(I_STCM);
  3013     check_priv();
  3014     load_reg( REG_EAX, Rn );
  3015     check_walign32( REG_EAX );
  3016     ADDL_imms_r32( -4, REG_EAX );
  3017     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  3018     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3019     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3020     sh4_x86.tstate = TSTATE_NONE;
  3021 :}
  3022 STC.L SSR, @-Rn {:  
  3023     COUNT_INST(I_STCM);
  3024     check_priv();
  3025     load_reg( REG_EAX, Rn );
  3026     check_walign32( REG_EAX );
  3027     ADDL_imms_r32( -4, REG_EAX );
  3028     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3029     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3030     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3031     sh4_x86.tstate = TSTATE_NONE;
  3032 :}
  3033 STC.L SPC, @-Rn {:
  3034     COUNT_INST(I_STCM);
  3035     check_priv();
  3036     load_reg( REG_EAX, Rn );
  3037     check_walign32( REG_EAX );
  3038     ADDL_imms_r32( -4, REG_EAX );
  3039     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3040     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3041     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3042     sh4_x86.tstate = TSTATE_NONE;
  3043 :}
  3044 STC.L SGR, @-Rn {:  
  3045     COUNT_INST(I_STCM);
  3046     check_priv();
  3047     load_reg( REG_EAX, Rn );
  3048     check_walign32( REG_EAX );
  3049     ADDL_imms_r32( -4, REG_EAX );
  3050     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3051     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3052     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3053     sh4_x86.tstate = TSTATE_NONE;
  3054 :}
  3055 STC.L DBR, @-Rn {:  
  3056     COUNT_INST(I_STCM);
  3057     check_priv();
  3058     load_reg( REG_EAX, Rn );
  3059     check_walign32( REG_EAX );
  3060     ADDL_imms_r32( -4, REG_EAX );
  3061     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3062     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3063     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3064     sh4_x86.tstate = TSTATE_NONE;
  3065 :}
  3066 STC.L Rm_BANK, @-Rn {:  
  3067     COUNT_INST(I_STCM);
  3068     check_priv();
  3069     load_reg( REG_EAX, Rn );
  3070     check_walign32( REG_EAX );
  3071     ADDL_imms_r32( -4, REG_EAX );
  3072     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3073     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3074     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3075     sh4_x86.tstate = TSTATE_NONE;
  3076 :}
  3077 STC.L GBR, @-Rn {:  
  3078     COUNT_INST(I_STCM);
  3079     load_reg( REG_EAX, Rn );
  3080     check_walign32( REG_EAX );
  3081     ADDL_imms_r32( -4, REG_EAX );
  3082     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3083     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3084     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3085     sh4_x86.tstate = TSTATE_NONE;
  3086 :}
  3087 STS FPSCR, Rn {:  
  3088     COUNT_INST(I_STSFPSCR);
  3089     check_fpuen();
  3090     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3091     store_reg( REG_EAX, Rn );
  3092 :}
  3093 STS.L FPSCR, @-Rn {:  
  3094     COUNT_INST(I_STSFPSCRM);
  3095     check_fpuen();
  3096     load_reg( REG_EAX, Rn );
  3097     check_walign32( REG_EAX );
  3098     ADDL_imms_r32( -4, REG_EAX );
  3099     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3100     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3101     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3102     sh4_x86.tstate = TSTATE_NONE;
  3103 :}
  3104 STS FPUL, Rn {:  
  3105     COUNT_INST(I_STS);
  3106     check_fpuen();
  3107     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3108     store_reg( REG_EAX, Rn );
  3109 :}
  3110 STS.L FPUL, @-Rn {:  
  3111     COUNT_INST(I_STSM);
  3112     check_fpuen();
  3113     load_reg( REG_EAX, Rn );
  3114     check_walign32( REG_EAX );
  3115     ADDL_imms_r32( -4, REG_EAX );
  3116     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3117     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3118     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3119     sh4_x86.tstate = TSTATE_NONE;
  3120 :}
  3121 STS MACH, Rn {:  
  3122     COUNT_INST(I_STS);
  3123     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3124     store_reg( REG_EAX, Rn );
  3125 :}
  3126 STS.L MACH, @-Rn {:  
  3127     COUNT_INST(I_STSM);
  3128     load_reg( REG_EAX, Rn );
  3129     check_walign32( REG_EAX );
  3130     ADDL_imms_r32( -4, REG_EAX );
  3131     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3132     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3133     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3134     sh4_x86.tstate = TSTATE_NONE;
  3135 :}
  3136 STS MACL, Rn {:  
  3137     COUNT_INST(I_STS);
  3138     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3139     store_reg( REG_EAX, Rn );
  3140 :}
  3141 STS.L MACL, @-Rn {:  
  3142     COUNT_INST(I_STSM);
  3143     load_reg( REG_EAX, Rn );
  3144     check_walign32( REG_EAX );
  3145     ADDL_imms_r32( -4, REG_EAX );
  3146     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3147     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3148     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3149     sh4_x86.tstate = TSTATE_NONE;
  3150 :}
  3151 STS PR, Rn {:  
  3152     COUNT_INST(I_STS);
  3153     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3154     store_reg( REG_EAX, Rn );
  3155 :}
  3156 STS.L PR, @-Rn {:  
  3157     COUNT_INST(I_STSM);
  3158     load_reg( REG_EAX, Rn );
  3159     check_walign32( REG_EAX );
  3160     ADDL_imms_r32( -4, REG_EAX );
  3161     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3162     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3163     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3164     sh4_x86.tstate = TSTATE_NONE;
  3165 :}
  3167 NOP {: 
  3168     COUNT_INST(I_NOP);
  3169     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3170 :}
  3171 %%
  3172     sh4_x86.in_delay_slot = DELAY_NONE;
  3173     return 0;
  3177 /**
  3178  * The unwind methods only work if we compiled with DWARF2 frame information
  3179  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3180  */
  3181 #ifdef HAVE_EXCEPTIONS
  3182 #include <unwind.h>
  3184 struct UnwindInfo {
  3185     uintptr_t block_start;
  3186     uintptr_t block_end;
  3187     void *pc;
  3188 };
  3190 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3192     struct UnwindInfo *info = arg;
  3193     void *pc = (void *)_Unwind_GetIP(context);
  3194     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3195         info->pc = pc;
  3196         return _URC_NORMAL_STOP;
  3198     return _URC_NO_REASON;
  3201 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3203     struct UnwindInfo info;
  3205     info.pc = NULL;
  3206     info.block_start = (uintptr_t)code;
  3207     info.block_end = info.block_start + code_size;
  3208     _Unwind_Backtrace( xlat_check_frame, &info );
  3209     return info.pc;
  3211 #else
  3212 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3213 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3215     void *result = NULL;
  3216     __asm__(
  3217         "mov %%ebp, %%eax\n\t"
  3218         "mov $0x8, %%ecx\n\t"
  3219         "mov %1, %%edx\n"
  3220         "frame_loop: test %%eax, %%eax\n\t"
  3221         "je frame_not_found\n\t"
  3222         "cmp (%%eax), %%edx\n\t"
  3223         "je frame_found\n\t"
  3224         "sub $0x1, %%ecx\n\t"
  3225         "je frame_not_found\n\t"
  3226         "movl (%%eax), %%eax\n\t"
  3227         "jmp frame_loop\n"
  3228         "frame_found: movl 0x4(%%eax), %0\n"
  3229         "frame_not_found:"
  3230         : "=r" (result)
  3231         : "r" (((uint8_t *)&sh4r) + 128 )
  3232         : "eax", "ecx", "edx" );
  3233     return result;
  3235 #endif
.