Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1149:da6124fceec6
prev1146:76c5d1064262
next1176:70feb1749427
author nkeynes
date Thu Nov 11 17:51:37 2010 +1000 (13 years ago)
permissions -rw-r--r--
last change Add convenience gl_check_error() function
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   110     /* Allocated memory for the (block-wide) back-patch list */
   111     struct backpatch_record *backpatch_list;
   112     uint32_t backpatch_posn;
   113     uint32_t backpatch_size;
   114 };
   116 static struct sh4_x86_state sh4_x86;
   118 static uint32_t max_int = 0x7FFFFFFF;
   119 static uint32_t min_int = 0x80000000;
   120 static uint32_t save_fcw; /* save value for fpu control word */
   121 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   123 static struct x86_symbol x86_symbol_table[] = {
   124     { "sh4r+128", ((char *)&sh4r)+128 },
   125     { "sh4_cpu_period", &sh4_cpu_period },
   126     { "sh4_address_space", NULL },
   127     { "sh4_user_address_space", NULL },
   128     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   129     { "sh4_write_fpscr", sh4_write_fpscr },
   130     { "sh4_write_sr", sh4_write_sr },
   131     { "sh4_read_sr", sh4_read_sr },
   132     { "sh4_sleep", sh4_sleep },
   133     { "sh4_fsca", sh4_fsca },
   134     { "sh4_ftrv", sh4_ftrv },
   135     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   136     { "sh4_execute_instruction", sh4_execute_instruction },
   137     { "signsat48", signsat48 },
   138     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   139     { "xlat_get_code", xlat_get_code }
   140 };
   143 gboolean is_sse3_supported()
   144 {
   145     uint32_t features;
   147     __asm__ __volatile__(
   148         "mov $0x01, %%eax\n\t"
   149         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   150     return (features & 1) ? TRUE : FALSE;
   151 }
   153 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   154 {
   155     sh4_x86.priv_address_space = priv;
   156     sh4_x86.user_address_space = user;
   157     x86_symbol_table[2].ptr = priv;
   158     x86_symbol_table[3].ptr = user;
   159 }
   161 void sh4_translate_init(void)
   162 {
   163     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   164     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   165     sh4_x86.begin_callback = NULL;
   166     sh4_x86.end_callback = NULL;
   167     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   168     sh4_x86.fastmem = TRUE;
   169     sh4_x86.sse3_enabled = is_sse3_supported();
   170     x86_disasm_init();
   171     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   172 }
   174 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   175 {
   176     sh4_x86.begin_callback = begin;
   177     sh4_x86.end_callback = end;
   178 }
   180 void sh4_translate_set_fastmem( gboolean flag )
   181 {
   182     sh4_x86.fastmem = flag;
   183 }
   185 /**
   186  * Disassemble the given translated code block, and it's source SH4 code block
   187  * side-by-side. The current native pc will be marked if non-null.
   188  */
   189 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   190 {
   191     char buf[256];
   192     char op[256];
   194     uintptr_t target_start = (uintptr_t)code, target_pc;
   195     uintptr_t target_end = target_start + xlat_get_code_size(code);
   196     uint32_t source_pc = source_start;
   197     uint32_t source_end = source_pc;
   198     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   199     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   201     for( target_pc = target_start; target_pc < target_end;  ) {
   202         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   203 #if SIZEOF_VOID_P == 8
   204         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   205                       target_pc, op, buf );
   206 #else
   207         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   208                       target_pc, op, buf );
   209 #endif        
   210         if( source_recov_table < source_recov_end && 
   211             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   212             source_recov_table++;
   213             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   214                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   215         }
   217         if( source_pc < source_end ) {
   218             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   219             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   220             source_pc = source_pc2;
   221         } else {
   222             fprintf( out, "\n" );
   223         }
   225         target_pc = pc2;
   226     }
   228     while( source_pc < source_end ) {
   229         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   230         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   231         source_pc = source_pc2;
   232     }
   233 }
   235 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   236 {
   237     int reloc_size = 4;
   239     if( exc_code == -2 ) {
   240         reloc_size = sizeof(void *);
   241     }
   243     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   244 	sh4_x86.backpatch_size <<= 1;
   245 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   246 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   247 	assert( sh4_x86.backpatch_list != NULL );
   248     }
   249     if( sh4_x86.in_delay_slot ) {
   250 	fixup_pc -= 2;
   251     }
   253     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   254 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   255     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   256     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   257     sh4_x86.backpatch_posn++;
   258 }
   260 #define TSTATE_NONE -1
   261 #define TSTATE_O    X86_COND_O
   262 #define TSTATE_C    X86_COND_C
   263 #define TSTATE_E    X86_COND_E
   264 #define TSTATE_NE   X86_COND_NE
   265 #define TSTATE_G    X86_COND_G
   266 #define TSTATE_GE   X86_COND_GE
   267 #define TSTATE_A    X86_COND_A
   268 #define TSTATE_AE   X86_COND_AE
   270 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   271 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   273 /* Convenience instructions */
   274 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   275 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   276 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   277 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   278 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   279 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   280 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   281 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   282 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   283 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   284 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   285 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   286 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   287 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   288 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   289 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   290 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   291 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   292 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   293 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   295 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   296 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   297 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   298     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   300 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   301 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   302 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   303     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   306 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   307 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   309 /**
   310  * Load an FR register (single-precision floating point) into an integer x86
   311  * register (eg for register-to-register moves)
   312  */
   313 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   314 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   316 /**
   317  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   318  */
   319 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   320 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   322 /**
   323  * Store an FR register (single-precision floating point) from an integer x86+
   324  * register (eg for register-to-register moves)
   325  */
   326 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   327 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   329 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   330 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   333 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   334 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   335 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   336 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   337 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   338 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   339 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   340 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   341 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   342 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   344 #ifdef ENABLE_SH4STATS
   345 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   346 #else
   347 #define COUNT_INST(id)
   348 #endif
   351 /* Exception checks - Note that all exception checks will clobber EAX */
   353 #define check_priv( ) \
   354     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   355         if( sh4_x86.in_delay_slot ) { \
   356             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   357         } else { \
   358             exit_block_exc(EXC_ILLEGAL, pc); \
   359         } \
   360         sh4_x86.branch_taken = TRUE; \
   361         sh4_x86.in_delay_slot = DELAY_NONE; \
   362         return 2; \
   363     }
   365 #define check_fpuen( ) \
   366     if( !sh4_x86.fpuen_checked ) {\
   367 	sh4_x86.fpuen_checked = TRUE;\
   368 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   369 	ANDL_imms_r32( SR_FD, REG_EAX );\
   370 	if( sh4_x86.in_delay_slot ) {\
   371 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   372 	} else {\
   373 	    JNE_exc(EXC_FPU_DISABLED);\
   374 	}\
   375 	sh4_x86.tstate = TSTATE_NONE; \
   376     }
   378 #define check_ralign16( x86reg ) \
   379     TESTL_imms_r32( 0x00000001, x86reg ); \
   380     JNE_exc(EXC_DATA_ADDR_READ)
   382 #define check_walign16( x86reg ) \
   383     TESTL_imms_r32( 0x00000001, x86reg ); \
   384     JNE_exc(EXC_DATA_ADDR_WRITE);
   386 #define check_ralign32( x86reg ) \
   387     TESTL_imms_r32( 0x00000003, x86reg ); \
   388     JNE_exc(EXC_DATA_ADDR_READ)
   390 #define check_walign32( x86reg ) \
   391     TESTL_imms_r32( 0x00000003, x86reg ); \
   392     JNE_exc(EXC_DATA_ADDR_WRITE);
   394 #define check_ralign64( x86reg ) \
   395     TESTL_imms_r32( 0x00000007, x86reg ); \
   396     JNE_exc(EXC_DATA_ADDR_READ)
   398 #define check_walign64( x86reg ) \
   399     TESTL_imms_r32( 0x00000007, x86reg ); \
   400     JNE_exc(EXC_DATA_ADDR_WRITE);
   402 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   404 #define UNDEF(ir)
   405 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   406  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   407  */
   408 #ifdef HAVE_FRAME_ADDRESS
   409 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   410 {
   411     decode_address(address_space(), addr_reg);
   412     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   413         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   414     } else {
   415         if( addr_reg != REG_ARG1 ) {
   416             MOVL_r32_r32( addr_reg, REG_ARG1 );
   417         }
   418         MOVP_immptr_rptr( 0, REG_ARG2 );
   419         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   420         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   421     }
   422     if( value_reg != REG_RESULT1 ) { 
   423         MOVL_r32_r32( REG_RESULT1, value_reg );
   424     }
   425 }
   427 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   428 {
   429     decode_address(address_space(), addr_reg);
   430     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   431         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   432     } else {
   433         if( value_reg != REG_ARG2 ) {
   434             MOVL_r32_r32( value_reg, REG_ARG2 );
   435 	}        
   436         if( addr_reg != REG_ARG1 ) {
   437             MOVL_r32_r32( addr_reg, REG_ARG1 );
   438         }
   439 #if MAX_REG_ARG > 2        
   440         MOVP_immptr_rptr( 0, REG_ARG3 );
   441         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   442         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   443 #else
   444         MOVL_imm32_rspdisp( 0, 0 );
   445         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   446         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   447 #endif
   448     }
   449 }
   450 #else
   451 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   452 {
   453     decode_address(address_space(), addr_reg);
   454     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   455     if( value_reg != REG_RESULT1 ) {
   456         MOVL_r32_r32( REG_RESULT1, value_reg );
   457     }
   458 }     
   460 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   461 {
   462     decode_address(address_space(), addr_reg);
   463     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   464 }
   465 #endif
   467 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   468 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   469 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   470 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   471 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   472 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   473 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   474 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   475 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   477 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   479 void sh4_translate_begin_block( sh4addr_t pc ) 
   480 {
   481 	sh4_x86.code = xlat_output;
   482     sh4_x86.in_delay_slot = FALSE;
   483     sh4_x86.fpuen_checked = FALSE;
   484     sh4_x86.branch_taken = FALSE;
   485     sh4_x86.backpatch_posn = 0;
   486     sh4_x86.block_start_pc = pc;
   487     sh4_x86.tlb_on = IS_TLB_ENABLED();
   488     sh4_x86.tstate = TSTATE_NONE;
   489     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   490     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   491     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   492     emit_prologue();
   493     if( sh4_x86.begin_callback ) {
   494         CALL_ptr( sh4_x86.begin_callback );
   495     }
   496 }
   499 uint32_t sh4_translate_end_block_size()
   500 {
   501     if( sh4_x86.backpatch_posn <= 3 ) {
   502         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   503     } else {
   504         return EPILOGUE_SIZE + (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   505     }
   506 }
   509 /**
   510  * Embed a breakpoint into the generated code
   511  */
   512 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   513 {
   514     MOVL_imm32_r32( pc, REG_EAX );
   515     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   516     sh4_x86.tstate = TSTATE_NONE;
   517 }
   520 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   522 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   523 #define XLAT_SH4_MODE_CODE_OFFSET  (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   524 #define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   526 /**
   527  * Test if the loaded target code pointer in %eax is valid, and if so jump
   528  * directly into it, bypassing the normal exit.
   529  */
   530 static void jump_next_block()
   531 {
   532 	uint8_t *ptr = xlat_output;
   533 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   534 	JE_label(nocode);
   535 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   536 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   537 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   538 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   539 	} else {
   540 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   541 	}
   542 	JNE_label(wrongmode);
   543 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   544 	if( sh4_x86.end_callback ) {
   545 	    /* Note this does leave the stack out of alignment, but doesn't matter
   546 	     * for what we're currently using it for.
   547 	     */
   548 	    PUSH_r32(REG_EAX);
   549 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   550 	    JMP_rptr(REG_ECX);
   551 	} else {
   552 	    JMP_rptr(REG_EAX);
   553 	}
   554 	JMP_TARGET(wrongmode);
   555 	MOVL_r32disp_r32( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   556 	int rel = ptr - xlat_output;
   557     JMP_prerel(rel);
   558 	JMP_TARGET(nocode); 
   559 }
   561 static void exit_block()
   562 {
   563 	emit_epilogue();
   564 	if( sh4_x86.end_callback ) {
   565 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   566 	    JMP_rptr(REG_ECX);
   567 	} else {
   568 	    RET();
   569 	}
   570 }
   572 /**
   573  * Exit the block with sh4r.pc already written
   574  */
   575 void exit_block_pcset( sh4addr_t pc )
   576 {
   577     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   578     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   579     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   580     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   581     JBE_label(exitloop);
   582     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   583     if( sh4_x86.tlb_on ) {
   584         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   585     } else {
   586         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   587     }
   589     jump_next_block();
   590     JMP_TARGET(exitloop);
   591     exit_block();
   592 }
   594 /**
   595  * Exit the block with sh4r.new_pc written with the target pc
   596  */
   597 void exit_block_newpcset( sh4addr_t pc )
   598 {
   599     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   600     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   601     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   602     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   603     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   604     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   605     JBE_label(exitloop);
   606     if( sh4_x86.tlb_on ) {
   607         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   608     } else {
   609         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   610     }
   612 	jump_next_block();
   613     JMP_TARGET(exitloop);
   614     exit_block();
   615 }
   618 /**
   619  * Exit the block to an absolute PC
   620  */
   621 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   622 {
   623     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   624     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   625     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   627     MOVL_imm32_r32( pc, REG_ARG1 );
   628     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   629     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   630     JBE_label(exitloop);
   632     if( IS_IN_ICACHE(pc) ) {
   633         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   634         ANDP_imms_rptr( -4, REG_EAX );
   635     } else if( sh4_x86.tlb_on ) {
   636         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   637     } else {
   638         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   639     }
   640     jump_next_block();
   641     JMP_TARGET(exitloop);
   642     exit_block();
   643 }
   645 /**
   646  * Exit the block to a relative PC
   647  */
   648 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   649 {
   650     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   651     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   652     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   654 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   655 	    /* Special case for tight loops - the PC doesn't change, and
   656 	     * we already know the target address. Just check events pending before
   657 	     * looping.
   658 	     */
   659         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   660         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   661         JCC_cc_prerel(X86_COND_A, backdisp);
   662 	} else {
   663         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   664         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   665         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   666         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   667         JBE_label(exitloop2);
   669         if( IS_IN_ICACHE(pc) ) {
   670             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   671             ANDP_imms_rptr( -4, REG_EAX );
   672         } else if( sh4_x86.tlb_on ) {
   673             CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   674         } else {
   675             CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   676         }
   677         jump_next_block();
   678         JMP_TARGET(exitloop2);
   679     }
   680     exit_block();
   681 }
   683 /**
   684  * Exit unconditionally with a general exception
   685  */
   686 void exit_block_exc( int code, sh4addr_t pc )
   687 {
   688     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   689     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   690     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   691     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   692     MOVL_imm32_r32( code, REG_ARG1 );
   693     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   694     exit_block();
   695 }    
   697 /**
   698  * Embed a call to sh4_execute_instruction for situations that we
   699  * can't translate (just page-crossing delay slots at the moment).
   700  * Caller is responsible for setting new_pc before calling this function.
   701  *
   702  * Performs:
   703  *   Set PC = endpc
   704  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   705  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   706  *   Call sh4_execute_instruction
   707  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   708  */
   709 void exit_block_emu( sh4vma_t endpc )
   710 {
   711     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   712     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   714     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   715     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   716     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   717     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   719     CALL_ptr( sh4_execute_instruction );
   720     exit_block();
   721 } 
   723 /**
   724  * Write the block trailer (exception handling block)
   725  */
   726 void sh4_translate_end_block( sh4addr_t pc ) {
   727     if( sh4_x86.branch_taken == FALSE ) {
   728         // Didn't exit unconditionally already, so write the termination here
   729         exit_block_rel( pc, pc );
   730     }
   731     if( sh4_x86.backpatch_posn != 0 ) {
   732         unsigned int i;
   733         // Exception raised - cleanup and exit
   734         uint8_t *end_ptr = xlat_output;
   735         MOVL_r32_r32( REG_EDX, REG_ECX );
   736         ADDL_r32_r32( REG_EDX, REG_ECX );
   737         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   738         MOVL_moffptr_eax( &sh4_cpu_period );
   739         MULL_r32( REG_EDX );
   740         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   741         exit_block();
   743         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   744             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   745             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   746                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   747                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   748                 } else {
   749                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   750                 }
   751                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   752                 int rel = end_ptr - xlat_output;
   753                 JMP_prerel(rel);
   754             } else {
   755                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   756                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   757                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   758                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   759                 int rel = end_ptr - xlat_output;
   760                 JMP_prerel(rel);
   761             }
   762         }
   763     }
   764 }
   766 /**
   767  * Translate a single instruction. Delayed branches are handled specially
   768  * by translating both branch and delayed instruction as a single unit (as
   769  * 
   770  * The instruction MUST be in the icache (assert check)
   771  *
   772  * @return true if the instruction marks the end of a basic block
   773  * (eg a branch or 
   774  */
   775 uint32_t sh4_translate_instruction( sh4vma_t pc )
   776 {
   777     uint32_t ir;
   778     /* Read instruction from icache */
   779     assert( IS_IN_ICACHE(pc) );
   780     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   782     if( !sh4_x86.in_delay_slot ) {
   783 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   784     }
   786     /* check for breakpoints at this pc */
   787     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   788         if( sh4_breakpoints[i].address == pc ) {
   789             sh4_translate_emit_breakpoint(pc);
   790             break;
   791         }
   792     }
   793 %%
   794 /* ALU operations */
   795 ADD Rm, Rn {:
   796     COUNT_INST(I_ADD);
   797     load_reg( REG_EAX, Rm );
   798     load_reg( REG_ECX, Rn );
   799     ADDL_r32_r32( REG_EAX, REG_ECX );
   800     store_reg( REG_ECX, Rn );
   801     sh4_x86.tstate = TSTATE_NONE;
   802 :}
   803 ADD #imm, Rn {:  
   804     COUNT_INST(I_ADDI);
   805     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   806     sh4_x86.tstate = TSTATE_NONE;
   807 :}
   808 ADDC Rm, Rn {:
   809     COUNT_INST(I_ADDC);
   810     if( sh4_x86.tstate != TSTATE_C ) {
   811         LDC_t();
   812     }
   813     load_reg( REG_EAX, Rm );
   814     load_reg( REG_ECX, Rn );
   815     ADCL_r32_r32( REG_EAX, REG_ECX );
   816     store_reg( REG_ECX, Rn );
   817     SETC_t();
   818     sh4_x86.tstate = TSTATE_C;
   819 :}
   820 ADDV Rm, Rn {:
   821     COUNT_INST(I_ADDV);
   822     load_reg( REG_EAX, Rm );
   823     load_reg( REG_ECX, Rn );
   824     ADDL_r32_r32( REG_EAX, REG_ECX );
   825     store_reg( REG_ECX, Rn );
   826     SETO_t();
   827     sh4_x86.tstate = TSTATE_O;
   828 :}
   829 AND Rm, Rn {:
   830     COUNT_INST(I_AND);
   831     load_reg( REG_EAX, Rm );
   832     load_reg( REG_ECX, Rn );
   833     ANDL_r32_r32( REG_EAX, REG_ECX );
   834     store_reg( REG_ECX, Rn );
   835     sh4_x86.tstate = TSTATE_NONE;
   836 :}
   837 AND #imm, R0 {:  
   838     COUNT_INST(I_ANDI);
   839     load_reg( REG_EAX, 0 );
   840     ANDL_imms_r32(imm, REG_EAX); 
   841     store_reg( REG_EAX, 0 );
   842     sh4_x86.tstate = TSTATE_NONE;
   843 :}
   844 AND.B #imm, @(R0, GBR) {: 
   845     COUNT_INST(I_ANDB);
   846     load_reg( REG_EAX, 0 );
   847     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   848     MOVL_r32_rspdisp(REG_EAX, 0);
   849     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   850     MOVL_rspdisp_r32(0, REG_EAX);
   851     ANDL_imms_r32(imm, REG_EDX );
   852     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   853     sh4_x86.tstate = TSTATE_NONE;
   854 :}
   855 CMP/EQ Rm, Rn {:  
   856     COUNT_INST(I_CMPEQ);
   857     load_reg( REG_EAX, Rm );
   858     load_reg( REG_ECX, Rn );
   859     CMPL_r32_r32( REG_EAX, REG_ECX );
   860     SETE_t();
   861     sh4_x86.tstate = TSTATE_E;
   862 :}
   863 CMP/EQ #imm, R0 {:  
   864     COUNT_INST(I_CMPEQI);
   865     load_reg( REG_EAX, 0 );
   866     CMPL_imms_r32(imm, REG_EAX);
   867     SETE_t();
   868     sh4_x86.tstate = TSTATE_E;
   869 :}
   870 CMP/GE Rm, Rn {:  
   871     COUNT_INST(I_CMPGE);
   872     load_reg( REG_EAX, Rm );
   873     load_reg( REG_ECX, Rn );
   874     CMPL_r32_r32( REG_EAX, REG_ECX );
   875     SETGE_t();
   876     sh4_x86.tstate = TSTATE_GE;
   877 :}
   878 CMP/GT Rm, Rn {: 
   879     COUNT_INST(I_CMPGT);
   880     load_reg( REG_EAX, Rm );
   881     load_reg( REG_ECX, Rn );
   882     CMPL_r32_r32( REG_EAX, REG_ECX );
   883     SETG_t();
   884     sh4_x86.tstate = TSTATE_G;
   885 :}
   886 CMP/HI Rm, Rn {:  
   887     COUNT_INST(I_CMPHI);
   888     load_reg( REG_EAX, Rm );
   889     load_reg( REG_ECX, Rn );
   890     CMPL_r32_r32( REG_EAX, REG_ECX );
   891     SETA_t();
   892     sh4_x86.tstate = TSTATE_A;
   893 :}
   894 CMP/HS Rm, Rn {: 
   895     COUNT_INST(I_CMPHS);
   896     load_reg( REG_EAX, Rm );
   897     load_reg( REG_ECX, Rn );
   898     CMPL_r32_r32( REG_EAX, REG_ECX );
   899     SETAE_t();
   900     sh4_x86.tstate = TSTATE_AE;
   901  :}
   902 CMP/PL Rn {: 
   903     COUNT_INST(I_CMPPL);
   904     load_reg( REG_EAX, Rn );
   905     CMPL_imms_r32( 0, REG_EAX );
   906     SETG_t();
   907     sh4_x86.tstate = TSTATE_G;
   908 :}
   909 CMP/PZ Rn {:  
   910     COUNT_INST(I_CMPPZ);
   911     load_reg( REG_EAX, Rn );
   912     CMPL_imms_r32( 0, REG_EAX );
   913     SETGE_t();
   914     sh4_x86.tstate = TSTATE_GE;
   915 :}
   916 CMP/STR Rm, Rn {:  
   917     COUNT_INST(I_CMPSTR);
   918     load_reg( REG_EAX, Rm );
   919     load_reg( REG_ECX, Rn );
   920     XORL_r32_r32( REG_ECX, REG_EAX );
   921     TESTB_r8_r8( REG_AL, REG_AL );
   922     JE_label(target1);
   923     TESTB_r8_r8( REG_AH, REG_AH );
   924     JE_label(target2);
   925     SHRL_imm_r32( 16, REG_EAX );
   926     TESTB_r8_r8( REG_AL, REG_AL );
   927     JE_label(target3);
   928     TESTB_r8_r8( REG_AH, REG_AH );
   929     JMP_TARGET(target1);
   930     JMP_TARGET(target2);
   931     JMP_TARGET(target3);
   932     SETE_t();
   933     sh4_x86.tstate = TSTATE_E;
   934 :}
   935 DIV0S Rm, Rn {:
   936     COUNT_INST(I_DIV0S);
   937     load_reg( REG_EAX, Rm );
   938     load_reg( REG_ECX, Rn );
   939     SHRL_imm_r32( 31, REG_EAX );
   940     SHRL_imm_r32( 31, REG_ECX );
   941     MOVL_r32_rbpdisp( REG_EAX, R_M );
   942     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   943     CMPL_r32_r32( REG_EAX, REG_ECX );
   944     SETNE_t();
   945     sh4_x86.tstate = TSTATE_NE;
   946 :}
   947 DIV0U {:  
   948     COUNT_INST(I_DIV0U);
   949     XORL_r32_r32( REG_EAX, REG_EAX );
   950     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   951     MOVL_r32_rbpdisp( REG_EAX, R_M );
   952     MOVL_r32_rbpdisp( REG_EAX, R_T );
   953     sh4_x86.tstate = TSTATE_C; // works for DIV1
   954 :}
   955 DIV1 Rm, Rn {:
   956     COUNT_INST(I_DIV1);
   957     MOVL_rbpdisp_r32( R_M, REG_ECX );
   958     load_reg( REG_EAX, Rn );
   959     if( sh4_x86.tstate != TSTATE_C ) {
   960 	LDC_t();
   961     }
   962     RCLL_imm_r32( 1, REG_EAX );
   963     SETC_r8( REG_DL ); // Q'
   964     CMPL_rbpdisp_r32( R_Q, REG_ECX );
   965     JE_label(mqequal);
   966     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   967     JMP_label(end);
   968     JMP_TARGET(mqequal);
   969     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   970     JMP_TARGET(end);
   971     store_reg( REG_EAX, Rn ); // Done with Rn now
   972     SETC_r8(REG_AL); // tmp1
   973     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
   974     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
   975     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   976     XORL_imms_r32( 1, REG_AL );   // T = !Q'
   977     MOVZXL_r8_r32( REG_AL, REG_EAX );
   978     MOVL_r32_rbpdisp( REG_EAX, R_T );
   979     sh4_x86.tstate = TSTATE_NONE;
   980 :}
   981 DMULS.L Rm, Rn {:  
   982     COUNT_INST(I_DMULS);
   983     load_reg( REG_EAX, Rm );
   984     load_reg( REG_ECX, Rn );
   985     IMULL_r32(REG_ECX);
   986     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   987     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   988     sh4_x86.tstate = TSTATE_NONE;
   989 :}
   990 DMULU.L Rm, Rn {:  
   991     COUNT_INST(I_DMULU);
   992     load_reg( REG_EAX, Rm );
   993     load_reg( REG_ECX, Rn );
   994     MULL_r32(REG_ECX);
   995     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   996     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
   997     sh4_x86.tstate = TSTATE_NONE;
   998 :}
   999 DT Rn {:  
  1000     COUNT_INST(I_DT);
  1001     load_reg( REG_EAX, Rn );
  1002     ADDL_imms_r32( -1, REG_EAX );
  1003     store_reg( REG_EAX, Rn );
  1004     SETE_t();
  1005     sh4_x86.tstate = TSTATE_E;
  1006 :}
  1007 EXTS.B Rm, Rn {:  
  1008     COUNT_INST(I_EXTSB);
  1009     load_reg( REG_EAX, Rm );
  1010     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1011     store_reg( REG_EAX, Rn );
  1012 :}
  1013 EXTS.W Rm, Rn {:  
  1014     COUNT_INST(I_EXTSW);
  1015     load_reg( REG_EAX, Rm );
  1016     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1017     store_reg( REG_EAX, Rn );
  1018 :}
  1019 EXTU.B Rm, Rn {:  
  1020     COUNT_INST(I_EXTUB);
  1021     load_reg( REG_EAX, Rm );
  1022     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1023     store_reg( REG_EAX, Rn );
  1024 :}
  1025 EXTU.W Rm, Rn {:  
  1026     COUNT_INST(I_EXTUW);
  1027     load_reg( REG_EAX, Rm );
  1028     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1029     store_reg( REG_EAX, Rn );
  1030 :}
  1031 MAC.L @Rm+, @Rn+ {:
  1032     COUNT_INST(I_MACL);
  1033     if( Rm == Rn ) {
  1034 	load_reg( REG_EAX, Rm );
  1035 	check_ralign32( REG_EAX );
  1036 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1037 	MOVL_r32_rspdisp(REG_EAX, 0);
  1038 	load_reg( REG_EAX, Rm );
  1039 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1040 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1041         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1042     } else {
  1043 	load_reg( REG_EAX, Rm );
  1044 	check_ralign32( REG_EAX );
  1045 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1046 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1047 	load_reg( REG_EAX, Rn );
  1048 	check_ralign32( REG_EAX );
  1049 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1050 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1051 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1054     IMULL_rspdisp( 0 );
  1055     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1056     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1058     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1059     TESTL_r32_r32(REG_ECX, REG_ECX);
  1060     JE_label( nosat );
  1061     CALL_ptr( signsat48 );
  1062     JMP_TARGET( nosat );
  1063     sh4_x86.tstate = TSTATE_NONE;
  1064 :}
  1065 MAC.W @Rm+, @Rn+ {:  
  1066     COUNT_INST(I_MACW);
  1067     if( Rm == Rn ) {
  1068 	load_reg( REG_EAX, Rm );
  1069 	check_ralign16( REG_EAX );
  1070 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1071         MOVL_r32_rspdisp( REG_EAX, 0 );
  1072 	load_reg( REG_EAX, Rm );
  1073 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1074 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1075 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1076 	// Note translate twice in case of page boundaries. Maybe worth
  1077 	// adding a page-boundary check to skip the second translation
  1078     } else {
  1079 	load_reg( REG_EAX, Rm );
  1080 	check_ralign16( REG_EAX );
  1081 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1082         MOVL_r32_rspdisp( REG_EAX, 0 );
  1083 	load_reg( REG_EAX, Rn );
  1084 	check_ralign16( REG_EAX );
  1085 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1086 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1087 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1089     IMULL_rspdisp( 0 );
  1090     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1091     TESTL_r32_r32( REG_ECX, REG_ECX );
  1092     JE_label( nosat );
  1094     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1095     JNO_label( end );            // 2
  1096     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1097     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1098     JS_label( positive );        // 2
  1099     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1100     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1101     JMP_label(end2);           // 2
  1103     JMP_TARGET(positive);
  1104     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1105     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1106     JMP_label(end3);            // 2
  1108     JMP_TARGET(nosat);
  1109     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1110     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1111     JMP_TARGET(end);
  1112     JMP_TARGET(end2);
  1113     JMP_TARGET(end3);
  1114     sh4_x86.tstate = TSTATE_NONE;
  1115 :}
  1116 MOVT Rn {:  
  1117     COUNT_INST(I_MOVT);
  1118     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1119     store_reg( REG_EAX, Rn );
  1120 :}
  1121 MUL.L Rm, Rn {:  
  1122     COUNT_INST(I_MULL);
  1123     load_reg( REG_EAX, Rm );
  1124     load_reg( REG_ECX, Rn );
  1125     MULL_r32( REG_ECX );
  1126     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1127     sh4_x86.tstate = TSTATE_NONE;
  1128 :}
  1129 MULS.W Rm, Rn {:
  1130     COUNT_INST(I_MULSW);
  1131     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1132     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1133     MULL_r32( REG_ECX );
  1134     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1135     sh4_x86.tstate = TSTATE_NONE;
  1136 :}
  1137 MULU.W Rm, Rn {:  
  1138     COUNT_INST(I_MULUW);
  1139     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1140     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1141     MULL_r32( REG_ECX );
  1142     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1143     sh4_x86.tstate = TSTATE_NONE;
  1144 :}
  1145 NEG Rm, Rn {:
  1146     COUNT_INST(I_NEG);
  1147     load_reg( REG_EAX, Rm );
  1148     NEGL_r32( REG_EAX );
  1149     store_reg( REG_EAX, Rn );
  1150     sh4_x86.tstate = TSTATE_NONE;
  1151 :}
  1152 NEGC Rm, Rn {:  
  1153     COUNT_INST(I_NEGC);
  1154     load_reg( REG_EAX, Rm );
  1155     XORL_r32_r32( REG_ECX, REG_ECX );
  1156     LDC_t();
  1157     SBBL_r32_r32( REG_EAX, REG_ECX );
  1158     store_reg( REG_ECX, Rn );
  1159     SETC_t();
  1160     sh4_x86.tstate = TSTATE_C;
  1161 :}
  1162 NOT Rm, Rn {:  
  1163     COUNT_INST(I_NOT);
  1164     load_reg( REG_EAX, Rm );
  1165     NOTL_r32( REG_EAX );
  1166     store_reg( REG_EAX, Rn );
  1167     sh4_x86.tstate = TSTATE_NONE;
  1168 :}
  1169 OR Rm, Rn {:  
  1170     COUNT_INST(I_OR);
  1171     load_reg( REG_EAX, Rm );
  1172     load_reg( REG_ECX, Rn );
  1173     ORL_r32_r32( REG_EAX, REG_ECX );
  1174     store_reg( REG_ECX, Rn );
  1175     sh4_x86.tstate = TSTATE_NONE;
  1176 :}
  1177 OR #imm, R0 {:
  1178     COUNT_INST(I_ORI);
  1179     load_reg( REG_EAX, 0 );
  1180     ORL_imms_r32(imm, REG_EAX);
  1181     store_reg( REG_EAX, 0 );
  1182     sh4_x86.tstate = TSTATE_NONE;
  1183 :}
  1184 OR.B #imm, @(R0, GBR) {:  
  1185     COUNT_INST(I_ORB);
  1186     load_reg( REG_EAX, 0 );
  1187     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1188     MOVL_r32_rspdisp( REG_EAX, 0 );
  1189     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1190     MOVL_rspdisp_r32( 0, REG_EAX );
  1191     ORL_imms_r32(imm, REG_EDX );
  1192     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1193     sh4_x86.tstate = TSTATE_NONE;
  1194 :}
  1195 ROTCL Rn {:
  1196     COUNT_INST(I_ROTCL);
  1197     load_reg( REG_EAX, Rn );
  1198     if( sh4_x86.tstate != TSTATE_C ) {
  1199 	LDC_t();
  1201     RCLL_imm_r32( 1, REG_EAX );
  1202     store_reg( REG_EAX, Rn );
  1203     SETC_t();
  1204     sh4_x86.tstate = TSTATE_C;
  1205 :}
  1206 ROTCR Rn {:  
  1207     COUNT_INST(I_ROTCR);
  1208     load_reg( REG_EAX, Rn );
  1209     if( sh4_x86.tstate != TSTATE_C ) {
  1210 	LDC_t();
  1212     RCRL_imm_r32( 1, REG_EAX );
  1213     store_reg( REG_EAX, Rn );
  1214     SETC_t();
  1215     sh4_x86.tstate = TSTATE_C;
  1216 :}
  1217 ROTL Rn {:  
  1218     COUNT_INST(I_ROTL);
  1219     load_reg( REG_EAX, Rn );
  1220     ROLL_imm_r32( 1, REG_EAX );
  1221     store_reg( REG_EAX, Rn );
  1222     SETC_t();
  1223     sh4_x86.tstate = TSTATE_C;
  1224 :}
  1225 ROTR Rn {:  
  1226     COUNT_INST(I_ROTR);
  1227     load_reg( REG_EAX, Rn );
  1228     RORL_imm_r32( 1, REG_EAX );
  1229     store_reg( REG_EAX, Rn );
  1230     SETC_t();
  1231     sh4_x86.tstate = TSTATE_C;
  1232 :}
  1233 SHAD Rm, Rn {:
  1234     COUNT_INST(I_SHAD);
  1235     /* Annoyingly enough, not directly convertible */
  1236     load_reg( REG_EAX, Rn );
  1237     load_reg( REG_ECX, Rm );
  1238     CMPL_imms_r32( 0, REG_ECX );
  1239     JGE_label(doshl);
  1241     NEGL_r32( REG_ECX );      // 2
  1242     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1243     JE_label(emptysar);     // 2
  1244     SARL_cl_r32( REG_EAX );       // 2
  1245     JMP_label(end);          // 2
  1247     JMP_TARGET(emptysar);
  1248     SARL_imm_r32(31, REG_EAX );  // 3
  1249     JMP_label(end2);
  1251     JMP_TARGET(doshl);
  1252     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1253     SHLL_cl_r32( REG_EAX );       // 2
  1254     JMP_TARGET(end);
  1255     JMP_TARGET(end2);
  1256     store_reg( REG_EAX, Rn );
  1257     sh4_x86.tstate = TSTATE_NONE;
  1258 :}
  1259 SHLD Rm, Rn {:  
  1260     COUNT_INST(I_SHLD);
  1261     load_reg( REG_EAX, Rn );
  1262     load_reg( REG_ECX, Rm );
  1263     CMPL_imms_r32( 0, REG_ECX );
  1264     JGE_label(doshl);
  1266     NEGL_r32( REG_ECX );      // 2
  1267     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1268     JE_label(emptyshr );
  1269     SHRL_cl_r32( REG_EAX );       // 2
  1270     JMP_label(end);          // 2
  1272     JMP_TARGET(emptyshr);
  1273     XORL_r32_r32( REG_EAX, REG_EAX );
  1274     JMP_label(end2);
  1276     JMP_TARGET(doshl);
  1277     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1278     SHLL_cl_r32( REG_EAX );       // 2
  1279     JMP_TARGET(end);
  1280     JMP_TARGET(end2);
  1281     store_reg( REG_EAX, Rn );
  1282     sh4_x86.tstate = TSTATE_NONE;
  1283 :}
  1284 SHAL Rn {: 
  1285     COUNT_INST(I_SHAL);
  1286     load_reg( REG_EAX, Rn );
  1287     SHLL_imm_r32( 1, REG_EAX );
  1288     SETC_t();
  1289     store_reg( REG_EAX, Rn );
  1290     sh4_x86.tstate = TSTATE_C;
  1291 :}
  1292 SHAR Rn {:  
  1293     COUNT_INST(I_SHAR);
  1294     load_reg( REG_EAX, Rn );
  1295     SARL_imm_r32( 1, REG_EAX );
  1296     SETC_t();
  1297     store_reg( REG_EAX, Rn );
  1298     sh4_x86.tstate = TSTATE_C;
  1299 :}
  1300 SHLL Rn {:  
  1301     COUNT_INST(I_SHLL);
  1302     load_reg( REG_EAX, Rn );
  1303     SHLL_imm_r32( 1, REG_EAX );
  1304     SETC_t();
  1305     store_reg( REG_EAX, Rn );
  1306     sh4_x86.tstate = TSTATE_C;
  1307 :}
  1308 SHLL2 Rn {:
  1309     COUNT_INST(I_SHLL);
  1310     load_reg( REG_EAX, Rn );
  1311     SHLL_imm_r32( 2, REG_EAX );
  1312     store_reg( REG_EAX, Rn );
  1313     sh4_x86.tstate = TSTATE_NONE;
  1314 :}
  1315 SHLL8 Rn {:  
  1316     COUNT_INST(I_SHLL);
  1317     load_reg( REG_EAX, Rn );
  1318     SHLL_imm_r32( 8, REG_EAX );
  1319     store_reg( REG_EAX, Rn );
  1320     sh4_x86.tstate = TSTATE_NONE;
  1321 :}
  1322 SHLL16 Rn {:  
  1323     COUNT_INST(I_SHLL);
  1324     load_reg( REG_EAX, Rn );
  1325     SHLL_imm_r32( 16, REG_EAX );
  1326     store_reg( REG_EAX, Rn );
  1327     sh4_x86.tstate = TSTATE_NONE;
  1328 :}
  1329 SHLR Rn {:  
  1330     COUNT_INST(I_SHLR);
  1331     load_reg( REG_EAX, Rn );
  1332     SHRL_imm_r32( 1, REG_EAX );
  1333     SETC_t();
  1334     store_reg( REG_EAX, Rn );
  1335     sh4_x86.tstate = TSTATE_C;
  1336 :}
  1337 SHLR2 Rn {:  
  1338     COUNT_INST(I_SHLR);
  1339     load_reg( REG_EAX, Rn );
  1340     SHRL_imm_r32( 2, REG_EAX );
  1341     store_reg( REG_EAX, Rn );
  1342     sh4_x86.tstate = TSTATE_NONE;
  1343 :}
  1344 SHLR8 Rn {:  
  1345     COUNT_INST(I_SHLR);
  1346     load_reg( REG_EAX, Rn );
  1347     SHRL_imm_r32( 8, REG_EAX );
  1348     store_reg( REG_EAX, Rn );
  1349     sh4_x86.tstate = TSTATE_NONE;
  1350 :}
  1351 SHLR16 Rn {:  
  1352     COUNT_INST(I_SHLR);
  1353     load_reg( REG_EAX, Rn );
  1354     SHRL_imm_r32( 16, REG_EAX );
  1355     store_reg( REG_EAX, Rn );
  1356     sh4_x86.tstate = TSTATE_NONE;
  1357 :}
  1358 SUB Rm, Rn {:  
  1359     COUNT_INST(I_SUB);
  1360     load_reg( REG_EAX, Rm );
  1361     load_reg( REG_ECX, Rn );
  1362     SUBL_r32_r32( REG_EAX, REG_ECX );
  1363     store_reg( REG_ECX, Rn );
  1364     sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1366 SUBC Rm, Rn {:  
  1367     COUNT_INST(I_SUBC);
  1368     load_reg( REG_EAX, Rm );
  1369     load_reg( REG_ECX, Rn );
  1370     if( sh4_x86.tstate != TSTATE_C ) {
  1371 	LDC_t();
  1373     SBBL_r32_r32( REG_EAX, REG_ECX );
  1374     store_reg( REG_ECX, Rn );
  1375     SETC_t();
  1376     sh4_x86.tstate = TSTATE_C;
  1377 :}
  1378 SUBV Rm, Rn {:  
  1379     COUNT_INST(I_SUBV);
  1380     load_reg( REG_EAX, Rm );
  1381     load_reg( REG_ECX, Rn );
  1382     SUBL_r32_r32( REG_EAX, REG_ECX );
  1383     store_reg( REG_ECX, Rn );
  1384     SETO_t();
  1385     sh4_x86.tstate = TSTATE_O;
  1386 :}
  1387 SWAP.B Rm, Rn {:  
  1388     COUNT_INST(I_SWAPB);
  1389     load_reg( REG_EAX, Rm );
  1390     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1391     store_reg( REG_EAX, Rn );
  1392 :}
  1393 SWAP.W Rm, Rn {:  
  1394     COUNT_INST(I_SWAPB);
  1395     load_reg( REG_EAX, Rm );
  1396     MOVL_r32_r32( REG_EAX, REG_ECX );
  1397     SHLL_imm_r32( 16, REG_ECX );
  1398     SHRL_imm_r32( 16, REG_EAX );
  1399     ORL_r32_r32( REG_EAX, REG_ECX );
  1400     store_reg( REG_ECX, Rn );
  1401     sh4_x86.tstate = TSTATE_NONE;
  1402 :}
  1403 TAS.B @Rn {:  
  1404     COUNT_INST(I_TASB);
  1405     load_reg( REG_EAX, Rn );
  1406     MOVL_r32_rspdisp( REG_EAX, 0 );
  1407     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1408     TESTB_r8_r8( REG_DL, REG_DL );
  1409     SETE_t();
  1410     ORB_imms_r8( 0x80, REG_DL );
  1411     MOVL_rspdisp_r32( 0, REG_EAX );
  1412     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1413     sh4_x86.tstate = TSTATE_NONE;
  1414 :}
  1415 TST Rm, Rn {:  
  1416     COUNT_INST(I_TST);
  1417     load_reg( REG_EAX, Rm );
  1418     load_reg( REG_ECX, Rn );
  1419     TESTL_r32_r32( REG_EAX, REG_ECX );
  1420     SETE_t();
  1421     sh4_x86.tstate = TSTATE_E;
  1422 :}
  1423 TST #imm, R0 {:  
  1424     COUNT_INST(I_TSTI);
  1425     load_reg( REG_EAX, 0 );
  1426     TESTL_imms_r32( imm, REG_EAX );
  1427     SETE_t();
  1428     sh4_x86.tstate = TSTATE_E;
  1429 :}
  1430 TST.B #imm, @(R0, GBR) {:  
  1431     COUNT_INST(I_TSTB);
  1432     load_reg( REG_EAX, 0);
  1433     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1434     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1435     TESTB_imms_r8( imm, REG_AL );
  1436     SETE_t();
  1437     sh4_x86.tstate = TSTATE_E;
  1438 :}
  1439 XOR Rm, Rn {:  
  1440     COUNT_INST(I_XOR);
  1441     load_reg( REG_EAX, Rm );
  1442     load_reg( REG_ECX, Rn );
  1443     XORL_r32_r32( REG_EAX, REG_ECX );
  1444     store_reg( REG_ECX, Rn );
  1445     sh4_x86.tstate = TSTATE_NONE;
  1446 :}
  1447 XOR #imm, R0 {:  
  1448     COUNT_INST(I_XORI);
  1449     load_reg( REG_EAX, 0 );
  1450     XORL_imms_r32( imm, REG_EAX );
  1451     store_reg( REG_EAX, 0 );
  1452     sh4_x86.tstate = TSTATE_NONE;
  1453 :}
  1454 XOR.B #imm, @(R0, GBR) {:  
  1455     COUNT_INST(I_XORB);
  1456     load_reg( REG_EAX, 0 );
  1457     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1458     MOVL_r32_rspdisp( REG_EAX, 0 );
  1459     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1460     MOVL_rspdisp_r32( 0, REG_EAX );
  1461     XORL_imms_r32( imm, REG_EDX );
  1462     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1463     sh4_x86.tstate = TSTATE_NONE;
  1464 :}
  1465 XTRCT Rm, Rn {:
  1466     COUNT_INST(I_XTRCT);
  1467     load_reg( REG_EAX, Rm );
  1468     load_reg( REG_ECX, Rn );
  1469     SHLL_imm_r32( 16, REG_EAX );
  1470     SHRL_imm_r32( 16, REG_ECX );
  1471     ORL_r32_r32( REG_EAX, REG_ECX );
  1472     store_reg( REG_ECX, Rn );
  1473     sh4_x86.tstate = TSTATE_NONE;
  1474 :}
  1476 /* Data move instructions */
  1477 MOV Rm, Rn {:  
  1478     COUNT_INST(I_MOV);
  1479     load_reg( REG_EAX, Rm );
  1480     store_reg( REG_EAX, Rn );
  1481 :}
  1482 MOV #imm, Rn {:  
  1483     COUNT_INST(I_MOVI);
  1484     MOVL_imm32_r32( imm, REG_EAX );
  1485     store_reg( REG_EAX, Rn );
  1486 :}
  1487 MOV.B Rm, @Rn {:  
  1488     COUNT_INST(I_MOVB);
  1489     load_reg( REG_EAX, Rn );
  1490     load_reg( REG_EDX, Rm );
  1491     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1492     sh4_x86.tstate = TSTATE_NONE;
  1493 :}
  1494 MOV.B Rm, @-Rn {:  
  1495     COUNT_INST(I_MOVB);
  1496     load_reg( REG_EAX, Rn );
  1497     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1498     load_reg( REG_EDX, Rm );
  1499     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1500     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1501     sh4_x86.tstate = TSTATE_NONE;
  1502 :}
  1503 MOV.B Rm, @(R0, Rn) {:  
  1504     COUNT_INST(I_MOVB);
  1505     load_reg( REG_EAX, 0 );
  1506     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1507     load_reg( REG_EDX, Rm );
  1508     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1509     sh4_x86.tstate = TSTATE_NONE;
  1510 :}
  1511 MOV.B R0, @(disp, GBR) {:  
  1512     COUNT_INST(I_MOVB);
  1513     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1514     ADDL_imms_r32( disp, REG_EAX );
  1515     load_reg( REG_EDX, 0 );
  1516     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1517     sh4_x86.tstate = TSTATE_NONE;
  1518 :}
  1519 MOV.B R0, @(disp, Rn) {:  
  1520     COUNT_INST(I_MOVB);
  1521     load_reg( REG_EAX, Rn );
  1522     ADDL_imms_r32( disp, REG_EAX );
  1523     load_reg( REG_EDX, 0 );
  1524     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1525     sh4_x86.tstate = TSTATE_NONE;
  1526 :}
  1527 MOV.B @Rm, Rn {:  
  1528     COUNT_INST(I_MOVB);
  1529     load_reg( REG_EAX, Rm );
  1530     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1531     store_reg( REG_EAX, Rn );
  1532     sh4_x86.tstate = TSTATE_NONE;
  1533 :}
  1534 MOV.B @Rm+, Rn {:  
  1535     COUNT_INST(I_MOVB);
  1536     load_reg( REG_EAX, Rm );
  1537     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1538     if( Rm != Rn ) {
  1539     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1541     store_reg( REG_EAX, Rn );
  1542     sh4_x86.tstate = TSTATE_NONE;
  1543 :}
  1544 MOV.B @(R0, Rm), Rn {:  
  1545     COUNT_INST(I_MOVB);
  1546     load_reg( REG_EAX, 0 );
  1547     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1548     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1549     store_reg( REG_EAX, Rn );
  1550     sh4_x86.tstate = TSTATE_NONE;
  1551 :}
  1552 MOV.B @(disp, GBR), R0 {:  
  1553     COUNT_INST(I_MOVB);
  1554     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1555     ADDL_imms_r32( disp, REG_EAX );
  1556     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1557     store_reg( REG_EAX, 0 );
  1558     sh4_x86.tstate = TSTATE_NONE;
  1559 :}
  1560 MOV.B @(disp, Rm), R0 {:  
  1561     COUNT_INST(I_MOVB);
  1562     load_reg( REG_EAX, Rm );
  1563     ADDL_imms_r32( disp, REG_EAX );
  1564     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1565     store_reg( REG_EAX, 0 );
  1566     sh4_x86.tstate = TSTATE_NONE;
  1567 :}
  1568 MOV.L Rm, @Rn {:
  1569     COUNT_INST(I_MOVL);
  1570     load_reg( REG_EAX, Rn );
  1571     check_walign32(REG_EAX);
  1572     MOVL_r32_r32( REG_EAX, REG_ECX );
  1573     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1574     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1575     JNE_label( notsq );
  1576     ANDL_imms_r32( 0x3C, REG_EAX );
  1577     load_reg( REG_EDX, Rm );
  1578     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1579     JMP_label(end);
  1580     JMP_TARGET(notsq);
  1581     load_reg( REG_EDX, Rm );
  1582     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1583     JMP_TARGET(end);
  1584     sh4_x86.tstate = TSTATE_NONE;
  1585 :}
  1586 MOV.L Rm, @-Rn {:  
  1587     COUNT_INST(I_MOVL);
  1588     load_reg( REG_EAX, Rn );
  1589     ADDL_imms_r32( -4, REG_EAX );
  1590     check_walign32( REG_EAX );
  1591     load_reg( REG_EDX, Rm );
  1592     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1593     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1594     sh4_x86.tstate = TSTATE_NONE;
  1595 :}
  1596 MOV.L Rm, @(R0, Rn) {:  
  1597     COUNT_INST(I_MOVL);
  1598     load_reg( REG_EAX, 0 );
  1599     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1600     check_walign32( REG_EAX );
  1601     load_reg( REG_EDX, Rm );
  1602     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1603     sh4_x86.tstate = TSTATE_NONE;
  1604 :}
  1605 MOV.L R0, @(disp, GBR) {:  
  1606     COUNT_INST(I_MOVL);
  1607     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1608     ADDL_imms_r32( disp, REG_EAX );
  1609     check_walign32( REG_EAX );
  1610     load_reg( REG_EDX, 0 );
  1611     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1612     sh4_x86.tstate = TSTATE_NONE;
  1613 :}
  1614 MOV.L Rm, @(disp, Rn) {:  
  1615     COUNT_INST(I_MOVL);
  1616     load_reg( REG_EAX, Rn );
  1617     ADDL_imms_r32( disp, REG_EAX );
  1618     check_walign32( REG_EAX );
  1619     MOVL_r32_r32( REG_EAX, REG_ECX );
  1620     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1621     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1622     JNE_label( notsq );
  1623     ANDL_imms_r32( 0x3C, REG_EAX );
  1624     load_reg( REG_EDX, Rm );
  1625     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1626     JMP_label(end);
  1627     JMP_TARGET(notsq);
  1628     load_reg( REG_EDX, Rm );
  1629     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1630     JMP_TARGET(end);
  1631     sh4_x86.tstate = TSTATE_NONE;
  1632 :}
  1633 MOV.L @Rm, Rn {:  
  1634     COUNT_INST(I_MOVL);
  1635     load_reg( REG_EAX, Rm );
  1636     check_ralign32( REG_EAX );
  1637     MEM_READ_LONG( REG_EAX, REG_EAX );
  1638     store_reg( REG_EAX, Rn );
  1639     sh4_x86.tstate = TSTATE_NONE;
  1640 :}
  1641 MOV.L @Rm+, Rn {:  
  1642     COUNT_INST(I_MOVL);
  1643     load_reg( REG_EAX, Rm );
  1644     check_ralign32( REG_EAX );
  1645     MEM_READ_LONG( REG_EAX, REG_EAX );
  1646     if( Rm != Rn ) {
  1647     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1649     store_reg( REG_EAX, Rn );
  1650     sh4_x86.tstate = TSTATE_NONE;
  1651 :}
  1652 MOV.L @(R0, Rm), Rn {:  
  1653     COUNT_INST(I_MOVL);
  1654     load_reg( REG_EAX, 0 );
  1655     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1656     check_ralign32( REG_EAX );
  1657     MEM_READ_LONG( REG_EAX, REG_EAX );
  1658     store_reg( REG_EAX, Rn );
  1659     sh4_x86.tstate = TSTATE_NONE;
  1660 :}
  1661 MOV.L @(disp, GBR), R0 {:
  1662     COUNT_INST(I_MOVL);
  1663     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1664     ADDL_imms_r32( disp, REG_EAX );
  1665     check_ralign32( REG_EAX );
  1666     MEM_READ_LONG( REG_EAX, REG_EAX );
  1667     store_reg( REG_EAX, 0 );
  1668     sh4_x86.tstate = TSTATE_NONE;
  1669 :}
  1670 MOV.L @(disp, PC), Rn {:  
  1671     COUNT_INST(I_MOVLPC);
  1672     if( sh4_x86.in_delay_slot ) {
  1673 	SLOTILLEGAL();
  1674     } else {
  1675 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1676 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1677 	    // If the target address is in the same page as the code, it's
  1678 	    // pretty safe to just ref it directly and circumvent the whole
  1679 	    // memory subsystem. (this is a big performance win)
  1681 	    // FIXME: There's a corner-case that's not handled here when
  1682 	    // the current code-page is in the ITLB but not in the UTLB.
  1683 	    // (should generate a TLB miss although need to test SH4 
  1684 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1685 	    // behaviour though.
  1686 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1687 	    MOVL_moffptr_eax( ptr );
  1688 	} else {
  1689 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1690 	    // different virtual address than the translation was done with,
  1691 	    // but we can safely assume that the low bits are the same.
  1692 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1693 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1694 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1695 	    sh4_x86.tstate = TSTATE_NONE;
  1697 	store_reg( REG_EAX, Rn );
  1699 :}
  1700 MOV.L @(disp, Rm), Rn {:  
  1701     COUNT_INST(I_MOVL);
  1702     load_reg( REG_EAX, Rm );
  1703     ADDL_imms_r32( disp, REG_EAX );
  1704     check_ralign32( REG_EAX );
  1705     MEM_READ_LONG( REG_EAX, REG_EAX );
  1706     store_reg( REG_EAX, Rn );
  1707     sh4_x86.tstate = TSTATE_NONE;
  1708 :}
  1709 MOV.W Rm, @Rn {:  
  1710     COUNT_INST(I_MOVW);
  1711     load_reg( REG_EAX, Rn );
  1712     check_walign16( REG_EAX );
  1713     load_reg( REG_EDX, Rm );
  1714     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1715     sh4_x86.tstate = TSTATE_NONE;
  1716 :}
  1717 MOV.W Rm, @-Rn {:  
  1718     COUNT_INST(I_MOVW);
  1719     load_reg( REG_EAX, Rn );
  1720     check_walign16( REG_EAX );
  1721     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1722     load_reg( REG_EDX, Rm );
  1723     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1724     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1725     sh4_x86.tstate = TSTATE_NONE;
  1726 :}
  1727 MOV.W Rm, @(R0, Rn) {:  
  1728     COUNT_INST(I_MOVW);
  1729     load_reg( REG_EAX, 0 );
  1730     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1731     check_walign16( REG_EAX );
  1732     load_reg( REG_EDX, Rm );
  1733     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1734     sh4_x86.tstate = TSTATE_NONE;
  1735 :}
  1736 MOV.W R0, @(disp, GBR) {:  
  1737     COUNT_INST(I_MOVW);
  1738     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1739     ADDL_imms_r32( disp, REG_EAX );
  1740     check_walign16( REG_EAX );
  1741     load_reg( REG_EDX, 0 );
  1742     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1743     sh4_x86.tstate = TSTATE_NONE;
  1744 :}
  1745 MOV.W R0, @(disp, Rn) {:  
  1746     COUNT_INST(I_MOVW);
  1747     load_reg( REG_EAX, Rn );
  1748     ADDL_imms_r32( disp, REG_EAX );
  1749     check_walign16( REG_EAX );
  1750     load_reg( REG_EDX, 0 );
  1751     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1752     sh4_x86.tstate = TSTATE_NONE;
  1753 :}
  1754 MOV.W @Rm, Rn {:  
  1755     COUNT_INST(I_MOVW);
  1756     load_reg( REG_EAX, Rm );
  1757     check_ralign16( REG_EAX );
  1758     MEM_READ_WORD( REG_EAX, REG_EAX );
  1759     store_reg( REG_EAX, Rn );
  1760     sh4_x86.tstate = TSTATE_NONE;
  1761 :}
  1762 MOV.W @Rm+, Rn {:  
  1763     COUNT_INST(I_MOVW);
  1764     load_reg( REG_EAX, Rm );
  1765     check_ralign16( REG_EAX );
  1766     MEM_READ_WORD( REG_EAX, REG_EAX );
  1767     if( Rm != Rn ) {
  1768         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1770     store_reg( REG_EAX, Rn );
  1771     sh4_x86.tstate = TSTATE_NONE;
  1772 :}
  1773 MOV.W @(R0, Rm), Rn {:  
  1774     COUNT_INST(I_MOVW);
  1775     load_reg( REG_EAX, 0 );
  1776     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1777     check_ralign16( REG_EAX );
  1778     MEM_READ_WORD( REG_EAX, REG_EAX );
  1779     store_reg( REG_EAX, Rn );
  1780     sh4_x86.tstate = TSTATE_NONE;
  1781 :}
  1782 MOV.W @(disp, GBR), R0 {:  
  1783     COUNT_INST(I_MOVW);
  1784     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1785     ADDL_imms_r32( disp, REG_EAX );
  1786     check_ralign16( REG_EAX );
  1787     MEM_READ_WORD( REG_EAX, REG_EAX );
  1788     store_reg( REG_EAX, 0 );
  1789     sh4_x86.tstate = TSTATE_NONE;
  1790 :}
  1791 MOV.W @(disp, PC), Rn {:  
  1792     COUNT_INST(I_MOVW);
  1793     if( sh4_x86.in_delay_slot ) {
  1794 	SLOTILLEGAL();
  1795     } else {
  1796 	// See comments for MOV.L @(disp, PC), Rn
  1797 	uint32_t target = pc + disp + 4;
  1798 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1799 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1800 	    MOVL_moffptr_eax( ptr );
  1801 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1802 	} else {
  1803 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1804 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1805 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1806 	    sh4_x86.tstate = TSTATE_NONE;
  1808 	store_reg( REG_EAX, Rn );
  1810 :}
  1811 MOV.W @(disp, Rm), R0 {:  
  1812     COUNT_INST(I_MOVW);
  1813     load_reg( REG_EAX, Rm );
  1814     ADDL_imms_r32( disp, REG_EAX );
  1815     check_ralign16( REG_EAX );
  1816     MEM_READ_WORD( REG_EAX, REG_EAX );
  1817     store_reg( REG_EAX, 0 );
  1818     sh4_x86.tstate = TSTATE_NONE;
  1819 :}
  1820 MOVA @(disp, PC), R0 {:  
  1821     COUNT_INST(I_MOVA);
  1822     if( sh4_x86.in_delay_slot ) {
  1823 	SLOTILLEGAL();
  1824     } else {
  1825 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1826 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1827 	store_reg( REG_ECX, 0 );
  1828 	sh4_x86.tstate = TSTATE_NONE;
  1830 :}
  1831 MOVCA.L R0, @Rn {:  
  1832     COUNT_INST(I_MOVCA);
  1833     load_reg( REG_EAX, Rn );
  1834     check_walign32( REG_EAX );
  1835     load_reg( REG_EDX, 0 );
  1836     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1837     sh4_x86.tstate = TSTATE_NONE;
  1838 :}
  1840 /* Control transfer instructions */
  1841 BF disp {:
  1842     COUNT_INST(I_BF);
  1843     if( sh4_x86.in_delay_slot ) {
  1844 	SLOTILLEGAL();
  1845     } else {
  1846 	sh4vma_t target = disp + pc + 4;
  1847 	JT_label( nottaken );
  1848 	exit_block_rel(target, pc+2 );
  1849 	JMP_TARGET(nottaken);
  1850 	return 2;
  1852 :}
  1853 BF/S disp {:
  1854     COUNT_INST(I_BFS);
  1855     if( sh4_x86.in_delay_slot ) {
  1856 	SLOTILLEGAL();
  1857     } else {
  1858 	sh4_x86.in_delay_slot = DELAY_PC;
  1859 	if( UNTRANSLATABLE(pc+2) ) {
  1860 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1861 	    JT_label(nottaken);
  1862 	    ADDL_imms_r32( disp, REG_EAX );
  1863 	    JMP_TARGET(nottaken);
  1864 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1865 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1866 	    exit_block_emu(pc+2);
  1867 	    sh4_x86.branch_taken = TRUE;
  1868 	    return 2;
  1869 	} else {
  1870 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1871 		CMPL_imms_rbpdisp( 1, R_T );
  1872 		sh4_x86.tstate = TSTATE_E;
  1874 	    sh4vma_t target = disp + pc + 4;
  1875 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1876 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1877 	    int save_tstate = sh4_x86.tstate;
  1878 	    sh4_translate_instruction(pc+2);
  1879             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1880 	    exit_block_rel( target, pc+4 );
  1882 	    // not taken
  1883 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1884 	    sh4_x86.tstate = save_tstate;
  1885 	    sh4_translate_instruction(pc+2);
  1886 	    return 4;
  1889 :}
  1890 BRA disp {:  
  1891     COUNT_INST(I_BRA);
  1892     if( sh4_x86.in_delay_slot ) {
  1893 	SLOTILLEGAL();
  1894     } else {
  1895 	sh4_x86.in_delay_slot = DELAY_PC;
  1896 	sh4_x86.branch_taken = TRUE;
  1897 	if( UNTRANSLATABLE(pc+2) ) {
  1898 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1899 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1900 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1901 	    exit_block_emu(pc+2);
  1902 	    return 2;
  1903 	} else {
  1904 	    sh4_translate_instruction( pc + 2 );
  1905 	    exit_block_rel( disp + pc + 4, pc+4 );
  1906 	    return 4;
  1909 :}
  1910 BRAF Rn {:  
  1911     COUNT_INST(I_BRAF);
  1912     if( sh4_x86.in_delay_slot ) {
  1913 	SLOTILLEGAL();
  1914     } else {
  1915 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1916 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1917 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1918 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1919 	sh4_x86.in_delay_slot = DELAY_PC;
  1920 	sh4_x86.tstate = TSTATE_NONE;
  1921 	sh4_x86.branch_taken = TRUE;
  1922 	if( UNTRANSLATABLE(pc+2) ) {
  1923 	    exit_block_emu(pc+2);
  1924 	    return 2;
  1925 	} else {
  1926 	    sh4_translate_instruction( pc + 2 );
  1927 	    exit_block_newpcset(pc+4);
  1928 	    return 4;
  1931 :}
  1932 BSR disp {:  
  1933     COUNT_INST(I_BSR);
  1934     if( sh4_x86.in_delay_slot ) {
  1935 	SLOTILLEGAL();
  1936     } else {
  1937 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1938 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1939 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1940 	sh4_x86.in_delay_slot = DELAY_PC;
  1941 	sh4_x86.branch_taken = TRUE;
  1942 	sh4_x86.tstate = TSTATE_NONE;
  1943 	if( UNTRANSLATABLE(pc+2) ) {
  1944 	    ADDL_imms_r32( disp, REG_EAX );
  1945 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1946 	    exit_block_emu(pc+2);
  1947 	    return 2;
  1948 	} else {
  1949 	    sh4_translate_instruction( pc + 2 );
  1950 	    exit_block_rel( disp + pc + 4, pc+4 );
  1951 	    return 4;
  1954 :}
  1955 BSRF Rn {:  
  1956     COUNT_INST(I_BSRF);
  1957     if( sh4_x86.in_delay_slot ) {
  1958 	SLOTILLEGAL();
  1959     } else {
  1960 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1961 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1962 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1963 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1964 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1966 	sh4_x86.in_delay_slot = DELAY_PC;
  1967 	sh4_x86.tstate = TSTATE_NONE;
  1968 	sh4_x86.branch_taken = TRUE;
  1969 	if( UNTRANSLATABLE(pc+2) ) {
  1970 	    exit_block_emu(pc+2);
  1971 	    return 2;
  1972 	} else {
  1973 	    sh4_translate_instruction( pc + 2 );
  1974 	    exit_block_newpcset(pc+4);
  1975 	    return 4;
  1978 :}
  1979 BT disp {:
  1980     COUNT_INST(I_BT);
  1981     if( sh4_x86.in_delay_slot ) {
  1982 	SLOTILLEGAL();
  1983     } else {
  1984 	sh4vma_t target = disp + pc + 4;
  1985 	JF_label( nottaken );
  1986 	exit_block_rel(target, pc+2 );
  1987 	JMP_TARGET(nottaken);
  1988 	return 2;
  1990 :}
  1991 BT/S disp {:
  1992     COUNT_INST(I_BTS);
  1993     if( sh4_x86.in_delay_slot ) {
  1994 	SLOTILLEGAL();
  1995     } else {
  1996 	sh4_x86.in_delay_slot = DELAY_PC;
  1997 	if( UNTRANSLATABLE(pc+2) ) {
  1998 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1999 	    JF_label(nottaken);
  2000 	    ADDL_imms_r32( disp, REG_EAX );
  2001 	    JMP_TARGET(nottaken);
  2002 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2003 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2004 	    exit_block_emu(pc+2);
  2005 	    sh4_x86.branch_taken = TRUE;
  2006 	    return 2;
  2007 	} else {
  2008 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  2009 		CMPL_imms_rbpdisp( 1, R_T );
  2010 		sh4_x86.tstate = TSTATE_E;
  2012 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2013 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2015 	    int save_tstate = sh4_x86.tstate;
  2016 	    sh4_translate_instruction(pc+2);
  2017             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2018 	    exit_block_rel( disp + pc + 4, pc+4 );
  2019 	    // not taken
  2020 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2021 	    sh4_x86.tstate = save_tstate;
  2022 	    sh4_translate_instruction(pc+2);
  2023 	    return 4;
  2026 :}
  2027 JMP @Rn {:  
  2028     COUNT_INST(I_JMP);
  2029     if( sh4_x86.in_delay_slot ) {
  2030 	SLOTILLEGAL();
  2031     } else {
  2032 	load_reg( REG_ECX, Rn );
  2033 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2034 	sh4_x86.in_delay_slot = DELAY_PC;
  2035 	sh4_x86.branch_taken = TRUE;
  2036 	if( UNTRANSLATABLE(pc+2) ) {
  2037 	    exit_block_emu(pc+2);
  2038 	    return 2;
  2039 	} else {
  2040 	    sh4_translate_instruction(pc+2);
  2041 	    exit_block_newpcset(pc+4);
  2042 	    return 4;
  2045 :}
  2046 JSR @Rn {:  
  2047     COUNT_INST(I_JSR);
  2048     if( sh4_x86.in_delay_slot ) {
  2049 	SLOTILLEGAL();
  2050     } else {
  2051 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2052 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2053 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2054 	load_reg( REG_ECX, Rn );
  2055 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2056 	sh4_x86.in_delay_slot = DELAY_PC;
  2057 	sh4_x86.branch_taken = TRUE;
  2058 	sh4_x86.tstate = TSTATE_NONE;
  2059 	if( UNTRANSLATABLE(pc+2) ) {
  2060 	    exit_block_emu(pc+2);
  2061 	    return 2;
  2062 	} else {
  2063 	    sh4_translate_instruction(pc+2);
  2064 	    exit_block_newpcset(pc+4);
  2065 	    return 4;
  2068 :}
  2069 RTE {:  
  2070     COUNT_INST(I_RTE);
  2071     if( sh4_x86.in_delay_slot ) {
  2072 	SLOTILLEGAL();
  2073     } else {
  2074 	check_priv();
  2075 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2076 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2077 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2078 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2079 	sh4_x86.in_delay_slot = DELAY_PC;
  2080 	sh4_x86.fpuen_checked = FALSE;
  2081 	sh4_x86.tstate = TSTATE_NONE;
  2082 	sh4_x86.branch_taken = TRUE;
  2083     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2084 	if( UNTRANSLATABLE(pc+2) ) {
  2085 	    exit_block_emu(pc+2);
  2086 	    return 2;
  2087 	} else {
  2088 	    sh4_translate_instruction(pc+2);
  2089 	    exit_block_newpcset(pc+4);
  2090 	    return 4;
  2093 :}
  2094 RTS {:  
  2095     COUNT_INST(I_RTS);
  2096     if( sh4_x86.in_delay_slot ) {
  2097 	SLOTILLEGAL();
  2098     } else {
  2099 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2100 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2101 	sh4_x86.in_delay_slot = DELAY_PC;
  2102 	sh4_x86.branch_taken = TRUE;
  2103 	if( UNTRANSLATABLE(pc+2) ) {
  2104 	    exit_block_emu(pc+2);
  2105 	    return 2;
  2106 	} else {
  2107 	    sh4_translate_instruction(pc+2);
  2108 	    exit_block_newpcset(pc+4);
  2109 	    return 4;
  2112 :}
  2113 TRAPA #imm {:  
  2114     COUNT_INST(I_TRAPA);
  2115     if( sh4_x86.in_delay_slot ) {
  2116 	SLOTILLEGAL();
  2117     } else {
  2118 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2119 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2120 	MOVL_imm32_r32( imm, REG_EAX );
  2121 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2122 	sh4_x86.tstate = TSTATE_NONE;
  2123 	exit_block_pcset(pc+2);
  2124 	sh4_x86.branch_taken = TRUE;
  2125 	return 2;
  2127 :}
  2128 UNDEF {:  
  2129     COUNT_INST(I_UNDEF);
  2130     if( sh4_x86.in_delay_slot ) {
  2131 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  2132     } else {
  2133 	exit_block_exc(EXC_ILLEGAL, pc);    
  2134 	return 2;
  2136 :}
  2138 CLRMAC {:  
  2139     COUNT_INST(I_CLRMAC);
  2140     XORL_r32_r32(REG_EAX, REG_EAX);
  2141     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2142     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2143     sh4_x86.tstate = TSTATE_NONE;
  2144 :}
  2145 CLRS {:
  2146     COUNT_INST(I_CLRS);
  2147     CLC();
  2148     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2149     sh4_x86.tstate = TSTATE_NONE;
  2150 :}
  2151 CLRT {:  
  2152     COUNT_INST(I_CLRT);
  2153     CLC();
  2154     SETC_t();
  2155     sh4_x86.tstate = TSTATE_C;
  2156 :}
  2157 SETS {:  
  2158     COUNT_INST(I_SETS);
  2159     STC();
  2160     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2161     sh4_x86.tstate = TSTATE_NONE;
  2162 :}
  2163 SETT {:  
  2164     COUNT_INST(I_SETT);
  2165     STC();
  2166     SETC_t();
  2167     sh4_x86.tstate = TSTATE_C;
  2168 :}
  2170 /* Floating point moves */
  2171 FMOV FRm, FRn {:  
  2172     COUNT_INST(I_FMOV1);
  2173     check_fpuen();
  2174     if( sh4_x86.double_size ) {
  2175         load_dr0( REG_EAX, FRm );
  2176         load_dr1( REG_ECX, FRm );
  2177         store_dr0( REG_EAX, FRn );
  2178         store_dr1( REG_ECX, FRn );
  2179     } else {
  2180         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2181         store_fr( REG_EAX, FRn );
  2183 :}
  2184 FMOV FRm, @Rn {: 
  2185     COUNT_INST(I_FMOV2);
  2186     check_fpuen();
  2187     load_reg( REG_EAX, Rn );
  2188     if( sh4_x86.double_size ) {
  2189         check_walign64( REG_EAX );
  2190         load_dr0( REG_EDX, FRm );
  2191         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2192         load_reg( REG_EAX, Rn );
  2193         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2194         load_dr1( REG_EDX, FRm );
  2195         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2196     } else {
  2197         check_walign32( REG_EAX );
  2198         load_fr( REG_EDX, FRm );
  2199         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2201     sh4_x86.tstate = TSTATE_NONE;
  2202 :}
  2203 FMOV @Rm, FRn {:  
  2204     COUNT_INST(I_FMOV5);
  2205     check_fpuen();
  2206     load_reg( REG_EAX, Rm );
  2207     if( sh4_x86.double_size ) {
  2208         check_ralign64( REG_EAX );
  2209         MEM_READ_LONG( REG_EAX, REG_EAX );
  2210         store_dr0( REG_EAX, FRn );
  2211         load_reg( REG_EAX, Rm );
  2212         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2213         MEM_READ_LONG( REG_EAX, REG_EAX );
  2214         store_dr1( REG_EAX, FRn );
  2215     } else {
  2216         check_ralign32( REG_EAX );
  2217         MEM_READ_LONG( REG_EAX, REG_EAX );
  2218         store_fr( REG_EAX, FRn );
  2220     sh4_x86.tstate = TSTATE_NONE;
  2221 :}
  2222 FMOV FRm, @-Rn {:  
  2223     COUNT_INST(I_FMOV3);
  2224     check_fpuen();
  2225     load_reg( REG_EAX, Rn );
  2226     if( sh4_x86.double_size ) {
  2227         check_walign64( REG_EAX );
  2228         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2229         load_dr0( REG_EDX, FRm );
  2230         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2231         load_reg( REG_EAX, Rn );
  2232         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2233         load_dr1( REG_EDX, FRm );
  2234         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2235         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2236     } else {
  2237         check_walign32( REG_EAX );
  2238         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2239         load_fr( REG_EDX, FRm );
  2240         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2241         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2243     sh4_x86.tstate = TSTATE_NONE;
  2244 :}
  2245 FMOV @Rm+, FRn {:
  2246     COUNT_INST(I_FMOV6);
  2247     check_fpuen();
  2248     load_reg( REG_EAX, Rm );
  2249     if( sh4_x86.double_size ) {
  2250         check_ralign64( REG_EAX );
  2251         MEM_READ_LONG( REG_EAX, REG_EAX );
  2252         store_dr0( REG_EAX, FRn );
  2253         load_reg( REG_EAX, Rm );
  2254         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2255         MEM_READ_LONG( REG_EAX, REG_EAX );
  2256         store_dr1( REG_EAX, FRn );
  2257         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2258     } else {
  2259         check_ralign32( REG_EAX );
  2260         MEM_READ_LONG( REG_EAX, REG_EAX );
  2261         store_fr( REG_EAX, FRn );
  2262         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2264     sh4_x86.tstate = TSTATE_NONE;
  2265 :}
  2266 FMOV FRm, @(R0, Rn) {:  
  2267     COUNT_INST(I_FMOV4);
  2268     check_fpuen();
  2269     load_reg( REG_EAX, Rn );
  2270     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2271     if( sh4_x86.double_size ) {
  2272         check_walign64( REG_EAX );
  2273         load_dr0( REG_EDX, FRm );
  2274         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2275         load_reg( REG_EAX, Rn );
  2276         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2277         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2278         load_dr1( REG_EDX, FRm );
  2279         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2280     } else {
  2281         check_walign32( REG_EAX );
  2282         load_fr( REG_EDX, FRm );
  2283         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2285     sh4_x86.tstate = TSTATE_NONE;
  2286 :}
  2287 FMOV @(R0, Rm), FRn {:  
  2288     COUNT_INST(I_FMOV7);
  2289     check_fpuen();
  2290     load_reg( REG_EAX, Rm );
  2291     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2292     if( sh4_x86.double_size ) {
  2293         check_ralign64( REG_EAX );
  2294         MEM_READ_LONG( REG_EAX, REG_EAX );
  2295         store_dr0( REG_EAX, FRn );
  2296         load_reg( REG_EAX, Rm );
  2297         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2298         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2299         MEM_READ_LONG( REG_EAX, REG_EAX );
  2300         store_dr1( REG_EAX, FRn );
  2301     } else {
  2302         check_ralign32( REG_EAX );
  2303         MEM_READ_LONG( REG_EAX, REG_EAX );
  2304         store_fr( REG_EAX, FRn );
  2306     sh4_x86.tstate = TSTATE_NONE;
  2307 :}
  2308 FLDI0 FRn {:  /* IFF PR=0 */
  2309     COUNT_INST(I_FLDI0);
  2310     check_fpuen();
  2311     if( sh4_x86.double_prec == 0 ) {
  2312         XORL_r32_r32( REG_EAX, REG_EAX );
  2313         store_fr( REG_EAX, FRn );
  2315     sh4_x86.tstate = TSTATE_NONE;
  2316 :}
  2317 FLDI1 FRn {:  /* IFF PR=0 */
  2318     COUNT_INST(I_FLDI1);
  2319     check_fpuen();
  2320     if( sh4_x86.double_prec == 0 ) {
  2321         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2322         store_fr( REG_EAX, FRn );
  2324 :}
  2326 FLOAT FPUL, FRn {:  
  2327     COUNT_INST(I_FLOAT);
  2328     check_fpuen();
  2329     FILD_rbpdisp(R_FPUL);
  2330     if( sh4_x86.double_prec ) {
  2331         pop_dr( FRn );
  2332     } else {
  2333         pop_fr( FRn );
  2335 :}
  2336 FTRC FRm, FPUL {:  
  2337     COUNT_INST(I_FTRC);
  2338     check_fpuen();
  2339     if( sh4_x86.double_prec ) {
  2340         push_dr( FRm );
  2341     } else {
  2342         push_fr( FRm );
  2344     MOVP_immptr_rptr( &max_int, REG_ECX );
  2345     FILD_r32disp( REG_ECX, 0 );
  2346     FCOMIP_st(1);
  2347     JNA_label( sat );
  2348     MOVP_immptr_rptr( &min_int, REG_ECX );
  2349     FILD_r32disp( REG_ECX, 0 );
  2350     FCOMIP_st(1);              
  2351     JAE_label( sat2 );            
  2352     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2353     FNSTCW_r32disp( REG_EAX, 0 );
  2354     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2355     FLDCW_r32disp( REG_EDX, 0 );
  2356     FISTP_rbpdisp(R_FPUL);             
  2357     FLDCW_r32disp( REG_EAX, 0 );
  2358     JMP_label(end);             
  2360     JMP_TARGET(sat);
  2361     JMP_TARGET(sat2);
  2362     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2363     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2364     FPOP_st();
  2365     JMP_TARGET(end);
  2366     sh4_x86.tstate = TSTATE_NONE;
  2367 :}
  2368 FLDS FRm, FPUL {:  
  2369     COUNT_INST(I_FLDS);
  2370     check_fpuen();
  2371     load_fr( REG_EAX, FRm );
  2372     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2373 :}
  2374 FSTS FPUL, FRn {:  
  2375     COUNT_INST(I_FSTS);
  2376     check_fpuen();
  2377     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2378     store_fr( REG_EAX, FRn );
  2379 :}
  2380 FCNVDS FRm, FPUL {:  
  2381     COUNT_INST(I_FCNVDS);
  2382     check_fpuen();
  2383     if( sh4_x86.double_prec ) {
  2384         push_dr( FRm );
  2385         pop_fpul();
  2387 :}
  2388 FCNVSD FPUL, FRn {:  
  2389     COUNT_INST(I_FCNVSD);
  2390     check_fpuen();
  2391     if( sh4_x86.double_prec ) {
  2392         push_fpul();
  2393         pop_dr( FRn );
  2395 :}
  2397 /* Floating point instructions */
  2398 FABS FRn {:  
  2399     COUNT_INST(I_FABS);
  2400     check_fpuen();
  2401     if( sh4_x86.double_prec ) {
  2402         push_dr(FRn);
  2403         FABS_st0();
  2404         pop_dr(FRn);
  2405     } else {
  2406         push_fr(FRn);
  2407         FABS_st0();
  2408         pop_fr(FRn);
  2410 :}
  2411 FADD FRm, FRn {:  
  2412     COUNT_INST(I_FADD);
  2413     check_fpuen();
  2414     if( sh4_x86.double_prec ) {
  2415         push_dr(FRm);
  2416         push_dr(FRn);
  2417         FADDP_st(1);
  2418         pop_dr(FRn);
  2419     } else {
  2420         push_fr(FRm);
  2421         push_fr(FRn);
  2422         FADDP_st(1);
  2423         pop_fr(FRn);
  2425 :}
  2426 FDIV FRm, FRn {:  
  2427     COUNT_INST(I_FDIV);
  2428     check_fpuen();
  2429     if( sh4_x86.double_prec ) {
  2430         push_dr(FRn);
  2431         push_dr(FRm);
  2432         FDIVP_st(1);
  2433         pop_dr(FRn);
  2434     } else {
  2435         push_fr(FRn);
  2436         push_fr(FRm);
  2437         FDIVP_st(1);
  2438         pop_fr(FRn);
  2440 :}
  2441 FMAC FR0, FRm, FRn {:  
  2442     COUNT_INST(I_FMAC);
  2443     check_fpuen();
  2444     if( sh4_x86.double_prec ) {
  2445         push_dr( 0 );
  2446         push_dr( FRm );
  2447         FMULP_st(1);
  2448         push_dr( FRn );
  2449         FADDP_st(1);
  2450         pop_dr( FRn );
  2451     } else {
  2452         push_fr( 0 );
  2453         push_fr( FRm );
  2454         FMULP_st(1);
  2455         push_fr( FRn );
  2456         FADDP_st(1);
  2457         pop_fr( FRn );
  2459 :}
  2461 FMUL FRm, FRn {:  
  2462     COUNT_INST(I_FMUL);
  2463     check_fpuen();
  2464     if( sh4_x86.double_prec ) {
  2465         push_dr(FRm);
  2466         push_dr(FRn);
  2467         FMULP_st(1);
  2468         pop_dr(FRn);
  2469     } else {
  2470         push_fr(FRm);
  2471         push_fr(FRn);
  2472         FMULP_st(1);
  2473         pop_fr(FRn);
  2475 :}
  2476 FNEG FRn {:  
  2477     COUNT_INST(I_FNEG);
  2478     check_fpuen();
  2479     if( sh4_x86.double_prec ) {
  2480         push_dr(FRn);
  2481         FCHS_st0();
  2482         pop_dr(FRn);
  2483     } else {
  2484         push_fr(FRn);
  2485         FCHS_st0();
  2486         pop_fr(FRn);
  2488 :}
  2489 FSRRA FRn {:  
  2490     COUNT_INST(I_FSRRA);
  2491     check_fpuen();
  2492     if( sh4_x86.double_prec == 0 ) {
  2493         FLD1_st0();
  2494         push_fr(FRn);
  2495         FSQRT_st0();
  2496         FDIVP_st(1);
  2497         pop_fr(FRn);
  2499 :}
  2500 FSQRT FRn {:  
  2501     COUNT_INST(I_FSQRT);
  2502     check_fpuen();
  2503     if( sh4_x86.double_prec ) {
  2504         push_dr(FRn);
  2505         FSQRT_st0();
  2506         pop_dr(FRn);
  2507     } else {
  2508         push_fr(FRn);
  2509         FSQRT_st0();
  2510         pop_fr(FRn);
  2512 :}
  2513 FSUB FRm, FRn {:  
  2514     COUNT_INST(I_FSUB);
  2515     check_fpuen();
  2516     if( sh4_x86.double_prec ) {
  2517         push_dr(FRn);
  2518         push_dr(FRm);
  2519         FSUBP_st(1);
  2520         pop_dr(FRn);
  2521     } else {
  2522         push_fr(FRn);
  2523         push_fr(FRm);
  2524         FSUBP_st(1);
  2525         pop_fr(FRn);
  2527 :}
  2529 FCMP/EQ FRm, FRn {:  
  2530     COUNT_INST(I_FCMPEQ);
  2531     check_fpuen();
  2532     if( sh4_x86.double_prec ) {
  2533         push_dr(FRm);
  2534         push_dr(FRn);
  2535     } else {
  2536         push_fr(FRm);
  2537         push_fr(FRn);
  2539     FCOMIP_st(1);
  2540     SETE_t();
  2541     FPOP_st();
  2542     sh4_x86.tstate = TSTATE_E;
  2543 :}
  2544 FCMP/GT FRm, FRn {:  
  2545     COUNT_INST(I_FCMPGT);
  2546     check_fpuen();
  2547     if( sh4_x86.double_prec ) {
  2548         push_dr(FRm);
  2549         push_dr(FRn);
  2550     } else {
  2551         push_fr(FRm);
  2552         push_fr(FRn);
  2554     FCOMIP_st(1);
  2555     SETA_t();
  2556     FPOP_st();
  2557     sh4_x86.tstate = TSTATE_A;
  2558 :}
  2560 FSCA FPUL, FRn {:  
  2561     COUNT_INST(I_FSCA);
  2562     check_fpuen();
  2563     if( sh4_x86.double_prec == 0 ) {
  2564         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2565         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2566         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2568     sh4_x86.tstate = TSTATE_NONE;
  2569 :}
  2570 FIPR FVm, FVn {:  
  2571     COUNT_INST(I_FIPR);
  2572     check_fpuen();
  2573     if( sh4_x86.double_prec == 0 ) {
  2574         if( sh4_x86.sse3_enabled ) {
  2575             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2576             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2577             HADDPS_xmm_xmm( 4, 4 ); 
  2578             HADDPS_xmm_xmm( 4, 4 );
  2579             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2580         } else {
  2581             push_fr( FVm<<2 );
  2582             push_fr( FVn<<2 );
  2583             FMULP_st(1);
  2584             push_fr( (FVm<<2)+1);
  2585             push_fr( (FVn<<2)+1);
  2586             FMULP_st(1);
  2587             FADDP_st(1);
  2588             push_fr( (FVm<<2)+2);
  2589             push_fr( (FVn<<2)+2);
  2590             FMULP_st(1);
  2591             FADDP_st(1);
  2592             push_fr( (FVm<<2)+3);
  2593             push_fr( (FVn<<2)+3);
  2594             FMULP_st(1);
  2595             FADDP_st(1);
  2596             pop_fr( (FVn<<2)+3);
  2599 :}
  2600 FTRV XMTRX, FVn {:  
  2601     COUNT_INST(I_FTRV);
  2602     check_fpuen();
  2603     if( sh4_x86.double_prec == 0 ) {
  2604         if( sh4_x86.sse3_enabled ) {
  2605             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2606             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2607             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2608             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2610             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2611             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2612             MOV_xmm_xmm( 4, 6 );
  2613             MOV_xmm_xmm( 5, 7 );
  2614             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2615             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2616             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2617             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2618             MULPS_xmm_xmm( 0, 4 );
  2619             MULPS_xmm_xmm( 1, 5 );
  2620             MULPS_xmm_xmm( 2, 6 );
  2621             MULPS_xmm_xmm( 3, 7 );
  2622             ADDPS_xmm_xmm( 5, 4 );
  2623             ADDPS_xmm_xmm( 7, 6 );
  2624             ADDPS_xmm_xmm( 6, 4 );
  2625             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2626         } else {
  2627             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2628             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2631     sh4_x86.tstate = TSTATE_NONE;
  2632 :}
  2634 FRCHG {:  
  2635     COUNT_INST(I_FRCHG);
  2636     check_fpuen();
  2637     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2638     CALL_ptr( sh4_switch_fr_banks );
  2639     sh4_x86.tstate = TSTATE_NONE;
  2640 :}
  2641 FSCHG {:  
  2642     COUNT_INST(I_FSCHG);
  2643     check_fpuen();
  2644     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2645     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2646     sh4_x86.tstate = TSTATE_NONE;
  2647     sh4_x86.double_size = !sh4_x86.double_size;
  2648     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2649 :}
  2651 /* Processor control instructions */
  2652 LDC Rm, SR {:
  2653     COUNT_INST(I_LDCSR);
  2654     if( sh4_x86.in_delay_slot ) {
  2655 	SLOTILLEGAL();
  2656     } else {
  2657 	check_priv();
  2658 	load_reg( REG_EAX, Rm );
  2659 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2660 	sh4_x86.fpuen_checked = FALSE;
  2661 	sh4_x86.tstate = TSTATE_NONE;
  2662     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2663 	return 2;
  2665 :}
  2666 LDC Rm, GBR {: 
  2667     COUNT_INST(I_LDC);
  2668     load_reg( REG_EAX, Rm );
  2669     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2670 :}
  2671 LDC Rm, VBR {:  
  2672     COUNT_INST(I_LDC);
  2673     check_priv();
  2674     load_reg( REG_EAX, Rm );
  2675     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2676     sh4_x86.tstate = TSTATE_NONE;
  2677 :}
  2678 LDC Rm, SSR {:  
  2679     COUNT_INST(I_LDC);
  2680     check_priv();
  2681     load_reg( REG_EAX, Rm );
  2682     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2683     sh4_x86.tstate = TSTATE_NONE;
  2684 :}
  2685 LDC Rm, SGR {:  
  2686     COUNT_INST(I_LDC);
  2687     check_priv();
  2688     load_reg( REG_EAX, Rm );
  2689     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2690     sh4_x86.tstate = TSTATE_NONE;
  2691 :}
  2692 LDC Rm, SPC {:  
  2693     COUNT_INST(I_LDC);
  2694     check_priv();
  2695     load_reg( REG_EAX, Rm );
  2696     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2697     sh4_x86.tstate = TSTATE_NONE;
  2698 :}
  2699 LDC Rm, DBR {:  
  2700     COUNT_INST(I_LDC);
  2701     check_priv();
  2702     load_reg( REG_EAX, Rm );
  2703     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2704     sh4_x86.tstate = TSTATE_NONE;
  2705 :}
  2706 LDC Rm, Rn_BANK {:  
  2707     COUNT_INST(I_LDC);
  2708     check_priv();
  2709     load_reg( REG_EAX, Rm );
  2710     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2711     sh4_x86.tstate = TSTATE_NONE;
  2712 :}
  2713 LDC.L @Rm+, GBR {:  
  2714     COUNT_INST(I_LDCM);
  2715     load_reg( REG_EAX, Rm );
  2716     check_ralign32( REG_EAX );
  2717     MEM_READ_LONG( REG_EAX, REG_EAX );
  2718     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2719     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2720     sh4_x86.tstate = TSTATE_NONE;
  2721 :}
  2722 LDC.L @Rm+, SR {:
  2723     COUNT_INST(I_LDCSRM);
  2724     if( sh4_x86.in_delay_slot ) {
  2725 	SLOTILLEGAL();
  2726     } else {
  2727 	check_priv();
  2728 	load_reg( REG_EAX, Rm );
  2729 	check_ralign32( REG_EAX );
  2730 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2731 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2732 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2733 	sh4_x86.fpuen_checked = FALSE;
  2734 	sh4_x86.tstate = TSTATE_NONE;
  2735     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2736 	return 2;
  2738 :}
  2739 LDC.L @Rm+, VBR {:  
  2740     COUNT_INST(I_LDCM);
  2741     check_priv();
  2742     load_reg( REG_EAX, Rm );
  2743     check_ralign32( REG_EAX );
  2744     MEM_READ_LONG( REG_EAX, REG_EAX );
  2745     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2746     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2747     sh4_x86.tstate = TSTATE_NONE;
  2748 :}
  2749 LDC.L @Rm+, SSR {:
  2750     COUNT_INST(I_LDCM);
  2751     check_priv();
  2752     load_reg( REG_EAX, Rm );
  2753     check_ralign32( REG_EAX );
  2754     MEM_READ_LONG( REG_EAX, REG_EAX );
  2755     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2756     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2757     sh4_x86.tstate = TSTATE_NONE;
  2758 :}
  2759 LDC.L @Rm+, SGR {:  
  2760     COUNT_INST(I_LDCM);
  2761     check_priv();
  2762     load_reg( REG_EAX, Rm );
  2763     check_ralign32( REG_EAX );
  2764     MEM_READ_LONG( REG_EAX, REG_EAX );
  2765     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2766     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2767     sh4_x86.tstate = TSTATE_NONE;
  2768 :}
  2769 LDC.L @Rm+, SPC {:  
  2770     COUNT_INST(I_LDCM);
  2771     check_priv();
  2772     load_reg( REG_EAX, Rm );
  2773     check_ralign32( REG_EAX );
  2774     MEM_READ_LONG( REG_EAX, REG_EAX );
  2775     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2776     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2777     sh4_x86.tstate = TSTATE_NONE;
  2778 :}
  2779 LDC.L @Rm+, DBR {:  
  2780     COUNT_INST(I_LDCM);
  2781     check_priv();
  2782     load_reg( REG_EAX, Rm );
  2783     check_ralign32( REG_EAX );
  2784     MEM_READ_LONG( REG_EAX, REG_EAX );
  2785     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2786     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2787     sh4_x86.tstate = TSTATE_NONE;
  2788 :}
  2789 LDC.L @Rm+, Rn_BANK {:  
  2790     COUNT_INST(I_LDCM);
  2791     check_priv();
  2792     load_reg( REG_EAX, Rm );
  2793     check_ralign32( REG_EAX );
  2794     MEM_READ_LONG( REG_EAX, REG_EAX );
  2795     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2796     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2797     sh4_x86.tstate = TSTATE_NONE;
  2798 :}
  2799 LDS Rm, FPSCR {:
  2800     COUNT_INST(I_LDSFPSCR);
  2801     check_fpuen();
  2802     load_reg( REG_EAX, Rm );
  2803     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2804     sh4_x86.tstate = TSTATE_NONE;
  2805     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2806     return 2;
  2807 :}
  2808 LDS.L @Rm+, FPSCR {:  
  2809     COUNT_INST(I_LDSFPSCRM);
  2810     check_fpuen();
  2811     load_reg( REG_EAX, Rm );
  2812     check_ralign32( REG_EAX );
  2813     MEM_READ_LONG( REG_EAX, REG_EAX );
  2814     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2815     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2816     sh4_x86.tstate = TSTATE_NONE;
  2817     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2818     return 2;
  2819 :}
  2820 LDS Rm, FPUL {:  
  2821     COUNT_INST(I_LDS);
  2822     check_fpuen();
  2823     load_reg( REG_EAX, Rm );
  2824     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2825 :}
  2826 LDS.L @Rm+, FPUL {:  
  2827     COUNT_INST(I_LDSM);
  2828     check_fpuen();
  2829     load_reg( REG_EAX, Rm );
  2830     check_ralign32( REG_EAX );
  2831     MEM_READ_LONG( REG_EAX, REG_EAX );
  2832     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2833     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2834     sh4_x86.tstate = TSTATE_NONE;
  2835 :}
  2836 LDS Rm, MACH {: 
  2837     COUNT_INST(I_LDS);
  2838     load_reg( REG_EAX, Rm );
  2839     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2840 :}
  2841 LDS.L @Rm+, MACH {:  
  2842     COUNT_INST(I_LDSM);
  2843     load_reg( REG_EAX, Rm );
  2844     check_ralign32( REG_EAX );
  2845     MEM_READ_LONG( REG_EAX, REG_EAX );
  2846     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2847     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2848     sh4_x86.tstate = TSTATE_NONE;
  2849 :}
  2850 LDS Rm, MACL {:  
  2851     COUNT_INST(I_LDS);
  2852     load_reg( REG_EAX, Rm );
  2853     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2854 :}
  2855 LDS.L @Rm+, MACL {:  
  2856     COUNT_INST(I_LDSM);
  2857     load_reg( REG_EAX, Rm );
  2858     check_ralign32( REG_EAX );
  2859     MEM_READ_LONG( REG_EAX, REG_EAX );
  2860     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2861     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2862     sh4_x86.tstate = TSTATE_NONE;
  2863 :}
  2864 LDS Rm, PR {:  
  2865     COUNT_INST(I_LDS);
  2866     load_reg( REG_EAX, Rm );
  2867     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2868 :}
  2869 LDS.L @Rm+, PR {:  
  2870     COUNT_INST(I_LDSM);
  2871     load_reg( REG_EAX, Rm );
  2872     check_ralign32( REG_EAX );
  2873     MEM_READ_LONG( REG_EAX, REG_EAX );
  2874     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2875     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2876     sh4_x86.tstate = TSTATE_NONE;
  2877 :}
  2878 LDTLB {:  
  2879     COUNT_INST(I_LDTLB);
  2880     CALL_ptr( MMU_ldtlb );
  2881     sh4_x86.tstate = TSTATE_NONE;
  2882 :}
  2883 OCBI @Rn {:
  2884     COUNT_INST(I_OCBI);
  2885 :}
  2886 OCBP @Rn {:
  2887     COUNT_INST(I_OCBP);
  2888 :}
  2889 OCBWB @Rn {:
  2890     COUNT_INST(I_OCBWB);
  2891 :}
  2892 PREF @Rn {:
  2893     COUNT_INST(I_PREF);
  2894     load_reg( REG_EAX, Rn );
  2895     MEM_PREFETCH( REG_EAX );
  2896     sh4_x86.tstate = TSTATE_NONE;
  2897 :}
  2898 SLEEP {: 
  2899     COUNT_INST(I_SLEEP);
  2900     check_priv();
  2901     CALL_ptr( sh4_sleep );
  2902     sh4_x86.tstate = TSTATE_NONE;
  2903     sh4_x86.in_delay_slot = DELAY_NONE;
  2904     return 2;
  2905 :}
  2906 STC SR, Rn {:
  2907     COUNT_INST(I_STCSR);
  2908     check_priv();
  2909     CALL_ptr(sh4_read_sr);
  2910     store_reg( REG_EAX, Rn );
  2911     sh4_x86.tstate = TSTATE_NONE;
  2912 :}
  2913 STC GBR, Rn {:  
  2914     COUNT_INST(I_STC);
  2915     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2916     store_reg( REG_EAX, Rn );
  2917 :}
  2918 STC VBR, Rn {:  
  2919     COUNT_INST(I_STC);
  2920     check_priv();
  2921     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2922     store_reg( REG_EAX, Rn );
  2923     sh4_x86.tstate = TSTATE_NONE;
  2924 :}
  2925 STC SSR, Rn {:  
  2926     COUNT_INST(I_STC);
  2927     check_priv();
  2928     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2929     store_reg( REG_EAX, Rn );
  2930     sh4_x86.tstate = TSTATE_NONE;
  2931 :}
  2932 STC SPC, Rn {:  
  2933     COUNT_INST(I_STC);
  2934     check_priv();
  2935     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2936     store_reg( REG_EAX, Rn );
  2937     sh4_x86.tstate = TSTATE_NONE;
  2938 :}
  2939 STC SGR, Rn {:  
  2940     COUNT_INST(I_STC);
  2941     check_priv();
  2942     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2943     store_reg( REG_EAX, Rn );
  2944     sh4_x86.tstate = TSTATE_NONE;
  2945 :}
  2946 STC DBR, Rn {:  
  2947     COUNT_INST(I_STC);
  2948     check_priv();
  2949     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2950     store_reg( REG_EAX, Rn );
  2951     sh4_x86.tstate = TSTATE_NONE;
  2952 :}
  2953 STC Rm_BANK, Rn {:
  2954     COUNT_INST(I_STC);
  2955     check_priv();
  2956     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  2957     store_reg( REG_EAX, Rn );
  2958     sh4_x86.tstate = TSTATE_NONE;
  2959 :}
  2960 STC.L SR, @-Rn {:
  2961     COUNT_INST(I_STCSRM);
  2962     check_priv();
  2963     CALL_ptr( sh4_read_sr );
  2964     MOVL_r32_r32( REG_EAX, REG_EDX );
  2965     load_reg( REG_EAX, Rn );
  2966     check_walign32( REG_EAX );
  2967     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2968     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2969     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2970     sh4_x86.tstate = TSTATE_NONE;
  2971 :}
  2972 STC.L VBR, @-Rn {:  
  2973     COUNT_INST(I_STCM);
  2974     check_priv();
  2975     load_reg( REG_EAX, Rn );
  2976     check_walign32( REG_EAX );
  2977     ADDL_imms_r32( -4, REG_EAX );
  2978     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  2979     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2980     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2981     sh4_x86.tstate = TSTATE_NONE;
  2982 :}
  2983 STC.L SSR, @-Rn {:  
  2984     COUNT_INST(I_STCM);
  2985     check_priv();
  2986     load_reg( REG_EAX, Rn );
  2987     check_walign32( REG_EAX );
  2988     ADDL_imms_r32( -4, REG_EAX );
  2989     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  2990     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2991     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2992     sh4_x86.tstate = TSTATE_NONE;
  2993 :}
  2994 STC.L SPC, @-Rn {:
  2995     COUNT_INST(I_STCM);
  2996     check_priv();
  2997     load_reg( REG_EAX, Rn );
  2998     check_walign32( REG_EAX );
  2999     ADDL_imms_r32( -4, REG_EAX );
  3000     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3001     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3002     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3003     sh4_x86.tstate = TSTATE_NONE;
  3004 :}
  3005 STC.L SGR, @-Rn {:  
  3006     COUNT_INST(I_STCM);
  3007     check_priv();
  3008     load_reg( REG_EAX, Rn );
  3009     check_walign32( REG_EAX );
  3010     ADDL_imms_r32( -4, REG_EAX );
  3011     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3012     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3013     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3014     sh4_x86.tstate = TSTATE_NONE;
  3015 :}
  3016 STC.L DBR, @-Rn {:  
  3017     COUNT_INST(I_STCM);
  3018     check_priv();
  3019     load_reg( REG_EAX, Rn );
  3020     check_walign32( REG_EAX );
  3021     ADDL_imms_r32( -4, REG_EAX );
  3022     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3023     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3024     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3025     sh4_x86.tstate = TSTATE_NONE;
  3026 :}
  3027 STC.L Rm_BANK, @-Rn {:  
  3028     COUNT_INST(I_STCM);
  3029     check_priv();
  3030     load_reg( REG_EAX, Rn );
  3031     check_walign32( REG_EAX );
  3032     ADDL_imms_r32( -4, REG_EAX );
  3033     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3034     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3035     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3036     sh4_x86.tstate = TSTATE_NONE;
  3037 :}
  3038 STC.L GBR, @-Rn {:  
  3039     COUNT_INST(I_STCM);
  3040     load_reg( REG_EAX, Rn );
  3041     check_walign32( REG_EAX );
  3042     ADDL_imms_r32( -4, REG_EAX );
  3043     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3044     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3045     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3046     sh4_x86.tstate = TSTATE_NONE;
  3047 :}
  3048 STS FPSCR, Rn {:  
  3049     COUNT_INST(I_STSFPSCR);
  3050     check_fpuen();
  3051     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3052     store_reg( REG_EAX, Rn );
  3053 :}
  3054 STS.L FPSCR, @-Rn {:  
  3055     COUNT_INST(I_STSFPSCRM);
  3056     check_fpuen();
  3057     load_reg( REG_EAX, Rn );
  3058     check_walign32( REG_EAX );
  3059     ADDL_imms_r32( -4, REG_EAX );
  3060     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3061     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3062     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3063     sh4_x86.tstate = TSTATE_NONE;
  3064 :}
  3065 STS FPUL, Rn {:  
  3066     COUNT_INST(I_STS);
  3067     check_fpuen();
  3068     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3069     store_reg( REG_EAX, Rn );
  3070 :}
  3071 STS.L FPUL, @-Rn {:  
  3072     COUNT_INST(I_STSM);
  3073     check_fpuen();
  3074     load_reg( REG_EAX, Rn );
  3075     check_walign32( REG_EAX );
  3076     ADDL_imms_r32( -4, REG_EAX );
  3077     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3078     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3079     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3080     sh4_x86.tstate = TSTATE_NONE;
  3081 :}
  3082 STS MACH, Rn {:  
  3083     COUNT_INST(I_STS);
  3084     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3085     store_reg( REG_EAX, Rn );
  3086 :}
  3087 STS.L MACH, @-Rn {:  
  3088     COUNT_INST(I_STSM);
  3089     load_reg( REG_EAX, Rn );
  3090     check_walign32( REG_EAX );
  3091     ADDL_imms_r32( -4, REG_EAX );
  3092     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3093     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3094     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3095     sh4_x86.tstate = TSTATE_NONE;
  3096 :}
  3097 STS MACL, Rn {:  
  3098     COUNT_INST(I_STS);
  3099     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3100     store_reg( REG_EAX, Rn );
  3101 :}
  3102 STS.L MACL, @-Rn {:  
  3103     COUNT_INST(I_STSM);
  3104     load_reg( REG_EAX, Rn );
  3105     check_walign32( REG_EAX );
  3106     ADDL_imms_r32( -4, REG_EAX );
  3107     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3108     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3109     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3110     sh4_x86.tstate = TSTATE_NONE;
  3111 :}
  3112 STS PR, Rn {:  
  3113     COUNT_INST(I_STS);
  3114     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3115     store_reg( REG_EAX, Rn );
  3116 :}
  3117 STS.L PR, @-Rn {:  
  3118     COUNT_INST(I_STSM);
  3119     load_reg( REG_EAX, Rn );
  3120     check_walign32( REG_EAX );
  3121     ADDL_imms_r32( -4, REG_EAX );
  3122     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3123     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3124     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3125     sh4_x86.tstate = TSTATE_NONE;
  3126 :}
  3128 NOP {: 
  3129     COUNT_INST(I_NOP);
  3130     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3131 :}
  3132 %%
  3133     sh4_x86.in_delay_slot = DELAY_NONE;
  3134     return 0;
  3138 /**
  3139  * The unwind methods only work if we compiled with DWARF2 frame information
  3140  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3141  */
  3142 #ifdef HAVE_EXCEPTIONS
  3143 #include <unwind.h>
  3145 struct UnwindInfo {
  3146     uintptr_t block_start;
  3147     uintptr_t block_end;
  3148     void *pc;
  3149 };
  3151 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3153     struct UnwindInfo *info = arg;
  3154     void *pc = (void *)_Unwind_GetIP(context);
  3155     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3156         info->pc = pc;
  3157         return _URC_NORMAL_STOP;
  3159     return _URC_NO_REASON;
  3162 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3164     struct _Unwind_Exception exc;
  3165     struct UnwindInfo info;
  3167     info.pc = NULL;
  3168     info.block_start = (uintptr_t)code;
  3169     info.block_end = info.block_start + code_size;
  3170     void *result = NULL;
  3171     _Unwind_Backtrace( xlat_check_frame, &info );
  3172     return info.pc;
  3174 #else
  3175 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3176 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3178     void *result = NULL;
  3179     __asm__(
  3180         "mov %%ebp, %%eax\n\t"
  3181         "mov $0x8, %%ecx\n\t"
  3182         "mov %1, %%edx\n"
  3183         "frame_loop: test %%eax, %%eax\n\t"
  3184         "je frame_not_found\n\t"
  3185         "cmp (%%eax), %%edx\n\t"
  3186         "je frame_found\n\t"
  3187         "sub $0x1, %%ecx\n\t"
  3188         "je frame_not_found\n\t"
  3189         "movl (%%eax), %%eax\n\t"
  3190         "jmp frame_loop\n"
  3191         "frame_found: movl 0x4(%%eax), %0\n"
  3192         "frame_not_found:"
  3193         : "=r" (result)
  3194         : "r" (((uint8_t *)&sh4r) + 128 )
  3195         : "eax", "ecx", "edx" );
  3196     return result;
  3198 #endif
.