Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1182:b38a327ad8fa
prev1176:70feb1749427
next1186:2dc47c67bb93
author nkeynes
date Sun Nov 27 18:20:21 2011 +1000 (12 years ago)
permissions -rw-r--r--
last change Add block profiling option to count the number of executions of each block,
and dump them out from most-to-least used.
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   109     gboolean profile_blocks;
   111     /* Allocated memory for the (block-wide) back-patch list */
   112     struct backpatch_record *backpatch_list;
   113     uint32_t backpatch_posn;
   114     uint32_t backpatch_size;
   115 };
   117 static struct sh4_x86_state sh4_x86;
   119 static uint32_t max_int = 0x7FFFFFFF;
   120 static uint32_t min_int = 0x80000000;
   121 static uint32_t save_fcw; /* save value for fpu control word */
   122 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   124 static struct x86_symbol x86_symbol_table[] = {
   125     { "sh4r+128", ((char *)&sh4r)+128 },
   126     { "sh4_cpu_period", &sh4_cpu_period },
   127     { "sh4_address_space", NULL },
   128     { "sh4_user_address_space", NULL },
   129     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   130     { "sh4_write_fpscr", sh4_write_fpscr },
   131     { "sh4_write_sr", sh4_write_sr },
   132     { "sh4_read_sr", sh4_read_sr },
   133     { "sh4_sleep", sh4_sleep },
   134     { "sh4_fsca", sh4_fsca },
   135     { "sh4_ftrv", sh4_ftrv },
   136     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   137     { "sh4_execute_instruction", sh4_execute_instruction },
   138     { "signsat48", signsat48 },
   139     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   140     { "xlat_get_code", xlat_get_code }
   141 };
   144 gboolean is_sse3_supported()
   145 {
   146     uint32_t features;
   148     __asm__ __volatile__(
   149         "mov $0x01, %%eax\n\t"
   150         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   151     return (features & 1) ? TRUE : FALSE;
   152 }
   154 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   155 {
   156     sh4_x86.priv_address_space = priv;
   157     sh4_x86.user_address_space = user;
   158     x86_symbol_table[2].ptr = priv;
   159     x86_symbol_table[3].ptr = user;
   160 }
   162 void sh4_translate_init(void)
   163 {
   164     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   165     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   166     sh4_x86.begin_callback = NULL;
   167     sh4_x86.end_callback = NULL;
   168     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   169     sh4_x86.fastmem = TRUE;
   170     sh4_x86.profile_blocks = FALSE;
   171     sh4_x86.sse3_enabled = is_sse3_supported();
   172     x86_disasm_init();
   173     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   174 }
   176 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   177 {
   178     sh4_x86.begin_callback = begin;
   179     sh4_x86.end_callback = end;
   180 }
   182 void sh4_translate_set_fastmem( gboolean flag )
   183 {
   184     sh4_x86.fastmem = flag;
   185 }
   187 void sh4_translate_set_profile_blocks( gboolean flag )
   188 {
   189     sh4_x86.profile_blocks = flag;
   190 }
   192 gboolean sh4_translate_get_profile_blocks()
   193 {
   194     return sh4_x86.profile_blocks;
   195 }
   197 /**
   198  * Disassemble the given translated code block, and it's source SH4 code block
   199  * side-by-side. The current native pc will be marked if non-null.
   200  */
   201 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   202 {
   203     char buf[256];
   204     char op[256];
   206     uintptr_t target_start = (uintptr_t)code, target_pc;
   207     uintptr_t target_end = target_start + xlat_get_code_size(code);
   208     uint32_t source_pc = source_start;
   209     uint32_t source_end = source_pc;
   210     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   211     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   213     for( target_pc = target_start; target_pc < target_end;  ) {
   214         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   215 #if SIZEOF_VOID_P == 8
   216         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   217                       target_pc, op, buf );
   218 #else
   219         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   220                       target_pc, op, buf );
   221 #endif        
   222         if( source_recov_table < source_recov_end && 
   223             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   224             source_recov_table++;
   225             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   226                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   227         }
   229         if( source_pc < source_end ) {
   230             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   231             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   232             source_pc = source_pc2;
   233         } else {
   234             fprintf( out, "\n" );
   235         }
   237         target_pc = pc2;
   238     }
   240     while( source_pc < source_end ) {
   241         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   242         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   243         source_pc = source_pc2;
   244     }
   245 }
   247 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   248 {
   249     int reloc_size = 4;
   251     if( exc_code == -2 ) {
   252         reloc_size = sizeof(void *);
   253     }
   255     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   256 	sh4_x86.backpatch_size <<= 1;
   257 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   258 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   259 	assert( sh4_x86.backpatch_list != NULL );
   260     }
   261     if( sh4_x86.in_delay_slot ) {
   262 	fixup_pc -= 2;
   263     }
   265     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   266 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   267     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   268     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   269     sh4_x86.backpatch_posn++;
   270 }
   272 #define TSTATE_NONE -1
   273 #define TSTATE_O    X86_COND_O
   274 #define TSTATE_C    X86_COND_C
   275 #define TSTATE_E    X86_COND_E
   276 #define TSTATE_NE   X86_COND_NE
   277 #define TSTATE_G    X86_COND_G
   278 #define TSTATE_GE   X86_COND_GE
   279 #define TSTATE_A    X86_COND_A
   280 #define TSTATE_AE   X86_COND_AE
   282 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   283 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   285 /* Convenience instructions */
   286 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   287 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   288 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   289 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   290 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   291 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   292 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   293 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   294 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   295 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   296 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   297 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   298 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   299 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   300 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   301 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   302 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   303 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   304 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   305 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   307 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   308 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   309 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   310     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   312 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   313 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   314 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   315     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   318 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   319 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   321 /**
   322  * Load an FR register (single-precision floating point) into an integer x86
   323  * register (eg for register-to-register moves)
   324  */
   325 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   326 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   328 /**
   329  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   330  */
   331 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   332 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   334 /**
   335  * Store an FR register (single-precision floating point) from an integer x86+
   336  * register (eg for register-to-register moves)
   337  */
   338 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   339 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   341 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   342 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   345 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   346 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   347 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   348 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   349 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   350 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   351 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   352 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   353 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   354 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   356 #ifdef ENABLE_SH4STATS
   357 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   358 #else
   359 #define COUNT_INST(id)
   360 #endif
   363 /* Exception checks - Note that all exception checks will clobber EAX */
   365 #define check_priv( ) \
   366     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   367         if( sh4_x86.in_delay_slot ) { \
   368             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   369         } else { \
   370             exit_block_exc(EXC_ILLEGAL, pc); \
   371         } \
   372         sh4_x86.branch_taken = TRUE; \
   373         sh4_x86.in_delay_slot = DELAY_NONE; \
   374         return 2; \
   375     }
   377 #define check_fpuen( ) \
   378     if( !sh4_x86.fpuen_checked ) {\
   379 	sh4_x86.fpuen_checked = TRUE;\
   380 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   381 	ANDL_imms_r32( SR_FD, REG_EAX );\
   382 	if( sh4_x86.in_delay_slot ) {\
   383 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   384 	} else {\
   385 	    JNE_exc(EXC_FPU_DISABLED);\
   386 	}\
   387 	sh4_x86.tstate = TSTATE_NONE; \
   388     }
   390 #define check_ralign16( x86reg ) \
   391     TESTL_imms_r32( 0x00000001, x86reg ); \
   392     JNE_exc(EXC_DATA_ADDR_READ)
   394 #define check_walign16( x86reg ) \
   395     TESTL_imms_r32( 0x00000001, x86reg ); \
   396     JNE_exc(EXC_DATA_ADDR_WRITE);
   398 #define check_ralign32( x86reg ) \
   399     TESTL_imms_r32( 0x00000003, x86reg ); \
   400     JNE_exc(EXC_DATA_ADDR_READ)
   402 #define check_walign32( x86reg ) \
   403     TESTL_imms_r32( 0x00000003, x86reg ); \
   404     JNE_exc(EXC_DATA_ADDR_WRITE);
   406 #define check_ralign64( x86reg ) \
   407     TESTL_imms_r32( 0x00000007, x86reg ); \
   408     JNE_exc(EXC_DATA_ADDR_READ)
   410 #define check_walign64( x86reg ) \
   411     TESTL_imms_r32( 0x00000007, x86reg ); \
   412     JNE_exc(EXC_DATA_ADDR_WRITE);
   414 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   416 #define UNDEF(ir)
   417 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   418  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   419  */
   420 #ifdef HAVE_FRAME_ADDRESS
   421 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   422 {
   423     decode_address(address_space(), addr_reg);
   424     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   425         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   426     } else {
   427         if( addr_reg != REG_ARG1 ) {
   428             MOVL_r32_r32( addr_reg, REG_ARG1 );
   429         }
   430         MOVP_immptr_rptr( 0, REG_ARG2 );
   431         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   432         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   433     }
   434     if( value_reg != REG_RESULT1 ) { 
   435         MOVL_r32_r32( REG_RESULT1, value_reg );
   436     }
   437 }
   439 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   440 {
   441     decode_address(address_space(), addr_reg);
   442     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   443         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   444     } else {
   445         if( value_reg != REG_ARG2 ) {
   446             MOVL_r32_r32( value_reg, REG_ARG2 );
   447 	}        
   448         if( addr_reg != REG_ARG1 ) {
   449             MOVL_r32_r32( addr_reg, REG_ARG1 );
   450         }
   451 #if MAX_REG_ARG > 2        
   452         MOVP_immptr_rptr( 0, REG_ARG3 );
   453         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   454         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   455 #else
   456         MOVL_imm32_rspdisp( 0, 0 );
   457         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   458         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   459 #endif
   460     }
   461 }
   462 #else
   463 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   464 {
   465     decode_address(address_space(), addr_reg);
   466     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   467     if( value_reg != REG_RESULT1 ) {
   468         MOVL_r32_r32( REG_RESULT1, value_reg );
   469     }
   470 }     
   472 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   473 {
   474     decode_address(address_space(), addr_reg);
   475     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   476 }
   477 #endif
   479 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   480 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   481 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   482 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   483 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   484 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   485 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   486 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   487 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   489 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   491 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   492 #define XLAT_SH4_MODE_CODE_OFFSET  (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   493 #define XLAT_CHAIN_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
   494 #define XLAT_ACTIVE_CODE_OFFSET (uint32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
   496 void sh4_translate_begin_block( sh4addr_t pc ) 
   497 {
   498 	sh4_x86.code = xlat_output;
   499     sh4_x86.in_delay_slot = FALSE;
   500     sh4_x86.fpuen_checked = FALSE;
   501     sh4_x86.branch_taken = FALSE;
   502     sh4_x86.backpatch_posn = 0;
   503     sh4_x86.block_start_pc = pc;
   504     sh4_x86.tlb_on = IS_TLB_ENABLED();
   505     sh4_x86.tstate = TSTATE_NONE;
   506     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   507     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   508     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   509     emit_prologue();
   510     if( sh4_x86.begin_callback ) {
   511         CALL_ptr( sh4_x86.begin_callback );
   512     }
   513     if( sh4_x86.profile_blocks ) {
   514     	MOVP_immptr_rptr( ((uintptr_t)sh4_x86.code) + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
   515     	ADDL_imms_r32disp( 1, REG_EAX, 0 );
   516     }  
   517 }
   520 uint32_t sh4_translate_end_block_size()
   521 {
   522     if( sh4_x86.backpatch_posn <= 3 ) {
   523         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
   524     } else {
   525         return EPILOGUE_SIZE + (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
   526     }
   527 }
   530 /**
   531  * Embed a breakpoint into the generated code
   532  */
   533 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   534 {
   535     MOVL_imm32_r32( pc, REG_EAX );
   536     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   537     sh4_x86.tstate = TSTATE_NONE;
   538 }
   541 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   543 /**
   544  * Test if the loaded target code pointer in %eax is valid, and if so jump
   545  * directly into it, bypassing the normal exit.
   546  */
   547 static void jump_next_block()
   548 {
   549 	uint8_t *ptr = xlat_output;
   550 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   551 	JE_label(nocode);
   552 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   553 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   554 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   555 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   556 	} else {
   557 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   558 	}
   559 	JNE_label(wrongmode);
   560 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   561 	if( sh4_x86.end_callback ) {
   562 	    /* Note this does leave the stack out of alignment, but doesn't matter
   563 	     * for what we're currently using it for.
   564 	     */
   565 	    PUSH_r32(REG_EAX);
   566 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   567 	    JMP_rptr(REG_ECX);
   568 	} else {
   569 	    JMP_rptr(REG_EAX);
   570 	}
   571 	JMP_TARGET(wrongmode);
   572 	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
   573 	int rel = ptr - xlat_output;
   574     JMP_prerel(rel);
   575 	JMP_TARGET(nocode); 
   576 }
   578 static void exit_block()
   579 {
   580 	emit_epilogue();
   581 	if( sh4_x86.end_callback ) {
   582 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   583 	    JMP_rptr(REG_ECX);
   584 	} else {
   585 	    RET();
   586 	}
   587 }
   589 /**
   590  * Exit the block with sh4r.pc already written
   591  */
   592 void exit_block_pcset( sh4addr_t pc )
   593 {
   594     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   595     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   596     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   597     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   598     JBE_label(exitloop);
   599     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   600     if( sh4_x86.tlb_on ) {
   601         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   602     } else {
   603         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   604     }
   606     jump_next_block();
   607     JMP_TARGET(exitloop);
   608     exit_block();
   609 }
   611 /**
   612  * Exit the block with sh4r.new_pc written with the target pc
   613  */
   614 void exit_block_newpcset( sh4addr_t pc )
   615 {
   616     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   617     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   618     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   619     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   620     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   621     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   622     JBE_label(exitloop);
   623     if( sh4_x86.tlb_on ) {
   624         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   625     } else {
   626         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   627     }
   629 	jump_next_block();
   630     JMP_TARGET(exitloop);
   631     exit_block();
   632 }
   635 /**
   636  * Exit the block to an absolute PC
   637  */
   638 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   639 {
   640     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   641     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   642     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   644     MOVL_imm32_r32( pc, REG_ARG1 );
   645     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   646     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   647     JBE_label(exitloop);
   649     if( IS_IN_ICACHE(pc) ) {
   650         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   651         ANDP_imms_rptr( -4, REG_EAX );
   652     } else if( sh4_x86.tlb_on ) {
   653         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   654     } else {
   655         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   656     }
   657     jump_next_block();
   658     JMP_TARGET(exitloop);
   659     exit_block();
   660 }
   662 /**
   663  * Exit the block to a relative PC
   664  */
   665 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   666 {
   667     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   668     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   669     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   671 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   672 	    /* Special case for tight loops - the PC doesn't change, and
   673 	     * we already know the target address. Just check events pending before
   674 	     * looping.
   675 	     */
   676         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   677         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   678         JCC_cc_prerel(X86_COND_A, backdisp);
   679 	} else {
   680         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   681         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   682         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   683         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   684         JBE_label(exitloop2);
   686         if( IS_IN_ICACHE(pc) ) {
   687             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   688             ANDP_imms_rptr( -4, REG_EAX );
   689         } else if( sh4_x86.tlb_on ) {
   690             CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   691         } else {
   692             CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   693         }
   694         jump_next_block();
   695         JMP_TARGET(exitloop2);
   696     }
   697     exit_block();
   698 }
   700 /**
   701  * Exit unconditionally with a general exception
   702  */
   703 void exit_block_exc( int code, sh4addr_t pc )
   704 {
   705     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   706     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   707     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   708     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   709     MOVL_imm32_r32( code, REG_ARG1 );
   710     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   711     exit_block();
   712 }    
   714 /**
   715  * Embed a call to sh4_execute_instruction for situations that we
   716  * can't translate (just page-crossing delay slots at the moment).
   717  * Caller is responsible for setting new_pc before calling this function.
   718  *
   719  * Performs:
   720  *   Set PC = endpc
   721  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   722  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   723  *   Call sh4_execute_instruction
   724  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   725  */
   726 void exit_block_emu( sh4vma_t endpc )
   727 {
   728     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   729     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   731     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   732     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   733     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   734     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   736     CALL_ptr( sh4_execute_instruction );
   737     exit_block();
   738 } 
   740 /**
   741  * Write the block trailer (exception handling block)
   742  */
   743 void sh4_translate_end_block( sh4addr_t pc ) {
   744     if( sh4_x86.branch_taken == FALSE ) {
   745         // Didn't exit unconditionally already, so write the termination here
   746         exit_block_rel( pc, pc );
   747     }
   748     if( sh4_x86.backpatch_posn != 0 ) {
   749         unsigned int i;
   750         // Exception raised - cleanup and exit
   751         uint8_t *end_ptr = xlat_output;
   752         MOVL_r32_r32( REG_EDX, REG_ECX );
   753         ADDL_r32_r32( REG_EDX, REG_ECX );
   754         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   755         MOVL_moffptr_eax( &sh4_cpu_period );
   756         MULL_r32( REG_EDX );
   757         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   758         exit_block();
   760         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   761             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   762             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   763                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   764                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   765                 } else {
   766                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   767                 }
   768                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   769                 int rel = end_ptr - xlat_output;
   770                 JMP_prerel(rel);
   771             } else {
   772                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   773                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   774                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   775                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   776                 int rel = end_ptr - xlat_output;
   777                 JMP_prerel(rel);
   778             }
   779         }
   780     }
   781 }
   783 /**
   784  * Translate a single instruction. Delayed branches are handled specially
   785  * by translating both branch and delayed instruction as a single unit (as
   786  * 
   787  * The instruction MUST be in the icache (assert check)
   788  *
   789  * @return true if the instruction marks the end of a basic block
   790  * (eg a branch or 
   791  */
   792 uint32_t sh4_translate_instruction( sh4vma_t pc )
   793 {
   794     uint32_t ir;
   795     /* Read instruction from icache */
   796     assert( IS_IN_ICACHE(pc) );
   797     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   799     if( !sh4_x86.in_delay_slot ) {
   800 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   801     }
   803     /* check for breakpoints at this pc */
   804     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   805         if( sh4_breakpoints[i].address == pc ) {
   806             sh4_translate_emit_breakpoint(pc);
   807             break;
   808         }
   809     }
   810 %%
   811 /* ALU operations */
   812 ADD Rm, Rn {:
   813     COUNT_INST(I_ADD);
   814     load_reg( REG_EAX, Rm );
   815     load_reg( REG_ECX, Rn );
   816     ADDL_r32_r32( REG_EAX, REG_ECX );
   817     store_reg( REG_ECX, Rn );
   818     sh4_x86.tstate = TSTATE_NONE;
   819 :}
   820 ADD #imm, Rn {:  
   821     COUNT_INST(I_ADDI);
   822     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   823     sh4_x86.tstate = TSTATE_NONE;
   824 :}
   825 ADDC Rm, Rn {:
   826     COUNT_INST(I_ADDC);
   827     if( sh4_x86.tstate != TSTATE_C ) {
   828         LDC_t();
   829     }
   830     load_reg( REG_EAX, Rm );
   831     load_reg( REG_ECX, Rn );
   832     ADCL_r32_r32( REG_EAX, REG_ECX );
   833     store_reg( REG_ECX, Rn );
   834     SETC_t();
   835     sh4_x86.tstate = TSTATE_C;
   836 :}
   837 ADDV Rm, Rn {:
   838     COUNT_INST(I_ADDV);
   839     load_reg( REG_EAX, Rm );
   840     load_reg( REG_ECX, Rn );
   841     ADDL_r32_r32( REG_EAX, REG_ECX );
   842     store_reg( REG_ECX, Rn );
   843     SETO_t();
   844     sh4_x86.tstate = TSTATE_O;
   845 :}
   846 AND Rm, Rn {:
   847     COUNT_INST(I_AND);
   848     load_reg( REG_EAX, Rm );
   849     load_reg( REG_ECX, Rn );
   850     ANDL_r32_r32( REG_EAX, REG_ECX );
   851     store_reg( REG_ECX, Rn );
   852     sh4_x86.tstate = TSTATE_NONE;
   853 :}
   854 AND #imm, R0 {:  
   855     COUNT_INST(I_ANDI);
   856     load_reg( REG_EAX, 0 );
   857     ANDL_imms_r32(imm, REG_EAX); 
   858     store_reg( REG_EAX, 0 );
   859     sh4_x86.tstate = TSTATE_NONE;
   860 :}
   861 AND.B #imm, @(R0, GBR) {: 
   862     COUNT_INST(I_ANDB);
   863     load_reg( REG_EAX, 0 );
   864     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   865     MOVL_r32_rspdisp(REG_EAX, 0);
   866     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   867     MOVL_rspdisp_r32(0, REG_EAX);
   868     ANDL_imms_r32(imm, REG_EDX );
   869     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   870     sh4_x86.tstate = TSTATE_NONE;
   871 :}
   872 CMP/EQ Rm, Rn {:  
   873     COUNT_INST(I_CMPEQ);
   874     load_reg( REG_EAX, Rm );
   875     load_reg( REG_ECX, Rn );
   876     CMPL_r32_r32( REG_EAX, REG_ECX );
   877     SETE_t();
   878     sh4_x86.tstate = TSTATE_E;
   879 :}
   880 CMP/EQ #imm, R0 {:  
   881     COUNT_INST(I_CMPEQI);
   882     load_reg( REG_EAX, 0 );
   883     CMPL_imms_r32(imm, REG_EAX);
   884     SETE_t();
   885     sh4_x86.tstate = TSTATE_E;
   886 :}
   887 CMP/GE Rm, Rn {:  
   888     COUNT_INST(I_CMPGE);
   889     load_reg( REG_EAX, Rm );
   890     load_reg( REG_ECX, Rn );
   891     CMPL_r32_r32( REG_EAX, REG_ECX );
   892     SETGE_t();
   893     sh4_x86.tstate = TSTATE_GE;
   894 :}
   895 CMP/GT Rm, Rn {: 
   896     COUNT_INST(I_CMPGT);
   897     load_reg( REG_EAX, Rm );
   898     load_reg( REG_ECX, Rn );
   899     CMPL_r32_r32( REG_EAX, REG_ECX );
   900     SETG_t();
   901     sh4_x86.tstate = TSTATE_G;
   902 :}
   903 CMP/HI Rm, Rn {:  
   904     COUNT_INST(I_CMPHI);
   905     load_reg( REG_EAX, Rm );
   906     load_reg( REG_ECX, Rn );
   907     CMPL_r32_r32( REG_EAX, REG_ECX );
   908     SETA_t();
   909     sh4_x86.tstate = TSTATE_A;
   910 :}
   911 CMP/HS Rm, Rn {: 
   912     COUNT_INST(I_CMPHS);
   913     load_reg( REG_EAX, Rm );
   914     load_reg( REG_ECX, Rn );
   915     CMPL_r32_r32( REG_EAX, REG_ECX );
   916     SETAE_t();
   917     sh4_x86.tstate = TSTATE_AE;
   918  :}
   919 CMP/PL Rn {: 
   920     COUNT_INST(I_CMPPL);
   921     load_reg( REG_EAX, Rn );
   922     CMPL_imms_r32( 0, REG_EAX );
   923     SETG_t();
   924     sh4_x86.tstate = TSTATE_G;
   925 :}
   926 CMP/PZ Rn {:  
   927     COUNT_INST(I_CMPPZ);
   928     load_reg( REG_EAX, Rn );
   929     CMPL_imms_r32( 0, REG_EAX );
   930     SETGE_t();
   931     sh4_x86.tstate = TSTATE_GE;
   932 :}
   933 CMP/STR Rm, Rn {:  
   934     COUNT_INST(I_CMPSTR);
   935     load_reg( REG_EAX, Rm );
   936     load_reg( REG_ECX, Rn );
   937     XORL_r32_r32( REG_ECX, REG_EAX );
   938     TESTB_r8_r8( REG_AL, REG_AL );
   939     JE_label(target1);
   940     TESTB_r8_r8( REG_AH, REG_AH );
   941     JE_label(target2);
   942     SHRL_imm_r32( 16, REG_EAX );
   943     TESTB_r8_r8( REG_AL, REG_AL );
   944     JE_label(target3);
   945     TESTB_r8_r8( REG_AH, REG_AH );
   946     JMP_TARGET(target1);
   947     JMP_TARGET(target2);
   948     JMP_TARGET(target3);
   949     SETE_t();
   950     sh4_x86.tstate = TSTATE_E;
   951 :}
   952 DIV0S Rm, Rn {:
   953     COUNT_INST(I_DIV0S);
   954     load_reg( REG_EAX, Rm );
   955     load_reg( REG_ECX, Rn );
   956     SHRL_imm_r32( 31, REG_EAX );
   957     SHRL_imm_r32( 31, REG_ECX );
   958     MOVL_r32_rbpdisp( REG_EAX, R_M );
   959     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   960     CMPL_r32_r32( REG_EAX, REG_ECX );
   961     SETNE_t();
   962     sh4_x86.tstate = TSTATE_NE;
   963 :}
   964 DIV0U {:  
   965     COUNT_INST(I_DIV0U);
   966     XORL_r32_r32( REG_EAX, REG_EAX );
   967     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   968     MOVL_r32_rbpdisp( REG_EAX, R_M );
   969     MOVL_r32_rbpdisp( REG_EAX, R_T );
   970     sh4_x86.tstate = TSTATE_C; // works for DIV1
   971 :}
   972 DIV1 Rm, Rn {:
   973     COUNT_INST(I_DIV1);
   974     MOVL_rbpdisp_r32( R_M, REG_ECX );
   975     load_reg( REG_EAX, Rn );
   976     if( sh4_x86.tstate != TSTATE_C ) {
   977 	LDC_t();
   978     }
   979     RCLL_imm_r32( 1, REG_EAX );
   980     SETC_r8( REG_DL ); // Q'
   981     CMPL_rbpdisp_r32( R_Q, REG_ECX );
   982     JE_label(mqequal);
   983     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   984     JMP_label(end);
   985     JMP_TARGET(mqequal);
   986     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   987     JMP_TARGET(end);
   988     store_reg( REG_EAX, Rn ); // Done with Rn now
   989     SETC_r8(REG_AL); // tmp1
   990     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
   991     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
   992     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   993     XORL_imms_r32( 1, REG_AL );   // T = !Q'
   994     MOVZXL_r8_r32( REG_AL, REG_EAX );
   995     MOVL_r32_rbpdisp( REG_EAX, R_T );
   996     sh4_x86.tstate = TSTATE_NONE;
   997 :}
   998 DMULS.L Rm, Rn {:  
   999     COUNT_INST(I_DMULS);
  1000     load_reg( REG_EAX, Rm );
  1001     load_reg( REG_ECX, Rn );
  1002     IMULL_r32(REG_ECX);
  1003     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1004     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1005     sh4_x86.tstate = TSTATE_NONE;
  1006 :}
  1007 DMULU.L Rm, Rn {:  
  1008     COUNT_INST(I_DMULU);
  1009     load_reg( REG_EAX, Rm );
  1010     load_reg( REG_ECX, Rn );
  1011     MULL_r32(REG_ECX);
  1012     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
  1013     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
  1014     sh4_x86.tstate = TSTATE_NONE;
  1015 :}
  1016 DT Rn {:  
  1017     COUNT_INST(I_DT);
  1018     load_reg( REG_EAX, Rn );
  1019     ADDL_imms_r32( -1, REG_EAX );
  1020     store_reg( REG_EAX, Rn );
  1021     SETE_t();
  1022     sh4_x86.tstate = TSTATE_E;
  1023 :}
  1024 EXTS.B Rm, Rn {:  
  1025     COUNT_INST(I_EXTSB);
  1026     load_reg( REG_EAX, Rm );
  1027     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1028     store_reg( REG_EAX, Rn );
  1029 :}
  1030 EXTS.W Rm, Rn {:  
  1031     COUNT_INST(I_EXTSW);
  1032     load_reg( REG_EAX, Rm );
  1033     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1034     store_reg( REG_EAX, Rn );
  1035 :}
  1036 EXTU.B Rm, Rn {:  
  1037     COUNT_INST(I_EXTUB);
  1038     load_reg( REG_EAX, Rm );
  1039     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1040     store_reg( REG_EAX, Rn );
  1041 :}
  1042 EXTU.W Rm, Rn {:  
  1043     COUNT_INST(I_EXTUW);
  1044     load_reg( REG_EAX, Rm );
  1045     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1046     store_reg( REG_EAX, Rn );
  1047 :}
  1048 MAC.L @Rm+, @Rn+ {:
  1049     COUNT_INST(I_MACL);
  1050     if( Rm == Rn ) {
  1051 	load_reg( REG_EAX, Rm );
  1052 	check_ralign32( REG_EAX );
  1053 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1054 	MOVL_r32_rspdisp(REG_EAX, 0);
  1055 	load_reg( REG_EAX, Rm );
  1056 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1057 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1058         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1059     } else {
  1060 	load_reg( REG_EAX, Rm );
  1061 	check_ralign32( REG_EAX );
  1062 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1063 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1064 	load_reg( REG_EAX, Rn );
  1065 	check_ralign32( REG_EAX );
  1066 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1067 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1068 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1071     IMULL_rspdisp( 0 );
  1072     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1073     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1075     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1076     TESTL_r32_r32(REG_ECX, REG_ECX);
  1077     JE_label( nosat );
  1078     CALL_ptr( signsat48 );
  1079     JMP_TARGET( nosat );
  1080     sh4_x86.tstate = TSTATE_NONE;
  1081 :}
  1082 MAC.W @Rm+, @Rn+ {:  
  1083     COUNT_INST(I_MACW);
  1084     if( Rm == Rn ) {
  1085 	load_reg( REG_EAX, Rm );
  1086 	check_ralign16( REG_EAX );
  1087 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1088         MOVL_r32_rspdisp( REG_EAX, 0 );
  1089 	load_reg( REG_EAX, Rm );
  1090 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1091 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1092 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1093 	// Note translate twice in case of page boundaries. Maybe worth
  1094 	// adding a page-boundary check to skip the second translation
  1095     } else {
  1096 	load_reg( REG_EAX, Rm );
  1097 	check_ralign16( REG_EAX );
  1098 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1099         MOVL_r32_rspdisp( REG_EAX, 0 );
  1100 	load_reg( REG_EAX, Rn );
  1101 	check_ralign16( REG_EAX );
  1102 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1103 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1104 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1106     IMULL_rspdisp( 0 );
  1107     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1108     TESTL_r32_r32( REG_ECX, REG_ECX );
  1109     JE_label( nosat );
  1111     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1112     JNO_label( end );            // 2
  1113     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1114     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1115     JS_label( positive );        // 2
  1116     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1117     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1118     JMP_label(end2);           // 2
  1120     JMP_TARGET(positive);
  1121     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1122     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1123     JMP_label(end3);            // 2
  1125     JMP_TARGET(nosat);
  1126     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1127     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1128     JMP_TARGET(end);
  1129     JMP_TARGET(end2);
  1130     JMP_TARGET(end3);
  1131     sh4_x86.tstate = TSTATE_NONE;
  1132 :}
  1133 MOVT Rn {:  
  1134     COUNT_INST(I_MOVT);
  1135     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1136     store_reg( REG_EAX, Rn );
  1137 :}
  1138 MUL.L Rm, Rn {:  
  1139     COUNT_INST(I_MULL);
  1140     load_reg( REG_EAX, Rm );
  1141     load_reg( REG_ECX, Rn );
  1142     MULL_r32( REG_ECX );
  1143     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1144     sh4_x86.tstate = TSTATE_NONE;
  1145 :}
  1146 MULS.W Rm, Rn {:
  1147     COUNT_INST(I_MULSW);
  1148     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1149     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1150     MULL_r32( REG_ECX );
  1151     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1152     sh4_x86.tstate = TSTATE_NONE;
  1153 :}
  1154 MULU.W Rm, Rn {:  
  1155     COUNT_INST(I_MULUW);
  1156     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1157     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1158     MULL_r32( REG_ECX );
  1159     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1160     sh4_x86.tstate = TSTATE_NONE;
  1161 :}
  1162 NEG Rm, Rn {:
  1163     COUNT_INST(I_NEG);
  1164     load_reg( REG_EAX, Rm );
  1165     NEGL_r32( REG_EAX );
  1166     store_reg( REG_EAX, Rn );
  1167     sh4_x86.tstate = TSTATE_NONE;
  1168 :}
  1169 NEGC Rm, Rn {:  
  1170     COUNT_INST(I_NEGC);
  1171     load_reg( REG_EAX, Rm );
  1172     XORL_r32_r32( REG_ECX, REG_ECX );
  1173     LDC_t();
  1174     SBBL_r32_r32( REG_EAX, REG_ECX );
  1175     store_reg( REG_ECX, Rn );
  1176     SETC_t();
  1177     sh4_x86.tstate = TSTATE_C;
  1178 :}
  1179 NOT Rm, Rn {:  
  1180     COUNT_INST(I_NOT);
  1181     load_reg( REG_EAX, Rm );
  1182     NOTL_r32( REG_EAX );
  1183     store_reg( REG_EAX, Rn );
  1184     sh4_x86.tstate = TSTATE_NONE;
  1185 :}
  1186 OR Rm, Rn {:  
  1187     COUNT_INST(I_OR);
  1188     load_reg( REG_EAX, Rm );
  1189     load_reg( REG_ECX, Rn );
  1190     ORL_r32_r32( REG_EAX, REG_ECX );
  1191     store_reg( REG_ECX, Rn );
  1192     sh4_x86.tstate = TSTATE_NONE;
  1193 :}
  1194 OR #imm, R0 {:
  1195     COUNT_INST(I_ORI);
  1196     load_reg( REG_EAX, 0 );
  1197     ORL_imms_r32(imm, REG_EAX);
  1198     store_reg( REG_EAX, 0 );
  1199     sh4_x86.tstate = TSTATE_NONE;
  1200 :}
  1201 OR.B #imm, @(R0, GBR) {:  
  1202     COUNT_INST(I_ORB);
  1203     load_reg( REG_EAX, 0 );
  1204     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1205     MOVL_r32_rspdisp( REG_EAX, 0 );
  1206     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1207     MOVL_rspdisp_r32( 0, REG_EAX );
  1208     ORL_imms_r32(imm, REG_EDX );
  1209     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1210     sh4_x86.tstate = TSTATE_NONE;
  1211 :}
  1212 ROTCL Rn {:
  1213     COUNT_INST(I_ROTCL);
  1214     load_reg( REG_EAX, Rn );
  1215     if( sh4_x86.tstate != TSTATE_C ) {
  1216 	LDC_t();
  1218     RCLL_imm_r32( 1, REG_EAX );
  1219     store_reg( REG_EAX, Rn );
  1220     SETC_t();
  1221     sh4_x86.tstate = TSTATE_C;
  1222 :}
  1223 ROTCR Rn {:  
  1224     COUNT_INST(I_ROTCR);
  1225     load_reg( REG_EAX, Rn );
  1226     if( sh4_x86.tstate != TSTATE_C ) {
  1227 	LDC_t();
  1229     RCRL_imm_r32( 1, REG_EAX );
  1230     store_reg( REG_EAX, Rn );
  1231     SETC_t();
  1232     sh4_x86.tstate = TSTATE_C;
  1233 :}
  1234 ROTL Rn {:  
  1235     COUNT_INST(I_ROTL);
  1236     load_reg( REG_EAX, Rn );
  1237     ROLL_imm_r32( 1, REG_EAX );
  1238     store_reg( REG_EAX, Rn );
  1239     SETC_t();
  1240     sh4_x86.tstate = TSTATE_C;
  1241 :}
  1242 ROTR Rn {:  
  1243     COUNT_INST(I_ROTR);
  1244     load_reg( REG_EAX, Rn );
  1245     RORL_imm_r32( 1, REG_EAX );
  1246     store_reg( REG_EAX, Rn );
  1247     SETC_t();
  1248     sh4_x86.tstate = TSTATE_C;
  1249 :}
  1250 SHAD Rm, Rn {:
  1251     COUNT_INST(I_SHAD);
  1252     /* Annoyingly enough, not directly convertible */
  1253     load_reg( REG_EAX, Rn );
  1254     load_reg( REG_ECX, Rm );
  1255     CMPL_imms_r32( 0, REG_ECX );
  1256     JGE_label(doshl);
  1258     NEGL_r32( REG_ECX );      // 2
  1259     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1260     JE_label(emptysar);     // 2
  1261     SARL_cl_r32( REG_EAX );       // 2
  1262     JMP_label(end);          // 2
  1264     JMP_TARGET(emptysar);
  1265     SARL_imm_r32(31, REG_EAX );  // 3
  1266     JMP_label(end2);
  1268     JMP_TARGET(doshl);
  1269     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1270     SHLL_cl_r32( REG_EAX );       // 2
  1271     JMP_TARGET(end);
  1272     JMP_TARGET(end2);
  1273     store_reg( REG_EAX, Rn );
  1274     sh4_x86.tstate = TSTATE_NONE;
  1275 :}
  1276 SHLD Rm, Rn {:  
  1277     COUNT_INST(I_SHLD);
  1278     load_reg( REG_EAX, Rn );
  1279     load_reg( REG_ECX, Rm );
  1280     CMPL_imms_r32( 0, REG_ECX );
  1281     JGE_label(doshl);
  1283     NEGL_r32( REG_ECX );      // 2
  1284     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1285     JE_label(emptyshr );
  1286     SHRL_cl_r32( REG_EAX );       // 2
  1287     JMP_label(end);          // 2
  1289     JMP_TARGET(emptyshr);
  1290     XORL_r32_r32( REG_EAX, REG_EAX );
  1291     JMP_label(end2);
  1293     JMP_TARGET(doshl);
  1294     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1295     SHLL_cl_r32( REG_EAX );       // 2
  1296     JMP_TARGET(end);
  1297     JMP_TARGET(end2);
  1298     store_reg( REG_EAX, Rn );
  1299     sh4_x86.tstate = TSTATE_NONE;
  1300 :}
  1301 SHAL Rn {: 
  1302     COUNT_INST(I_SHAL);
  1303     load_reg( REG_EAX, Rn );
  1304     SHLL_imm_r32( 1, REG_EAX );
  1305     SETC_t();
  1306     store_reg( REG_EAX, Rn );
  1307     sh4_x86.tstate = TSTATE_C;
  1308 :}
  1309 SHAR Rn {:  
  1310     COUNT_INST(I_SHAR);
  1311     load_reg( REG_EAX, Rn );
  1312     SARL_imm_r32( 1, REG_EAX );
  1313     SETC_t();
  1314     store_reg( REG_EAX, Rn );
  1315     sh4_x86.tstate = TSTATE_C;
  1316 :}
  1317 SHLL Rn {:  
  1318     COUNT_INST(I_SHLL);
  1319     load_reg( REG_EAX, Rn );
  1320     SHLL_imm_r32( 1, REG_EAX );
  1321     SETC_t();
  1322     store_reg( REG_EAX, Rn );
  1323     sh4_x86.tstate = TSTATE_C;
  1324 :}
  1325 SHLL2 Rn {:
  1326     COUNT_INST(I_SHLL);
  1327     load_reg( REG_EAX, Rn );
  1328     SHLL_imm_r32( 2, REG_EAX );
  1329     store_reg( REG_EAX, Rn );
  1330     sh4_x86.tstate = TSTATE_NONE;
  1331 :}
  1332 SHLL8 Rn {:  
  1333     COUNT_INST(I_SHLL);
  1334     load_reg( REG_EAX, Rn );
  1335     SHLL_imm_r32( 8, REG_EAX );
  1336     store_reg( REG_EAX, Rn );
  1337     sh4_x86.tstate = TSTATE_NONE;
  1338 :}
  1339 SHLL16 Rn {:  
  1340     COUNT_INST(I_SHLL);
  1341     load_reg( REG_EAX, Rn );
  1342     SHLL_imm_r32( 16, REG_EAX );
  1343     store_reg( REG_EAX, Rn );
  1344     sh4_x86.tstate = TSTATE_NONE;
  1345 :}
  1346 SHLR Rn {:  
  1347     COUNT_INST(I_SHLR);
  1348     load_reg( REG_EAX, Rn );
  1349     SHRL_imm_r32( 1, REG_EAX );
  1350     SETC_t();
  1351     store_reg( REG_EAX, Rn );
  1352     sh4_x86.tstate = TSTATE_C;
  1353 :}
  1354 SHLR2 Rn {:  
  1355     COUNT_INST(I_SHLR);
  1356     load_reg( REG_EAX, Rn );
  1357     SHRL_imm_r32( 2, REG_EAX );
  1358     store_reg( REG_EAX, Rn );
  1359     sh4_x86.tstate = TSTATE_NONE;
  1360 :}
  1361 SHLR8 Rn {:  
  1362     COUNT_INST(I_SHLR);
  1363     load_reg( REG_EAX, Rn );
  1364     SHRL_imm_r32( 8, REG_EAX );
  1365     store_reg( REG_EAX, Rn );
  1366     sh4_x86.tstate = TSTATE_NONE;
  1367 :}
  1368 SHLR16 Rn {:  
  1369     COUNT_INST(I_SHLR);
  1370     load_reg( REG_EAX, Rn );
  1371     SHRL_imm_r32( 16, REG_EAX );
  1372     store_reg( REG_EAX, Rn );
  1373     sh4_x86.tstate = TSTATE_NONE;
  1374 :}
  1375 SUB Rm, Rn {:  
  1376     COUNT_INST(I_SUB);
  1377     load_reg( REG_EAX, Rm );
  1378     load_reg( REG_ECX, Rn );
  1379     SUBL_r32_r32( REG_EAX, REG_ECX );
  1380     store_reg( REG_ECX, Rn );
  1381     sh4_x86.tstate = TSTATE_NONE;
  1382 :}
  1383 SUBC Rm, Rn {:  
  1384     COUNT_INST(I_SUBC);
  1385     load_reg( REG_EAX, Rm );
  1386     load_reg( REG_ECX, Rn );
  1387     if( sh4_x86.tstate != TSTATE_C ) {
  1388 	LDC_t();
  1390     SBBL_r32_r32( REG_EAX, REG_ECX );
  1391     store_reg( REG_ECX, Rn );
  1392     SETC_t();
  1393     sh4_x86.tstate = TSTATE_C;
  1394 :}
  1395 SUBV Rm, Rn {:  
  1396     COUNT_INST(I_SUBV);
  1397     load_reg( REG_EAX, Rm );
  1398     load_reg( REG_ECX, Rn );
  1399     SUBL_r32_r32( REG_EAX, REG_ECX );
  1400     store_reg( REG_ECX, Rn );
  1401     SETO_t();
  1402     sh4_x86.tstate = TSTATE_O;
  1403 :}
  1404 SWAP.B Rm, Rn {:  
  1405     COUNT_INST(I_SWAPB);
  1406     load_reg( REG_EAX, Rm );
  1407     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1408     store_reg( REG_EAX, Rn );
  1409 :}
  1410 SWAP.W Rm, Rn {:  
  1411     COUNT_INST(I_SWAPB);
  1412     load_reg( REG_EAX, Rm );
  1413     MOVL_r32_r32( REG_EAX, REG_ECX );
  1414     SHLL_imm_r32( 16, REG_ECX );
  1415     SHRL_imm_r32( 16, REG_EAX );
  1416     ORL_r32_r32( REG_EAX, REG_ECX );
  1417     store_reg( REG_ECX, Rn );
  1418     sh4_x86.tstate = TSTATE_NONE;
  1419 :}
  1420 TAS.B @Rn {:  
  1421     COUNT_INST(I_TASB);
  1422     load_reg( REG_EAX, Rn );
  1423     MOVL_r32_rspdisp( REG_EAX, 0 );
  1424     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1425     TESTB_r8_r8( REG_DL, REG_DL );
  1426     SETE_t();
  1427     ORB_imms_r8( 0x80, REG_DL );
  1428     MOVL_rspdisp_r32( 0, REG_EAX );
  1429     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1430     sh4_x86.tstate = TSTATE_NONE;
  1431 :}
  1432 TST Rm, Rn {:  
  1433     COUNT_INST(I_TST);
  1434     load_reg( REG_EAX, Rm );
  1435     load_reg( REG_ECX, Rn );
  1436     TESTL_r32_r32( REG_EAX, REG_ECX );
  1437     SETE_t();
  1438     sh4_x86.tstate = TSTATE_E;
  1439 :}
  1440 TST #imm, R0 {:  
  1441     COUNT_INST(I_TSTI);
  1442     load_reg( REG_EAX, 0 );
  1443     TESTL_imms_r32( imm, REG_EAX );
  1444     SETE_t();
  1445     sh4_x86.tstate = TSTATE_E;
  1446 :}
  1447 TST.B #imm, @(R0, GBR) {:  
  1448     COUNT_INST(I_TSTB);
  1449     load_reg( REG_EAX, 0);
  1450     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1451     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1452     TESTB_imms_r8( imm, REG_AL );
  1453     SETE_t();
  1454     sh4_x86.tstate = TSTATE_E;
  1455 :}
  1456 XOR Rm, Rn {:  
  1457     COUNT_INST(I_XOR);
  1458     load_reg( REG_EAX, Rm );
  1459     load_reg( REG_ECX, Rn );
  1460     XORL_r32_r32( REG_EAX, REG_ECX );
  1461     store_reg( REG_ECX, Rn );
  1462     sh4_x86.tstate = TSTATE_NONE;
  1463 :}
  1464 XOR #imm, R0 {:  
  1465     COUNT_INST(I_XORI);
  1466     load_reg( REG_EAX, 0 );
  1467     XORL_imms_r32( imm, REG_EAX );
  1468     store_reg( REG_EAX, 0 );
  1469     sh4_x86.tstate = TSTATE_NONE;
  1470 :}
  1471 XOR.B #imm, @(R0, GBR) {:  
  1472     COUNT_INST(I_XORB);
  1473     load_reg( REG_EAX, 0 );
  1474     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1475     MOVL_r32_rspdisp( REG_EAX, 0 );
  1476     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1477     MOVL_rspdisp_r32( 0, REG_EAX );
  1478     XORL_imms_r32( imm, REG_EDX );
  1479     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1480     sh4_x86.tstate = TSTATE_NONE;
  1481 :}
  1482 XTRCT Rm, Rn {:
  1483     COUNT_INST(I_XTRCT);
  1484     load_reg( REG_EAX, Rm );
  1485     load_reg( REG_ECX, Rn );
  1486     SHLL_imm_r32( 16, REG_EAX );
  1487     SHRL_imm_r32( 16, REG_ECX );
  1488     ORL_r32_r32( REG_EAX, REG_ECX );
  1489     store_reg( REG_ECX, Rn );
  1490     sh4_x86.tstate = TSTATE_NONE;
  1491 :}
  1493 /* Data move instructions */
  1494 MOV Rm, Rn {:  
  1495     COUNT_INST(I_MOV);
  1496     load_reg( REG_EAX, Rm );
  1497     store_reg( REG_EAX, Rn );
  1498 :}
  1499 MOV #imm, Rn {:  
  1500     COUNT_INST(I_MOVI);
  1501     MOVL_imm32_r32( imm, REG_EAX );
  1502     store_reg( REG_EAX, Rn );
  1503 :}
  1504 MOV.B Rm, @Rn {:  
  1505     COUNT_INST(I_MOVB);
  1506     load_reg( REG_EAX, Rn );
  1507     load_reg( REG_EDX, Rm );
  1508     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1509     sh4_x86.tstate = TSTATE_NONE;
  1510 :}
  1511 MOV.B Rm, @-Rn {:  
  1512     COUNT_INST(I_MOVB);
  1513     load_reg( REG_EAX, Rn );
  1514     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1515     load_reg( REG_EDX, Rm );
  1516     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1517     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1518     sh4_x86.tstate = TSTATE_NONE;
  1519 :}
  1520 MOV.B Rm, @(R0, Rn) {:  
  1521     COUNT_INST(I_MOVB);
  1522     load_reg( REG_EAX, 0 );
  1523     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1524     load_reg( REG_EDX, Rm );
  1525     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1526     sh4_x86.tstate = TSTATE_NONE;
  1527 :}
  1528 MOV.B R0, @(disp, GBR) {:  
  1529     COUNT_INST(I_MOVB);
  1530     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1531     ADDL_imms_r32( disp, REG_EAX );
  1532     load_reg( REG_EDX, 0 );
  1533     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1534     sh4_x86.tstate = TSTATE_NONE;
  1535 :}
  1536 MOV.B R0, @(disp, Rn) {:  
  1537     COUNT_INST(I_MOVB);
  1538     load_reg( REG_EAX, Rn );
  1539     ADDL_imms_r32( disp, REG_EAX );
  1540     load_reg( REG_EDX, 0 );
  1541     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1542     sh4_x86.tstate = TSTATE_NONE;
  1543 :}
  1544 MOV.B @Rm, Rn {:  
  1545     COUNT_INST(I_MOVB);
  1546     load_reg( REG_EAX, Rm );
  1547     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1548     store_reg( REG_EAX, Rn );
  1549     sh4_x86.tstate = TSTATE_NONE;
  1550 :}
  1551 MOV.B @Rm+, Rn {:  
  1552     COUNT_INST(I_MOVB);
  1553     load_reg( REG_EAX, Rm );
  1554     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1555     if( Rm != Rn ) {
  1556     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1558     store_reg( REG_EAX, Rn );
  1559     sh4_x86.tstate = TSTATE_NONE;
  1560 :}
  1561 MOV.B @(R0, Rm), Rn {:  
  1562     COUNT_INST(I_MOVB);
  1563     load_reg( REG_EAX, 0 );
  1564     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1565     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1566     store_reg( REG_EAX, Rn );
  1567     sh4_x86.tstate = TSTATE_NONE;
  1568 :}
  1569 MOV.B @(disp, GBR), R0 {:  
  1570     COUNT_INST(I_MOVB);
  1571     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1572     ADDL_imms_r32( disp, REG_EAX );
  1573     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1574     store_reg( REG_EAX, 0 );
  1575     sh4_x86.tstate = TSTATE_NONE;
  1576 :}
  1577 MOV.B @(disp, Rm), R0 {:  
  1578     COUNT_INST(I_MOVB);
  1579     load_reg( REG_EAX, Rm );
  1580     ADDL_imms_r32( disp, REG_EAX );
  1581     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1582     store_reg( REG_EAX, 0 );
  1583     sh4_x86.tstate = TSTATE_NONE;
  1584 :}
  1585 MOV.L Rm, @Rn {:
  1586     COUNT_INST(I_MOVL);
  1587     load_reg( REG_EAX, Rn );
  1588     check_walign32(REG_EAX);
  1589     MOVL_r32_r32( REG_EAX, REG_ECX );
  1590     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1591     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1592     JNE_label( notsq );
  1593     ANDL_imms_r32( 0x3C, REG_EAX );
  1594     load_reg( REG_EDX, Rm );
  1595     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1596     JMP_label(end);
  1597     JMP_TARGET(notsq);
  1598     load_reg( REG_EDX, Rm );
  1599     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1600     JMP_TARGET(end);
  1601     sh4_x86.tstate = TSTATE_NONE;
  1602 :}
  1603 MOV.L Rm, @-Rn {:  
  1604     COUNT_INST(I_MOVL);
  1605     load_reg( REG_EAX, Rn );
  1606     ADDL_imms_r32( -4, REG_EAX );
  1607     check_walign32( REG_EAX );
  1608     load_reg( REG_EDX, Rm );
  1609     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1610     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1611     sh4_x86.tstate = TSTATE_NONE;
  1612 :}
  1613 MOV.L Rm, @(R0, Rn) {:  
  1614     COUNT_INST(I_MOVL);
  1615     load_reg( REG_EAX, 0 );
  1616     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1617     check_walign32( REG_EAX );
  1618     load_reg( REG_EDX, Rm );
  1619     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1620     sh4_x86.tstate = TSTATE_NONE;
  1621 :}
  1622 MOV.L R0, @(disp, GBR) {:  
  1623     COUNT_INST(I_MOVL);
  1624     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1625     ADDL_imms_r32( disp, REG_EAX );
  1626     check_walign32( REG_EAX );
  1627     load_reg( REG_EDX, 0 );
  1628     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1629     sh4_x86.tstate = TSTATE_NONE;
  1630 :}
  1631 MOV.L Rm, @(disp, Rn) {:  
  1632     COUNT_INST(I_MOVL);
  1633     load_reg( REG_EAX, Rn );
  1634     ADDL_imms_r32( disp, REG_EAX );
  1635     check_walign32( REG_EAX );
  1636     MOVL_r32_r32( REG_EAX, REG_ECX );
  1637     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1638     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1639     JNE_label( notsq );
  1640     ANDL_imms_r32( 0x3C, REG_EAX );
  1641     load_reg( REG_EDX, Rm );
  1642     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1643     JMP_label(end);
  1644     JMP_TARGET(notsq);
  1645     load_reg( REG_EDX, Rm );
  1646     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1647     JMP_TARGET(end);
  1648     sh4_x86.tstate = TSTATE_NONE;
  1649 :}
  1650 MOV.L @Rm, Rn {:  
  1651     COUNT_INST(I_MOVL);
  1652     load_reg( REG_EAX, Rm );
  1653     check_ralign32( REG_EAX );
  1654     MEM_READ_LONG( REG_EAX, REG_EAX );
  1655     store_reg( REG_EAX, Rn );
  1656     sh4_x86.tstate = TSTATE_NONE;
  1657 :}
  1658 MOV.L @Rm+, Rn {:  
  1659     COUNT_INST(I_MOVL);
  1660     load_reg( REG_EAX, Rm );
  1661     check_ralign32( REG_EAX );
  1662     MEM_READ_LONG( REG_EAX, REG_EAX );
  1663     if( Rm != Rn ) {
  1664     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1666     store_reg( REG_EAX, Rn );
  1667     sh4_x86.tstate = TSTATE_NONE;
  1668 :}
  1669 MOV.L @(R0, Rm), Rn {:  
  1670     COUNT_INST(I_MOVL);
  1671     load_reg( REG_EAX, 0 );
  1672     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1673     check_ralign32( REG_EAX );
  1674     MEM_READ_LONG( REG_EAX, REG_EAX );
  1675     store_reg( REG_EAX, Rn );
  1676     sh4_x86.tstate = TSTATE_NONE;
  1677 :}
  1678 MOV.L @(disp, GBR), R0 {:
  1679     COUNT_INST(I_MOVL);
  1680     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1681     ADDL_imms_r32( disp, REG_EAX );
  1682     check_ralign32( REG_EAX );
  1683     MEM_READ_LONG( REG_EAX, REG_EAX );
  1684     store_reg( REG_EAX, 0 );
  1685     sh4_x86.tstate = TSTATE_NONE;
  1686 :}
  1687 MOV.L @(disp, PC), Rn {:  
  1688     COUNT_INST(I_MOVLPC);
  1689     if( sh4_x86.in_delay_slot ) {
  1690 	SLOTILLEGAL();
  1691     } else {
  1692 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1693 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1694 	    // If the target address is in the same page as the code, it's
  1695 	    // pretty safe to just ref it directly and circumvent the whole
  1696 	    // memory subsystem. (this is a big performance win)
  1698 	    // FIXME: There's a corner-case that's not handled here when
  1699 	    // the current code-page is in the ITLB but not in the UTLB.
  1700 	    // (should generate a TLB miss although need to test SH4 
  1701 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1702 	    // behaviour though.
  1703 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1704 	    MOVL_moffptr_eax( ptr );
  1705 	} else {
  1706 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1707 	    // different virtual address than the translation was done with,
  1708 	    // but we can safely assume that the low bits are the same.
  1709 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1710 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1711 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1712 	    sh4_x86.tstate = TSTATE_NONE;
  1714 	store_reg( REG_EAX, Rn );
  1716 :}
  1717 MOV.L @(disp, Rm), Rn {:  
  1718     COUNT_INST(I_MOVL);
  1719     load_reg( REG_EAX, Rm );
  1720     ADDL_imms_r32( disp, REG_EAX );
  1721     check_ralign32( REG_EAX );
  1722     MEM_READ_LONG( REG_EAX, REG_EAX );
  1723     store_reg( REG_EAX, Rn );
  1724     sh4_x86.tstate = TSTATE_NONE;
  1725 :}
  1726 MOV.W Rm, @Rn {:  
  1727     COUNT_INST(I_MOVW);
  1728     load_reg( REG_EAX, Rn );
  1729     check_walign16( REG_EAX );
  1730     load_reg( REG_EDX, Rm );
  1731     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1732     sh4_x86.tstate = TSTATE_NONE;
  1733 :}
  1734 MOV.W Rm, @-Rn {:  
  1735     COUNT_INST(I_MOVW);
  1736     load_reg( REG_EAX, Rn );
  1737     check_walign16( REG_EAX );
  1738     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1739     load_reg( REG_EDX, Rm );
  1740     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1741     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1742     sh4_x86.tstate = TSTATE_NONE;
  1743 :}
  1744 MOV.W Rm, @(R0, Rn) {:  
  1745     COUNT_INST(I_MOVW);
  1746     load_reg( REG_EAX, 0 );
  1747     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1748     check_walign16( REG_EAX );
  1749     load_reg( REG_EDX, Rm );
  1750     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1751     sh4_x86.tstate = TSTATE_NONE;
  1752 :}
  1753 MOV.W R0, @(disp, GBR) {:  
  1754     COUNT_INST(I_MOVW);
  1755     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1756     ADDL_imms_r32( disp, REG_EAX );
  1757     check_walign16( REG_EAX );
  1758     load_reg( REG_EDX, 0 );
  1759     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1760     sh4_x86.tstate = TSTATE_NONE;
  1761 :}
  1762 MOV.W R0, @(disp, Rn) {:  
  1763     COUNT_INST(I_MOVW);
  1764     load_reg( REG_EAX, Rn );
  1765     ADDL_imms_r32( disp, REG_EAX );
  1766     check_walign16( REG_EAX );
  1767     load_reg( REG_EDX, 0 );
  1768     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1769     sh4_x86.tstate = TSTATE_NONE;
  1770 :}
  1771 MOV.W @Rm, Rn {:  
  1772     COUNT_INST(I_MOVW);
  1773     load_reg( REG_EAX, Rm );
  1774     check_ralign16( REG_EAX );
  1775     MEM_READ_WORD( REG_EAX, REG_EAX );
  1776     store_reg( REG_EAX, Rn );
  1777     sh4_x86.tstate = TSTATE_NONE;
  1778 :}
  1779 MOV.W @Rm+, Rn {:  
  1780     COUNT_INST(I_MOVW);
  1781     load_reg( REG_EAX, Rm );
  1782     check_ralign16( REG_EAX );
  1783     MEM_READ_WORD( REG_EAX, REG_EAX );
  1784     if( Rm != Rn ) {
  1785         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1787     store_reg( REG_EAX, Rn );
  1788     sh4_x86.tstate = TSTATE_NONE;
  1789 :}
  1790 MOV.W @(R0, Rm), Rn {:  
  1791     COUNT_INST(I_MOVW);
  1792     load_reg( REG_EAX, 0 );
  1793     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1794     check_ralign16( REG_EAX );
  1795     MEM_READ_WORD( REG_EAX, REG_EAX );
  1796     store_reg( REG_EAX, Rn );
  1797     sh4_x86.tstate = TSTATE_NONE;
  1798 :}
  1799 MOV.W @(disp, GBR), R0 {:  
  1800     COUNT_INST(I_MOVW);
  1801     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1802     ADDL_imms_r32( disp, REG_EAX );
  1803     check_ralign16( REG_EAX );
  1804     MEM_READ_WORD( REG_EAX, REG_EAX );
  1805     store_reg( REG_EAX, 0 );
  1806     sh4_x86.tstate = TSTATE_NONE;
  1807 :}
  1808 MOV.W @(disp, PC), Rn {:  
  1809     COUNT_INST(I_MOVW);
  1810     if( sh4_x86.in_delay_slot ) {
  1811 	SLOTILLEGAL();
  1812     } else {
  1813 	// See comments for MOV.L @(disp, PC), Rn
  1814 	uint32_t target = pc + disp + 4;
  1815 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1816 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1817 	    MOVL_moffptr_eax( ptr );
  1818 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1819 	} else {
  1820 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1821 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1822 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1823 	    sh4_x86.tstate = TSTATE_NONE;
  1825 	store_reg( REG_EAX, Rn );
  1827 :}
  1828 MOV.W @(disp, Rm), R0 {:  
  1829     COUNT_INST(I_MOVW);
  1830     load_reg( REG_EAX, Rm );
  1831     ADDL_imms_r32( disp, REG_EAX );
  1832     check_ralign16( REG_EAX );
  1833     MEM_READ_WORD( REG_EAX, REG_EAX );
  1834     store_reg( REG_EAX, 0 );
  1835     sh4_x86.tstate = TSTATE_NONE;
  1836 :}
  1837 MOVA @(disp, PC), R0 {:  
  1838     COUNT_INST(I_MOVA);
  1839     if( sh4_x86.in_delay_slot ) {
  1840 	SLOTILLEGAL();
  1841     } else {
  1842 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1843 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1844 	store_reg( REG_ECX, 0 );
  1845 	sh4_x86.tstate = TSTATE_NONE;
  1847 :}
  1848 MOVCA.L R0, @Rn {:  
  1849     COUNT_INST(I_MOVCA);
  1850     load_reg( REG_EAX, Rn );
  1851     check_walign32( REG_EAX );
  1852     load_reg( REG_EDX, 0 );
  1853     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1854     sh4_x86.tstate = TSTATE_NONE;
  1855 :}
  1857 /* Control transfer instructions */
  1858 BF disp {:
  1859     COUNT_INST(I_BF);
  1860     if( sh4_x86.in_delay_slot ) {
  1861 	SLOTILLEGAL();
  1862     } else {
  1863 	sh4vma_t target = disp + pc + 4;
  1864 	JT_label( nottaken );
  1865 	exit_block_rel(target, pc+2 );
  1866 	JMP_TARGET(nottaken);
  1867 	return 2;
  1869 :}
  1870 BF/S disp {:
  1871     COUNT_INST(I_BFS);
  1872     if( sh4_x86.in_delay_slot ) {
  1873 	SLOTILLEGAL();
  1874     } else {
  1875 	sh4_x86.in_delay_slot = DELAY_PC;
  1876 	if( UNTRANSLATABLE(pc+2) ) {
  1877 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1878 	    JT_label(nottaken);
  1879 	    ADDL_imms_r32( disp, REG_EAX );
  1880 	    JMP_TARGET(nottaken);
  1881 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1882 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1883 	    exit_block_emu(pc+2);
  1884 	    sh4_x86.branch_taken = TRUE;
  1885 	    return 2;
  1886 	} else {
  1887 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1888 		CMPL_imms_rbpdisp( 1, R_T );
  1889 		sh4_x86.tstate = TSTATE_E;
  1891 	    sh4vma_t target = disp + pc + 4;
  1892 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1893 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1894 	    int save_tstate = sh4_x86.tstate;
  1895 	    sh4_translate_instruction(pc+2);
  1896             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1897 	    exit_block_rel( target, pc+4 );
  1899 	    // not taken
  1900 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1901 	    sh4_x86.tstate = save_tstate;
  1902 	    sh4_translate_instruction(pc+2);
  1903 	    return 4;
  1906 :}
  1907 BRA disp {:  
  1908     COUNT_INST(I_BRA);
  1909     if( sh4_x86.in_delay_slot ) {
  1910 	SLOTILLEGAL();
  1911     } else {
  1912 	sh4_x86.in_delay_slot = DELAY_PC;
  1913 	sh4_x86.branch_taken = TRUE;
  1914 	if( UNTRANSLATABLE(pc+2) ) {
  1915 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1916 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1917 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1918 	    exit_block_emu(pc+2);
  1919 	    return 2;
  1920 	} else {
  1921 	    sh4_translate_instruction( pc + 2 );
  1922 	    exit_block_rel( disp + pc + 4, pc+4 );
  1923 	    return 4;
  1926 :}
  1927 BRAF Rn {:  
  1928     COUNT_INST(I_BRAF);
  1929     if( sh4_x86.in_delay_slot ) {
  1930 	SLOTILLEGAL();
  1931     } else {
  1932 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1933 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1934 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1935 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1936 	sh4_x86.in_delay_slot = DELAY_PC;
  1937 	sh4_x86.tstate = TSTATE_NONE;
  1938 	sh4_x86.branch_taken = TRUE;
  1939 	if( UNTRANSLATABLE(pc+2) ) {
  1940 	    exit_block_emu(pc+2);
  1941 	    return 2;
  1942 	} else {
  1943 	    sh4_translate_instruction( pc + 2 );
  1944 	    exit_block_newpcset(pc+4);
  1945 	    return 4;
  1948 :}
  1949 BSR disp {:  
  1950     COUNT_INST(I_BSR);
  1951     if( sh4_x86.in_delay_slot ) {
  1952 	SLOTILLEGAL();
  1953     } else {
  1954 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1955 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1956 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1957 	sh4_x86.in_delay_slot = DELAY_PC;
  1958 	sh4_x86.branch_taken = TRUE;
  1959 	sh4_x86.tstate = TSTATE_NONE;
  1960 	if( UNTRANSLATABLE(pc+2) ) {
  1961 	    ADDL_imms_r32( disp, REG_EAX );
  1962 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1963 	    exit_block_emu(pc+2);
  1964 	    return 2;
  1965 	} else {
  1966 	    sh4_translate_instruction( pc + 2 );
  1967 	    exit_block_rel( disp + pc + 4, pc+4 );
  1968 	    return 4;
  1971 :}
  1972 BSRF Rn {:  
  1973     COUNT_INST(I_BSRF);
  1974     if( sh4_x86.in_delay_slot ) {
  1975 	SLOTILLEGAL();
  1976     } else {
  1977 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1978 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1979 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1980 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1981 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1983 	sh4_x86.in_delay_slot = DELAY_PC;
  1984 	sh4_x86.tstate = TSTATE_NONE;
  1985 	sh4_x86.branch_taken = TRUE;
  1986 	if( UNTRANSLATABLE(pc+2) ) {
  1987 	    exit_block_emu(pc+2);
  1988 	    return 2;
  1989 	} else {
  1990 	    sh4_translate_instruction( pc + 2 );
  1991 	    exit_block_newpcset(pc+4);
  1992 	    return 4;
  1995 :}
  1996 BT disp {:
  1997     COUNT_INST(I_BT);
  1998     if( sh4_x86.in_delay_slot ) {
  1999 	SLOTILLEGAL();
  2000     } else {
  2001 	sh4vma_t target = disp + pc + 4;
  2002 	JF_label( nottaken );
  2003 	exit_block_rel(target, pc+2 );
  2004 	JMP_TARGET(nottaken);
  2005 	return 2;
  2007 :}
  2008 BT/S disp {:
  2009     COUNT_INST(I_BTS);
  2010     if( sh4_x86.in_delay_slot ) {
  2011 	SLOTILLEGAL();
  2012     } else {
  2013 	sh4_x86.in_delay_slot = DELAY_PC;
  2014 	if( UNTRANSLATABLE(pc+2) ) {
  2015 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2016 	    JF_label(nottaken);
  2017 	    ADDL_imms_r32( disp, REG_EAX );
  2018 	    JMP_TARGET(nottaken);
  2019 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  2020 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  2021 	    exit_block_emu(pc+2);
  2022 	    sh4_x86.branch_taken = TRUE;
  2023 	    return 2;
  2024 	} else {
  2025 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  2026 		CMPL_imms_rbpdisp( 1, R_T );
  2027 		sh4_x86.tstate = TSTATE_E;
  2029 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2030 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2032 	    int save_tstate = sh4_x86.tstate;
  2033 	    sh4_translate_instruction(pc+2);
  2034             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2035 	    exit_block_rel( disp + pc + 4, pc+4 );
  2036 	    // not taken
  2037 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2038 	    sh4_x86.tstate = save_tstate;
  2039 	    sh4_translate_instruction(pc+2);
  2040 	    return 4;
  2043 :}
  2044 JMP @Rn {:  
  2045     COUNT_INST(I_JMP);
  2046     if( sh4_x86.in_delay_slot ) {
  2047 	SLOTILLEGAL();
  2048     } else {
  2049 	load_reg( REG_ECX, Rn );
  2050 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2051 	sh4_x86.in_delay_slot = DELAY_PC;
  2052 	sh4_x86.branch_taken = TRUE;
  2053 	if( UNTRANSLATABLE(pc+2) ) {
  2054 	    exit_block_emu(pc+2);
  2055 	    return 2;
  2056 	} else {
  2057 	    sh4_translate_instruction(pc+2);
  2058 	    exit_block_newpcset(pc+4);
  2059 	    return 4;
  2062 :}
  2063 JSR @Rn {:  
  2064     COUNT_INST(I_JSR);
  2065     if( sh4_x86.in_delay_slot ) {
  2066 	SLOTILLEGAL();
  2067     } else {
  2068 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2069 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2070 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2071 	load_reg( REG_ECX, Rn );
  2072 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2073 	sh4_x86.in_delay_slot = DELAY_PC;
  2074 	sh4_x86.branch_taken = TRUE;
  2075 	sh4_x86.tstate = TSTATE_NONE;
  2076 	if( UNTRANSLATABLE(pc+2) ) {
  2077 	    exit_block_emu(pc+2);
  2078 	    return 2;
  2079 	} else {
  2080 	    sh4_translate_instruction(pc+2);
  2081 	    exit_block_newpcset(pc+4);
  2082 	    return 4;
  2085 :}
  2086 RTE {:  
  2087     COUNT_INST(I_RTE);
  2088     if( sh4_x86.in_delay_slot ) {
  2089 	SLOTILLEGAL();
  2090     } else {
  2091 	check_priv();
  2092 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2093 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2094 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2095 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2096 	sh4_x86.in_delay_slot = DELAY_PC;
  2097 	sh4_x86.fpuen_checked = FALSE;
  2098 	sh4_x86.tstate = TSTATE_NONE;
  2099 	sh4_x86.branch_taken = TRUE;
  2100     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2101 	if( UNTRANSLATABLE(pc+2) ) {
  2102 	    exit_block_emu(pc+2);
  2103 	    return 2;
  2104 	} else {
  2105 	    sh4_translate_instruction(pc+2);
  2106 	    exit_block_newpcset(pc+4);
  2107 	    return 4;
  2110 :}
  2111 RTS {:  
  2112     COUNT_INST(I_RTS);
  2113     if( sh4_x86.in_delay_slot ) {
  2114 	SLOTILLEGAL();
  2115     } else {
  2116 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2117 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2118 	sh4_x86.in_delay_slot = DELAY_PC;
  2119 	sh4_x86.branch_taken = TRUE;
  2120 	if( UNTRANSLATABLE(pc+2) ) {
  2121 	    exit_block_emu(pc+2);
  2122 	    return 2;
  2123 	} else {
  2124 	    sh4_translate_instruction(pc+2);
  2125 	    exit_block_newpcset(pc+4);
  2126 	    return 4;
  2129 :}
  2130 TRAPA #imm {:  
  2131     COUNT_INST(I_TRAPA);
  2132     if( sh4_x86.in_delay_slot ) {
  2133 	SLOTILLEGAL();
  2134     } else {
  2135 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2136 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2137 	MOVL_imm32_r32( imm, REG_EAX );
  2138 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2139 	sh4_x86.tstate = TSTATE_NONE;
  2140 	exit_block_pcset(pc+2);
  2141 	sh4_x86.branch_taken = TRUE;
  2142 	return 2;
  2144 :}
  2145 UNDEF {:  
  2146     COUNT_INST(I_UNDEF);
  2147     if( sh4_x86.in_delay_slot ) {
  2148 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  2149     } else {
  2150 	exit_block_exc(EXC_ILLEGAL, pc);    
  2151 	return 2;
  2153 :}
  2155 CLRMAC {:  
  2156     COUNT_INST(I_CLRMAC);
  2157     XORL_r32_r32(REG_EAX, REG_EAX);
  2158     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2159     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2160     sh4_x86.tstate = TSTATE_NONE;
  2161 :}
  2162 CLRS {:
  2163     COUNT_INST(I_CLRS);
  2164     CLC();
  2165     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2166     sh4_x86.tstate = TSTATE_NONE;
  2167 :}
  2168 CLRT {:  
  2169     COUNT_INST(I_CLRT);
  2170     CLC();
  2171     SETC_t();
  2172     sh4_x86.tstate = TSTATE_C;
  2173 :}
  2174 SETS {:  
  2175     COUNT_INST(I_SETS);
  2176     STC();
  2177     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2178     sh4_x86.tstate = TSTATE_NONE;
  2179 :}
  2180 SETT {:  
  2181     COUNT_INST(I_SETT);
  2182     STC();
  2183     SETC_t();
  2184     sh4_x86.tstate = TSTATE_C;
  2185 :}
  2187 /* Floating point moves */
  2188 FMOV FRm, FRn {:  
  2189     COUNT_INST(I_FMOV1);
  2190     check_fpuen();
  2191     if( sh4_x86.double_size ) {
  2192         load_dr0( REG_EAX, FRm );
  2193         load_dr1( REG_ECX, FRm );
  2194         store_dr0( REG_EAX, FRn );
  2195         store_dr1( REG_ECX, FRn );
  2196     } else {
  2197         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2198         store_fr( REG_EAX, FRn );
  2200 :}
  2201 FMOV FRm, @Rn {: 
  2202     COUNT_INST(I_FMOV2);
  2203     check_fpuen();
  2204     load_reg( REG_EAX, Rn );
  2205     if( sh4_x86.double_size ) {
  2206         check_walign64( REG_EAX );
  2207         load_dr0( REG_EDX, FRm );
  2208         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2209         load_reg( REG_EAX, Rn );
  2210         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2211         load_dr1( REG_EDX, FRm );
  2212         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2213     } else {
  2214         check_walign32( REG_EAX );
  2215         load_fr( REG_EDX, FRm );
  2216         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2218     sh4_x86.tstate = TSTATE_NONE;
  2219 :}
  2220 FMOV @Rm, FRn {:  
  2221     COUNT_INST(I_FMOV5);
  2222     check_fpuen();
  2223     load_reg( REG_EAX, Rm );
  2224     if( sh4_x86.double_size ) {
  2225         check_ralign64( REG_EAX );
  2226         MEM_READ_LONG( REG_EAX, REG_EAX );
  2227         store_dr0( REG_EAX, FRn );
  2228         load_reg( REG_EAX, Rm );
  2229         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2230         MEM_READ_LONG( REG_EAX, REG_EAX );
  2231         store_dr1( REG_EAX, FRn );
  2232     } else {
  2233         check_ralign32( REG_EAX );
  2234         MEM_READ_LONG( REG_EAX, REG_EAX );
  2235         store_fr( REG_EAX, FRn );
  2237     sh4_x86.tstate = TSTATE_NONE;
  2238 :}
  2239 FMOV FRm, @-Rn {:  
  2240     COUNT_INST(I_FMOV3);
  2241     check_fpuen();
  2242     load_reg( REG_EAX, Rn );
  2243     if( sh4_x86.double_size ) {
  2244         check_walign64( REG_EAX );
  2245         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2246         load_dr0( REG_EDX, FRm );
  2247         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2248         load_reg( REG_EAX, Rn );
  2249         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2250         load_dr1( REG_EDX, FRm );
  2251         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2252         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2253     } else {
  2254         check_walign32( REG_EAX );
  2255         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2256         load_fr( REG_EDX, FRm );
  2257         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2258         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2260     sh4_x86.tstate = TSTATE_NONE;
  2261 :}
  2262 FMOV @Rm+, FRn {:
  2263     COUNT_INST(I_FMOV6);
  2264     check_fpuen();
  2265     load_reg( REG_EAX, Rm );
  2266     if( sh4_x86.double_size ) {
  2267         check_ralign64( REG_EAX );
  2268         MEM_READ_LONG( REG_EAX, REG_EAX );
  2269         store_dr0( REG_EAX, FRn );
  2270         load_reg( REG_EAX, Rm );
  2271         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2272         MEM_READ_LONG( REG_EAX, REG_EAX );
  2273         store_dr1( REG_EAX, FRn );
  2274         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2275     } else {
  2276         check_ralign32( REG_EAX );
  2277         MEM_READ_LONG( REG_EAX, REG_EAX );
  2278         store_fr( REG_EAX, FRn );
  2279         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2281     sh4_x86.tstate = TSTATE_NONE;
  2282 :}
  2283 FMOV FRm, @(R0, Rn) {:  
  2284     COUNT_INST(I_FMOV4);
  2285     check_fpuen();
  2286     load_reg( REG_EAX, Rn );
  2287     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2288     if( sh4_x86.double_size ) {
  2289         check_walign64( REG_EAX );
  2290         load_dr0( REG_EDX, FRm );
  2291         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2292         load_reg( REG_EAX, Rn );
  2293         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2294         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2295         load_dr1( REG_EDX, FRm );
  2296         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2297     } else {
  2298         check_walign32( REG_EAX );
  2299         load_fr( REG_EDX, FRm );
  2300         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2302     sh4_x86.tstate = TSTATE_NONE;
  2303 :}
  2304 FMOV @(R0, Rm), FRn {:  
  2305     COUNT_INST(I_FMOV7);
  2306     check_fpuen();
  2307     load_reg( REG_EAX, Rm );
  2308     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2309     if( sh4_x86.double_size ) {
  2310         check_ralign64( REG_EAX );
  2311         MEM_READ_LONG( REG_EAX, REG_EAX );
  2312         store_dr0( REG_EAX, FRn );
  2313         load_reg( REG_EAX, Rm );
  2314         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2315         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2316         MEM_READ_LONG( REG_EAX, REG_EAX );
  2317         store_dr1( REG_EAX, FRn );
  2318     } else {
  2319         check_ralign32( REG_EAX );
  2320         MEM_READ_LONG( REG_EAX, REG_EAX );
  2321         store_fr( REG_EAX, FRn );
  2323     sh4_x86.tstate = TSTATE_NONE;
  2324 :}
  2325 FLDI0 FRn {:  /* IFF PR=0 */
  2326     COUNT_INST(I_FLDI0);
  2327     check_fpuen();
  2328     if( sh4_x86.double_prec == 0 ) {
  2329         XORL_r32_r32( REG_EAX, REG_EAX );
  2330         store_fr( REG_EAX, FRn );
  2332     sh4_x86.tstate = TSTATE_NONE;
  2333 :}
  2334 FLDI1 FRn {:  /* IFF PR=0 */
  2335     COUNT_INST(I_FLDI1);
  2336     check_fpuen();
  2337     if( sh4_x86.double_prec == 0 ) {
  2338         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2339         store_fr( REG_EAX, FRn );
  2341 :}
  2343 FLOAT FPUL, FRn {:  
  2344     COUNT_INST(I_FLOAT);
  2345     check_fpuen();
  2346     FILD_rbpdisp(R_FPUL);
  2347     if( sh4_x86.double_prec ) {
  2348         pop_dr( FRn );
  2349     } else {
  2350         pop_fr( FRn );
  2352 :}
  2353 FTRC FRm, FPUL {:  
  2354     COUNT_INST(I_FTRC);
  2355     check_fpuen();
  2356     if( sh4_x86.double_prec ) {
  2357         push_dr( FRm );
  2358     } else {
  2359         push_fr( FRm );
  2361     MOVP_immptr_rptr( &max_int, REG_ECX );
  2362     FILD_r32disp( REG_ECX, 0 );
  2363     FCOMIP_st(1);
  2364     JNA_label( sat );
  2365     MOVP_immptr_rptr( &min_int, REG_ECX );
  2366     FILD_r32disp( REG_ECX, 0 );
  2367     FCOMIP_st(1);              
  2368     JAE_label( sat2 );            
  2369     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2370     FNSTCW_r32disp( REG_EAX, 0 );
  2371     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2372     FLDCW_r32disp( REG_EDX, 0 );
  2373     FISTP_rbpdisp(R_FPUL);             
  2374     FLDCW_r32disp( REG_EAX, 0 );
  2375     JMP_label(end);             
  2377     JMP_TARGET(sat);
  2378     JMP_TARGET(sat2);
  2379     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2380     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2381     FPOP_st();
  2382     JMP_TARGET(end);
  2383     sh4_x86.tstate = TSTATE_NONE;
  2384 :}
  2385 FLDS FRm, FPUL {:  
  2386     COUNT_INST(I_FLDS);
  2387     check_fpuen();
  2388     load_fr( REG_EAX, FRm );
  2389     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2390 :}
  2391 FSTS FPUL, FRn {:  
  2392     COUNT_INST(I_FSTS);
  2393     check_fpuen();
  2394     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2395     store_fr( REG_EAX, FRn );
  2396 :}
  2397 FCNVDS FRm, FPUL {:  
  2398     COUNT_INST(I_FCNVDS);
  2399     check_fpuen();
  2400     if( sh4_x86.double_prec ) {
  2401         push_dr( FRm );
  2402         pop_fpul();
  2404 :}
  2405 FCNVSD FPUL, FRn {:  
  2406     COUNT_INST(I_FCNVSD);
  2407     check_fpuen();
  2408     if( sh4_x86.double_prec ) {
  2409         push_fpul();
  2410         pop_dr( FRn );
  2412 :}
  2414 /* Floating point instructions */
  2415 FABS FRn {:  
  2416     COUNT_INST(I_FABS);
  2417     check_fpuen();
  2418     if( sh4_x86.double_prec ) {
  2419         push_dr(FRn);
  2420         FABS_st0();
  2421         pop_dr(FRn);
  2422     } else {
  2423         push_fr(FRn);
  2424         FABS_st0();
  2425         pop_fr(FRn);
  2427 :}
  2428 FADD FRm, FRn {:  
  2429     COUNT_INST(I_FADD);
  2430     check_fpuen();
  2431     if( sh4_x86.double_prec ) {
  2432         push_dr(FRm);
  2433         push_dr(FRn);
  2434         FADDP_st(1);
  2435         pop_dr(FRn);
  2436     } else {
  2437         push_fr(FRm);
  2438         push_fr(FRn);
  2439         FADDP_st(1);
  2440         pop_fr(FRn);
  2442 :}
  2443 FDIV FRm, FRn {:  
  2444     COUNT_INST(I_FDIV);
  2445     check_fpuen();
  2446     if( sh4_x86.double_prec ) {
  2447         push_dr(FRn);
  2448         push_dr(FRm);
  2449         FDIVP_st(1);
  2450         pop_dr(FRn);
  2451     } else {
  2452         push_fr(FRn);
  2453         push_fr(FRm);
  2454         FDIVP_st(1);
  2455         pop_fr(FRn);
  2457 :}
  2458 FMAC FR0, FRm, FRn {:  
  2459     COUNT_INST(I_FMAC);
  2460     check_fpuen();
  2461     if( sh4_x86.double_prec ) {
  2462         push_dr( 0 );
  2463         push_dr( FRm );
  2464         FMULP_st(1);
  2465         push_dr( FRn );
  2466         FADDP_st(1);
  2467         pop_dr( FRn );
  2468     } else {
  2469         push_fr( 0 );
  2470         push_fr( FRm );
  2471         FMULP_st(1);
  2472         push_fr( FRn );
  2473         FADDP_st(1);
  2474         pop_fr( FRn );
  2476 :}
  2478 FMUL FRm, FRn {:  
  2479     COUNT_INST(I_FMUL);
  2480     check_fpuen();
  2481     if( sh4_x86.double_prec ) {
  2482         push_dr(FRm);
  2483         push_dr(FRn);
  2484         FMULP_st(1);
  2485         pop_dr(FRn);
  2486     } else {
  2487         push_fr(FRm);
  2488         push_fr(FRn);
  2489         FMULP_st(1);
  2490         pop_fr(FRn);
  2492 :}
  2493 FNEG FRn {:  
  2494     COUNT_INST(I_FNEG);
  2495     check_fpuen();
  2496     if( sh4_x86.double_prec ) {
  2497         push_dr(FRn);
  2498         FCHS_st0();
  2499         pop_dr(FRn);
  2500     } else {
  2501         push_fr(FRn);
  2502         FCHS_st0();
  2503         pop_fr(FRn);
  2505 :}
  2506 FSRRA FRn {:  
  2507     COUNT_INST(I_FSRRA);
  2508     check_fpuen();
  2509     if( sh4_x86.double_prec == 0 ) {
  2510         FLD1_st0();
  2511         push_fr(FRn);
  2512         FSQRT_st0();
  2513         FDIVP_st(1);
  2514         pop_fr(FRn);
  2516 :}
  2517 FSQRT FRn {:  
  2518     COUNT_INST(I_FSQRT);
  2519     check_fpuen();
  2520     if( sh4_x86.double_prec ) {
  2521         push_dr(FRn);
  2522         FSQRT_st0();
  2523         pop_dr(FRn);
  2524     } else {
  2525         push_fr(FRn);
  2526         FSQRT_st0();
  2527         pop_fr(FRn);
  2529 :}
  2530 FSUB FRm, FRn {:  
  2531     COUNT_INST(I_FSUB);
  2532     check_fpuen();
  2533     if( sh4_x86.double_prec ) {
  2534         push_dr(FRn);
  2535         push_dr(FRm);
  2536         FSUBP_st(1);
  2537         pop_dr(FRn);
  2538     } else {
  2539         push_fr(FRn);
  2540         push_fr(FRm);
  2541         FSUBP_st(1);
  2542         pop_fr(FRn);
  2544 :}
  2546 FCMP/EQ FRm, FRn {:  
  2547     COUNT_INST(I_FCMPEQ);
  2548     check_fpuen();
  2549     if( sh4_x86.double_prec ) {
  2550         push_dr(FRm);
  2551         push_dr(FRn);
  2552     } else {
  2553         push_fr(FRm);
  2554         push_fr(FRn);
  2556     FCOMIP_st(1);
  2557     SETE_t();
  2558     FPOP_st();
  2559     sh4_x86.tstate = TSTATE_E;
  2560 :}
  2561 FCMP/GT FRm, FRn {:  
  2562     COUNT_INST(I_FCMPGT);
  2563     check_fpuen();
  2564     if( sh4_x86.double_prec ) {
  2565         push_dr(FRm);
  2566         push_dr(FRn);
  2567     } else {
  2568         push_fr(FRm);
  2569         push_fr(FRn);
  2571     FCOMIP_st(1);
  2572     SETA_t();
  2573     FPOP_st();
  2574     sh4_x86.tstate = TSTATE_A;
  2575 :}
  2577 FSCA FPUL, FRn {:  
  2578     COUNT_INST(I_FSCA);
  2579     check_fpuen();
  2580     if( sh4_x86.double_prec == 0 ) {
  2581         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2582         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2583         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2585     sh4_x86.tstate = TSTATE_NONE;
  2586 :}
  2587 FIPR FVm, FVn {:  
  2588     COUNT_INST(I_FIPR);
  2589     check_fpuen();
  2590     if( sh4_x86.double_prec == 0 ) {
  2591         if( sh4_x86.sse3_enabled ) {
  2592             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2593             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2594             HADDPS_xmm_xmm( 4, 4 ); 
  2595             HADDPS_xmm_xmm( 4, 4 );
  2596             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2597         } else {
  2598             push_fr( FVm<<2 );
  2599             push_fr( FVn<<2 );
  2600             FMULP_st(1);
  2601             push_fr( (FVm<<2)+1);
  2602             push_fr( (FVn<<2)+1);
  2603             FMULP_st(1);
  2604             FADDP_st(1);
  2605             push_fr( (FVm<<2)+2);
  2606             push_fr( (FVn<<2)+2);
  2607             FMULP_st(1);
  2608             FADDP_st(1);
  2609             push_fr( (FVm<<2)+3);
  2610             push_fr( (FVn<<2)+3);
  2611             FMULP_st(1);
  2612             FADDP_st(1);
  2613             pop_fr( (FVn<<2)+3);
  2616 :}
  2617 FTRV XMTRX, FVn {:  
  2618     COUNT_INST(I_FTRV);
  2619     check_fpuen();
  2620     if( sh4_x86.double_prec == 0 ) {
  2621         if( sh4_x86.sse3_enabled ) {
  2622             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2623             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2624             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2625             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2627             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2628             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2629             MOV_xmm_xmm( 4, 6 );
  2630             MOV_xmm_xmm( 5, 7 );
  2631             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2632             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2633             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2634             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2635             MULPS_xmm_xmm( 0, 4 );
  2636             MULPS_xmm_xmm( 1, 5 );
  2637             MULPS_xmm_xmm( 2, 6 );
  2638             MULPS_xmm_xmm( 3, 7 );
  2639             ADDPS_xmm_xmm( 5, 4 );
  2640             ADDPS_xmm_xmm( 7, 6 );
  2641             ADDPS_xmm_xmm( 6, 4 );
  2642             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2643         } else {
  2644             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2645             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2648     sh4_x86.tstate = TSTATE_NONE;
  2649 :}
  2651 FRCHG {:  
  2652     COUNT_INST(I_FRCHG);
  2653     check_fpuen();
  2654     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2655     CALL_ptr( sh4_switch_fr_banks );
  2656     sh4_x86.tstate = TSTATE_NONE;
  2657 :}
  2658 FSCHG {:  
  2659     COUNT_INST(I_FSCHG);
  2660     check_fpuen();
  2661     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2662     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2663     sh4_x86.tstate = TSTATE_NONE;
  2664     sh4_x86.double_size = !sh4_x86.double_size;
  2665     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2666 :}
  2668 /* Processor control instructions */
  2669 LDC Rm, SR {:
  2670     COUNT_INST(I_LDCSR);
  2671     if( sh4_x86.in_delay_slot ) {
  2672 	SLOTILLEGAL();
  2673     } else {
  2674 	check_priv();
  2675 	load_reg( REG_EAX, Rm );
  2676 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2677 	sh4_x86.fpuen_checked = FALSE;
  2678 	sh4_x86.tstate = TSTATE_NONE;
  2679     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2680 	return 2;
  2682 :}
  2683 LDC Rm, GBR {: 
  2684     COUNT_INST(I_LDC);
  2685     load_reg( REG_EAX, Rm );
  2686     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2687 :}
  2688 LDC Rm, VBR {:  
  2689     COUNT_INST(I_LDC);
  2690     check_priv();
  2691     load_reg( REG_EAX, Rm );
  2692     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2693     sh4_x86.tstate = TSTATE_NONE;
  2694 :}
  2695 LDC Rm, SSR {:  
  2696     COUNT_INST(I_LDC);
  2697     check_priv();
  2698     load_reg( REG_EAX, Rm );
  2699     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2700     sh4_x86.tstate = TSTATE_NONE;
  2701 :}
  2702 LDC Rm, SGR {:  
  2703     COUNT_INST(I_LDC);
  2704     check_priv();
  2705     load_reg( REG_EAX, Rm );
  2706     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2707     sh4_x86.tstate = TSTATE_NONE;
  2708 :}
  2709 LDC Rm, SPC {:  
  2710     COUNT_INST(I_LDC);
  2711     check_priv();
  2712     load_reg( REG_EAX, Rm );
  2713     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2714     sh4_x86.tstate = TSTATE_NONE;
  2715 :}
  2716 LDC Rm, DBR {:  
  2717     COUNT_INST(I_LDC);
  2718     check_priv();
  2719     load_reg( REG_EAX, Rm );
  2720     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2721     sh4_x86.tstate = TSTATE_NONE;
  2722 :}
  2723 LDC Rm, Rn_BANK {:  
  2724     COUNT_INST(I_LDC);
  2725     check_priv();
  2726     load_reg( REG_EAX, Rm );
  2727     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2728     sh4_x86.tstate = TSTATE_NONE;
  2729 :}
  2730 LDC.L @Rm+, GBR {:  
  2731     COUNT_INST(I_LDCM);
  2732     load_reg( REG_EAX, Rm );
  2733     check_ralign32( REG_EAX );
  2734     MEM_READ_LONG( REG_EAX, REG_EAX );
  2735     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2736     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2737     sh4_x86.tstate = TSTATE_NONE;
  2738 :}
  2739 LDC.L @Rm+, SR {:
  2740     COUNT_INST(I_LDCSRM);
  2741     if( sh4_x86.in_delay_slot ) {
  2742 	SLOTILLEGAL();
  2743     } else {
  2744 	check_priv();
  2745 	load_reg( REG_EAX, Rm );
  2746 	check_ralign32( REG_EAX );
  2747 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2748 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2749 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2750 	sh4_x86.fpuen_checked = FALSE;
  2751 	sh4_x86.tstate = TSTATE_NONE;
  2752     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2753 	return 2;
  2755 :}
  2756 LDC.L @Rm+, VBR {:  
  2757     COUNT_INST(I_LDCM);
  2758     check_priv();
  2759     load_reg( REG_EAX, Rm );
  2760     check_ralign32( REG_EAX );
  2761     MEM_READ_LONG( REG_EAX, REG_EAX );
  2762     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2763     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2764     sh4_x86.tstate = TSTATE_NONE;
  2765 :}
  2766 LDC.L @Rm+, SSR {:
  2767     COUNT_INST(I_LDCM);
  2768     check_priv();
  2769     load_reg( REG_EAX, Rm );
  2770     check_ralign32( REG_EAX );
  2771     MEM_READ_LONG( REG_EAX, REG_EAX );
  2772     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2773     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2774     sh4_x86.tstate = TSTATE_NONE;
  2775 :}
  2776 LDC.L @Rm+, SGR {:  
  2777     COUNT_INST(I_LDCM);
  2778     check_priv();
  2779     load_reg( REG_EAX, Rm );
  2780     check_ralign32( REG_EAX );
  2781     MEM_READ_LONG( REG_EAX, REG_EAX );
  2782     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2783     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2784     sh4_x86.tstate = TSTATE_NONE;
  2785 :}
  2786 LDC.L @Rm+, SPC {:  
  2787     COUNT_INST(I_LDCM);
  2788     check_priv();
  2789     load_reg( REG_EAX, Rm );
  2790     check_ralign32( REG_EAX );
  2791     MEM_READ_LONG( REG_EAX, REG_EAX );
  2792     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2793     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2794     sh4_x86.tstate = TSTATE_NONE;
  2795 :}
  2796 LDC.L @Rm+, DBR {:  
  2797     COUNT_INST(I_LDCM);
  2798     check_priv();
  2799     load_reg( REG_EAX, Rm );
  2800     check_ralign32( REG_EAX );
  2801     MEM_READ_LONG( REG_EAX, REG_EAX );
  2802     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2803     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2804     sh4_x86.tstate = TSTATE_NONE;
  2805 :}
  2806 LDC.L @Rm+, Rn_BANK {:  
  2807     COUNT_INST(I_LDCM);
  2808     check_priv();
  2809     load_reg( REG_EAX, Rm );
  2810     check_ralign32( REG_EAX );
  2811     MEM_READ_LONG( REG_EAX, REG_EAX );
  2812     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2813     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2814     sh4_x86.tstate = TSTATE_NONE;
  2815 :}
  2816 LDS Rm, FPSCR {:
  2817     COUNT_INST(I_LDSFPSCR);
  2818     check_fpuen();
  2819     load_reg( REG_EAX, Rm );
  2820     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2821     sh4_x86.tstate = TSTATE_NONE;
  2822     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2823     return 2;
  2824 :}
  2825 LDS.L @Rm+, FPSCR {:  
  2826     COUNT_INST(I_LDSFPSCRM);
  2827     check_fpuen();
  2828     load_reg( REG_EAX, Rm );
  2829     check_ralign32( REG_EAX );
  2830     MEM_READ_LONG( REG_EAX, REG_EAX );
  2831     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2832     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2833     sh4_x86.tstate = TSTATE_NONE;
  2834     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2835     return 2;
  2836 :}
  2837 LDS Rm, FPUL {:  
  2838     COUNT_INST(I_LDS);
  2839     check_fpuen();
  2840     load_reg( REG_EAX, Rm );
  2841     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2842 :}
  2843 LDS.L @Rm+, FPUL {:  
  2844     COUNT_INST(I_LDSM);
  2845     check_fpuen();
  2846     load_reg( REG_EAX, Rm );
  2847     check_ralign32( REG_EAX );
  2848     MEM_READ_LONG( REG_EAX, REG_EAX );
  2849     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2850     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2851     sh4_x86.tstate = TSTATE_NONE;
  2852 :}
  2853 LDS Rm, MACH {: 
  2854     COUNT_INST(I_LDS);
  2855     load_reg( REG_EAX, Rm );
  2856     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2857 :}
  2858 LDS.L @Rm+, MACH {:  
  2859     COUNT_INST(I_LDSM);
  2860     load_reg( REG_EAX, Rm );
  2861     check_ralign32( REG_EAX );
  2862     MEM_READ_LONG( REG_EAX, REG_EAX );
  2863     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2864     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2865     sh4_x86.tstate = TSTATE_NONE;
  2866 :}
  2867 LDS Rm, MACL {:  
  2868     COUNT_INST(I_LDS);
  2869     load_reg( REG_EAX, Rm );
  2870     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2871 :}
  2872 LDS.L @Rm+, MACL {:  
  2873     COUNT_INST(I_LDSM);
  2874     load_reg( REG_EAX, Rm );
  2875     check_ralign32( REG_EAX );
  2876     MEM_READ_LONG( REG_EAX, REG_EAX );
  2877     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2878     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2879     sh4_x86.tstate = TSTATE_NONE;
  2880 :}
  2881 LDS Rm, PR {:  
  2882     COUNT_INST(I_LDS);
  2883     load_reg( REG_EAX, Rm );
  2884     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2885 :}
  2886 LDS.L @Rm+, PR {:  
  2887     COUNT_INST(I_LDSM);
  2888     load_reg( REG_EAX, Rm );
  2889     check_ralign32( REG_EAX );
  2890     MEM_READ_LONG( REG_EAX, REG_EAX );
  2891     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2892     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2893     sh4_x86.tstate = TSTATE_NONE;
  2894 :}
  2895 LDTLB {:  
  2896     COUNT_INST(I_LDTLB);
  2897     CALL_ptr( MMU_ldtlb );
  2898     sh4_x86.tstate = TSTATE_NONE;
  2899 :}
  2900 OCBI @Rn {:
  2901     COUNT_INST(I_OCBI);
  2902 :}
  2903 OCBP @Rn {:
  2904     COUNT_INST(I_OCBP);
  2905 :}
  2906 OCBWB @Rn {:
  2907     COUNT_INST(I_OCBWB);
  2908 :}
  2909 PREF @Rn {:
  2910     COUNT_INST(I_PREF);
  2911     load_reg( REG_EAX, Rn );
  2912     MEM_PREFETCH( REG_EAX );
  2913     sh4_x86.tstate = TSTATE_NONE;
  2914 :}
  2915 SLEEP {: 
  2916     COUNT_INST(I_SLEEP);
  2917     check_priv();
  2918     CALL_ptr( sh4_sleep );
  2919     sh4_x86.tstate = TSTATE_NONE;
  2920     sh4_x86.in_delay_slot = DELAY_NONE;
  2921     return 2;
  2922 :}
  2923 STC SR, Rn {:
  2924     COUNT_INST(I_STCSR);
  2925     check_priv();
  2926     CALL_ptr(sh4_read_sr);
  2927     store_reg( REG_EAX, Rn );
  2928     sh4_x86.tstate = TSTATE_NONE;
  2929 :}
  2930 STC GBR, Rn {:  
  2931     COUNT_INST(I_STC);
  2932     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2933     store_reg( REG_EAX, Rn );
  2934 :}
  2935 STC VBR, Rn {:  
  2936     COUNT_INST(I_STC);
  2937     check_priv();
  2938     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2939     store_reg( REG_EAX, Rn );
  2940     sh4_x86.tstate = TSTATE_NONE;
  2941 :}
  2942 STC SSR, Rn {:  
  2943     COUNT_INST(I_STC);
  2944     check_priv();
  2945     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2946     store_reg( REG_EAX, Rn );
  2947     sh4_x86.tstate = TSTATE_NONE;
  2948 :}
  2949 STC SPC, Rn {:  
  2950     COUNT_INST(I_STC);
  2951     check_priv();
  2952     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2953     store_reg( REG_EAX, Rn );
  2954     sh4_x86.tstate = TSTATE_NONE;
  2955 :}
  2956 STC SGR, Rn {:  
  2957     COUNT_INST(I_STC);
  2958     check_priv();
  2959     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2960     store_reg( REG_EAX, Rn );
  2961     sh4_x86.tstate = TSTATE_NONE;
  2962 :}
  2963 STC DBR, Rn {:  
  2964     COUNT_INST(I_STC);
  2965     check_priv();
  2966     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2967     store_reg( REG_EAX, Rn );
  2968     sh4_x86.tstate = TSTATE_NONE;
  2969 :}
  2970 STC Rm_BANK, Rn {:
  2971     COUNT_INST(I_STC);
  2972     check_priv();
  2973     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  2974     store_reg( REG_EAX, Rn );
  2975     sh4_x86.tstate = TSTATE_NONE;
  2976 :}
  2977 STC.L SR, @-Rn {:
  2978     COUNT_INST(I_STCSRM);
  2979     check_priv();
  2980     CALL_ptr( sh4_read_sr );
  2981     MOVL_r32_r32( REG_EAX, REG_EDX );
  2982     load_reg( REG_EAX, Rn );
  2983     check_walign32( REG_EAX );
  2984     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2985     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2986     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2987     sh4_x86.tstate = TSTATE_NONE;
  2988 :}
  2989 STC.L VBR, @-Rn {:  
  2990     COUNT_INST(I_STCM);
  2991     check_priv();
  2992     load_reg( REG_EAX, Rn );
  2993     check_walign32( REG_EAX );
  2994     ADDL_imms_r32( -4, REG_EAX );
  2995     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  2996     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2997     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2998     sh4_x86.tstate = TSTATE_NONE;
  2999 :}
  3000 STC.L SSR, @-Rn {:  
  3001     COUNT_INST(I_STCM);
  3002     check_priv();
  3003     load_reg( REG_EAX, Rn );
  3004     check_walign32( REG_EAX );
  3005     ADDL_imms_r32( -4, REG_EAX );
  3006     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  3007     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3008     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3009     sh4_x86.tstate = TSTATE_NONE;
  3010 :}
  3011 STC.L SPC, @-Rn {:
  3012     COUNT_INST(I_STCM);
  3013     check_priv();
  3014     load_reg( REG_EAX, Rn );
  3015     check_walign32( REG_EAX );
  3016     ADDL_imms_r32( -4, REG_EAX );
  3017     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  3018     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3019     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3020     sh4_x86.tstate = TSTATE_NONE;
  3021 :}
  3022 STC.L SGR, @-Rn {:  
  3023     COUNT_INST(I_STCM);
  3024     check_priv();
  3025     load_reg( REG_EAX, Rn );
  3026     check_walign32( REG_EAX );
  3027     ADDL_imms_r32( -4, REG_EAX );
  3028     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3029     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3030     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3031     sh4_x86.tstate = TSTATE_NONE;
  3032 :}
  3033 STC.L DBR, @-Rn {:  
  3034     COUNT_INST(I_STCM);
  3035     check_priv();
  3036     load_reg( REG_EAX, Rn );
  3037     check_walign32( REG_EAX );
  3038     ADDL_imms_r32( -4, REG_EAX );
  3039     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3040     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3041     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3042     sh4_x86.tstate = TSTATE_NONE;
  3043 :}
  3044 STC.L Rm_BANK, @-Rn {:  
  3045     COUNT_INST(I_STCM);
  3046     check_priv();
  3047     load_reg( REG_EAX, Rn );
  3048     check_walign32( REG_EAX );
  3049     ADDL_imms_r32( -4, REG_EAX );
  3050     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3051     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3052     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3053     sh4_x86.tstate = TSTATE_NONE;
  3054 :}
  3055 STC.L GBR, @-Rn {:  
  3056     COUNT_INST(I_STCM);
  3057     load_reg( REG_EAX, Rn );
  3058     check_walign32( REG_EAX );
  3059     ADDL_imms_r32( -4, REG_EAX );
  3060     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3061     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3062     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3063     sh4_x86.tstate = TSTATE_NONE;
  3064 :}
  3065 STS FPSCR, Rn {:  
  3066     COUNT_INST(I_STSFPSCR);
  3067     check_fpuen();
  3068     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3069     store_reg( REG_EAX, Rn );
  3070 :}
  3071 STS.L FPSCR, @-Rn {:  
  3072     COUNT_INST(I_STSFPSCRM);
  3073     check_fpuen();
  3074     load_reg( REG_EAX, Rn );
  3075     check_walign32( REG_EAX );
  3076     ADDL_imms_r32( -4, REG_EAX );
  3077     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3078     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3079     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3080     sh4_x86.tstate = TSTATE_NONE;
  3081 :}
  3082 STS FPUL, Rn {:  
  3083     COUNT_INST(I_STS);
  3084     check_fpuen();
  3085     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3086     store_reg( REG_EAX, Rn );
  3087 :}
  3088 STS.L FPUL, @-Rn {:  
  3089     COUNT_INST(I_STSM);
  3090     check_fpuen();
  3091     load_reg( REG_EAX, Rn );
  3092     check_walign32( REG_EAX );
  3093     ADDL_imms_r32( -4, REG_EAX );
  3094     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3095     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3096     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3097     sh4_x86.tstate = TSTATE_NONE;
  3098 :}
  3099 STS MACH, Rn {:  
  3100     COUNT_INST(I_STS);
  3101     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3102     store_reg( REG_EAX, Rn );
  3103 :}
  3104 STS.L MACH, @-Rn {:  
  3105     COUNT_INST(I_STSM);
  3106     load_reg( REG_EAX, Rn );
  3107     check_walign32( REG_EAX );
  3108     ADDL_imms_r32( -4, REG_EAX );
  3109     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3110     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3111     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3112     sh4_x86.tstate = TSTATE_NONE;
  3113 :}
  3114 STS MACL, Rn {:  
  3115     COUNT_INST(I_STS);
  3116     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3117     store_reg( REG_EAX, Rn );
  3118 :}
  3119 STS.L MACL, @-Rn {:  
  3120     COUNT_INST(I_STSM);
  3121     load_reg( REG_EAX, Rn );
  3122     check_walign32( REG_EAX );
  3123     ADDL_imms_r32( -4, REG_EAX );
  3124     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3125     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3126     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3127     sh4_x86.tstate = TSTATE_NONE;
  3128 :}
  3129 STS PR, Rn {:  
  3130     COUNT_INST(I_STS);
  3131     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3132     store_reg( REG_EAX, Rn );
  3133 :}
  3134 STS.L PR, @-Rn {:  
  3135     COUNT_INST(I_STSM);
  3136     load_reg( REG_EAX, Rn );
  3137     check_walign32( REG_EAX );
  3138     ADDL_imms_r32( -4, REG_EAX );
  3139     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3140     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3141     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3142     sh4_x86.tstate = TSTATE_NONE;
  3143 :}
  3145 NOP {: 
  3146     COUNT_INST(I_NOP);
  3147     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3148 :}
  3149 %%
  3150     sh4_x86.in_delay_slot = DELAY_NONE;
  3151     return 0;
  3155 /**
  3156  * The unwind methods only work if we compiled with DWARF2 frame information
  3157  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3158  */
  3159 #ifdef HAVE_EXCEPTIONS
  3160 #include <unwind.h>
  3162 struct UnwindInfo {
  3163     uintptr_t block_start;
  3164     uintptr_t block_end;
  3165     void *pc;
  3166 };
  3168 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3170     struct UnwindInfo *info = arg;
  3171     void *pc = (void *)_Unwind_GetIP(context);
  3172     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3173         info->pc = pc;
  3174         return _URC_NORMAL_STOP;
  3176     return _URC_NO_REASON;
  3179 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3181     struct _Unwind_Exception exc;
  3182     struct UnwindInfo info;
  3184     info.pc = NULL;
  3185     info.block_start = (uintptr_t)code;
  3186     info.block_end = info.block_start + code_size;
  3187     void *result = NULL;
  3188     _Unwind_Backtrace( xlat_check_frame, &info );
  3189     return info.pc;
  3191 #else
  3192 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3193 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3195     void *result = NULL;
  3196     __asm__(
  3197         "mov %%ebp, %%eax\n\t"
  3198         "mov $0x8, %%ecx\n\t"
  3199         "mov %1, %%edx\n"
  3200         "frame_loop: test %%eax, %%eax\n\t"
  3201         "je frame_not_found\n\t"
  3202         "cmp (%%eax), %%edx\n\t"
  3203         "je frame_found\n\t"
  3204         "sub $0x1, %%ecx\n\t"
  3205         "je frame_not_found\n\t"
  3206         "movl (%%eax), %%eax\n\t"
  3207         "jmp frame_loop\n"
  3208         "frame_found: movl 0x4(%%eax), %0\n"
  3209         "frame_not_found:"
  3210         : "=r" (result)
  3211         : "r" (((uint8_t *)&sh4r) + 128 )
  3212         : "eax", "ecx", "edx" );
  3213     return result;
  3215 #endif
.