Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1125:9dd5dee45db9
prev1120:7c40a0f687b3
next1146:76c5d1064262
author nkeynes
date Mon Sep 13 10:13:42 2010 +1000 (13 years ago)
permissions -rw-r--r--
last change Implement shadow-execution 'core' to run translator + interpreter side by
side (for testing)
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 #define SH4_MODE_UNKNOWN -1
    76 struct backpatch_record {
    77     uint32_t fixup_offset;
    78     uint32_t fixup_icount;
    79     int32_t exc_code;
    80 };
    82 /** 
    83  * Struct to manage internal translation state. This state is not saved -
    84  * it is only valid between calls to sh4_translate_begin_block() and
    85  * sh4_translate_end_block()
    86  */
    87 struct sh4_x86_state {
    88     int in_delay_slot;
    89     uint8_t *code;
    90     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    91     gboolean branch_taken; /* true if we branched unconditionally */
    92     gboolean double_prec; /* true if FPU is in double-precision mode */
    93     gboolean double_size; /* true if FPU is in double-size mode */
    94     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    95     uint32_t block_start_pc;
    96     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    97     uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
    98     int tstate;
   100     /* mode settings */
   101     gboolean tlb_on; /* True if tlb translation is active */
   102     struct mem_region_fn **priv_address_space;
   103     struct mem_region_fn **user_address_space;
   105     /* Instrumentation */
   106     xlat_block_begin_callback_t begin_callback;
   107     xlat_block_end_callback_t end_callback;
   108     gboolean fastmem;
   110     /* Allocated memory for the (block-wide) back-patch list */
   111     struct backpatch_record *backpatch_list;
   112     uint32_t backpatch_posn;
   113     uint32_t backpatch_size;
   114 };
   116 static struct sh4_x86_state sh4_x86;
   118 static uint32_t max_int = 0x7FFFFFFF;
   119 static uint32_t min_int = 0x80000000;
   120 static uint32_t save_fcw; /* save value for fpu control word */
   121 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   123 static struct x86_symbol x86_symbol_table[] = {
   124     { "sh4r+128", ((char *)&sh4r)+128 },
   125     { "sh4_cpu_period", &sh4_cpu_period },
   126     { "sh4_address_space", NULL },
   127     { "sh4_user_address_space", NULL },
   128     { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
   129     { "sh4_write_fpscr", sh4_write_fpscr },
   130     { "sh4_write_sr", sh4_write_sr },
   131     { "sh4_read_sr", sh4_read_sr },
   132     { "sh4_sleep", sh4_sleep },
   133     { "sh4_fsca", sh4_fsca },
   134     { "sh4_ftrv", sh4_ftrv },
   135     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   136     { "sh4_execute_instruction", sh4_execute_instruction },
   137     { "signsat48", signsat48 },
   138     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   139     { "xlat_get_code", xlat_get_code }
   140 };
   143 gboolean is_sse3_supported()
   144 {
   145     uint32_t features;
   147     __asm__ __volatile__(
   148         "mov $0x01, %%eax\n\t"
   149         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   150     return (features & 1) ? TRUE : FALSE;
   151 }
   153 void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
   154 {
   155     sh4_x86.priv_address_space = priv;
   156     sh4_x86.user_address_space = user;
   157     x86_symbol_table[2].ptr = priv;
   158     x86_symbol_table[3].ptr = user;
   159 }
   161 void sh4_translate_init(void)
   162 {
   163     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   164     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   165     sh4_x86.begin_callback = NULL;
   166     sh4_x86.end_callback = NULL;
   167     sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
   168     sh4_x86.fastmem = TRUE;
   169     sh4_x86.sse3_enabled = is_sse3_supported();
   170     x86_disasm_init();
   171     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   172 }
   174 void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
   175 {
   176     sh4_x86.begin_callback = begin;
   177     sh4_x86.end_callback = end;
   178 }
   180 void sh4_translate_set_fastmem( gboolean flag )
   181 {
   182     sh4_x86.fastmem = flag;
   183 }
   185 /**
   186  * Disassemble the given translated code block, and it's source SH4 code block
   187  * side-by-side. The current native pc will be marked if non-null.
   188  */
   189 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   190 {
   191     char buf[256];
   192     char op[256];
   194     uintptr_t target_start = (uintptr_t)code, target_pc;
   195     uintptr_t target_end = target_start + xlat_get_code_size(code);
   196     uint32_t source_pc = source_start;
   197     uint32_t source_end = source_pc;
   198     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   199     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
   201     for( target_pc = target_start; target_pc < target_end;  ) {
   202         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   203 #if SIZEOF_VOID_P == 8
   204         fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   205                       target_pc, op, buf );
   206 #else
   207         fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   208                       target_pc, op, buf );
   209 #endif        
   210         if( source_recov_table < source_recov_end && 
   211             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   212             source_recov_table++;
   213             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   214                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   215         }
   217         if( source_pc < source_end ) {
   218             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   219             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   220             source_pc = source_pc2;
   221         } else {
   222             fprintf( out, "\n" );
   223         }
   225         target_pc = pc2;
   226     }
   228     while( source_pc < source_end ) {
   229         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   230         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   231         source_pc = source_pc2;
   232     }
   233 }
   235 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   236 {
   237     int reloc_size = 4;
   239     if( exc_code == -2 ) {
   240         reloc_size = sizeof(void *);
   241     }
   243     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   244 	sh4_x86.backpatch_size <<= 1;
   245 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   246 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   247 	assert( sh4_x86.backpatch_list != NULL );
   248     }
   249     if( sh4_x86.in_delay_slot ) {
   250 	fixup_pc -= 2;
   251     }
   253     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   254 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   255     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   256     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   257     sh4_x86.backpatch_posn++;
   258 }
   260 #define TSTATE_NONE -1
   261 #define TSTATE_O    X86_COND_O
   262 #define TSTATE_C    X86_COND_C
   263 #define TSTATE_E    X86_COND_E
   264 #define TSTATE_NE   X86_COND_NE
   265 #define TSTATE_G    X86_COND_G
   266 #define TSTATE_GE   X86_COND_GE
   267 #define TSTATE_A    X86_COND_A
   268 #define TSTATE_AE   X86_COND_AE
   270 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   271 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   273 /* Convenience instructions */
   274 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   275 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   276 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   277 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   278 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   279 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   280 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   281 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   282 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   283 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   284 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   285 #define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
   286 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   287 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   288 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   289 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   290 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   291 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   292 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   293 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   295 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   296 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   297 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   298     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   300 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   301 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   302 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   303     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   306 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   307 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   309 /**
   310  * Load an FR register (single-precision floating point) into an integer x86
   311  * register (eg for register-to-register moves)
   312  */
   313 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   314 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   316 /**
   317  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   318  */
   319 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   320 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   322 /**
   323  * Store an FR register (single-precision floating point) from an integer x86+
   324  * register (eg for register-to-register moves)
   325  */
   326 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   327 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   329 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   330 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   333 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   334 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   335 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   336 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   337 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   338 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   339 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   340 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   341 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   342 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   344 #ifdef ENABLE_SH4STATS
   345 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   346 #else
   347 #define COUNT_INST(id)
   348 #endif
   351 /* Exception checks - Note that all exception checks will clobber EAX */
   353 #define check_priv( ) \
   354     if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
   355         if( sh4_x86.in_delay_slot ) { \
   356             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   357         } else { \
   358             exit_block_exc(EXC_ILLEGAL, pc); \
   359         } \
   360         sh4_x86.branch_taken = TRUE; \
   361         sh4_x86.in_delay_slot = DELAY_NONE; \
   362         return 2; \
   363     }
   365 #define check_fpuen( ) \
   366     if( !sh4_x86.fpuen_checked ) {\
   367 	sh4_x86.fpuen_checked = TRUE;\
   368 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   369 	ANDL_imms_r32( SR_FD, REG_EAX );\
   370 	if( sh4_x86.in_delay_slot ) {\
   371 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   372 	} else {\
   373 	    JNE_exc(EXC_FPU_DISABLED);\
   374 	}\
   375 	sh4_x86.tstate = TSTATE_NONE; \
   376     }
   378 #define check_ralign16( x86reg ) \
   379     TESTL_imms_r32( 0x00000001, x86reg ); \
   380     JNE_exc(EXC_DATA_ADDR_READ)
   382 #define check_walign16( x86reg ) \
   383     TESTL_imms_r32( 0x00000001, x86reg ); \
   384     JNE_exc(EXC_DATA_ADDR_WRITE);
   386 #define check_ralign32( x86reg ) \
   387     TESTL_imms_r32( 0x00000003, x86reg ); \
   388     JNE_exc(EXC_DATA_ADDR_READ)
   390 #define check_walign32( x86reg ) \
   391     TESTL_imms_r32( 0x00000003, x86reg ); \
   392     JNE_exc(EXC_DATA_ADDR_WRITE);
   394 #define check_ralign64( x86reg ) \
   395     TESTL_imms_r32( 0x00000007, x86reg ); \
   396     JNE_exc(EXC_DATA_ADDR_READ)
   398 #define check_walign64( x86reg ) \
   399     TESTL_imms_r32( 0x00000007, x86reg ); \
   400     JNE_exc(EXC_DATA_ADDR_WRITE);
   402 #define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
   404 #define UNDEF(ir)
   405 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   406  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   407  */
   408 #ifdef HAVE_FRAME_ADDRESS
   409 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   410 {
   411     decode_address(address_space(), addr_reg);
   412     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   413         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   414     } else {
   415         if( addr_reg != REG_ARG1 ) {
   416             MOVL_r32_r32( addr_reg, REG_ARG1 );
   417         }
   418         MOVP_immptr_rptr( 0, REG_ARG2 );
   419         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   420         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   421     }
   422     if( value_reg != REG_RESULT1 ) { 
   423         MOVL_r32_r32( REG_RESULT1, value_reg );
   424     }
   425 }
   427 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   428 {
   429     decode_address(address_space(), addr_reg);
   430     if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
   431         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   432     } else {
   433         if( value_reg != REG_ARG2 ) {
   434             MOVL_r32_r32( value_reg, REG_ARG2 );
   435 	}        
   436         if( addr_reg != REG_ARG1 ) {
   437             MOVL_r32_r32( addr_reg, REG_ARG1 );
   438         }
   439 #if MAX_REG_ARG > 2        
   440         MOVP_immptr_rptr( 0, REG_ARG3 );
   441         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   442         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   443 #else
   444         MOVL_imm32_rspdisp( 0, 0 );
   445         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   446         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   447 #endif
   448     }
   449 }
   450 #else
   451 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   452 {
   453     decode_address(address_space(), addr_reg);
   454     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   455     if( value_reg != REG_RESULT1 ) {
   456         MOVL_r32_r32( REG_RESULT1, value_reg );
   457     }
   458 }     
   460 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   461 {
   462     decode_address(address_space(), addr_reg);
   463     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   464 }
   465 #endif
   467 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   468 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   469 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   470 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   471 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   472 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   473 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   474 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   475 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   477 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   479 void sh4_translate_begin_block( sh4addr_t pc ) 
   480 {
   481 	sh4_x86.code = xlat_output;
   482     sh4_x86.in_delay_slot = FALSE;
   483     sh4_x86.fpuen_checked = FALSE;
   484     sh4_x86.branch_taken = FALSE;
   485     sh4_x86.backpatch_posn = 0;
   486     sh4_x86.block_start_pc = pc;
   487     sh4_x86.tlb_on = IS_TLB_ENABLED();
   488     sh4_x86.tstate = TSTATE_NONE;
   489     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   490     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   491     sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
   492     emit_prologue();
   493     if( sh4_x86.begin_callback ) {
   494         CALL_ptr( sh4_x86.begin_callback );
   495     }
   496 }
   499 uint32_t sh4_translate_end_block_size()
   500 {
   501     if( sh4_x86.backpatch_posn <= 3 ) {
   502         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*24);
   503     } else {
   504         return EPILOGUE_SIZE + 72 + (sh4_x86.backpatch_posn-3)*27;
   505     }
   506 }
   509 /**
   510  * Embed a breakpoint into the generated code
   511  */
   512 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   513 {
   514     MOVL_imm32_r32( pc, REG_EAX );
   515     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   516     sh4_x86.tstate = TSTATE_NONE;
   517 }
   520 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   522 /** Offset of xlat_sh4_mode field relative to the code pointer */ 
   523 #define XLAT_SH4_MODE_CODE_OFFSET  (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
   525 /**
   526  * Test if the loaded target code pointer in %eax is valid, and if so jump
   527  * directly into it, bypassing the normal exit.
   528  */
   529 static void jump_next_block()
   530 {
   531 	TESTP_rptr_rptr(REG_EAX, REG_EAX);
   532 	JE_label(nocode);
   533 	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
   534 	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
   535 	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
   536 	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   537 	} else {
   538 	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
   539 	}
   540 	JNE_label(wrongmode);
   541 	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
   542 	if( sh4_x86.end_callback ) {
   543 	    /* Note this does leave the stack out of alignment, but doesn't matter
   544 	     * for what we're currently using it for.
   545 	     */
   546 	    PUSH_r32(REG_EAX);
   547 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   548 	    JMP_rptr(REG_ECX);
   549 	} else {
   550 	    JMP_rptr(REG_EAX);
   551 	}
   552 	JMP_TARGET(nocode); JMP_TARGET(wrongmode);
   553 }
   555 static void exit_block()
   556 {
   557 	emit_epilogue();
   558 	if( sh4_x86.end_callback ) {
   559 	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
   560 	    JMP_rptr(REG_ECX);
   561 	} else {
   562 	    RET();
   563 	}
   564 }
   566 /**
   567  * Exit the block with sh4r.pc already written
   568  */
   569 void exit_block_pcset( sh4addr_t pc )
   570 {
   571     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   572     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   573     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   574     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   575     JBE_label(exitloop);
   576     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   577     if( sh4_x86.tlb_on ) {
   578         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   579     } else {
   580         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   581     }
   583     jump_next_block();
   584     JMP_TARGET(exitloop);
   585     exit_block();
   586 }
   588 /**
   589  * Exit the block with sh4r.new_pc written with the target pc
   590  */
   591 void exit_block_newpcset( sh4addr_t pc )
   592 {
   593     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   594     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   595     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   596     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   597     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   598     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   599     JBE_label(exitloop);
   600     if( sh4_x86.tlb_on ) {
   601         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   602     } else {
   603         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   604     }
   606 	jump_next_block();
   607     JMP_TARGET(exitloop);
   608     exit_block();
   609 }
   612 /**
   613  * Exit the block to an absolute PC
   614  */
   615 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   616 {
   617     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   618     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   619     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   621     MOVL_imm32_r32( pc, REG_ARG1 );
   622     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   623     CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   624     JBE_label(exitloop);
   626     if( IS_IN_ICACHE(pc) ) {
   627         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   628         ANDP_imms_rptr( -4, REG_EAX );
   629     } else if( sh4_x86.tlb_on ) {
   630         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   631     } else {
   632         CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   633     }
   634     jump_next_block();
   635     JMP_TARGET(exitloop);
   636     exit_block();
   637 }
   639 /**
   640  * Exit the block to a relative PC
   641  */
   642 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   643 {
   644     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   645     ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
   646     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   648 	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
   649 	    /* Special case for tight loops - the PC doesn't change, and
   650 	     * we already know the target address. Just check events pending before
   651 	     * looping.
   652 	     */
   653         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   654         uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
   655         JCC_cc_prerel(X86_COND_A, backdisp);
   656 	} else {
   657         MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
   658         ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
   659         MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   660         CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
   661         JBE_label(exitloop2);
   663         if( IS_IN_ICACHE(pc) ) {
   664             MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   665             ANDP_imms_rptr( -4, REG_EAX );
   666         } else if( sh4_x86.tlb_on ) {
   667             CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   668         } else {
   669             CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   670         }
   671         jump_next_block();
   672         JMP_TARGET(exitloop2);
   673     }
   674     exit_block();
   675 }
   677 /**
   678  * Exit unconditionally with a general exception
   679  */
   680 void exit_block_exc( int code, sh4addr_t pc )
   681 {
   682     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   683     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   684     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   685     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   686     MOVL_imm32_r32( code, REG_ARG1 );
   687     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   688     exit_block();
   689 }    
   691 /**
   692  * Embed a call to sh4_execute_instruction for situations that we
   693  * can't translate (just page-crossing delay slots at the moment).
   694  * Caller is responsible for setting new_pc before calling this function.
   695  *
   696  * Performs:
   697  *   Set PC = endpc
   698  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   699  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   700  *   Call sh4_execute_instruction
   701  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   702  */
   703 void exit_block_emu( sh4vma_t endpc )
   704 {
   705     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   706     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   708     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   709     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   710     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   711     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   713     CALL_ptr( sh4_execute_instruction );
   714     exit_block();
   715 } 
   717 /**
   718  * Write the block trailer (exception handling block)
   719  */
   720 void sh4_translate_end_block( sh4addr_t pc ) {
   721     if( sh4_x86.branch_taken == FALSE ) {
   722         // Didn't exit unconditionally already, so write the termination here
   723         exit_block_rel( pc, pc );
   724     }
   725     if( sh4_x86.backpatch_posn != 0 ) {
   726         unsigned int i;
   727         // Exception raised - cleanup and exit
   728         uint8_t *end_ptr = xlat_output;
   729         MOVL_r32_r32( REG_EDX, REG_ECX );
   730         ADDL_r32_r32( REG_EDX, REG_ECX );
   731         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   732         MOVL_moffptr_eax( &sh4_cpu_period );
   733         MULL_r32( REG_EDX );
   734         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   735         exit_block();
   737         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   738             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   739             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   740                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   741                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   742                 } else {
   743                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   744                 }
   745                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   746                 int rel = end_ptr - xlat_output;
   747                 JMP_prerel(rel);
   748             } else {
   749                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   750                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   751                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   752                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   753                 int rel = end_ptr - xlat_output;
   754                 JMP_prerel(rel);
   755             }
   756         }
   757     }
   758 }
   760 /**
   761  * Translate a single instruction. Delayed branches are handled specially
   762  * by translating both branch and delayed instruction as a single unit (as
   763  * 
   764  * The instruction MUST be in the icache (assert check)
   765  *
   766  * @return true if the instruction marks the end of a basic block
   767  * (eg a branch or 
   768  */
   769 uint32_t sh4_translate_instruction( sh4vma_t pc )
   770 {
   771     uint32_t ir;
   772     /* Read instruction from icache */
   773     assert( IS_IN_ICACHE(pc) );
   774     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   776     if( !sh4_x86.in_delay_slot ) {
   777 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   778     }
   780     /* check for breakpoints at this pc */
   781     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   782         if( sh4_breakpoints[i].address == pc ) {
   783             sh4_translate_emit_breakpoint(pc);
   784             break;
   785         }
   786     }
   787 %%
   788 /* ALU operations */
   789 ADD Rm, Rn {:
   790     COUNT_INST(I_ADD);
   791     load_reg( REG_EAX, Rm );
   792     load_reg( REG_ECX, Rn );
   793     ADDL_r32_r32( REG_EAX, REG_ECX );
   794     store_reg( REG_ECX, Rn );
   795     sh4_x86.tstate = TSTATE_NONE;
   796 :}
   797 ADD #imm, Rn {:  
   798     COUNT_INST(I_ADDI);
   799     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   800     sh4_x86.tstate = TSTATE_NONE;
   801 :}
   802 ADDC Rm, Rn {:
   803     COUNT_INST(I_ADDC);
   804     if( sh4_x86.tstate != TSTATE_C ) {
   805         LDC_t();
   806     }
   807     load_reg( REG_EAX, Rm );
   808     load_reg( REG_ECX, Rn );
   809     ADCL_r32_r32( REG_EAX, REG_ECX );
   810     store_reg( REG_ECX, Rn );
   811     SETC_t();
   812     sh4_x86.tstate = TSTATE_C;
   813 :}
   814 ADDV Rm, Rn {:
   815     COUNT_INST(I_ADDV);
   816     load_reg( REG_EAX, Rm );
   817     load_reg( REG_ECX, Rn );
   818     ADDL_r32_r32( REG_EAX, REG_ECX );
   819     store_reg( REG_ECX, Rn );
   820     SETO_t();
   821     sh4_x86.tstate = TSTATE_O;
   822 :}
   823 AND Rm, Rn {:
   824     COUNT_INST(I_AND);
   825     load_reg( REG_EAX, Rm );
   826     load_reg( REG_ECX, Rn );
   827     ANDL_r32_r32( REG_EAX, REG_ECX );
   828     store_reg( REG_ECX, Rn );
   829     sh4_x86.tstate = TSTATE_NONE;
   830 :}
   831 AND #imm, R0 {:  
   832     COUNT_INST(I_ANDI);
   833     load_reg( REG_EAX, 0 );
   834     ANDL_imms_r32(imm, REG_EAX); 
   835     store_reg( REG_EAX, 0 );
   836     sh4_x86.tstate = TSTATE_NONE;
   837 :}
   838 AND.B #imm, @(R0, GBR) {: 
   839     COUNT_INST(I_ANDB);
   840     load_reg( REG_EAX, 0 );
   841     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   842     MOVL_r32_rspdisp(REG_EAX, 0);
   843     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   844     MOVL_rspdisp_r32(0, REG_EAX);
   845     ANDL_imms_r32(imm, REG_EDX );
   846     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   847     sh4_x86.tstate = TSTATE_NONE;
   848 :}
   849 CMP/EQ Rm, Rn {:  
   850     COUNT_INST(I_CMPEQ);
   851     load_reg( REG_EAX, Rm );
   852     load_reg( REG_ECX, Rn );
   853     CMPL_r32_r32( REG_EAX, REG_ECX );
   854     SETE_t();
   855     sh4_x86.tstate = TSTATE_E;
   856 :}
   857 CMP/EQ #imm, R0 {:  
   858     COUNT_INST(I_CMPEQI);
   859     load_reg( REG_EAX, 0 );
   860     CMPL_imms_r32(imm, REG_EAX);
   861     SETE_t();
   862     sh4_x86.tstate = TSTATE_E;
   863 :}
   864 CMP/GE Rm, Rn {:  
   865     COUNT_INST(I_CMPGE);
   866     load_reg( REG_EAX, Rm );
   867     load_reg( REG_ECX, Rn );
   868     CMPL_r32_r32( REG_EAX, REG_ECX );
   869     SETGE_t();
   870     sh4_x86.tstate = TSTATE_GE;
   871 :}
   872 CMP/GT Rm, Rn {: 
   873     COUNT_INST(I_CMPGT);
   874     load_reg( REG_EAX, Rm );
   875     load_reg( REG_ECX, Rn );
   876     CMPL_r32_r32( REG_EAX, REG_ECX );
   877     SETG_t();
   878     sh4_x86.tstate = TSTATE_G;
   879 :}
   880 CMP/HI Rm, Rn {:  
   881     COUNT_INST(I_CMPHI);
   882     load_reg( REG_EAX, Rm );
   883     load_reg( REG_ECX, Rn );
   884     CMPL_r32_r32( REG_EAX, REG_ECX );
   885     SETA_t();
   886     sh4_x86.tstate = TSTATE_A;
   887 :}
   888 CMP/HS Rm, Rn {: 
   889     COUNT_INST(I_CMPHS);
   890     load_reg( REG_EAX, Rm );
   891     load_reg( REG_ECX, Rn );
   892     CMPL_r32_r32( REG_EAX, REG_ECX );
   893     SETAE_t();
   894     sh4_x86.tstate = TSTATE_AE;
   895  :}
   896 CMP/PL Rn {: 
   897     COUNT_INST(I_CMPPL);
   898     load_reg( REG_EAX, Rn );
   899     CMPL_imms_r32( 0, REG_EAX );
   900     SETG_t();
   901     sh4_x86.tstate = TSTATE_G;
   902 :}
   903 CMP/PZ Rn {:  
   904     COUNT_INST(I_CMPPZ);
   905     load_reg( REG_EAX, Rn );
   906     CMPL_imms_r32( 0, REG_EAX );
   907     SETGE_t();
   908     sh4_x86.tstate = TSTATE_GE;
   909 :}
   910 CMP/STR Rm, Rn {:  
   911     COUNT_INST(I_CMPSTR);
   912     load_reg( REG_EAX, Rm );
   913     load_reg( REG_ECX, Rn );
   914     XORL_r32_r32( REG_ECX, REG_EAX );
   915     TESTB_r8_r8( REG_AL, REG_AL );
   916     JE_label(target1);
   917     TESTB_r8_r8( REG_AH, REG_AH );
   918     JE_label(target2);
   919     SHRL_imm_r32( 16, REG_EAX );
   920     TESTB_r8_r8( REG_AL, REG_AL );
   921     JE_label(target3);
   922     TESTB_r8_r8( REG_AH, REG_AH );
   923     JMP_TARGET(target1);
   924     JMP_TARGET(target2);
   925     JMP_TARGET(target3);
   926     SETE_t();
   927     sh4_x86.tstate = TSTATE_E;
   928 :}
   929 DIV0S Rm, Rn {:
   930     COUNT_INST(I_DIV0S);
   931     load_reg( REG_EAX, Rm );
   932     load_reg( REG_ECX, Rn );
   933     SHRL_imm_r32( 31, REG_EAX );
   934     SHRL_imm_r32( 31, REG_ECX );
   935     MOVL_r32_rbpdisp( REG_EAX, R_M );
   936     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   937     CMPL_r32_r32( REG_EAX, REG_ECX );
   938     SETNE_t();
   939     sh4_x86.tstate = TSTATE_NE;
   940 :}
   941 DIV0U {:  
   942     COUNT_INST(I_DIV0U);
   943     XORL_r32_r32( REG_EAX, REG_EAX );
   944     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   945     MOVL_r32_rbpdisp( REG_EAX, R_M );
   946     MOVL_r32_rbpdisp( REG_EAX, R_T );
   947     sh4_x86.tstate = TSTATE_C; // works for DIV1
   948 :}
   949 DIV1 Rm, Rn {:
   950     COUNT_INST(I_DIV1);
   951     MOVL_rbpdisp_r32( R_M, REG_ECX );
   952     load_reg( REG_EAX, Rn );
   953     if( sh4_x86.tstate != TSTATE_C ) {
   954 	LDC_t();
   955     }
   956     RCLL_imm_r32( 1, REG_EAX );
   957     SETC_r8( REG_DL ); // Q'
   958     CMPL_rbpdisp_r32( R_Q, REG_ECX );
   959     JE_label(mqequal);
   960     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   961     JMP_label(end);
   962     JMP_TARGET(mqequal);
   963     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   964     JMP_TARGET(end);
   965     store_reg( REG_EAX, Rn ); // Done with Rn now
   966     SETC_r8(REG_AL); // tmp1
   967     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
   968     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
   969     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   970     XORL_imms_r32( 1, REG_AL );   // T = !Q'
   971     MOVZXL_r8_r32( REG_AL, REG_EAX );
   972     MOVL_r32_rbpdisp( REG_EAX, R_T );
   973     sh4_x86.tstate = TSTATE_NONE;
   974 :}
   975 DMULS.L Rm, Rn {:  
   976     COUNT_INST(I_DMULS);
   977     load_reg( REG_EAX, Rm );
   978     load_reg( REG_ECX, Rn );
   979     IMULL_r32(REG_ECX);
   980     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   981     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   982     sh4_x86.tstate = TSTATE_NONE;
   983 :}
   984 DMULU.L Rm, Rn {:  
   985     COUNT_INST(I_DMULU);
   986     load_reg( REG_EAX, Rm );
   987     load_reg( REG_ECX, Rn );
   988     MULL_r32(REG_ECX);
   989     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   990     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
   991     sh4_x86.tstate = TSTATE_NONE;
   992 :}
   993 DT Rn {:  
   994     COUNT_INST(I_DT);
   995     load_reg( REG_EAX, Rn );
   996     ADDL_imms_r32( -1, REG_EAX );
   997     store_reg( REG_EAX, Rn );
   998     SETE_t();
   999     sh4_x86.tstate = TSTATE_E;
  1000 :}
  1001 EXTS.B Rm, Rn {:  
  1002     COUNT_INST(I_EXTSB);
  1003     load_reg( REG_EAX, Rm );
  1004     MOVSXL_r8_r32( REG_EAX, REG_EAX );
  1005     store_reg( REG_EAX, Rn );
  1006 :}
  1007 EXTS.W Rm, Rn {:  
  1008     COUNT_INST(I_EXTSW);
  1009     load_reg( REG_EAX, Rm );
  1010     MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1011     store_reg( REG_EAX, Rn );
  1012 :}
  1013 EXTU.B Rm, Rn {:  
  1014     COUNT_INST(I_EXTUB);
  1015     load_reg( REG_EAX, Rm );
  1016     MOVZXL_r8_r32( REG_EAX, REG_EAX );
  1017     store_reg( REG_EAX, Rn );
  1018 :}
  1019 EXTU.W Rm, Rn {:  
  1020     COUNT_INST(I_EXTUW);
  1021     load_reg( REG_EAX, Rm );
  1022     MOVZXL_r16_r32( REG_EAX, REG_EAX );
  1023     store_reg( REG_EAX, Rn );
  1024 :}
  1025 MAC.L @Rm+, @Rn+ {:
  1026     COUNT_INST(I_MACL);
  1027     if( Rm == Rn ) {
  1028 	load_reg( REG_EAX, Rm );
  1029 	check_ralign32( REG_EAX );
  1030 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1031 	MOVL_r32_rspdisp(REG_EAX, 0);
  1032 	load_reg( REG_EAX, Rm );
  1033 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  1034 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1035         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
  1036     } else {
  1037 	load_reg( REG_EAX, Rm );
  1038 	check_ralign32( REG_EAX );
  1039 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1040 	MOVL_r32_rspdisp( REG_EAX, 0 );
  1041 	load_reg( REG_EAX, Rn );
  1042 	check_ralign32( REG_EAX );
  1043 	MEM_READ_LONG( REG_EAX, REG_EAX );
  1044 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1045 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1048     IMULL_rspdisp( 0 );
  1049     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
  1050     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
  1052     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1053     TESTL_r32_r32(REG_ECX, REG_ECX);
  1054     JE_label( nosat );
  1055     CALL_ptr( signsat48 );
  1056     JMP_TARGET( nosat );
  1057     sh4_x86.tstate = TSTATE_NONE;
  1058 :}
  1059 MAC.W @Rm+, @Rn+ {:  
  1060     COUNT_INST(I_MACW);
  1061     if( Rm == Rn ) {
  1062 	load_reg( REG_EAX, Rm );
  1063 	check_ralign16( REG_EAX );
  1064 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1065         MOVL_r32_rspdisp( REG_EAX, 0 );
  1066 	load_reg( REG_EAX, Rm );
  1067 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
  1068 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1069 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
  1070 	// Note translate twice in case of page boundaries. Maybe worth
  1071 	// adding a page-boundary check to skip the second translation
  1072     } else {
  1073 	load_reg( REG_EAX, Rm );
  1074 	check_ralign16( REG_EAX );
  1075 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1076         MOVL_r32_rspdisp( REG_EAX, 0 );
  1077 	load_reg( REG_EAX, Rn );
  1078 	check_ralign16( REG_EAX );
  1079 	MEM_READ_WORD( REG_EAX, REG_EAX );
  1080 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
  1081 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1083     IMULL_rspdisp( 0 );
  1084     MOVL_rbpdisp_r32( R_S, REG_ECX );
  1085     TESTL_r32_r32( REG_ECX, REG_ECX );
  1086     JE_label( nosat );
  1088     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1089     JNO_label( end );            // 2
  1090     MOVL_imm32_r32( 1, REG_EDX );         // 5
  1091     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
  1092     JS_label( positive );        // 2
  1093     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
  1094     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1095     JMP_label(end2);           // 2
  1097     JMP_TARGET(positive);
  1098     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
  1099     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
  1100     JMP_label(end3);            // 2
  1102     JMP_TARGET(nosat);
  1103     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1104     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1105     JMP_TARGET(end);
  1106     JMP_TARGET(end2);
  1107     JMP_TARGET(end3);
  1108     sh4_x86.tstate = TSTATE_NONE;
  1109 :}
  1110 MOVT Rn {:  
  1111     COUNT_INST(I_MOVT);
  1112     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1113     store_reg( REG_EAX, Rn );
  1114 :}
  1115 MUL.L Rm, Rn {:  
  1116     COUNT_INST(I_MULL);
  1117     load_reg( REG_EAX, Rm );
  1118     load_reg( REG_ECX, Rn );
  1119     MULL_r32( REG_ECX );
  1120     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1121     sh4_x86.tstate = TSTATE_NONE;
  1122 :}
  1123 MULS.W Rm, Rn {:
  1124     COUNT_INST(I_MULSW);
  1125     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1126     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1127     MULL_r32( REG_ECX );
  1128     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1129     sh4_x86.tstate = TSTATE_NONE;
  1130 :}
  1131 MULU.W Rm, Rn {:  
  1132     COUNT_INST(I_MULUW);
  1133     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1134     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1135     MULL_r32( REG_ECX );
  1136     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1137     sh4_x86.tstate = TSTATE_NONE;
  1138 :}
  1139 NEG Rm, Rn {:
  1140     COUNT_INST(I_NEG);
  1141     load_reg( REG_EAX, Rm );
  1142     NEGL_r32( REG_EAX );
  1143     store_reg( REG_EAX, Rn );
  1144     sh4_x86.tstate = TSTATE_NONE;
  1145 :}
  1146 NEGC Rm, Rn {:  
  1147     COUNT_INST(I_NEGC);
  1148     load_reg( REG_EAX, Rm );
  1149     XORL_r32_r32( REG_ECX, REG_ECX );
  1150     LDC_t();
  1151     SBBL_r32_r32( REG_EAX, REG_ECX );
  1152     store_reg( REG_ECX, Rn );
  1153     SETC_t();
  1154     sh4_x86.tstate = TSTATE_C;
  1155 :}
  1156 NOT Rm, Rn {:  
  1157     COUNT_INST(I_NOT);
  1158     load_reg( REG_EAX, Rm );
  1159     NOTL_r32( REG_EAX );
  1160     store_reg( REG_EAX, Rn );
  1161     sh4_x86.tstate = TSTATE_NONE;
  1162 :}
  1163 OR Rm, Rn {:  
  1164     COUNT_INST(I_OR);
  1165     load_reg( REG_EAX, Rm );
  1166     load_reg( REG_ECX, Rn );
  1167     ORL_r32_r32( REG_EAX, REG_ECX );
  1168     store_reg( REG_ECX, Rn );
  1169     sh4_x86.tstate = TSTATE_NONE;
  1170 :}
  1171 OR #imm, R0 {:
  1172     COUNT_INST(I_ORI);
  1173     load_reg( REG_EAX, 0 );
  1174     ORL_imms_r32(imm, REG_EAX);
  1175     store_reg( REG_EAX, 0 );
  1176     sh4_x86.tstate = TSTATE_NONE;
  1177 :}
  1178 OR.B #imm, @(R0, GBR) {:  
  1179     COUNT_INST(I_ORB);
  1180     load_reg( REG_EAX, 0 );
  1181     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1182     MOVL_r32_rspdisp( REG_EAX, 0 );
  1183     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1184     MOVL_rspdisp_r32( 0, REG_EAX );
  1185     ORL_imms_r32(imm, REG_EDX );
  1186     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1187     sh4_x86.tstate = TSTATE_NONE;
  1188 :}
  1189 ROTCL Rn {:
  1190     COUNT_INST(I_ROTCL);
  1191     load_reg( REG_EAX, Rn );
  1192     if( sh4_x86.tstate != TSTATE_C ) {
  1193 	LDC_t();
  1195     RCLL_imm_r32( 1, REG_EAX );
  1196     store_reg( REG_EAX, Rn );
  1197     SETC_t();
  1198     sh4_x86.tstate = TSTATE_C;
  1199 :}
  1200 ROTCR Rn {:  
  1201     COUNT_INST(I_ROTCR);
  1202     load_reg( REG_EAX, Rn );
  1203     if( sh4_x86.tstate != TSTATE_C ) {
  1204 	LDC_t();
  1206     RCRL_imm_r32( 1, REG_EAX );
  1207     store_reg( REG_EAX, Rn );
  1208     SETC_t();
  1209     sh4_x86.tstate = TSTATE_C;
  1210 :}
  1211 ROTL Rn {:  
  1212     COUNT_INST(I_ROTL);
  1213     load_reg( REG_EAX, Rn );
  1214     ROLL_imm_r32( 1, REG_EAX );
  1215     store_reg( REG_EAX, Rn );
  1216     SETC_t();
  1217     sh4_x86.tstate = TSTATE_C;
  1218 :}
  1219 ROTR Rn {:  
  1220     COUNT_INST(I_ROTR);
  1221     load_reg( REG_EAX, Rn );
  1222     RORL_imm_r32( 1, REG_EAX );
  1223     store_reg( REG_EAX, Rn );
  1224     SETC_t();
  1225     sh4_x86.tstate = TSTATE_C;
  1226 :}
  1227 SHAD Rm, Rn {:
  1228     COUNT_INST(I_SHAD);
  1229     /* Annoyingly enough, not directly convertible */
  1230     load_reg( REG_EAX, Rn );
  1231     load_reg( REG_ECX, Rm );
  1232     CMPL_imms_r32( 0, REG_ECX );
  1233     JGE_label(doshl);
  1235     NEGL_r32( REG_ECX );      // 2
  1236     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1237     JE_label(emptysar);     // 2
  1238     SARL_cl_r32( REG_EAX );       // 2
  1239     JMP_label(end);          // 2
  1241     JMP_TARGET(emptysar);
  1242     SARL_imm_r32(31, REG_EAX );  // 3
  1243     JMP_label(end2);
  1245     JMP_TARGET(doshl);
  1246     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1247     SHLL_cl_r32( REG_EAX );       // 2
  1248     JMP_TARGET(end);
  1249     JMP_TARGET(end2);
  1250     store_reg( REG_EAX, Rn );
  1251     sh4_x86.tstate = TSTATE_NONE;
  1252 :}
  1253 SHLD Rm, Rn {:  
  1254     COUNT_INST(I_SHLD);
  1255     load_reg( REG_EAX, Rn );
  1256     load_reg( REG_ECX, Rm );
  1257     CMPL_imms_r32( 0, REG_ECX );
  1258     JGE_label(doshl);
  1260     NEGL_r32( REG_ECX );      // 2
  1261     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1262     JE_label(emptyshr );
  1263     SHRL_cl_r32( REG_EAX );       // 2
  1264     JMP_label(end);          // 2
  1266     JMP_TARGET(emptyshr);
  1267     XORL_r32_r32( REG_EAX, REG_EAX );
  1268     JMP_label(end2);
  1270     JMP_TARGET(doshl);
  1271     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1272     SHLL_cl_r32( REG_EAX );       // 2
  1273     JMP_TARGET(end);
  1274     JMP_TARGET(end2);
  1275     store_reg( REG_EAX, Rn );
  1276     sh4_x86.tstate = TSTATE_NONE;
  1277 :}
  1278 SHAL Rn {: 
  1279     COUNT_INST(I_SHAL);
  1280     load_reg( REG_EAX, Rn );
  1281     SHLL_imm_r32( 1, REG_EAX );
  1282     SETC_t();
  1283     store_reg( REG_EAX, Rn );
  1284     sh4_x86.tstate = TSTATE_C;
  1285 :}
  1286 SHAR Rn {:  
  1287     COUNT_INST(I_SHAR);
  1288     load_reg( REG_EAX, Rn );
  1289     SARL_imm_r32( 1, REG_EAX );
  1290     SETC_t();
  1291     store_reg( REG_EAX, Rn );
  1292     sh4_x86.tstate = TSTATE_C;
  1293 :}
  1294 SHLL Rn {:  
  1295     COUNT_INST(I_SHLL);
  1296     load_reg( REG_EAX, Rn );
  1297     SHLL_imm_r32( 1, REG_EAX );
  1298     SETC_t();
  1299     store_reg( REG_EAX, Rn );
  1300     sh4_x86.tstate = TSTATE_C;
  1301 :}
  1302 SHLL2 Rn {:
  1303     COUNT_INST(I_SHLL);
  1304     load_reg( REG_EAX, Rn );
  1305     SHLL_imm_r32( 2, REG_EAX );
  1306     store_reg( REG_EAX, Rn );
  1307     sh4_x86.tstate = TSTATE_NONE;
  1308 :}
  1309 SHLL8 Rn {:  
  1310     COUNT_INST(I_SHLL);
  1311     load_reg( REG_EAX, Rn );
  1312     SHLL_imm_r32( 8, REG_EAX );
  1313     store_reg( REG_EAX, Rn );
  1314     sh4_x86.tstate = TSTATE_NONE;
  1315 :}
  1316 SHLL16 Rn {:  
  1317     COUNT_INST(I_SHLL);
  1318     load_reg( REG_EAX, Rn );
  1319     SHLL_imm_r32( 16, REG_EAX );
  1320     store_reg( REG_EAX, Rn );
  1321     sh4_x86.tstate = TSTATE_NONE;
  1322 :}
  1323 SHLR Rn {:  
  1324     COUNT_INST(I_SHLR);
  1325     load_reg( REG_EAX, Rn );
  1326     SHRL_imm_r32( 1, REG_EAX );
  1327     SETC_t();
  1328     store_reg( REG_EAX, Rn );
  1329     sh4_x86.tstate = TSTATE_C;
  1330 :}
  1331 SHLR2 Rn {:  
  1332     COUNT_INST(I_SHLR);
  1333     load_reg( REG_EAX, Rn );
  1334     SHRL_imm_r32( 2, REG_EAX );
  1335     store_reg( REG_EAX, Rn );
  1336     sh4_x86.tstate = TSTATE_NONE;
  1337 :}
  1338 SHLR8 Rn {:  
  1339     COUNT_INST(I_SHLR);
  1340     load_reg( REG_EAX, Rn );
  1341     SHRL_imm_r32( 8, REG_EAX );
  1342     store_reg( REG_EAX, Rn );
  1343     sh4_x86.tstate = TSTATE_NONE;
  1344 :}
  1345 SHLR16 Rn {:  
  1346     COUNT_INST(I_SHLR);
  1347     load_reg( REG_EAX, Rn );
  1348     SHRL_imm_r32( 16, REG_EAX );
  1349     store_reg( REG_EAX, Rn );
  1350     sh4_x86.tstate = TSTATE_NONE;
  1351 :}
  1352 SUB Rm, Rn {:  
  1353     COUNT_INST(I_SUB);
  1354     load_reg( REG_EAX, Rm );
  1355     load_reg( REG_ECX, Rn );
  1356     SUBL_r32_r32( REG_EAX, REG_ECX );
  1357     store_reg( REG_ECX, Rn );
  1358     sh4_x86.tstate = TSTATE_NONE;
  1359 :}
  1360 SUBC Rm, Rn {:  
  1361     COUNT_INST(I_SUBC);
  1362     load_reg( REG_EAX, Rm );
  1363     load_reg( REG_ECX, Rn );
  1364     if( sh4_x86.tstate != TSTATE_C ) {
  1365 	LDC_t();
  1367     SBBL_r32_r32( REG_EAX, REG_ECX );
  1368     store_reg( REG_ECX, Rn );
  1369     SETC_t();
  1370     sh4_x86.tstate = TSTATE_C;
  1371 :}
  1372 SUBV Rm, Rn {:  
  1373     COUNT_INST(I_SUBV);
  1374     load_reg( REG_EAX, Rm );
  1375     load_reg( REG_ECX, Rn );
  1376     SUBL_r32_r32( REG_EAX, REG_ECX );
  1377     store_reg( REG_ECX, Rn );
  1378     SETO_t();
  1379     sh4_x86.tstate = TSTATE_O;
  1380 :}
  1381 SWAP.B Rm, Rn {:  
  1382     COUNT_INST(I_SWAPB);
  1383     load_reg( REG_EAX, Rm );
  1384     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1385     store_reg( REG_EAX, Rn );
  1386 :}
  1387 SWAP.W Rm, Rn {:  
  1388     COUNT_INST(I_SWAPB);
  1389     load_reg( REG_EAX, Rm );
  1390     MOVL_r32_r32( REG_EAX, REG_ECX );
  1391     SHLL_imm_r32( 16, REG_ECX );
  1392     SHRL_imm_r32( 16, REG_EAX );
  1393     ORL_r32_r32( REG_EAX, REG_ECX );
  1394     store_reg( REG_ECX, Rn );
  1395     sh4_x86.tstate = TSTATE_NONE;
  1396 :}
  1397 TAS.B @Rn {:  
  1398     COUNT_INST(I_TASB);
  1399     load_reg( REG_EAX, Rn );
  1400     MOVL_r32_rspdisp( REG_EAX, 0 );
  1401     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1402     TESTB_r8_r8( REG_DL, REG_DL );
  1403     SETE_t();
  1404     ORB_imms_r8( 0x80, REG_DL );
  1405     MOVL_rspdisp_r32( 0, REG_EAX );
  1406     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1407     sh4_x86.tstate = TSTATE_NONE;
  1408 :}
  1409 TST Rm, Rn {:  
  1410     COUNT_INST(I_TST);
  1411     load_reg( REG_EAX, Rm );
  1412     load_reg( REG_ECX, Rn );
  1413     TESTL_r32_r32( REG_EAX, REG_ECX );
  1414     SETE_t();
  1415     sh4_x86.tstate = TSTATE_E;
  1416 :}
  1417 TST #imm, R0 {:  
  1418     COUNT_INST(I_TSTI);
  1419     load_reg( REG_EAX, 0 );
  1420     TESTL_imms_r32( imm, REG_EAX );
  1421     SETE_t();
  1422     sh4_x86.tstate = TSTATE_E;
  1423 :}
  1424 TST.B #imm, @(R0, GBR) {:  
  1425     COUNT_INST(I_TSTB);
  1426     load_reg( REG_EAX, 0);
  1427     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1428     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1429     TESTB_imms_r8( imm, REG_AL );
  1430     SETE_t();
  1431     sh4_x86.tstate = TSTATE_E;
  1432 :}
  1433 XOR Rm, Rn {:  
  1434     COUNT_INST(I_XOR);
  1435     load_reg( REG_EAX, Rm );
  1436     load_reg( REG_ECX, Rn );
  1437     XORL_r32_r32( REG_EAX, REG_ECX );
  1438     store_reg( REG_ECX, Rn );
  1439     sh4_x86.tstate = TSTATE_NONE;
  1440 :}
  1441 XOR #imm, R0 {:  
  1442     COUNT_INST(I_XORI);
  1443     load_reg( REG_EAX, 0 );
  1444     XORL_imms_r32( imm, REG_EAX );
  1445     store_reg( REG_EAX, 0 );
  1446     sh4_x86.tstate = TSTATE_NONE;
  1447 :}
  1448 XOR.B #imm, @(R0, GBR) {:  
  1449     COUNT_INST(I_XORB);
  1450     load_reg( REG_EAX, 0 );
  1451     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1452     MOVL_r32_rspdisp( REG_EAX, 0 );
  1453     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1454     MOVL_rspdisp_r32( 0, REG_EAX );
  1455     XORL_imms_r32( imm, REG_EDX );
  1456     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1457     sh4_x86.tstate = TSTATE_NONE;
  1458 :}
  1459 XTRCT Rm, Rn {:
  1460     COUNT_INST(I_XTRCT);
  1461     load_reg( REG_EAX, Rm );
  1462     load_reg( REG_ECX, Rn );
  1463     SHLL_imm_r32( 16, REG_EAX );
  1464     SHRL_imm_r32( 16, REG_ECX );
  1465     ORL_r32_r32( REG_EAX, REG_ECX );
  1466     store_reg( REG_ECX, Rn );
  1467     sh4_x86.tstate = TSTATE_NONE;
  1468 :}
  1470 /* Data move instructions */
  1471 MOV Rm, Rn {:  
  1472     COUNT_INST(I_MOV);
  1473     load_reg( REG_EAX, Rm );
  1474     store_reg( REG_EAX, Rn );
  1475 :}
  1476 MOV #imm, Rn {:  
  1477     COUNT_INST(I_MOVI);
  1478     MOVL_imm32_r32( imm, REG_EAX );
  1479     store_reg( REG_EAX, Rn );
  1480 :}
  1481 MOV.B Rm, @Rn {:  
  1482     COUNT_INST(I_MOVB);
  1483     load_reg( REG_EAX, Rn );
  1484     load_reg( REG_EDX, Rm );
  1485     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1486     sh4_x86.tstate = TSTATE_NONE;
  1487 :}
  1488 MOV.B Rm, @-Rn {:  
  1489     COUNT_INST(I_MOVB);
  1490     load_reg( REG_EAX, Rn );
  1491     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1492     load_reg( REG_EDX, Rm );
  1493     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1494     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1495     sh4_x86.tstate = TSTATE_NONE;
  1496 :}
  1497 MOV.B Rm, @(R0, Rn) {:  
  1498     COUNT_INST(I_MOVB);
  1499     load_reg( REG_EAX, 0 );
  1500     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1501     load_reg( REG_EDX, Rm );
  1502     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1503     sh4_x86.tstate = TSTATE_NONE;
  1504 :}
  1505 MOV.B R0, @(disp, GBR) {:  
  1506     COUNT_INST(I_MOVB);
  1507     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1508     ADDL_imms_r32( disp, REG_EAX );
  1509     load_reg( REG_EDX, 0 );
  1510     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1511     sh4_x86.tstate = TSTATE_NONE;
  1512 :}
  1513 MOV.B R0, @(disp, Rn) {:  
  1514     COUNT_INST(I_MOVB);
  1515     load_reg( REG_EAX, Rn );
  1516     ADDL_imms_r32( disp, REG_EAX );
  1517     load_reg( REG_EDX, 0 );
  1518     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1519     sh4_x86.tstate = TSTATE_NONE;
  1520 :}
  1521 MOV.B @Rm, Rn {:  
  1522     COUNT_INST(I_MOVB);
  1523     load_reg( REG_EAX, Rm );
  1524     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1525     store_reg( REG_EAX, Rn );
  1526     sh4_x86.tstate = TSTATE_NONE;
  1527 :}
  1528 MOV.B @Rm+, Rn {:  
  1529     COUNT_INST(I_MOVB);
  1530     load_reg( REG_EAX, Rm );
  1531     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1532     if( Rm != Rn ) {
  1533     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1535     store_reg( REG_EAX, Rn );
  1536     sh4_x86.tstate = TSTATE_NONE;
  1537 :}
  1538 MOV.B @(R0, Rm), Rn {:  
  1539     COUNT_INST(I_MOVB);
  1540     load_reg( REG_EAX, 0 );
  1541     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1542     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1543     store_reg( REG_EAX, Rn );
  1544     sh4_x86.tstate = TSTATE_NONE;
  1545 :}
  1546 MOV.B @(disp, GBR), R0 {:  
  1547     COUNT_INST(I_MOVB);
  1548     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1549     ADDL_imms_r32( disp, REG_EAX );
  1550     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1551     store_reg( REG_EAX, 0 );
  1552     sh4_x86.tstate = TSTATE_NONE;
  1553 :}
  1554 MOV.B @(disp, Rm), R0 {:  
  1555     COUNT_INST(I_MOVB);
  1556     load_reg( REG_EAX, Rm );
  1557     ADDL_imms_r32( disp, REG_EAX );
  1558     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1559     store_reg( REG_EAX, 0 );
  1560     sh4_x86.tstate = TSTATE_NONE;
  1561 :}
  1562 MOV.L Rm, @Rn {:
  1563     COUNT_INST(I_MOVL);
  1564     load_reg( REG_EAX, Rn );
  1565     check_walign32(REG_EAX);
  1566     MOVL_r32_r32( REG_EAX, REG_ECX );
  1567     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1568     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1569     JNE_label( notsq );
  1570     ANDL_imms_r32( 0x3C, REG_EAX );
  1571     load_reg( REG_EDX, Rm );
  1572     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1573     JMP_label(end);
  1574     JMP_TARGET(notsq);
  1575     load_reg( REG_EDX, Rm );
  1576     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1577     JMP_TARGET(end);
  1578     sh4_x86.tstate = TSTATE_NONE;
  1579 :}
  1580 MOV.L Rm, @-Rn {:  
  1581     COUNT_INST(I_MOVL);
  1582     load_reg( REG_EAX, Rn );
  1583     ADDL_imms_r32( -4, REG_EAX );
  1584     check_walign32( REG_EAX );
  1585     load_reg( REG_EDX, Rm );
  1586     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1587     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1588     sh4_x86.tstate = TSTATE_NONE;
  1589 :}
  1590 MOV.L Rm, @(R0, Rn) {:  
  1591     COUNT_INST(I_MOVL);
  1592     load_reg( REG_EAX, 0 );
  1593     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1594     check_walign32( REG_EAX );
  1595     load_reg( REG_EDX, Rm );
  1596     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1597     sh4_x86.tstate = TSTATE_NONE;
  1598 :}
  1599 MOV.L R0, @(disp, GBR) {:  
  1600     COUNT_INST(I_MOVL);
  1601     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1602     ADDL_imms_r32( disp, REG_EAX );
  1603     check_walign32( REG_EAX );
  1604     load_reg( REG_EDX, 0 );
  1605     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1606     sh4_x86.tstate = TSTATE_NONE;
  1607 :}
  1608 MOV.L Rm, @(disp, Rn) {:  
  1609     COUNT_INST(I_MOVL);
  1610     load_reg( REG_EAX, Rn );
  1611     ADDL_imms_r32( disp, REG_EAX );
  1612     check_walign32( REG_EAX );
  1613     MOVL_r32_r32( REG_EAX, REG_ECX );
  1614     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1615     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1616     JNE_label( notsq );
  1617     ANDL_imms_r32( 0x3C, REG_EAX );
  1618     load_reg( REG_EDX, Rm );
  1619     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1620     JMP_label(end);
  1621     JMP_TARGET(notsq);
  1622     load_reg( REG_EDX, Rm );
  1623     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1624     JMP_TARGET(end);
  1625     sh4_x86.tstate = TSTATE_NONE;
  1626 :}
  1627 MOV.L @Rm, Rn {:  
  1628     COUNT_INST(I_MOVL);
  1629     load_reg( REG_EAX, Rm );
  1630     check_ralign32( REG_EAX );
  1631     MEM_READ_LONG( REG_EAX, REG_EAX );
  1632     store_reg( REG_EAX, Rn );
  1633     sh4_x86.tstate = TSTATE_NONE;
  1634 :}
  1635 MOV.L @Rm+, Rn {:  
  1636     COUNT_INST(I_MOVL);
  1637     load_reg( REG_EAX, Rm );
  1638     check_ralign32( REG_EAX );
  1639     MEM_READ_LONG( REG_EAX, REG_EAX );
  1640     if( Rm != Rn ) {
  1641     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1643     store_reg( REG_EAX, Rn );
  1644     sh4_x86.tstate = TSTATE_NONE;
  1645 :}
  1646 MOV.L @(R0, Rm), Rn {:  
  1647     COUNT_INST(I_MOVL);
  1648     load_reg( REG_EAX, 0 );
  1649     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1650     check_ralign32( REG_EAX );
  1651     MEM_READ_LONG( REG_EAX, REG_EAX );
  1652     store_reg( REG_EAX, Rn );
  1653     sh4_x86.tstate = TSTATE_NONE;
  1654 :}
  1655 MOV.L @(disp, GBR), R0 {:
  1656     COUNT_INST(I_MOVL);
  1657     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1658     ADDL_imms_r32( disp, REG_EAX );
  1659     check_ralign32( REG_EAX );
  1660     MEM_READ_LONG( REG_EAX, REG_EAX );
  1661     store_reg( REG_EAX, 0 );
  1662     sh4_x86.tstate = TSTATE_NONE;
  1663 :}
  1664 MOV.L @(disp, PC), Rn {:  
  1665     COUNT_INST(I_MOVLPC);
  1666     if( sh4_x86.in_delay_slot ) {
  1667 	SLOTILLEGAL();
  1668     } else {
  1669 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1670 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1671 	    // If the target address is in the same page as the code, it's
  1672 	    // pretty safe to just ref it directly and circumvent the whole
  1673 	    // memory subsystem. (this is a big performance win)
  1675 	    // FIXME: There's a corner-case that's not handled here when
  1676 	    // the current code-page is in the ITLB but not in the UTLB.
  1677 	    // (should generate a TLB miss although need to test SH4 
  1678 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1679 	    // behaviour though.
  1680 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1681 	    MOVL_moffptr_eax( ptr );
  1682 	} else {
  1683 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1684 	    // different virtual address than the translation was done with,
  1685 	    // but we can safely assume that the low bits are the same.
  1686 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1687 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1688 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1689 	    sh4_x86.tstate = TSTATE_NONE;
  1691 	store_reg( REG_EAX, Rn );
  1693 :}
  1694 MOV.L @(disp, Rm), Rn {:  
  1695     COUNT_INST(I_MOVL);
  1696     load_reg( REG_EAX, Rm );
  1697     ADDL_imms_r32( disp, REG_EAX );
  1698     check_ralign32( REG_EAX );
  1699     MEM_READ_LONG( REG_EAX, REG_EAX );
  1700     store_reg( REG_EAX, Rn );
  1701     sh4_x86.tstate = TSTATE_NONE;
  1702 :}
  1703 MOV.W Rm, @Rn {:  
  1704     COUNT_INST(I_MOVW);
  1705     load_reg( REG_EAX, Rn );
  1706     check_walign16( REG_EAX );
  1707     load_reg( REG_EDX, Rm );
  1708     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1709     sh4_x86.tstate = TSTATE_NONE;
  1710 :}
  1711 MOV.W Rm, @-Rn {:  
  1712     COUNT_INST(I_MOVW);
  1713     load_reg( REG_EAX, Rn );
  1714     check_walign16( REG_EAX );
  1715     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1716     load_reg( REG_EDX, Rm );
  1717     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1718     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1719     sh4_x86.tstate = TSTATE_NONE;
  1720 :}
  1721 MOV.W Rm, @(R0, Rn) {:  
  1722     COUNT_INST(I_MOVW);
  1723     load_reg( REG_EAX, 0 );
  1724     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1725     check_walign16( REG_EAX );
  1726     load_reg( REG_EDX, Rm );
  1727     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1728     sh4_x86.tstate = TSTATE_NONE;
  1729 :}
  1730 MOV.W R0, @(disp, GBR) {:  
  1731     COUNT_INST(I_MOVW);
  1732     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1733     ADDL_imms_r32( disp, REG_EAX );
  1734     check_walign16( REG_EAX );
  1735     load_reg( REG_EDX, 0 );
  1736     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1737     sh4_x86.tstate = TSTATE_NONE;
  1738 :}
  1739 MOV.W R0, @(disp, Rn) {:  
  1740     COUNT_INST(I_MOVW);
  1741     load_reg( REG_EAX, Rn );
  1742     ADDL_imms_r32( disp, REG_EAX );
  1743     check_walign16( REG_EAX );
  1744     load_reg( REG_EDX, 0 );
  1745     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1746     sh4_x86.tstate = TSTATE_NONE;
  1747 :}
  1748 MOV.W @Rm, Rn {:  
  1749     COUNT_INST(I_MOVW);
  1750     load_reg( REG_EAX, Rm );
  1751     check_ralign16( REG_EAX );
  1752     MEM_READ_WORD( REG_EAX, REG_EAX );
  1753     store_reg( REG_EAX, Rn );
  1754     sh4_x86.tstate = TSTATE_NONE;
  1755 :}
  1756 MOV.W @Rm+, Rn {:  
  1757     COUNT_INST(I_MOVW);
  1758     load_reg( REG_EAX, Rm );
  1759     check_ralign16( REG_EAX );
  1760     MEM_READ_WORD( REG_EAX, REG_EAX );
  1761     if( Rm != Rn ) {
  1762         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1764     store_reg( REG_EAX, Rn );
  1765     sh4_x86.tstate = TSTATE_NONE;
  1766 :}
  1767 MOV.W @(R0, Rm), Rn {:  
  1768     COUNT_INST(I_MOVW);
  1769     load_reg( REG_EAX, 0 );
  1770     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1771     check_ralign16( REG_EAX );
  1772     MEM_READ_WORD( REG_EAX, REG_EAX );
  1773     store_reg( REG_EAX, Rn );
  1774     sh4_x86.tstate = TSTATE_NONE;
  1775 :}
  1776 MOV.W @(disp, GBR), R0 {:  
  1777     COUNT_INST(I_MOVW);
  1778     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1779     ADDL_imms_r32( disp, REG_EAX );
  1780     check_ralign16( REG_EAX );
  1781     MEM_READ_WORD( REG_EAX, REG_EAX );
  1782     store_reg( REG_EAX, 0 );
  1783     sh4_x86.tstate = TSTATE_NONE;
  1784 :}
  1785 MOV.W @(disp, PC), Rn {:  
  1786     COUNT_INST(I_MOVW);
  1787     if( sh4_x86.in_delay_slot ) {
  1788 	SLOTILLEGAL();
  1789     } else {
  1790 	// See comments for MOV.L @(disp, PC), Rn
  1791 	uint32_t target = pc + disp + 4;
  1792 	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
  1793 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1794 	    MOVL_moffptr_eax( ptr );
  1795 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1796 	} else {
  1797 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1798 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1799 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1800 	    sh4_x86.tstate = TSTATE_NONE;
  1802 	store_reg( REG_EAX, Rn );
  1804 :}
  1805 MOV.W @(disp, Rm), R0 {:  
  1806     COUNT_INST(I_MOVW);
  1807     load_reg( REG_EAX, Rm );
  1808     ADDL_imms_r32( disp, REG_EAX );
  1809     check_ralign16( REG_EAX );
  1810     MEM_READ_WORD( REG_EAX, REG_EAX );
  1811     store_reg( REG_EAX, 0 );
  1812     sh4_x86.tstate = TSTATE_NONE;
  1813 :}
  1814 MOVA @(disp, PC), R0 {:  
  1815     COUNT_INST(I_MOVA);
  1816     if( sh4_x86.in_delay_slot ) {
  1817 	SLOTILLEGAL();
  1818     } else {
  1819 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1820 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1821 	store_reg( REG_ECX, 0 );
  1822 	sh4_x86.tstate = TSTATE_NONE;
  1824 :}
  1825 MOVCA.L R0, @Rn {:  
  1826     COUNT_INST(I_MOVCA);
  1827     load_reg( REG_EAX, Rn );
  1828     check_walign32( REG_EAX );
  1829     load_reg( REG_EDX, 0 );
  1830     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1831     sh4_x86.tstate = TSTATE_NONE;
  1832 :}
  1834 /* Control transfer instructions */
  1835 BF disp {:
  1836     COUNT_INST(I_BF);
  1837     if( sh4_x86.in_delay_slot ) {
  1838 	SLOTILLEGAL();
  1839     } else {
  1840 	sh4vma_t target = disp + pc + 4;
  1841 	JT_label( nottaken );
  1842 	exit_block_rel(target, pc+2 );
  1843 	JMP_TARGET(nottaken);
  1844 	return 2;
  1846 :}
  1847 BF/S disp {:
  1848     COUNT_INST(I_BFS);
  1849     if( sh4_x86.in_delay_slot ) {
  1850 	SLOTILLEGAL();
  1851     } else {
  1852 	sh4_x86.in_delay_slot = DELAY_PC;
  1853 	if( UNTRANSLATABLE(pc+2) ) {
  1854 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1855 	    JT_label(nottaken);
  1856 	    ADDL_imms_r32( disp, REG_EAX );
  1857 	    JMP_TARGET(nottaken);
  1858 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1859 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1860 	    exit_block_emu(pc+2);
  1861 	    sh4_x86.branch_taken = TRUE;
  1862 	    return 2;
  1863 	} else {
  1864 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1865 		CMPL_imms_rbpdisp( 1, R_T );
  1866 		sh4_x86.tstate = TSTATE_E;
  1868 	    sh4vma_t target = disp + pc + 4;
  1869 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1870 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1871 	    int save_tstate = sh4_x86.tstate;
  1872 	    sh4_translate_instruction(pc+2);
  1873             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1874 	    exit_block_rel( target, pc+4 );
  1876 	    // not taken
  1877 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1878 	    sh4_x86.tstate = save_tstate;
  1879 	    sh4_translate_instruction(pc+2);
  1880 	    return 4;
  1883 :}
  1884 BRA disp {:  
  1885     COUNT_INST(I_BRA);
  1886     if( sh4_x86.in_delay_slot ) {
  1887 	SLOTILLEGAL();
  1888     } else {
  1889 	sh4_x86.in_delay_slot = DELAY_PC;
  1890 	sh4_x86.branch_taken = TRUE;
  1891 	if( UNTRANSLATABLE(pc+2) ) {
  1892 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1893 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1894 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1895 	    exit_block_emu(pc+2);
  1896 	    return 2;
  1897 	} else {
  1898 	    sh4_translate_instruction( pc + 2 );
  1899 	    exit_block_rel( disp + pc + 4, pc+4 );
  1900 	    return 4;
  1903 :}
  1904 BRAF Rn {:  
  1905     COUNT_INST(I_BRAF);
  1906     if( sh4_x86.in_delay_slot ) {
  1907 	SLOTILLEGAL();
  1908     } else {
  1909 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1910 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1911 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1912 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1913 	sh4_x86.in_delay_slot = DELAY_PC;
  1914 	sh4_x86.tstate = TSTATE_NONE;
  1915 	sh4_x86.branch_taken = TRUE;
  1916 	if( UNTRANSLATABLE(pc+2) ) {
  1917 	    exit_block_emu(pc+2);
  1918 	    return 2;
  1919 	} else {
  1920 	    sh4_translate_instruction( pc + 2 );
  1921 	    exit_block_newpcset(pc+4);
  1922 	    return 4;
  1925 :}
  1926 BSR disp {:  
  1927     COUNT_INST(I_BSR);
  1928     if( sh4_x86.in_delay_slot ) {
  1929 	SLOTILLEGAL();
  1930     } else {
  1931 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1932 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1933 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1934 	sh4_x86.in_delay_slot = DELAY_PC;
  1935 	sh4_x86.branch_taken = TRUE;
  1936 	sh4_x86.tstate = TSTATE_NONE;
  1937 	if( UNTRANSLATABLE(pc+2) ) {
  1938 	    ADDL_imms_r32( disp, REG_EAX );
  1939 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1940 	    exit_block_emu(pc+2);
  1941 	    return 2;
  1942 	} else {
  1943 	    sh4_translate_instruction( pc + 2 );
  1944 	    exit_block_rel( disp + pc + 4, pc+4 );
  1945 	    return 4;
  1948 :}
  1949 BSRF Rn {:  
  1950     COUNT_INST(I_BSRF);
  1951     if( sh4_x86.in_delay_slot ) {
  1952 	SLOTILLEGAL();
  1953     } else {
  1954 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1955 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1956 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1957 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1958 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1960 	sh4_x86.in_delay_slot = DELAY_PC;
  1961 	sh4_x86.tstate = TSTATE_NONE;
  1962 	sh4_x86.branch_taken = TRUE;
  1963 	if( UNTRANSLATABLE(pc+2) ) {
  1964 	    exit_block_emu(pc+2);
  1965 	    return 2;
  1966 	} else {
  1967 	    sh4_translate_instruction( pc + 2 );
  1968 	    exit_block_newpcset(pc+4);
  1969 	    return 4;
  1972 :}
  1973 BT disp {:
  1974     COUNT_INST(I_BT);
  1975     if( sh4_x86.in_delay_slot ) {
  1976 	SLOTILLEGAL();
  1977     } else {
  1978 	sh4vma_t target = disp + pc + 4;
  1979 	JF_label( nottaken );
  1980 	exit_block_rel(target, pc+2 );
  1981 	JMP_TARGET(nottaken);
  1982 	return 2;
  1984 :}
  1985 BT/S disp {:
  1986     COUNT_INST(I_BTS);
  1987     if( sh4_x86.in_delay_slot ) {
  1988 	SLOTILLEGAL();
  1989     } else {
  1990 	sh4_x86.in_delay_slot = DELAY_PC;
  1991 	if( UNTRANSLATABLE(pc+2) ) {
  1992 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1993 	    JF_label(nottaken);
  1994 	    ADDL_imms_r32( disp, REG_EAX );
  1995 	    JMP_TARGET(nottaken);
  1996 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1997 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1998 	    exit_block_emu(pc+2);
  1999 	    sh4_x86.branch_taken = TRUE;
  2000 	    return 2;
  2001 	} else {
  2002 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  2003 		CMPL_imms_rbpdisp( 1, R_T );
  2004 		sh4_x86.tstate = TSTATE_E;
  2006 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  2007 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  2009 	    int save_tstate = sh4_x86.tstate;
  2010 	    sh4_translate_instruction(pc+2);
  2011             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  2012 	    exit_block_rel( disp + pc + 4, pc+4 );
  2013 	    // not taken
  2014 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  2015 	    sh4_x86.tstate = save_tstate;
  2016 	    sh4_translate_instruction(pc+2);
  2017 	    return 4;
  2020 :}
  2021 JMP @Rn {:  
  2022     COUNT_INST(I_JMP);
  2023     if( sh4_x86.in_delay_slot ) {
  2024 	SLOTILLEGAL();
  2025     } else {
  2026 	load_reg( REG_ECX, Rn );
  2027 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2028 	sh4_x86.in_delay_slot = DELAY_PC;
  2029 	sh4_x86.branch_taken = TRUE;
  2030 	if( UNTRANSLATABLE(pc+2) ) {
  2031 	    exit_block_emu(pc+2);
  2032 	    return 2;
  2033 	} else {
  2034 	    sh4_translate_instruction(pc+2);
  2035 	    exit_block_newpcset(pc+4);
  2036 	    return 4;
  2039 :}
  2040 JSR @Rn {:  
  2041     COUNT_INST(I_JSR);
  2042     if( sh4_x86.in_delay_slot ) {
  2043 	SLOTILLEGAL();
  2044     } else {
  2045 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  2046 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  2047 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2048 	load_reg( REG_ECX, Rn );
  2049 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2050 	sh4_x86.in_delay_slot = DELAY_PC;
  2051 	sh4_x86.branch_taken = TRUE;
  2052 	sh4_x86.tstate = TSTATE_NONE;
  2053 	if( UNTRANSLATABLE(pc+2) ) {
  2054 	    exit_block_emu(pc+2);
  2055 	    return 2;
  2056 	} else {
  2057 	    sh4_translate_instruction(pc+2);
  2058 	    exit_block_newpcset(pc+4);
  2059 	    return 4;
  2062 :}
  2063 RTE {:  
  2064     COUNT_INST(I_RTE);
  2065     if( sh4_x86.in_delay_slot ) {
  2066 	SLOTILLEGAL();
  2067     } else {
  2068 	check_priv();
  2069 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  2070 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2071 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2072 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2073 	sh4_x86.in_delay_slot = DELAY_PC;
  2074 	sh4_x86.fpuen_checked = FALSE;
  2075 	sh4_x86.tstate = TSTATE_NONE;
  2076 	sh4_x86.branch_taken = TRUE;
  2077     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2078 	if( UNTRANSLATABLE(pc+2) ) {
  2079 	    exit_block_emu(pc+2);
  2080 	    return 2;
  2081 	} else {
  2082 	    sh4_translate_instruction(pc+2);
  2083 	    exit_block_newpcset(pc+4);
  2084 	    return 4;
  2087 :}
  2088 RTS {:  
  2089     COUNT_INST(I_RTS);
  2090     if( sh4_x86.in_delay_slot ) {
  2091 	SLOTILLEGAL();
  2092     } else {
  2093 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  2094 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  2095 	sh4_x86.in_delay_slot = DELAY_PC;
  2096 	sh4_x86.branch_taken = TRUE;
  2097 	if( UNTRANSLATABLE(pc+2) ) {
  2098 	    exit_block_emu(pc+2);
  2099 	    return 2;
  2100 	} else {
  2101 	    sh4_translate_instruction(pc+2);
  2102 	    exit_block_newpcset(pc+4);
  2103 	    return 4;
  2106 :}
  2107 TRAPA #imm {:  
  2108     COUNT_INST(I_TRAPA);
  2109     if( sh4_x86.in_delay_slot ) {
  2110 	SLOTILLEGAL();
  2111     } else {
  2112 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2113 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2114 	MOVL_imm32_r32( imm, REG_EAX );
  2115 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2116 	sh4_x86.tstate = TSTATE_NONE;
  2117 	exit_block_pcset(pc+2);
  2118 	sh4_x86.branch_taken = TRUE;
  2119 	return 2;
  2121 :}
  2122 UNDEF {:  
  2123     COUNT_INST(I_UNDEF);
  2124     if( sh4_x86.in_delay_slot ) {
  2125 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  2126     } else {
  2127 	exit_block_exc(EXC_ILLEGAL, pc);    
  2128 	return 2;
  2130 :}
  2132 CLRMAC {:  
  2133     COUNT_INST(I_CLRMAC);
  2134     XORL_r32_r32(REG_EAX, REG_EAX);
  2135     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2136     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2137     sh4_x86.tstate = TSTATE_NONE;
  2138 :}
  2139 CLRS {:
  2140     COUNT_INST(I_CLRS);
  2141     CLC();
  2142     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2143     sh4_x86.tstate = TSTATE_NONE;
  2144 :}
  2145 CLRT {:  
  2146     COUNT_INST(I_CLRT);
  2147     CLC();
  2148     SETC_t();
  2149     sh4_x86.tstate = TSTATE_C;
  2150 :}
  2151 SETS {:  
  2152     COUNT_INST(I_SETS);
  2153     STC();
  2154     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2155     sh4_x86.tstate = TSTATE_NONE;
  2156 :}
  2157 SETT {:  
  2158     COUNT_INST(I_SETT);
  2159     STC();
  2160     SETC_t();
  2161     sh4_x86.tstate = TSTATE_C;
  2162 :}
  2164 /* Floating point moves */
  2165 FMOV FRm, FRn {:  
  2166     COUNT_INST(I_FMOV1);
  2167     check_fpuen();
  2168     if( sh4_x86.double_size ) {
  2169         load_dr0( REG_EAX, FRm );
  2170         load_dr1( REG_ECX, FRm );
  2171         store_dr0( REG_EAX, FRn );
  2172         store_dr1( REG_ECX, FRn );
  2173     } else {
  2174         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2175         store_fr( REG_EAX, FRn );
  2177 :}
  2178 FMOV FRm, @Rn {: 
  2179     COUNT_INST(I_FMOV2);
  2180     check_fpuen();
  2181     load_reg( REG_EAX, Rn );
  2182     if( sh4_x86.double_size ) {
  2183         check_walign64( REG_EAX );
  2184         load_dr0( REG_EDX, FRm );
  2185         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2186         load_reg( REG_EAX, Rn );
  2187         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2188         load_dr1( REG_EDX, FRm );
  2189         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2190     } else {
  2191         check_walign32( REG_EAX );
  2192         load_fr( REG_EDX, FRm );
  2193         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2195     sh4_x86.tstate = TSTATE_NONE;
  2196 :}
  2197 FMOV @Rm, FRn {:  
  2198     COUNT_INST(I_FMOV5);
  2199     check_fpuen();
  2200     load_reg( REG_EAX, Rm );
  2201     if( sh4_x86.double_size ) {
  2202         check_ralign64( REG_EAX );
  2203         MEM_READ_LONG( REG_EAX, REG_EAX );
  2204         store_dr0( REG_EAX, FRn );
  2205         load_reg( REG_EAX, Rm );
  2206         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2207         MEM_READ_LONG( REG_EAX, REG_EAX );
  2208         store_dr1( REG_EAX, FRn );
  2209     } else {
  2210         check_ralign32( REG_EAX );
  2211         MEM_READ_LONG( REG_EAX, REG_EAX );
  2212         store_fr( REG_EAX, FRn );
  2214     sh4_x86.tstate = TSTATE_NONE;
  2215 :}
  2216 FMOV FRm, @-Rn {:  
  2217     COUNT_INST(I_FMOV3);
  2218     check_fpuen();
  2219     load_reg( REG_EAX, Rn );
  2220     if( sh4_x86.double_size ) {
  2221         check_walign64( REG_EAX );
  2222         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2223         load_dr0( REG_EDX, FRm );
  2224         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2225         load_reg( REG_EAX, Rn );
  2226         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2227         load_dr1( REG_EDX, FRm );
  2228         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2229         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2230     } else {
  2231         check_walign32( REG_EAX );
  2232         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2233         load_fr( REG_EDX, FRm );
  2234         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2235         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2237     sh4_x86.tstate = TSTATE_NONE;
  2238 :}
  2239 FMOV @Rm+, FRn {:
  2240     COUNT_INST(I_FMOV6);
  2241     check_fpuen();
  2242     load_reg( REG_EAX, Rm );
  2243     if( sh4_x86.double_size ) {
  2244         check_ralign64( REG_EAX );
  2245         MEM_READ_LONG( REG_EAX, REG_EAX );
  2246         store_dr0( REG_EAX, FRn );
  2247         load_reg( REG_EAX, Rm );
  2248         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2249         MEM_READ_LONG( REG_EAX, REG_EAX );
  2250         store_dr1( REG_EAX, FRn );
  2251         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2252     } else {
  2253         check_ralign32( REG_EAX );
  2254         MEM_READ_LONG( REG_EAX, REG_EAX );
  2255         store_fr( REG_EAX, FRn );
  2256         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2258     sh4_x86.tstate = TSTATE_NONE;
  2259 :}
  2260 FMOV FRm, @(R0, Rn) {:  
  2261     COUNT_INST(I_FMOV4);
  2262     check_fpuen();
  2263     load_reg( REG_EAX, Rn );
  2264     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2265     if( sh4_x86.double_size ) {
  2266         check_walign64( REG_EAX );
  2267         load_dr0( REG_EDX, FRm );
  2268         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2269         load_reg( REG_EAX, Rn );
  2270         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2271         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2272         load_dr1( REG_EDX, FRm );
  2273         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2274     } else {
  2275         check_walign32( REG_EAX );
  2276         load_fr( REG_EDX, FRm );
  2277         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2279     sh4_x86.tstate = TSTATE_NONE;
  2280 :}
  2281 FMOV @(R0, Rm), FRn {:  
  2282     COUNT_INST(I_FMOV7);
  2283     check_fpuen();
  2284     load_reg( REG_EAX, Rm );
  2285     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2286     if( sh4_x86.double_size ) {
  2287         check_ralign64( REG_EAX );
  2288         MEM_READ_LONG( REG_EAX, REG_EAX );
  2289         store_dr0( REG_EAX, FRn );
  2290         load_reg( REG_EAX, Rm );
  2291         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2292         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2293         MEM_READ_LONG( REG_EAX, REG_EAX );
  2294         store_dr1( REG_EAX, FRn );
  2295     } else {
  2296         check_ralign32( REG_EAX );
  2297         MEM_READ_LONG( REG_EAX, REG_EAX );
  2298         store_fr( REG_EAX, FRn );
  2300     sh4_x86.tstate = TSTATE_NONE;
  2301 :}
  2302 FLDI0 FRn {:  /* IFF PR=0 */
  2303     COUNT_INST(I_FLDI0);
  2304     check_fpuen();
  2305     if( sh4_x86.double_prec == 0 ) {
  2306         XORL_r32_r32( REG_EAX, REG_EAX );
  2307         store_fr( REG_EAX, FRn );
  2309     sh4_x86.tstate = TSTATE_NONE;
  2310 :}
  2311 FLDI1 FRn {:  /* IFF PR=0 */
  2312     COUNT_INST(I_FLDI1);
  2313     check_fpuen();
  2314     if( sh4_x86.double_prec == 0 ) {
  2315         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2316         store_fr( REG_EAX, FRn );
  2318 :}
  2320 FLOAT FPUL, FRn {:  
  2321     COUNT_INST(I_FLOAT);
  2322     check_fpuen();
  2323     FILD_rbpdisp(R_FPUL);
  2324     if( sh4_x86.double_prec ) {
  2325         pop_dr( FRn );
  2326     } else {
  2327         pop_fr( FRn );
  2329 :}
  2330 FTRC FRm, FPUL {:  
  2331     COUNT_INST(I_FTRC);
  2332     check_fpuen();
  2333     if( sh4_x86.double_prec ) {
  2334         push_dr( FRm );
  2335     } else {
  2336         push_fr( FRm );
  2338     MOVP_immptr_rptr( &max_int, REG_ECX );
  2339     FILD_r32disp( REG_ECX, 0 );
  2340     FCOMIP_st(1);
  2341     JNA_label( sat );
  2342     MOVP_immptr_rptr( &min_int, REG_ECX );
  2343     FILD_r32disp( REG_ECX, 0 );
  2344     FCOMIP_st(1);              
  2345     JAE_label( sat2 );            
  2346     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2347     FNSTCW_r32disp( REG_EAX, 0 );
  2348     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2349     FLDCW_r32disp( REG_EDX, 0 );
  2350     FISTP_rbpdisp(R_FPUL);             
  2351     FLDCW_r32disp( REG_EAX, 0 );
  2352     JMP_label(end);             
  2354     JMP_TARGET(sat);
  2355     JMP_TARGET(sat2);
  2356     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2357     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2358     FPOP_st();
  2359     JMP_TARGET(end);
  2360     sh4_x86.tstate = TSTATE_NONE;
  2361 :}
  2362 FLDS FRm, FPUL {:  
  2363     COUNT_INST(I_FLDS);
  2364     check_fpuen();
  2365     load_fr( REG_EAX, FRm );
  2366     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2367 :}
  2368 FSTS FPUL, FRn {:  
  2369     COUNT_INST(I_FSTS);
  2370     check_fpuen();
  2371     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2372     store_fr( REG_EAX, FRn );
  2373 :}
  2374 FCNVDS FRm, FPUL {:  
  2375     COUNT_INST(I_FCNVDS);
  2376     check_fpuen();
  2377     if( sh4_x86.double_prec ) {
  2378         push_dr( FRm );
  2379         pop_fpul();
  2381 :}
  2382 FCNVSD FPUL, FRn {:  
  2383     COUNT_INST(I_FCNVSD);
  2384     check_fpuen();
  2385     if( sh4_x86.double_prec ) {
  2386         push_fpul();
  2387         pop_dr( FRn );
  2389 :}
  2391 /* Floating point instructions */
  2392 FABS FRn {:  
  2393     COUNT_INST(I_FABS);
  2394     check_fpuen();
  2395     if( sh4_x86.double_prec ) {
  2396         push_dr(FRn);
  2397         FABS_st0();
  2398         pop_dr(FRn);
  2399     } else {
  2400         push_fr(FRn);
  2401         FABS_st0();
  2402         pop_fr(FRn);
  2404 :}
  2405 FADD FRm, FRn {:  
  2406     COUNT_INST(I_FADD);
  2407     check_fpuen();
  2408     if( sh4_x86.double_prec ) {
  2409         push_dr(FRm);
  2410         push_dr(FRn);
  2411         FADDP_st(1);
  2412         pop_dr(FRn);
  2413     } else {
  2414         push_fr(FRm);
  2415         push_fr(FRn);
  2416         FADDP_st(1);
  2417         pop_fr(FRn);
  2419 :}
  2420 FDIV FRm, FRn {:  
  2421     COUNT_INST(I_FDIV);
  2422     check_fpuen();
  2423     if( sh4_x86.double_prec ) {
  2424         push_dr(FRn);
  2425         push_dr(FRm);
  2426         FDIVP_st(1);
  2427         pop_dr(FRn);
  2428     } else {
  2429         push_fr(FRn);
  2430         push_fr(FRm);
  2431         FDIVP_st(1);
  2432         pop_fr(FRn);
  2434 :}
  2435 FMAC FR0, FRm, FRn {:  
  2436     COUNT_INST(I_FMAC);
  2437     check_fpuen();
  2438     if( sh4_x86.double_prec ) {
  2439         push_dr( 0 );
  2440         push_dr( FRm );
  2441         FMULP_st(1);
  2442         push_dr( FRn );
  2443         FADDP_st(1);
  2444         pop_dr( FRn );
  2445     } else {
  2446         push_fr( 0 );
  2447         push_fr( FRm );
  2448         FMULP_st(1);
  2449         push_fr( FRn );
  2450         FADDP_st(1);
  2451         pop_fr( FRn );
  2453 :}
  2455 FMUL FRm, FRn {:  
  2456     COUNT_INST(I_FMUL);
  2457     check_fpuen();
  2458     if( sh4_x86.double_prec ) {
  2459         push_dr(FRm);
  2460         push_dr(FRn);
  2461         FMULP_st(1);
  2462         pop_dr(FRn);
  2463     } else {
  2464         push_fr(FRm);
  2465         push_fr(FRn);
  2466         FMULP_st(1);
  2467         pop_fr(FRn);
  2469 :}
  2470 FNEG FRn {:  
  2471     COUNT_INST(I_FNEG);
  2472     check_fpuen();
  2473     if( sh4_x86.double_prec ) {
  2474         push_dr(FRn);
  2475         FCHS_st0();
  2476         pop_dr(FRn);
  2477     } else {
  2478         push_fr(FRn);
  2479         FCHS_st0();
  2480         pop_fr(FRn);
  2482 :}
  2483 FSRRA FRn {:  
  2484     COUNT_INST(I_FSRRA);
  2485     check_fpuen();
  2486     if( sh4_x86.double_prec == 0 ) {
  2487         FLD1_st0();
  2488         push_fr(FRn);
  2489         FSQRT_st0();
  2490         FDIVP_st(1);
  2491         pop_fr(FRn);
  2493 :}
  2494 FSQRT FRn {:  
  2495     COUNT_INST(I_FSQRT);
  2496     check_fpuen();
  2497     if( sh4_x86.double_prec ) {
  2498         push_dr(FRn);
  2499         FSQRT_st0();
  2500         pop_dr(FRn);
  2501     } else {
  2502         push_fr(FRn);
  2503         FSQRT_st0();
  2504         pop_fr(FRn);
  2506 :}
  2507 FSUB FRm, FRn {:  
  2508     COUNT_INST(I_FSUB);
  2509     check_fpuen();
  2510     if( sh4_x86.double_prec ) {
  2511         push_dr(FRn);
  2512         push_dr(FRm);
  2513         FSUBP_st(1);
  2514         pop_dr(FRn);
  2515     } else {
  2516         push_fr(FRn);
  2517         push_fr(FRm);
  2518         FSUBP_st(1);
  2519         pop_fr(FRn);
  2521 :}
  2523 FCMP/EQ FRm, FRn {:  
  2524     COUNT_INST(I_FCMPEQ);
  2525     check_fpuen();
  2526     if( sh4_x86.double_prec ) {
  2527         push_dr(FRm);
  2528         push_dr(FRn);
  2529     } else {
  2530         push_fr(FRm);
  2531         push_fr(FRn);
  2533     FCOMIP_st(1);
  2534     SETE_t();
  2535     FPOP_st();
  2536     sh4_x86.tstate = TSTATE_E;
  2537 :}
  2538 FCMP/GT FRm, FRn {:  
  2539     COUNT_INST(I_FCMPGT);
  2540     check_fpuen();
  2541     if( sh4_x86.double_prec ) {
  2542         push_dr(FRm);
  2543         push_dr(FRn);
  2544     } else {
  2545         push_fr(FRm);
  2546         push_fr(FRn);
  2548     FCOMIP_st(1);
  2549     SETA_t();
  2550     FPOP_st();
  2551     sh4_x86.tstate = TSTATE_A;
  2552 :}
  2554 FSCA FPUL, FRn {:  
  2555     COUNT_INST(I_FSCA);
  2556     check_fpuen();
  2557     if( sh4_x86.double_prec == 0 ) {
  2558         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2559         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2560         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2562     sh4_x86.tstate = TSTATE_NONE;
  2563 :}
  2564 FIPR FVm, FVn {:  
  2565     COUNT_INST(I_FIPR);
  2566     check_fpuen();
  2567     if( sh4_x86.double_prec == 0 ) {
  2568         if( sh4_x86.sse3_enabled ) {
  2569             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2570             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2571             HADDPS_xmm_xmm( 4, 4 ); 
  2572             HADDPS_xmm_xmm( 4, 4 );
  2573             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2574         } else {
  2575             push_fr( FVm<<2 );
  2576             push_fr( FVn<<2 );
  2577             FMULP_st(1);
  2578             push_fr( (FVm<<2)+1);
  2579             push_fr( (FVn<<2)+1);
  2580             FMULP_st(1);
  2581             FADDP_st(1);
  2582             push_fr( (FVm<<2)+2);
  2583             push_fr( (FVn<<2)+2);
  2584             FMULP_st(1);
  2585             FADDP_st(1);
  2586             push_fr( (FVm<<2)+3);
  2587             push_fr( (FVn<<2)+3);
  2588             FMULP_st(1);
  2589             FADDP_st(1);
  2590             pop_fr( (FVn<<2)+3);
  2593 :}
  2594 FTRV XMTRX, FVn {:  
  2595     COUNT_INST(I_FTRV);
  2596     check_fpuen();
  2597     if( sh4_x86.double_prec == 0 ) {
  2598         if( sh4_x86.sse3_enabled ) {
  2599             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2600             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2601             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2602             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2604             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2605             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2606             MOV_xmm_xmm( 4, 6 );
  2607             MOV_xmm_xmm( 5, 7 );
  2608             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2609             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2610             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2611             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2612             MULPS_xmm_xmm( 0, 4 );
  2613             MULPS_xmm_xmm( 1, 5 );
  2614             MULPS_xmm_xmm( 2, 6 );
  2615             MULPS_xmm_xmm( 3, 7 );
  2616             ADDPS_xmm_xmm( 5, 4 );
  2617             ADDPS_xmm_xmm( 7, 6 );
  2618             ADDPS_xmm_xmm( 6, 4 );
  2619             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2620         } else {
  2621             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2622             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2625     sh4_x86.tstate = TSTATE_NONE;
  2626 :}
  2628 FRCHG {:  
  2629     COUNT_INST(I_FRCHG);
  2630     check_fpuen();
  2631     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2632     CALL_ptr( sh4_switch_fr_banks );
  2633     sh4_x86.tstate = TSTATE_NONE;
  2634 :}
  2635 FSCHG {:  
  2636     COUNT_INST(I_FSCHG);
  2637     check_fpuen();
  2638     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2639     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2640     sh4_x86.tstate = TSTATE_NONE;
  2641     sh4_x86.double_size = !sh4_x86.double_size;
  2642     sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
  2643 :}
  2645 /* Processor control instructions */
  2646 LDC Rm, SR {:
  2647     COUNT_INST(I_LDCSR);
  2648     if( sh4_x86.in_delay_slot ) {
  2649 	SLOTILLEGAL();
  2650     } else {
  2651 	check_priv();
  2652 	load_reg( REG_EAX, Rm );
  2653 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2654 	sh4_x86.fpuen_checked = FALSE;
  2655 	sh4_x86.tstate = TSTATE_NONE;
  2656     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2657 	return 2;
  2659 :}
  2660 LDC Rm, GBR {: 
  2661     COUNT_INST(I_LDC);
  2662     load_reg( REG_EAX, Rm );
  2663     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2664 :}
  2665 LDC Rm, VBR {:  
  2666     COUNT_INST(I_LDC);
  2667     check_priv();
  2668     load_reg( REG_EAX, Rm );
  2669     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2670     sh4_x86.tstate = TSTATE_NONE;
  2671 :}
  2672 LDC Rm, SSR {:  
  2673     COUNT_INST(I_LDC);
  2674     check_priv();
  2675     load_reg( REG_EAX, Rm );
  2676     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2677     sh4_x86.tstate = TSTATE_NONE;
  2678 :}
  2679 LDC Rm, SGR {:  
  2680     COUNT_INST(I_LDC);
  2681     check_priv();
  2682     load_reg( REG_EAX, Rm );
  2683     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2684     sh4_x86.tstate = TSTATE_NONE;
  2685 :}
  2686 LDC Rm, SPC {:  
  2687     COUNT_INST(I_LDC);
  2688     check_priv();
  2689     load_reg( REG_EAX, Rm );
  2690     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2691     sh4_x86.tstate = TSTATE_NONE;
  2692 :}
  2693 LDC Rm, DBR {:  
  2694     COUNT_INST(I_LDC);
  2695     check_priv();
  2696     load_reg( REG_EAX, Rm );
  2697     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2698     sh4_x86.tstate = TSTATE_NONE;
  2699 :}
  2700 LDC Rm, Rn_BANK {:  
  2701     COUNT_INST(I_LDC);
  2702     check_priv();
  2703     load_reg( REG_EAX, Rm );
  2704     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2705     sh4_x86.tstate = TSTATE_NONE;
  2706 :}
  2707 LDC.L @Rm+, GBR {:  
  2708     COUNT_INST(I_LDCM);
  2709     load_reg( REG_EAX, Rm );
  2710     check_ralign32( REG_EAX );
  2711     MEM_READ_LONG( REG_EAX, REG_EAX );
  2712     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2713     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2714     sh4_x86.tstate = TSTATE_NONE;
  2715 :}
  2716 LDC.L @Rm+, SR {:
  2717     COUNT_INST(I_LDCSRM);
  2718     if( sh4_x86.in_delay_slot ) {
  2719 	SLOTILLEGAL();
  2720     } else {
  2721 	check_priv();
  2722 	load_reg( REG_EAX, Rm );
  2723 	check_ralign32( REG_EAX );
  2724 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2725 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2726 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2727 	sh4_x86.fpuen_checked = FALSE;
  2728 	sh4_x86.tstate = TSTATE_NONE;
  2729     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2730 	return 2;
  2732 :}
  2733 LDC.L @Rm+, VBR {:  
  2734     COUNT_INST(I_LDCM);
  2735     check_priv();
  2736     load_reg( REG_EAX, Rm );
  2737     check_ralign32( REG_EAX );
  2738     MEM_READ_LONG( REG_EAX, REG_EAX );
  2739     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2740     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2741     sh4_x86.tstate = TSTATE_NONE;
  2742 :}
  2743 LDC.L @Rm+, SSR {:
  2744     COUNT_INST(I_LDCM);
  2745     check_priv();
  2746     load_reg( REG_EAX, Rm );
  2747     check_ralign32( REG_EAX );
  2748     MEM_READ_LONG( REG_EAX, REG_EAX );
  2749     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2750     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2751     sh4_x86.tstate = TSTATE_NONE;
  2752 :}
  2753 LDC.L @Rm+, SGR {:  
  2754     COUNT_INST(I_LDCM);
  2755     check_priv();
  2756     load_reg( REG_EAX, Rm );
  2757     check_ralign32( REG_EAX );
  2758     MEM_READ_LONG( REG_EAX, REG_EAX );
  2759     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2760     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2761     sh4_x86.tstate = TSTATE_NONE;
  2762 :}
  2763 LDC.L @Rm+, SPC {:  
  2764     COUNT_INST(I_LDCM);
  2765     check_priv();
  2766     load_reg( REG_EAX, Rm );
  2767     check_ralign32( REG_EAX );
  2768     MEM_READ_LONG( REG_EAX, REG_EAX );
  2769     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2770     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2771     sh4_x86.tstate = TSTATE_NONE;
  2772 :}
  2773 LDC.L @Rm+, DBR {:  
  2774     COUNT_INST(I_LDCM);
  2775     check_priv();
  2776     load_reg( REG_EAX, Rm );
  2777     check_ralign32( REG_EAX );
  2778     MEM_READ_LONG( REG_EAX, REG_EAX );
  2779     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2780     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2781     sh4_x86.tstate = TSTATE_NONE;
  2782 :}
  2783 LDC.L @Rm+, Rn_BANK {:  
  2784     COUNT_INST(I_LDCM);
  2785     check_priv();
  2786     load_reg( REG_EAX, Rm );
  2787     check_ralign32( REG_EAX );
  2788     MEM_READ_LONG( REG_EAX, REG_EAX );
  2789     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2790     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2791     sh4_x86.tstate = TSTATE_NONE;
  2792 :}
  2793 LDS Rm, FPSCR {:
  2794     COUNT_INST(I_LDSFPSCR);
  2795     check_fpuen();
  2796     load_reg( REG_EAX, Rm );
  2797     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2798     sh4_x86.tstate = TSTATE_NONE;
  2799     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2800     return 2;
  2801 :}
  2802 LDS.L @Rm+, FPSCR {:  
  2803     COUNT_INST(I_LDSFPSCRM);
  2804     check_fpuen();
  2805     load_reg( REG_EAX, Rm );
  2806     check_ralign32( REG_EAX );
  2807     MEM_READ_LONG( REG_EAX, REG_EAX );
  2808     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2809     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2810     sh4_x86.tstate = TSTATE_NONE;
  2811     sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
  2812     return 2;
  2813 :}
  2814 LDS Rm, FPUL {:  
  2815     COUNT_INST(I_LDS);
  2816     check_fpuen();
  2817     load_reg( REG_EAX, Rm );
  2818     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2819 :}
  2820 LDS.L @Rm+, FPUL {:  
  2821     COUNT_INST(I_LDSM);
  2822     check_fpuen();
  2823     load_reg( REG_EAX, Rm );
  2824     check_ralign32( REG_EAX );
  2825     MEM_READ_LONG( REG_EAX, REG_EAX );
  2826     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2827     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2828     sh4_x86.tstate = TSTATE_NONE;
  2829 :}
  2830 LDS Rm, MACH {: 
  2831     COUNT_INST(I_LDS);
  2832     load_reg( REG_EAX, Rm );
  2833     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2834 :}
  2835 LDS.L @Rm+, MACH {:  
  2836     COUNT_INST(I_LDSM);
  2837     load_reg( REG_EAX, Rm );
  2838     check_ralign32( REG_EAX );
  2839     MEM_READ_LONG( REG_EAX, REG_EAX );
  2840     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2841     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2842     sh4_x86.tstate = TSTATE_NONE;
  2843 :}
  2844 LDS Rm, MACL {:  
  2845     COUNT_INST(I_LDS);
  2846     load_reg( REG_EAX, Rm );
  2847     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2848 :}
  2849 LDS.L @Rm+, MACL {:  
  2850     COUNT_INST(I_LDSM);
  2851     load_reg( REG_EAX, Rm );
  2852     check_ralign32( REG_EAX );
  2853     MEM_READ_LONG( REG_EAX, REG_EAX );
  2854     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2855     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2856     sh4_x86.tstate = TSTATE_NONE;
  2857 :}
  2858 LDS Rm, PR {:  
  2859     COUNT_INST(I_LDS);
  2860     load_reg( REG_EAX, Rm );
  2861     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2862 :}
  2863 LDS.L @Rm+, PR {:  
  2864     COUNT_INST(I_LDSM);
  2865     load_reg( REG_EAX, Rm );
  2866     check_ralign32( REG_EAX );
  2867     MEM_READ_LONG( REG_EAX, REG_EAX );
  2868     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2869     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2870     sh4_x86.tstate = TSTATE_NONE;
  2871 :}
  2872 LDTLB {:  
  2873     COUNT_INST(I_LDTLB);
  2874     CALL_ptr( MMU_ldtlb );
  2875     sh4_x86.tstate = TSTATE_NONE;
  2876 :}
  2877 OCBI @Rn {:
  2878     COUNT_INST(I_OCBI);
  2879 :}
  2880 OCBP @Rn {:
  2881     COUNT_INST(I_OCBP);
  2882 :}
  2883 OCBWB @Rn {:
  2884     COUNT_INST(I_OCBWB);
  2885 :}
  2886 PREF @Rn {:
  2887     COUNT_INST(I_PREF);
  2888     load_reg( REG_EAX, Rn );
  2889     MEM_PREFETCH( REG_EAX );
  2890     sh4_x86.tstate = TSTATE_NONE;
  2891 :}
  2892 SLEEP {: 
  2893     COUNT_INST(I_SLEEP);
  2894     check_priv();
  2895     CALL_ptr( sh4_sleep );
  2896     sh4_x86.tstate = TSTATE_NONE;
  2897     sh4_x86.in_delay_slot = DELAY_NONE;
  2898     return 2;
  2899 :}
  2900 STC SR, Rn {:
  2901     COUNT_INST(I_STCSR);
  2902     check_priv();
  2903     CALL_ptr(sh4_read_sr);
  2904     store_reg( REG_EAX, Rn );
  2905     sh4_x86.tstate = TSTATE_NONE;
  2906 :}
  2907 STC GBR, Rn {:  
  2908     COUNT_INST(I_STC);
  2909     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2910     store_reg( REG_EAX, Rn );
  2911 :}
  2912 STC VBR, Rn {:  
  2913     COUNT_INST(I_STC);
  2914     check_priv();
  2915     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2916     store_reg( REG_EAX, Rn );
  2917     sh4_x86.tstate = TSTATE_NONE;
  2918 :}
  2919 STC SSR, Rn {:  
  2920     COUNT_INST(I_STC);
  2921     check_priv();
  2922     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2923     store_reg( REG_EAX, Rn );
  2924     sh4_x86.tstate = TSTATE_NONE;
  2925 :}
  2926 STC SPC, Rn {:  
  2927     COUNT_INST(I_STC);
  2928     check_priv();
  2929     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2930     store_reg( REG_EAX, Rn );
  2931     sh4_x86.tstate = TSTATE_NONE;
  2932 :}
  2933 STC SGR, Rn {:  
  2934     COUNT_INST(I_STC);
  2935     check_priv();
  2936     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2937     store_reg( REG_EAX, Rn );
  2938     sh4_x86.tstate = TSTATE_NONE;
  2939 :}
  2940 STC DBR, Rn {:  
  2941     COUNT_INST(I_STC);
  2942     check_priv();
  2943     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2944     store_reg( REG_EAX, Rn );
  2945     sh4_x86.tstate = TSTATE_NONE;
  2946 :}
  2947 STC Rm_BANK, Rn {:
  2948     COUNT_INST(I_STC);
  2949     check_priv();
  2950     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  2951     store_reg( REG_EAX, Rn );
  2952     sh4_x86.tstate = TSTATE_NONE;
  2953 :}
  2954 STC.L SR, @-Rn {:
  2955     COUNT_INST(I_STCSRM);
  2956     check_priv();
  2957     CALL_ptr( sh4_read_sr );
  2958     MOVL_r32_r32( REG_EAX, REG_EDX );
  2959     load_reg( REG_EAX, Rn );
  2960     check_walign32( REG_EAX );
  2961     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2962     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2963     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2964     sh4_x86.tstate = TSTATE_NONE;
  2965 :}
  2966 STC.L VBR, @-Rn {:  
  2967     COUNT_INST(I_STCM);
  2968     check_priv();
  2969     load_reg( REG_EAX, Rn );
  2970     check_walign32( REG_EAX );
  2971     ADDL_imms_r32( -4, REG_EAX );
  2972     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  2973     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2974     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2975     sh4_x86.tstate = TSTATE_NONE;
  2976 :}
  2977 STC.L SSR, @-Rn {:  
  2978     COUNT_INST(I_STCM);
  2979     check_priv();
  2980     load_reg( REG_EAX, Rn );
  2981     check_walign32( REG_EAX );
  2982     ADDL_imms_r32( -4, REG_EAX );
  2983     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  2984     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2985     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2986     sh4_x86.tstate = TSTATE_NONE;
  2987 :}
  2988 STC.L SPC, @-Rn {:
  2989     COUNT_INST(I_STCM);
  2990     check_priv();
  2991     load_reg( REG_EAX, Rn );
  2992     check_walign32( REG_EAX );
  2993     ADDL_imms_r32( -4, REG_EAX );
  2994     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  2995     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2996     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2997     sh4_x86.tstate = TSTATE_NONE;
  2998 :}
  2999 STC.L SGR, @-Rn {:  
  3000     COUNT_INST(I_STCM);
  3001     check_priv();
  3002     load_reg( REG_EAX, Rn );
  3003     check_walign32( REG_EAX );
  3004     ADDL_imms_r32( -4, REG_EAX );
  3005     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  3006     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3007     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3008     sh4_x86.tstate = TSTATE_NONE;
  3009 :}
  3010 STC.L DBR, @-Rn {:  
  3011     COUNT_INST(I_STCM);
  3012     check_priv();
  3013     load_reg( REG_EAX, Rn );
  3014     check_walign32( REG_EAX );
  3015     ADDL_imms_r32( -4, REG_EAX );
  3016     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  3017     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3018     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3019     sh4_x86.tstate = TSTATE_NONE;
  3020 :}
  3021 STC.L Rm_BANK, @-Rn {:  
  3022     COUNT_INST(I_STCM);
  3023     check_priv();
  3024     load_reg( REG_EAX, Rn );
  3025     check_walign32( REG_EAX );
  3026     ADDL_imms_r32( -4, REG_EAX );
  3027     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  3028     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3029     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3030     sh4_x86.tstate = TSTATE_NONE;
  3031 :}
  3032 STC.L GBR, @-Rn {:  
  3033     COUNT_INST(I_STCM);
  3034     load_reg( REG_EAX, Rn );
  3035     check_walign32( REG_EAX );
  3036     ADDL_imms_r32( -4, REG_EAX );
  3037     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  3038     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3039     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3040     sh4_x86.tstate = TSTATE_NONE;
  3041 :}
  3042 STS FPSCR, Rn {:  
  3043     COUNT_INST(I_STSFPSCR);
  3044     check_fpuen();
  3045     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  3046     store_reg( REG_EAX, Rn );
  3047 :}
  3048 STS.L FPSCR, @-Rn {:  
  3049     COUNT_INST(I_STSFPSCRM);
  3050     check_fpuen();
  3051     load_reg( REG_EAX, Rn );
  3052     check_walign32( REG_EAX );
  3053     ADDL_imms_r32( -4, REG_EAX );
  3054     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  3055     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3056     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3057     sh4_x86.tstate = TSTATE_NONE;
  3058 :}
  3059 STS FPUL, Rn {:  
  3060     COUNT_INST(I_STS);
  3061     check_fpuen();
  3062     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  3063     store_reg( REG_EAX, Rn );
  3064 :}
  3065 STS.L FPUL, @-Rn {:  
  3066     COUNT_INST(I_STSM);
  3067     check_fpuen();
  3068     load_reg( REG_EAX, Rn );
  3069     check_walign32( REG_EAX );
  3070     ADDL_imms_r32( -4, REG_EAX );
  3071     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  3072     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3073     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3074     sh4_x86.tstate = TSTATE_NONE;
  3075 :}
  3076 STS MACH, Rn {:  
  3077     COUNT_INST(I_STS);
  3078     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  3079     store_reg( REG_EAX, Rn );
  3080 :}
  3081 STS.L MACH, @-Rn {:  
  3082     COUNT_INST(I_STSM);
  3083     load_reg( REG_EAX, Rn );
  3084     check_walign32( REG_EAX );
  3085     ADDL_imms_r32( -4, REG_EAX );
  3086     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  3087     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3088     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3089     sh4_x86.tstate = TSTATE_NONE;
  3090 :}
  3091 STS MACL, Rn {:  
  3092     COUNT_INST(I_STS);
  3093     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  3094     store_reg( REG_EAX, Rn );
  3095 :}
  3096 STS.L MACL, @-Rn {:  
  3097     COUNT_INST(I_STSM);
  3098     load_reg( REG_EAX, Rn );
  3099     check_walign32( REG_EAX );
  3100     ADDL_imms_r32( -4, REG_EAX );
  3101     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  3102     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3103     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3104     sh4_x86.tstate = TSTATE_NONE;
  3105 :}
  3106 STS PR, Rn {:  
  3107     COUNT_INST(I_STS);
  3108     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3109     store_reg( REG_EAX, Rn );
  3110 :}
  3111 STS.L PR, @-Rn {:  
  3112     COUNT_INST(I_STSM);
  3113     load_reg( REG_EAX, Rn );
  3114     check_walign32( REG_EAX );
  3115     ADDL_imms_r32( -4, REG_EAX );
  3116     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3117     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3118     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3119     sh4_x86.tstate = TSTATE_NONE;
  3120 :}
  3122 NOP {: 
  3123     COUNT_INST(I_NOP);
  3124     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3125 :}
  3126 %%
  3127     sh4_x86.in_delay_slot = DELAY_NONE;
  3128     return 0;
  3132 /**
  3133  * The unwind methods only work if we compiled with DWARF2 frame information
  3134  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3135  */
  3136 #ifdef HAVE_EXCEPTIONS
  3137 #include <unwind.h>
  3139 struct UnwindInfo {
  3140     uintptr_t block_start;
  3141     uintptr_t block_end;
  3142     void *pc;
  3143 };
  3145 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3147     struct UnwindInfo *info = arg;
  3148     void *pc = (void *)_Unwind_GetIP(context);
  3149     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3150         info->pc = pc;
  3151         return _URC_NORMAL_STOP;
  3153     return _URC_NO_REASON;
  3156 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3158     struct _Unwind_Exception exc;
  3159     struct UnwindInfo info;
  3161     info.pc = NULL;
  3162     info.block_start = (uintptr_t)code;
  3163     info.block_end = info.block_start + code_size;
  3164     void *result = NULL;
  3165     _Unwind_Backtrace( xlat_check_frame, &info );
  3166     return info.pc;
  3168 #else
  3169 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3170 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3172     void *result = NULL;
  3173     __asm__(
  3174         "mov %%ebp, %%eax\n\t"
  3175         "mov $0x8, %%ecx\n\t"
  3176         "mov %1, %%edx\n"
  3177         "frame_loop: test %%eax, %%eax\n\t"
  3178         "je frame_not_found\n\t"
  3179         "cmp (%%eax), %%edx\n\t"
  3180         "je frame_found\n\t"
  3181         "sub $0x1, %%ecx\n\t"
  3182         "je frame_not_found\n\t"
  3183         "movl (%%eax), %%eax\n\t"
  3184         "jmp frame_loop\n"
  3185         "frame_found: movl 0x4(%%eax), %0\n"
  3186         "frame_not_found:"
  3187         : "=r" (result)
  3188         : "r" (((uint8_t *)&sh4r) + 128 )
  3189         : "eax", "ecx", "edx" );
  3190     return result;
  3192 #endif
.