Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1091:186558374345
prev1067:d3c00ffccfcd
next1092:7c4ffe27e7b5
author nkeynes
date Tue Dec 15 08:46:37 2009 +1000 (14 years ago)
permissions -rw-r--r--
last change Add side-by-side x86+sh4 disassembly output
Print SH4 state information and disassembly of the current block when
crashing.
Fix delay slot instruction in conditional branch not being marked as a
delay-slot instruction in the branch-not-taken path.
Rename REG_* defines in cpu.h to avoid conflict with translation defs
view annotate diff log raw
     1 /**
     2  * $Id$
     3  * 
     4  * SH4 => x86 translation. This version does no real optimization, it just
     5  * outputs straight-line x86 code - it mainly exists to provide a baseline
     6  * to test the optimizing versions against.
     7  *
     8  * Copyright (c) 2007 Nathan Keynes.
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License as published by
    12  * the Free Software Foundation; either version 2 of the License, or
    13  * (at your option) any later version.
    14  *
    15  * This program is distributed in the hope that it will be useful,
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18  * GNU General Public License for more details.
    19  */
    21 #include <assert.h>
    22 #include <math.h>
    24 #ifndef NDEBUG
    25 #define DEBUG_JUMPS 1
    26 #endif
    28 #include "lxdream.h"
    29 #include "sh4/sh4core.h"
    30 #include "sh4/sh4dasm.h"
    31 #include "sh4/sh4trans.h"
    32 #include "sh4/sh4stat.h"
    33 #include "sh4/sh4mmio.h"
    34 #include "sh4/mmu.h"
    35 #include "xlat/xltcache.h"
    36 #include "xlat/x86/x86op.h"
    37 #include "x86dasm/x86dasm.h"
    38 #include "clock.h"
    40 #define DEFAULT_BACKPATCH_SIZE 4096
    42 /* Offset of a reg relative to the sh4r structure */
    43 #define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
    45 #define R_T      REG_OFFSET(t)
    46 #define R_Q      REG_OFFSET(q)
    47 #define R_S      REG_OFFSET(s)
    48 #define R_M      REG_OFFSET(m)
    49 #define R_SR     REG_OFFSET(sr)
    50 #define R_GBR    REG_OFFSET(gbr)
    51 #define R_SSR    REG_OFFSET(ssr)
    52 #define R_SPC    REG_OFFSET(spc)
    53 #define R_VBR    REG_OFFSET(vbr)
    54 #define R_MACH   REG_OFFSET(mac)+4
    55 #define R_MACL   REG_OFFSET(mac)
    56 #define R_PC     REG_OFFSET(pc)
    57 #define R_NEW_PC REG_OFFSET(new_pc)
    58 #define R_PR     REG_OFFSET(pr)
    59 #define R_SGR    REG_OFFSET(sgr)
    60 #define R_FPUL   REG_OFFSET(fpul)
    61 #define R_FPSCR  REG_OFFSET(fpscr)
    62 #define R_DBR    REG_OFFSET(dbr)
    63 #define R_R(rn)  REG_OFFSET(r[rn])
    64 #define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
    65 #define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
    66 #define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
    67 #define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
    68 #define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
    70 #define DELAY_NONE 0
    71 #define DELAY_PC 1
    72 #define DELAY_PC_PR 2
    74 struct backpatch_record {
    75     uint32_t fixup_offset;
    76     uint32_t fixup_icount;
    77     int32_t exc_code;
    78 };
    80 /** 
    81  * Struct to manage internal translation state. This state is not saved -
    82  * it is only valid between calls to sh4_translate_begin_block() and
    83  * sh4_translate_end_block()
    84  */
    85 struct sh4_x86_state {
    86     int in_delay_slot;
    87     gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
    88     gboolean branch_taken; /* true if we branched unconditionally */
    89     gboolean double_prec; /* true if FPU is in double-precision mode */
    90     gboolean double_size; /* true if FPU is in double-size mode */
    91     gboolean sse3_enabled; /* true if host supports SSE3 instructions */
    92     uint32_t block_start_pc;
    93     uint32_t stack_posn;   /* Trace stack height for alignment purposes */
    94     int tstate;
    96     /* mode flags */
    97     gboolean tlb_on; /* True if tlb translation is active */
    99     /* Allocated memory for the (block-wide) back-patch list */
   100     struct backpatch_record *backpatch_list;
   101     uint32_t backpatch_posn;
   102     uint32_t backpatch_size;
   103 };
   105 static struct sh4_x86_state sh4_x86;
   107 static uint32_t max_int = 0x7FFFFFFF;
   108 static uint32_t min_int = 0x80000000;
   109 static uint32_t save_fcw; /* save value for fpu control word */
   110 static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
   112 static struct x86_symbol x86_symbol_table[] = {
   113     { "sh4r+128", ((char *)&sh4r)+128 },
   114     { "sh4_cpu_period", &sh4_cpu_period },
   115     { "sh4_address_space", NULL },
   116     { "sh4_user_address_space", NULL },
   117     { "sh4_write_fpscr", sh4_write_fpscr },
   118     { "sh4_write_sr", sh4_write_sr },
   119     { "sh4_read_sr", sh4_read_sr },
   120     { "sh4_sleep", sh4_sleep },
   121     { "sh4_fsca", sh4_fsca },
   122     { "sh4_ftrv", sh4_ftrv },
   123     { "sh4_switch_fr_banks", sh4_switch_fr_banks },
   124     { "sh4_execute_instruction", sh4_execute_instruction },
   125     { "signsat48", signsat48 },
   126     { "xlat_get_code_by_vma", xlat_get_code_by_vma },
   127     { "xlat_get_code", xlat_get_code }
   128 };
   131 gboolean is_sse3_supported()
   132 {
   133     uint32_t features;
   135     __asm__ __volatile__(
   136         "mov $0x01, %%eax\n\t"
   137         "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
   138     return (features & 1) ? TRUE : FALSE;
   139 }
   141 void sh4_translate_init(void)
   142 {
   143     sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
   144     sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
   145     sh4_x86.sse3_enabled = is_sse3_supported();
   146     x86_symbol_table[2].ptr = sh4_address_space;
   147     x86_symbol_table[3].ptr = sh4_user_address_space;    
   148     x86_disasm_init();
   149     x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
   150 }
   152 /**
   153  * Disassemble the given translated code block, and it's source SH4 code block
   154  * side-by-side. The current native pc will be marked if non-null.
   155  */
   156 void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
   157 {
   158     char buf[256];
   159     char op[256];
   161     uintptr_t target_start = (uintptr_t)code, target_pc;
   162     uintptr_t target_end = target_start + xlat_get_code_size(code);
   163     uint32_t source_pc = source_start;
   164     uint32_t source_end = source_pc;
   165     xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
   166     xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size;
   168     for( target_pc = target_start; target_pc < target_end;  ) {
   169         uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
   170         fprintf( out, "%c%08X: %-20s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
   171                       (unsigned int)target_pc, op, buf );
   173         if( source_recov_table < source_recov_end && 
   174             target_pc >= (target_start + source_recov_table->xlat_offset) ) {
   175             source_recov_table++;
   176             if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
   177                 source_end = source_start + (source_recov_table->sh4_icount)*2;
   178         }
   180         if( source_pc < source_end ) {
   181             uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   182             fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
   183             source_pc = source_pc2;
   184         } else {
   185             fprintf( out, "\n" );
   186         }
   188         target_pc = pc2;
   189     }
   191     while( source_pc < source_end ) {
   192         uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
   193         fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
   194         source_pc = source_pc2;
   195     }
   196 }
   198 static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
   199 {
   200     int reloc_size = 4;
   202     if( exc_code == -2 ) {
   203         reloc_size = sizeof(void *);
   204     }
   206     if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
   207 	sh4_x86.backpatch_size <<= 1;
   208 	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
   209 					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
   210 	assert( sh4_x86.backpatch_list != NULL );
   211     }
   212     if( sh4_x86.in_delay_slot ) {
   213 	fixup_pc -= 2;
   214     }
   216     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
   217 	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
   218     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
   219     sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
   220     sh4_x86.backpatch_posn++;
   221 }
   223 #define TSTATE_NONE -1
   224 #define TSTATE_O    X86_COND_O
   225 #define TSTATE_C    X86_COND_C
   226 #define TSTATE_E    X86_COND_E
   227 #define TSTATE_NE   X86_COND_NE
   228 #define TSTATE_G    X86_COND_G
   229 #define TSTATE_GE   X86_COND_GE
   230 #define TSTATE_A    X86_COND_A
   231 #define TSTATE_AE   X86_COND_AE
   233 #define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
   234 #define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
   236 /* Convenience instructions */
   237 #define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
   238 #define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
   239 #define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
   240 #define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
   241 #define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
   242 #define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
   243 #define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
   244 #define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
   245 #define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
   246 #define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
   247 #define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
   248 #define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
   249 #define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
   250 #define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
   251 #define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
   252 #define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
   253 #define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
   254 #define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
   255 #define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
   257 /** Branch if T is set (either in the current cflags, or in sh4r.t) */
   258 #define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   259 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   260     JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
   262 /** Branch if T is clear (either in the current cflags or in sh4r.t) */
   263 #define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
   264 	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
   265     JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
   268 #define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
   269 #define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
   271 /**
   272  * Load an FR register (single-precision floating point) into an integer x86
   273  * register (eg for register-to-register moves)
   274  */
   275 #define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
   276 #define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
   278 /**
   279  * Load the low half of a DR register (DR or XD) into an integer x86 register 
   280  */
   281 #define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
   282 #define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
   284 /**
   285  * Store an FR register (single-precision floating point) from an integer x86+
   286  * register (eg for register-to-register moves)
   287  */
   288 #define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
   289 #define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
   291 #define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
   292 #define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
   295 #define push_fpul()  FLDF_rbpdisp(R_FPUL)
   296 #define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
   297 #define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   298 #define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
   299 #define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   300 #define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
   301 #define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   302 #define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
   303 #define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   304 #define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
   306 #ifdef ENABLE_SH4STATS
   307 #define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
   308 #else
   309 #define COUNT_INST(id)
   310 #endif
   313 /* Exception checks - Note that all exception checks will clobber EAX */
   315 #define check_priv( ) \
   316     if( (sh4r.xlat_sh4_mode & SR_MD) == 0 ) { \
   317         if( sh4_x86.in_delay_slot ) { \
   318             exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
   319         } else { \
   320             exit_block_exc(EXC_ILLEGAL, pc); \
   321         } \
   322         sh4_x86.branch_taken = TRUE; \
   323         sh4_x86.in_delay_slot = DELAY_NONE; \
   324         return 2; \
   325     }
   327 #define check_fpuen( ) \
   328     if( !sh4_x86.fpuen_checked ) {\
   329 	sh4_x86.fpuen_checked = TRUE;\
   330 	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
   331 	ANDL_imms_r32( SR_FD, REG_EAX );\
   332 	if( sh4_x86.in_delay_slot ) {\
   333 	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
   334 	} else {\
   335 	    JNE_exc(EXC_FPU_DISABLED);\
   336 	}\
   337 	sh4_x86.tstate = TSTATE_NONE; \
   338     }
   340 #define check_ralign16( x86reg ) \
   341     TESTL_imms_r32( 0x00000001, x86reg ); \
   342     JNE_exc(EXC_DATA_ADDR_READ)
   344 #define check_walign16( x86reg ) \
   345     TESTL_imms_r32( 0x00000001, x86reg ); \
   346     JNE_exc(EXC_DATA_ADDR_WRITE);
   348 #define check_ralign32( x86reg ) \
   349     TESTL_imms_r32( 0x00000003, x86reg ); \
   350     JNE_exc(EXC_DATA_ADDR_READ)
   352 #define check_walign32( x86reg ) \
   353     TESTL_imms_r32( 0x00000003, x86reg ); \
   354     JNE_exc(EXC_DATA_ADDR_WRITE);
   356 #define check_ralign64( x86reg ) \
   357     TESTL_imms_r32( 0x00000007, x86reg ); \
   358     JNE_exc(EXC_DATA_ADDR_READ)
   360 #define check_walign64( x86reg ) \
   361     TESTL_imms_r32( 0x00000007, x86reg ); \
   362     JNE_exc(EXC_DATA_ADDR_WRITE);
   364 #define address_space() ((sh4r.xlat_sh4_mode&SR_MD) ? (uintptr_t)sh4_address_space : (uintptr_t)sh4_user_address_space)
   366 #define UNDEF(ir)
   367 /* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
   368  * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
   369  */
   370 #ifdef HAVE_FRAME_ADDRESS
   371 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   372 {
   373     decode_address(address_space(), addr_reg);
   374     if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { 
   375         CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   376     } else {
   377         if( addr_reg != REG_ARG1 ) {
   378             MOVL_r32_r32( addr_reg, REG_ARG1 );
   379         }
   380         MOVP_immptr_rptr( 0, REG_ARG2 );
   381         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   382         CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
   383     }
   384     if( value_reg != REG_RESULT1 ) { 
   385         MOVL_r32_r32( REG_RESULT1, value_reg );
   386     }
   387 }
   389 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   390 {
   391     decode_address(address_space(), addr_reg);
   392     if( !sh4_x86.tlb_on && (sh4r.xlat_sh4_mode & SR_MD) ) { 
   393         CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   394     } else {
   395         if( value_reg != REG_ARG2 ) {
   396             MOVL_r32_r32( value_reg, REG_ARG2 );
   397 	}        
   398         if( addr_reg != REG_ARG1 ) {
   399             MOVL_r32_r32( addr_reg, REG_ARG1 );
   400         }
   401 #if MAX_REG_ARG > 2        
   402         MOVP_immptr_rptr( 0, REG_ARG3 );
   403         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   404         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
   405 #else
   406         MOVL_imm32_rspdisp( 0, 0 );
   407         sh4_x86_add_backpatch( xlat_output, pc, -2 );
   408         CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
   409 #endif
   410     }
   411 }
   412 #else
   413 static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
   414 {
   415     decode_address(address_space(), addr_reg);
   416     CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
   417     if( value_reg != REG_RESULT1 ) {
   418         MOVL_r32_r32( REG_RESULT1, value_reg );
   419     }
   420 }     
   422 static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
   423 {
   424     decode_address(address_space(), addr_reg);
   425     CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
   426 }
   427 #endif
   429 #define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
   430 #define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
   431 #define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
   432 #define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
   433 #define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
   434 #define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
   435 #define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
   436 #define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
   437 #define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
   439 #define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
   441 void sh4_translate_begin_block( sh4addr_t pc ) 
   442 {
   443     enter_block();
   444     MOVP_immptr_rptr( ((uint8_t *)&sh4r) + 128, REG_EBP );
   445     sh4_x86.in_delay_slot = FALSE;
   446     sh4_x86.fpuen_checked = FALSE;
   447     sh4_x86.branch_taken = FALSE;
   448     sh4_x86.backpatch_posn = 0;
   449     sh4_x86.block_start_pc = pc;
   450     sh4_x86.tlb_on = IS_TLB_ENABLED();
   451     sh4_x86.tstate = TSTATE_NONE;
   452     sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
   453     sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
   454 }
   457 uint32_t sh4_translate_end_block_size()
   458 {
   459     if( sh4_x86.backpatch_posn <= 3 ) {
   460         return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*24);
   461     } else {
   462         return EPILOGUE_SIZE + 72 + (sh4_x86.backpatch_posn-3)*27;
   463     }
   464 }
   467 /**
   468  * Embed a breakpoint into the generated code
   469  */
   470 void sh4_translate_emit_breakpoint( sh4vma_t pc )
   471 {
   472     MOVL_imm32_r32( pc, REG_EAX );
   473     CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
   474     sh4_x86.tstate = TSTATE_NONE;
   475 }
   478 #define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
   480 /**
   481  * Exit the block with sh4r.pc already written
   482  */
   483 void exit_block_pcset( sh4addr_t pc )
   484 {
   485     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   486     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   487     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   488     if( sh4_x86.tlb_on ) {
   489         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   490     } else {
   491         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   492     }
   493     exit_block();
   494 }
   496 /**
   497  * Exit the block with sh4r.new_pc written with the target pc
   498  */
   499 void exit_block_newpcset( sh4addr_t pc )
   500 {
   501     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   502     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   503     MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
   504     MOVL_r32_rbpdisp( REG_ARG1, R_PC );
   505     if( sh4_x86.tlb_on ) {
   506         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   507     } else {
   508         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   509     }
   510     exit_block();
   511 }
   514 /**
   515  * Exit the block to an absolute PC
   516  */
   517 void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
   518 {
   519     MOVL_imm32_r32( pc, REG_ECX );
   520     MOVL_r32_rbpdisp( REG_ECX, R_PC );
   521     if( IS_IN_ICACHE(pc) ) {
   522         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   523         ANDP_imms_rptr( -4, REG_EAX );
   524     } else if( sh4_x86.tlb_on ) {
   525         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX);
   526     } else {
   527         CALL1_ptr_r32(xlat_get_code, REG_ECX);
   528     }
   529     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   530     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   531     exit_block();
   532 }
   534 /**
   535  * Exit the block to a relative PC
   536  */
   537 void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
   538 {
   539     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   540     ADDL_rbpdisp_r32( R_PC, REG_ECX );
   541     MOVL_r32_rbpdisp( REG_ECX, R_PC );
   542     if( IS_IN_ICACHE(pc) ) {
   543         MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
   544         ANDP_imms_rptr( -4, REG_EAX );
   545     } else if( sh4_x86.tlb_on ) {
   546         CALL1_ptr_r32(xlat_get_code_by_vma, REG_ECX);
   547     } else {
   548         CALL1_ptr_r32(xlat_get_code, REG_ECX);
   549     }
   550     MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   551     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   552     exit_block();
   553 }
   555 /**
   556  * Exit unconditionally with a general exception
   557  */
   558 void exit_block_exc( int code, sh4addr_t pc )
   559 {
   560     MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
   561     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   562     MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
   563     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
   564     MOVL_imm32_r32( code, REG_ARG1 );
   565     CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   566     MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   567     if( sh4_x86.tlb_on ) {
   568         CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
   569     } else {
   570         CALL1_ptr_r32(xlat_get_code,REG_ARG1);
   571     }
   573     exit_block();
   574 }    
   576 /**
   577  * Embed a call to sh4_execute_instruction for situations that we
   578  * can't translate (just page-crossing delay slots at the moment).
   579  * Caller is responsible for setting new_pc before calling this function.
   580  *
   581  * Performs:
   582  *   Set PC = endpc
   583  *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
   584  *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
   585  *   Call sh4_execute_instruction
   586  *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
   587  */
   588 void exit_block_emu( sh4vma_t endpc )
   589 {
   590     MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
   591     ADDL_r32_rbpdisp( REG_ECX, R_PC );
   593     MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
   594     ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
   595     MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
   596     MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
   598     CALL_ptr( sh4_execute_instruction );    
   599     MOVL_rbpdisp_r32( R_PC, REG_EAX );
   600     if( sh4_x86.tlb_on ) {
   601 	CALL1_ptr_r32(xlat_get_code_by_vma,REG_EAX);
   602     } else {
   603 	CALL1_ptr_r32(xlat_get_code,REG_EAX);
   604     }
   605     exit_block();
   606 } 
   608 /**
   609  * Write the block trailer (exception handling block)
   610  */
   611 void sh4_translate_end_block( sh4addr_t pc ) {
   612     if( sh4_x86.branch_taken == FALSE ) {
   613         // Didn't exit unconditionally already, so write the termination here
   614         exit_block_rel( pc, pc );
   615     }
   616     if( sh4_x86.backpatch_posn != 0 ) {
   617         unsigned int i;
   618         // Exception raised - cleanup and exit
   619         uint8_t *end_ptr = xlat_output;
   620         MOVL_r32_r32( REG_EDX, REG_ECX );
   621         ADDL_r32_r32( REG_EDX, REG_ECX );
   622         ADDL_r32_rbpdisp( REG_ECX, R_SPC );
   623         MOVL_moffptr_eax( &sh4_cpu_period );
   624         MULL_r32( REG_EDX );
   625         ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
   626         MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
   627         if( sh4_x86.tlb_on ) {
   628             CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
   629         } else {
   630             CALL1_ptr_r32(xlat_get_code, REG_ARG1);
   631         }
   632         exit_block();
   634         for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
   635             uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
   636             if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
   637                 if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
   638                     *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
   639                 } else {
   640                     *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   641                 }
   642                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   643                 int rel = end_ptr - xlat_output;
   644                 JMP_prerel(rel);
   645             } else {
   646                 *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
   647                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
   648                 CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
   649                 MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
   650                 int rel = end_ptr - xlat_output;
   651                 JMP_prerel(rel);
   652             }
   653         }
   654     }
   655 }
   657 /**
   658  * Translate a single instruction. Delayed branches are handled specially
   659  * by translating both branch and delayed instruction as a single unit (as
   660  * 
   661  * The instruction MUST be in the icache (assert check)
   662  *
   663  * @return true if the instruction marks the end of a basic block
   664  * (eg a branch or 
   665  */
   666 uint32_t sh4_translate_instruction( sh4vma_t pc )
   667 {
   668     uint32_t ir;
   669     /* Read instruction from icache */
   670     assert( IS_IN_ICACHE(pc) );
   671     ir = *(uint16_t *)GET_ICACHE_PTR(pc);
   673     if( !sh4_x86.in_delay_slot ) {
   674 	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
   675     }
   677     /* check for breakpoints at this pc */
   678     for( int i=0; i<sh4_breakpoint_count; i++ ) {
   679         if( sh4_breakpoints[i].address == pc ) {
   680             sh4_translate_emit_breakpoint(pc);
   681             break;
   682         }
   683     }
   684 %%
   685 /* ALU operations */
   686 ADD Rm, Rn {:
   687     COUNT_INST(I_ADD);
   688     load_reg( REG_EAX, Rm );
   689     load_reg( REG_ECX, Rn );
   690     ADDL_r32_r32( REG_EAX, REG_ECX );
   691     store_reg( REG_ECX, Rn );
   692     sh4_x86.tstate = TSTATE_NONE;
   693 :}
   694 ADD #imm, Rn {:  
   695     COUNT_INST(I_ADDI);
   696     ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
   697     sh4_x86.tstate = TSTATE_NONE;
   698 :}
   699 ADDC Rm, Rn {:
   700     COUNT_INST(I_ADDC);
   701     if( sh4_x86.tstate != TSTATE_C ) {
   702         LDC_t();
   703     }
   704     load_reg( REG_EAX, Rm );
   705     load_reg( REG_ECX, Rn );
   706     ADCL_r32_r32( REG_EAX, REG_ECX );
   707     store_reg( REG_ECX, Rn );
   708     SETC_t();
   709     sh4_x86.tstate = TSTATE_C;
   710 :}
   711 ADDV Rm, Rn {:
   712     COUNT_INST(I_ADDV);
   713     load_reg( REG_EAX, Rm );
   714     load_reg( REG_ECX, Rn );
   715     ADDL_r32_r32( REG_EAX, REG_ECX );
   716     store_reg( REG_ECX, Rn );
   717     SETO_t();
   718     sh4_x86.tstate = TSTATE_O;
   719 :}
   720 AND Rm, Rn {:
   721     COUNT_INST(I_AND);
   722     load_reg( REG_EAX, Rm );
   723     load_reg( REG_ECX, Rn );
   724     ANDL_r32_r32( REG_EAX, REG_ECX );
   725     store_reg( REG_ECX, Rn );
   726     sh4_x86.tstate = TSTATE_NONE;
   727 :}
   728 AND #imm, R0 {:  
   729     COUNT_INST(I_ANDI);
   730     load_reg( REG_EAX, 0 );
   731     ANDL_imms_r32(imm, REG_EAX); 
   732     store_reg( REG_EAX, 0 );
   733     sh4_x86.tstate = TSTATE_NONE;
   734 :}
   735 AND.B #imm, @(R0, GBR) {: 
   736     COUNT_INST(I_ANDB);
   737     load_reg( REG_EAX, 0 );
   738     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
   739     MOVL_r32_rspdisp(REG_EAX, 0);
   740     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
   741     MOVL_rspdisp_r32(0, REG_EAX);
   742     ANDL_imms_r32(imm, REG_EDX );
   743     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
   744     sh4_x86.tstate = TSTATE_NONE;
   745 :}
   746 CMP/EQ Rm, Rn {:  
   747     COUNT_INST(I_CMPEQ);
   748     load_reg( REG_EAX, Rm );
   749     load_reg( REG_ECX, Rn );
   750     CMPL_r32_r32( REG_EAX, REG_ECX );
   751     SETE_t();
   752     sh4_x86.tstate = TSTATE_E;
   753 :}
   754 CMP/EQ #imm, R0 {:  
   755     COUNT_INST(I_CMPEQI);
   756     load_reg( REG_EAX, 0 );
   757     CMPL_imms_r32(imm, REG_EAX);
   758     SETE_t();
   759     sh4_x86.tstate = TSTATE_E;
   760 :}
   761 CMP/GE Rm, Rn {:  
   762     COUNT_INST(I_CMPGE);
   763     load_reg( REG_EAX, Rm );
   764     load_reg( REG_ECX, Rn );
   765     CMPL_r32_r32( REG_EAX, REG_ECX );
   766     SETGE_t();
   767     sh4_x86.tstate = TSTATE_GE;
   768 :}
   769 CMP/GT Rm, Rn {: 
   770     COUNT_INST(I_CMPGT);
   771     load_reg( REG_EAX, Rm );
   772     load_reg( REG_ECX, Rn );
   773     CMPL_r32_r32( REG_EAX, REG_ECX );
   774     SETG_t();
   775     sh4_x86.tstate = TSTATE_G;
   776 :}
   777 CMP/HI Rm, Rn {:  
   778     COUNT_INST(I_CMPHI);
   779     load_reg( REG_EAX, Rm );
   780     load_reg( REG_ECX, Rn );
   781     CMPL_r32_r32( REG_EAX, REG_ECX );
   782     SETA_t();
   783     sh4_x86.tstate = TSTATE_A;
   784 :}
   785 CMP/HS Rm, Rn {: 
   786     COUNT_INST(I_CMPHS);
   787     load_reg( REG_EAX, Rm );
   788     load_reg( REG_ECX, Rn );
   789     CMPL_r32_r32( REG_EAX, REG_ECX );
   790     SETAE_t();
   791     sh4_x86.tstate = TSTATE_AE;
   792  :}
   793 CMP/PL Rn {: 
   794     COUNT_INST(I_CMPPL);
   795     load_reg( REG_EAX, Rn );
   796     CMPL_imms_r32( 0, REG_EAX );
   797     SETG_t();
   798     sh4_x86.tstate = TSTATE_G;
   799 :}
   800 CMP/PZ Rn {:  
   801     COUNT_INST(I_CMPPZ);
   802     load_reg( REG_EAX, Rn );
   803     CMPL_imms_r32( 0, REG_EAX );
   804     SETGE_t();
   805     sh4_x86.tstate = TSTATE_GE;
   806 :}
   807 CMP/STR Rm, Rn {:  
   808     COUNT_INST(I_CMPSTR);
   809     load_reg( REG_EAX, Rm );
   810     load_reg( REG_ECX, Rn );
   811     XORL_r32_r32( REG_ECX, REG_EAX );
   812     TESTB_r8_r8( REG_AL, REG_AL );
   813     JE_label(target1);
   814     TESTB_r8_r8( REG_AH, REG_AH );
   815     JE_label(target2);
   816     SHRL_imm_r32( 16, REG_EAX );
   817     TESTB_r8_r8( REG_AL, REG_AL );
   818     JE_label(target3);
   819     TESTB_r8_r8( REG_AH, REG_AH );
   820     JMP_TARGET(target1);
   821     JMP_TARGET(target2);
   822     JMP_TARGET(target3);
   823     SETE_t();
   824     sh4_x86.tstate = TSTATE_E;
   825 :}
   826 DIV0S Rm, Rn {:
   827     COUNT_INST(I_DIV0S);
   828     load_reg( REG_EAX, Rm );
   829     load_reg( REG_ECX, Rn );
   830     SHRL_imm_r32( 31, REG_EAX );
   831     SHRL_imm_r32( 31, REG_ECX );
   832     MOVL_r32_rbpdisp( REG_EAX, R_M );
   833     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   834     CMPL_r32_r32( REG_EAX, REG_ECX );
   835     SETNE_t();
   836     sh4_x86.tstate = TSTATE_NE;
   837 :}
   838 DIV0U {:  
   839     COUNT_INST(I_DIV0U);
   840     XORL_r32_r32( REG_EAX, REG_EAX );
   841     MOVL_r32_rbpdisp( REG_EAX, R_Q );
   842     MOVL_r32_rbpdisp( REG_EAX, R_M );
   843     MOVL_r32_rbpdisp( REG_EAX, R_T );
   844     sh4_x86.tstate = TSTATE_C; // works for DIV1
   845 :}
   846 DIV1 Rm, Rn {:
   847     COUNT_INST(I_DIV1);
   848     MOVL_rbpdisp_r32( R_M, REG_ECX );
   849     load_reg( REG_EAX, Rn );
   850     if( sh4_x86.tstate != TSTATE_C ) {
   851 	LDC_t();
   852     }
   853     RCLL_imm_r32( 1, REG_EAX );
   854     SETC_r8( REG_DL ); // Q'
   855     CMPL_rbpdisp_r32( R_Q, REG_ECX );
   856     JE_label(mqequal);
   857     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   858     JMP_label(end);
   859     JMP_TARGET(mqequal);
   860     SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
   861     JMP_TARGET(end);
   862     store_reg( REG_EAX, Rn ); // Done with Rn now
   863     SETC_r8(REG_AL); // tmp1
   864     XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
   865     XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
   866     MOVL_r32_rbpdisp( REG_ECX, R_Q );
   867     XORL_imms_r32( 1, REG_AL );   // T = !Q'
   868     MOVZXL_r8_r32( REG_AL, REG_EAX );
   869     MOVL_r32_rbpdisp( REG_EAX, R_T );
   870     sh4_x86.tstate = TSTATE_NONE;
   871 :}
   872 DMULS.L Rm, Rn {:  
   873     COUNT_INST(I_DMULS);
   874     load_reg( REG_EAX, Rm );
   875     load_reg( REG_ECX, Rn );
   876     IMULL_r32(REG_ECX);
   877     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   878     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
   879     sh4_x86.tstate = TSTATE_NONE;
   880 :}
   881 DMULU.L Rm, Rn {:  
   882     COUNT_INST(I_DMULU);
   883     load_reg( REG_EAX, Rm );
   884     load_reg( REG_ECX, Rn );
   885     MULL_r32(REG_ECX);
   886     MOVL_r32_rbpdisp( REG_EDX, R_MACH );
   887     MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
   888     sh4_x86.tstate = TSTATE_NONE;
   889 :}
   890 DT Rn {:  
   891     COUNT_INST(I_DT);
   892     load_reg( REG_EAX, Rn );
   893     ADDL_imms_r32( -1, REG_EAX );
   894     store_reg( REG_EAX, Rn );
   895     SETE_t();
   896     sh4_x86.tstate = TSTATE_E;
   897 :}
   898 EXTS.B Rm, Rn {:  
   899     COUNT_INST(I_EXTSB);
   900     load_reg( REG_EAX, Rm );
   901     MOVSXL_r8_r32( REG_EAX, REG_EAX );
   902     store_reg( REG_EAX, Rn );
   903 :}
   904 EXTS.W Rm, Rn {:  
   905     COUNT_INST(I_EXTSW);
   906     load_reg( REG_EAX, Rm );
   907     MOVSXL_r16_r32( REG_EAX, REG_EAX );
   908     store_reg( REG_EAX, Rn );
   909 :}
   910 EXTU.B Rm, Rn {:  
   911     COUNT_INST(I_EXTUB);
   912     load_reg( REG_EAX, Rm );
   913     MOVZXL_r8_r32( REG_EAX, REG_EAX );
   914     store_reg( REG_EAX, Rn );
   915 :}
   916 EXTU.W Rm, Rn {:  
   917     COUNT_INST(I_EXTUW);
   918     load_reg( REG_EAX, Rm );
   919     MOVZXL_r16_r32( REG_EAX, REG_EAX );
   920     store_reg( REG_EAX, Rn );
   921 :}
   922 MAC.L @Rm+, @Rn+ {:
   923     COUNT_INST(I_MACL);
   924     if( Rm == Rn ) {
   925 	load_reg( REG_EAX, Rm );
   926 	check_ralign32( REG_EAX );
   927 	MEM_READ_LONG( REG_EAX, REG_EAX );
   928 	MOVL_r32_rspdisp(REG_EAX, 0);
   929 	load_reg( REG_EAX, Rm );
   930 	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
   931 	MEM_READ_LONG( REG_EAX, REG_EAX );
   932         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
   933     } else {
   934 	load_reg( REG_EAX, Rm );
   935 	check_ralign32( REG_EAX );
   936 	MEM_READ_LONG( REG_EAX, REG_EAX );
   937 	MOVL_r32_rspdisp( REG_EAX, 0 );
   938 	load_reg( REG_EAX, Rn );
   939 	check_ralign32( REG_EAX );
   940 	MEM_READ_LONG( REG_EAX, REG_EAX );
   941 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
   942 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
   943     }
   945     IMULL_rspdisp( 0 );
   946     ADDL_r32_rbpdisp( REG_EAX, R_MACL );
   947     ADCL_r32_rbpdisp( REG_EDX, R_MACH );
   949     MOVL_rbpdisp_r32( R_S, REG_ECX );
   950     TESTL_r32_r32(REG_ECX, REG_ECX);
   951     JE_label( nosat );
   952     CALL_ptr( signsat48 );
   953     JMP_TARGET( nosat );
   954     sh4_x86.tstate = TSTATE_NONE;
   955 :}
   956 MAC.W @Rm+, @Rn+ {:  
   957     COUNT_INST(I_MACW);
   958     if( Rm == Rn ) {
   959 	load_reg( REG_EAX, Rm );
   960 	check_ralign16( REG_EAX );
   961 	MEM_READ_WORD( REG_EAX, REG_EAX );
   962         MOVL_r32_rspdisp( REG_EAX, 0 );
   963 	load_reg( REG_EAX, Rm );
   964 	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
   965 	MEM_READ_WORD( REG_EAX, REG_EAX );
   966 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
   967 	// Note translate twice in case of page boundaries. Maybe worth
   968 	// adding a page-boundary check to skip the second translation
   969     } else {
   970 	load_reg( REG_EAX, Rm );
   971 	check_ralign16( REG_EAX );
   972 	MEM_READ_WORD( REG_EAX, REG_EAX );
   973         MOVL_r32_rspdisp( REG_EAX, 0 );
   974 	load_reg( REG_EAX, Rn );
   975 	check_ralign16( REG_EAX );
   976 	MEM_READ_WORD( REG_EAX, REG_EAX );
   977 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
   978 	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
   979     }
   980     IMULL_rspdisp( 0 );
   981     MOVL_rbpdisp_r32( R_S, REG_ECX );
   982     TESTL_r32_r32( REG_ECX, REG_ECX );
   983     JE_label( nosat );
   985     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
   986     JNO_label( end );            // 2
   987     MOVL_imm32_r32( 1, REG_EDX );         // 5
   988     MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
   989     JS_label( positive );        // 2
   990     MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
   991     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
   992     JMP_label(end2);           // 2
   994     JMP_TARGET(positive);
   995     MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
   996     MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
   997     JMP_label(end3);            // 2
   999     JMP_TARGET(nosat);
  1000     ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
  1001     ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
  1002     JMP_TARGET(end);
  1003     JMP_TARGET(end2);
  1004     JMP_TARGET(end3);
  1005     sh4_x86.tstate = TSTATE_NONE;
  1006 :}
  1007 MOVT Rn {:  
  1008     COUNT_INST(I_MOVT);
  1009     MOVL_rbpdisp_r32( R_T, REG_EAX );
  1010     store_reg( REG_EAX, Rn );
  1011 :}
  1012 MUL.L Rm, Rn {:  
  1013     COUNT_INST(I_MULL);
  1014     load_reg( REG_EAX, Rm );
  1015     load_reg( REG_ECX, Rn );
  1016     MULL_r32( REG_ECX );
  1017     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1018     sh4_x86.tstate = TSTATE_NONE;
  1019 :}
  1020 MULS.W Rm, Rn {:
  1021     COUNT_INST(I_MULSW);
  1022     MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1023     MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1024     MULL_r32( REG_ECX );
  1025     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1026     sh4_x86.tstate = TSTATE_NONE;
  1027 :}
  1028 MULU.W Rm, Rn {:  
  1029     COUNT_INST(I_MULUW);
  1030     MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
  1031     MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
  1032     MULL_r32( REG_ECX );
  1033     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  1034     sh4_x86.tstate = TSTATE_NONE;
  1035 :}
  1036 NEG Rm, Rn {:
  1037     COUNT_INST(I_NEG);
  1038     load_reg( REG_EAX, Rm );
  1039     NEGL_r32( REG_EAX );
  1040     store_reg( REG_EAX, Rn );
  1041     sh4_x86.tstate = TSTATE_NONE;
  1042 :}
  1043 NEGC Rm, Rn {:  
  1044     COUNT_INST(I_NEGC);
  1045     load_reg( REG_EAX, Rm );
  1046     XORL_r32_r32( REG_ECX, REG_ECX );
  1047     LDC_t();
  1048     SBBL_r32_r32( REG_EAX, REG_ECX );
  1049     store_reg( REG_ECX, Rn );
  1050     SETC_t();
  1051     sh4_x86.tstate = TSTATE_C;
  1052 :}
  1053 NOT Rm, Rn {:  
  1054     COUNT_INST(I_NOT);
  1055     load_reg( REG_EAX, Rm );
  1056     NOTL_r32( REG_EAX );
  1057     store_reg( REG_EAX, Rn );
  1058     sh4_x86.tstate = TSTATE_NONE;
  1059 :}
  1060 OR Rm, Rn {:  
  1061     COUNT_INST(I_OR);
  1062     load_reg( REG_EAX, Rm );
  1063     load_reg( REG_ECX, Rn );
  1064     ORL_r32_r32( REG_EAX, REG_ECX );
  1065     store_reg( REG_ECX, Rn );
  1066     sh4_x86.tstate = TSTATE_NONE;
  1067 :}
  1068 OR #imm, R0 {:
  1069     COUNT_INST(I_ORI);
  1070     load_reg( REG_EAX, 0 );
  1071     ORL_imms_r32(imm, REG_EAX);
  1072     store_reg( REG_EAX, 0 );
  1073     sh4_x86.tstate = TSTATE_NONE;
  1074 :}
  1075 OR.B #imm, @(R0, GBR) {:  
  1076     COUNT_INST(I_ORB);
  1077     load_reg( REG_EAX, 0 );
  1078     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1079     MOVL_r32_rspdisp( REG_EAX, 0 );
  1080     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1081     MOVL_rspdisp_r32( 0, REG_EAX );
  1082     ORL_imms_r32(imm, REG_EDX );
  1083     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1084     sh4_x86.tstate = TSTATE_NONE;
  1085 :}
  1086 ROTCL Rn {:
  1087     COUNT_INST(I_ROTCL);
  1088     load_reg( REG_EAX, Rn );
  1089     if( sh4_x86.tstate != TSTATE_C ) {
  1090 	LDC_t();
  1092     RCLL_imm_r32( 1, REG_EAX );
  1093     store_reg( REG_EAX, Rn );
  1094     SETC_t();
  1095     sh4_x86.tstate = TSTATE_C;
  1096 :}
  1097 ROTCR Rn {:  
  1098     COUNT_INST(I_ROTCR);
  1099     load_reg( REG_EAX, Rn );
  1100     if( sh4_x86.tstate != TSTATE_C ) {
  1101 	LDC_t();
  1103     RCRL_imm_r32( 1, REG_EAX );
  1104     store_reg( REG_EAX, Rn );
  1105     SETC_t();
  1106     sh4_x86.tstate = TSTATE_C;
  1107 :}
  1108 ROTL Rn {:  
  1109     COUNT_INST(I_ROTL);
  1110     load_reg( REG_EAX, Rn );
  1111     ROLL_imm_r32( 1, REG_EAX );
  1112     store_reg( REG_EAX, Rn );
  1113     SETC_t();
  1114     sh4_x86.tstate = TSTATE_C;
  1115 :}
  1116 ROTR Rn {:  
  1117     COUNT_INST(I_ROTR);
  1118     load_reg( REG_EAX, Rn );
  1119     RORL_imm_r32( 1, REG_EAX );
  1120     store_reg( REG_EAX, Rn );
  1121     SETC_t();
  1122     sh4_x86.tstate = TSTATE_C;
  1123 :}
  1124 SHAD Rm, Rn {:
  1125     COUNT_INST(I_SHAD);
  1126     /* Annoyingly enough, not directly convertible */
  1127     load_reg( REG_EAX, Rn );
  1128     load_reg( REG_ECX, Rm );
  1129     CMPL_imms_r32( 0, REG_ECX );
  1130     JGE_label(doshl);
  1132     NEGL_r32( REG_ECX );      // 2
  1133     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1134     JE_label(emptysar);     // 2
  1135     SARL_cl_r32( REG_EAX );       // 2
  1136     JMP_label(end);          // 2
  1138     JMP_TARGET(emptysar);
  1139     SARL_imm_r32(31, REG_EAX );  // 3
  1140     JMP_label(end2);
  1142     JMP_TARGET(doshl);
  1143     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1144     SHLL_cl_r32( REG_EAX );       // 2
  1145     JMP_TARGET(end);
  1146     JMP_TARGET(end2);
  1147     store_reg( REG_EAX, Rn );
  1148     sh4_x86.tstate = TSTATE_NONE;
  1149 :}
  1150 SHLD Rm, Rn {:  
  1151     COUNT_INST(I_SHLD);
  1152     load_reg( REG_EAX, Rn );
  1153     load_reg( REG_ECX, Rm );
  1154     CMPL_imms_r32( 0, REG_ECX );
  1155     JGE_label(doshl);
  1157     NEGL_r32( REG_ECX );      // 2
  1158     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1159     JE_label(emptyshr );
  1160     SHRL_cl_r32( REG_EAX );       // 2
  1161     JMP_label(end);          // 2
  1163     JMP_TARGET(emptyshr);
  1164     XORL_r32_r32( REG_EAX, REG_EAX );
  1165     JMP_label(end2);
  1167     JMP_TARGET(doshl);
  1168     ANDB_imms_r8( 0x1F, REG_CL ); // 3
  1169     SHLL_cl_r32( REG_EAX );       // 2
  1170     JMP_TARGET(end);
  1171     JMP_TARGET(end2);
  1172     store_reg( REG_EAX, Rn );
  1173     sh4_x86.tstate = TSTATE_NONE;
  1174 :}
  1175 SHAL Rn {: 
  1176     COUNT_INST(I_SHAL);
  1177     load_reg( REG_EAX, Rn );
  1178     SHLL_imm_r32( 1, REG_EAX );
  1179     SETC_t();
  1180     store_reg( REG_EAX, Rn );
  1181     sh4_x86.tstate = TSTATE_C;
  1182 :}
  1183 SHAR Rn {:  
  1184     COUNT_INST(I_SHAR);
  1185     load_reg( REG_EAX, Rn );
  1186     SARL_imm_r32( 1, REG_EAX );
  1187     SETC_t();
  1188     store_reg( REG_EAX, Rn );
  1189     sh4_x86.tstate = TSTATE_C;
  1190 :}
  1191 SHLL Rn {:  
  1192     COUNT_INST(I_SHLL);
  1193     load_reg( REG_EAX, Rn );
  1194     SHLL_imm_r32( 1, REG_EAX );
  1195     SETC_t();
  1196     store_reg( REG_EAX, Rn );
  1197     sh4_x86.tstate = TSTATE_C;
  1198 :}
  1199 SHLL2 Rn {:
  1200     COUNT_INST(I_SHLL);
  1201     load_reg( REG_EAX, Rn );
  1202     SHLL_imm_r32( 2, REG_EAX );
  1203     store_reg( REG_EAX, Rn );
  1204     sh4_x86.tstate = TSTATE_NONE;
  1205 :}
  1206 SHLL8 Rn {:  
  1207     COUNT_INST(I_SHLL);
  1208     load_reg( REG_EAX, Rn );
  1209     SHLL_imm_r32( 8, REG_EAX );
  1210     store_reg( REG_EAX, Rn );
  1211     sh4_x86.tstate = TSTATE_NONE;
  1212 :}
  1213 SHLL16 Rn {:  
  1214     COUNT_INST(I_SHLL);
  1215     load_reg( REG_EAX, Rn );
  1216     SHLL_imm_r32( 16, REG_EAX );
  1217     store_reg( REG_EAX, Rn );
  1218     sh4_x86.tstate = TSTATE_NONE;
  1219 :}
  1220 SHLR Rn {:  
  1221     COUNT_INST(I_SHLR);
  1222     load_reg( REG_EAX, Rn );
  1223     SHRL_imm_r32( 1, REG_EAX );
  1224     SETC_t();
  1225     store_reg( REG_EAX, Rn );
  1226     sh4_x86.tstate = TSTATE_C;
  1227 :}
  1228 SHLR2 Rn {:  
  1229     COUNT_INST(I_SHLR);
  1230     load_reg( REG_EAX, Rn );
  1231     SHRL_imm_r32( 2, REG_EAX );
  1232     store_reg( REG_EAX, Rn );
  1233     sh4_x86.tstate = TSTATE_NONE;
  1234 :}
  1235 SHLR8 Rn {:  
  1236     COUNT_INST(I_SHLR);
  1237     load_reg( REG_EAX, Rn );
  1238     SHRL_imm_r32( 8, REG_EAX );
  1239     store_reg( REG_EAX, Rn );
  1240     sh4_x86.tstate = TSTATE_NONE;
  1241 :}
  1242 SHLR16 Rn {:  
  1243     COUNT_INST(I_SHLR);
  1244     load_reg( REG_EAX, Rn );
  1245     SHRL_imm_r32( 16, REG_EAX );
  1246     store_reg( REG_EAX, Rn );
  1247     sh4_x86.tstate = TSTATE_NONE;
  1248 :}
  1249 SUB Rm, Rn {:  
  1250     COUNT_INST(I_SUB);
  1251     load_reg( REG_EAX, Rm );
  1252     load_reg( REG_ECX, Rn );
  1253     SUBL_r32_r32( REG_EAX, REG_ECX );
  1254     store_reg( REG_ECX, Rn );
  1255     sh4_x86.tstate = TSTATE_NONE;
  1256 :}
  1257 SUBC Rm, Rn {:  
  1258     COUNT_INST(I_SUBC);
  1259     load_reg( REG_EAX, Rm );
  1260     load_reg( REG_ECX, Rn );
  1261     if( sh4_x86.tstate != TSTATE_C ) {
  1262 	LDC_t();
  1264     SBBL_r32_r32( REG_EAX, REG_ECX );
  1265     store_reg( REG_ECX, Rn );
  1266     SETC_t();
  1267     sh4_x86.tstate = TSTATE_C;
  1268 :}
  1269 SUBV Rm, Rn {:  
  1270     COUNT_INST(I_SUBV);
  1271     load_reg( REG_EAX, Rm );
  1272     load_reg( REG_ECX, Rn );
  1273     SUBL_r32_r32( REG_EAX, REG_ECX );
  1274     store_reg( REG_ECX, Rn );
  1275     SETO_t();
  1276     sh4_x86.tstate = TSTATE_O;
  1277 :}
  1278 SWAP.B Rm, Rn {:  
  1279     COUNT_INST(I_SWAPB);
  1280     load_reg( REG_EAX, Rm );
  1281     XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
  1282     store_reg( REG_EAX, Rn );
  1283 :}
  1284 SWAP.W Rm, Rn {:  
  1285     COUNT_INST(I_SWAPB);
  1286     load_reg( REG_EAX, Rm );
  1287     MOVL_r32_r32( REG_EAX, REG_ECX );
  1288     SHLL_imm_r32( 16, REG_ECX );
  1289     SHRL_imm_r32( 16, REG_EAX );
  1290     ORL_r32_r32( REG_EAX, REG_ECX );
  1291     store_reg( REG_ECX, Rn );
  1292     sh4_x86.tstate = TSTATE_NONE;
  1293 :}
  1294 TAS.B @Rn {:  
  1295     COUNT_INST(I_TASB);
  1296     load_reg( REG_EAX, Rn );
  1297     MOVL_r32_rspdisp( REG_EAX, 0 );
  1298     MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
  1299     TESTB_r8_r8( REG_DL, REG_DL );
  1300     SETE_t();
  1301     ORB_imms_r8( 0x80, REG_DL );
  1302     MOVL_rspdisp_r32( 0, REG_EAX );
  1303     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1304     sh4_x86.tstate = TSTATE_NONE;
  1305 :}
  1306 TST Rm, Rn {:  
  1307     COUNT_INST(I_TST);
  1308     load_reg( REG_EAX, Rm );
  1309     load_reg( REG_ECX, Rn );
  1310     TESTL_r32_r32( REG_EAX, REG_ECX );
  1311     SETE_t();
  1312     sh4_x86.tstate = TSTATE_E;
  1313 :}
  1314 TST #imm, R0 {:  
  1315     COUNT_INST(I_TSTI);
  1316     load_reg( REG_EAX, 0 );
  1317     TESTL_imms_r32( imm, REG_EAX );
  1318     SETE_t();
  1319     sh4_x86.tstate = TSTATE_E;
  1320 :}
  1321 TST.B #imm, @(R0, GBR) {:  
  1322     COUNT_INST(I_TSTB);
  1323     load_reg( REG_EAX, 0);
  1324     ADDL_rbpdisp_r32( R_GBR, REG_EAX );
  1325     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1326     TESTB_imms_r8( imm, REG_AL );
  1327     SETE_t();
  1328     sh4_x86.tstate = TSTATE_E;
  1329 :}
  1330 XOR Rm, Rn {:  
  1331     COUNT_INST(I_XOR);
  1332     load_reg( REG_EAX, Rm );
  1333     load_reg( REG_ECX, Rn );
  1334     XORL_r32_r32( REG_EAX, REG_ECX );
  1335     store_reg( REG_ECX, Rn );
  1336     sh4_x86.tstate = TSTATE_NONE;
  1337 :}
  1338 XOR #imm, R0 {:  
  1339     COUNT_INST(I_XORI);
  1340     load_reg( REG_EAX, 0 );
  1341     XORL_imms_r32( imm, REG_EAX );
  1342     store_reg( REG_EAX, 0 );
  1343     sh4_x86.tstate = TSTATE_NONE;
  1344 :}
  1345 XOR.B #imm, @(R0, GBR) {:  
  1346     COUNT_INST(I_XORB);
  1347     load_reg( REG_EAX, 0 );
  1348     ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
  1349     MOVL_r32_rspdisp( REG_EAX, 0 );
  1350     MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
  1351     MOVL_rspdisp_r32( 0, REG_EAX );
  1352     XORL_imms_r32( imm, REG_EDX );
  1353     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1354     sh4_x86.tstate = TSTATE_NONE;
  1355 :}
  1356 XTRCT Rm, Rn {:
  1357     COUNT_INST(I_XTRCT);
  1358     load_reg( REG_EAX, Rm );
  1359     load_reg( REG_ECX, Rn );
  1360     SHLL_imm_r32( 16, REG_EAX );
  1361     SHRL_imm_r32( 16, REG_ECX );
  1362     ORL_r32_r32( REG_EAX, REG_ECX );
  1363     store_reg( REG_ECX, Rn );
  1364     sh4_x86.tstate = TSTATE_NONE;
  1365 :}
  1367 /* Data move instructions */
  1368 MOV Rm, Rn {:  
  1369     COUNT_INST(I_MOV);
  1370     load_reg( REG_EAX, Rm );
  1371     store_reg( REG_EAX, Rn );
  1372 :}
  1373 MOV #imm, Rn {:  
  1374     COUNT_INST(I_MOVI);
  1375     MOVL_imm32_r32( imm, REG_EAX );
  1376     store_reg( REG_EAX, Rn );
  1377 :}
  1378 MOV.B Rm, @Rn {:  
  1379     COUNT_INST(I_MOVB);
  1380     load_reg( REG_EAX, Rn );
  1381     load_reg( REG_EDX, Rm );
  1382     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1383     sh4_x86.tstate = TSTATE_NONE;
  1384 :}
  1385 MOV.B Rm, @-Rn {:  
  1386     COUNT_INST(I_MOVB);
  1387     load_reg( REG_EAX, Rn );
  1388     LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
  1389     load_reg( REG_EDX, Rm );
  1390     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1391     ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
  1392     sh4_x86.tstate = TSTATE_NONE;
  1393 :}
  1394 MOV.B Rm, @(R0, Rn) {:  
  1395     COUNT_INST(I_MOVB);
  1396     load_reg( REG_EAX, 0 );
  1397     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1398     load_reg( REG_EDX, Rm );
  1399     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1400     sh4_x86.tstate = TSTATE_NONE;
  1401 :}
  1402 MOV.B R0, @(disp, GBR) {:  
  1403     COUNT_INST(I_MOVB);
  1404     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1405     ADDL_imms_r32( disp, REG_EAX );
  1406     load_reg( REG_EDX, 0 );
  1407     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1408     sh4_x86.tstate = TSTATE_NONE;
  1409 :}
  1410 MOV.B R0, @(disp, Rn) {:  
  1411     COUNT_INST(I_MOVB);
  1412     load_reg( REG_EAX, Rn );
  1413     ADDL_imms_r32( disp, REG_EAX );
  1414     load_reg( REG_EDX, 0 );
  1415     MEM_WRITE_BYTE( REG_EAX, REG_EDX );
  1416     sh4_x86.tstate = TSTATE_NONE;
  1417 :}
  1418 MOV.B @Rm, Rn {:  
  1419     COUNT_INST(I_MOVB);
  1420     load_reg( REG_EAX, Rm );
  1421     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1422     store_reg( REG_EAX, Rn );
  1423     sh4_x86.tstate = TSTATE_NONE;
  1424 :}
  1425 MOV.B @Rm+, Rn {:  
  1426     COUNT_INST(I_MOVB);
  1427     load_reg( REG_EAX, Rm );
  1428     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1429     if( Rm != Rn ) {
  1430     	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
  1432     store_reg( REG_EAX, Rn );
  1433     sh4_x86.tstate = TSTATE_NONE;
  1434 :}
  1435 MOV.B @(R0, Rm), Rn {:  
  1436     COUNT_INST(I_MOVB);
  1437     load_reg( REG_EAX, 0 );
  1438     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1439     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1440     store_reg( REG_EAX, Rn );
  1441     sh4_x86.tstate = TSTATE_NONE;
  1442 :}
  1443 MOV.B @(disp, GBR), R0 {:  
  1444     COUNT_INST(I_MOVB);
  1445     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1446     ADDL_imms_r32( disp, REG_EAX );
  1447     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1448     store_reg( REG_EAX, 0 );
  1449     sh4_x86.tstate = TSTATE_NONE;
  1450 :}
  1451 MOV.B @(disp, Rm), R0 {:  
  1452     COUNT_INST(I_MOVB);
  1453     load_reg( REG_EAX, Rm );
  1454     ADDL_imms_r32( disp, REG_EAX );
  1455     MEM_READ_BYTE( REG_EAX, REG_EAX );
  1456     store_reg( REG_EAX, 0 );
  1457     sh4_x86.tstate = TSTATE_NONE;
  1458 :}
  1459 MOV.L Rm, @Rn {:
  1460     COUNT_INST(I_MOVL);
  1461     load_reg( REG_EAX, Rn );
  1462     check_walign32(REG_EAX);
  1463     MOVL_r32_r32( REG_EAX, REG_ECX );
  1464     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1465     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1466     JNE_label( notsq );
  1467     ANDL_imms_r32( 0x3C, REG_EAX );
  1468     load_reg( REG_EDX, Rm );
  1469     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1470     JMP_label(end);
  1471     JMP_TARGET(notsq);
  1472     load_reg( REG_EDX, Rm );
  1473     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1474     JMP_TARGET(end);
  1475     sh4_x86.tstate = TSTATE_NONE;
  1476 :}
  1477 MOV.L Rm, @-Rn {:  
  1478     COUNT_INST(I_MOVL);
  1479     load_reg( REG_EAX, Rn );
  1480     ADDL_imms_r32( -4, REG_EAX );
  1481     check_walign32( REG_EAX );
  1482     load_reg( REG_EDX, Rm );
  1483     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1484     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  1485     sh4_x86.tstate = TSTATE_NONE;
  1486 :}
  1487 MOV.L Rm, @(R0, Rn) {:  
  1488     COUNT_INST(I_MOVL);
  1489     load_reg( REG_EAX, 0 );
  1490     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1491     check_walign32( REG_EAX );
  1492     load_reg( REG_EDX, Rm );
  1493     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1494     sh4_x86.tstate = TSTATE_NONE;
  1495 :}
  1496 MOV.L R0, @(disp, GBR) {:  
  1497     COUNT_INST(I_MOVL);
  1498     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1499     ADDL_imms_r32( disp, REG_EAX );
  1500     check_walign32( REG_EAX );
  1501     load_reg( REG_EDX, 0 );
  1502     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1503     sh4_x86.tstate = TSTATE_NONE;
  1504 :}
  1505 MOV.L Rm, @(disp, Rn) {:  
  1506     COUNT_INST(I_MOVL);
  1507     load_reg( REG_EAX, Rn );
  1508     ADDL_imms_r32( disp, REG_EAX );
  1509     check_walign32( REG_EAX );
  1510     MOVL_r32_r32( REG_EAX, REG_ECX );
  1511     ANDL_imms_r32( 0xFC000000, REG_ECX );
  1512     CMPL_imms_r32( 0xE0000000, REG_ECX );
  1513     JNE_label( notsq );
  1514     ANDL_imms_r32( 0x3C, REG_EAX );
  1515     load_reg( REG_EDX, Rm );
  1516     MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
  1517     JMP_label(end);
  1518     JMP_TARGET(notsq);
  1519     load_reg( REG_EDX, Rm );
  1520     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1521     JMP_TARGET(end);
  1522     sh4_x86.tstate = TSTATE_NONE;
  1523 :}
  1524 MOV.L @Rm, Rn {:  
  1525     COUNT_INST(I_MOVL);
  1526     load_reg( REG_EAX, Rm );
  1527     check_ralign32( REG_EAX );
  1528     MEM_READ_LONG( REG_EAX, REG_EAX );
  1529     store_reg( REG_EAX, Rn );
  1530     sh4_x86.tstate = TSTATE_NONE;
  1531 :}
  1532 MOV.L @Rm+, Rn {:  
  1533     COUNT_INST(I_MOVL);
  1534     load_reg( REG_EAX, Rm );
  1535     check_ralign32( REG_EAX );
  1536     MEM_READ_LONG( REG_EAX, REG_EAX );
  1537     if( Rm != Rn ) {
  1538     	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  1540     store_reg( REG_EAX, Rn );
  1541     sh4_x86.tstate = TSTATE_NONE;
  1542 :}
  1543 MOV.L @(R0, Rm), Rn {:  
  1544     COUNT_INST(I_MOVL);
  1545     load_reg( REG_EAX, 0 );
  1546     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1547     check_ralign32( REG_EAX );
  1548     MEM_READ_LONG( REG_EAX, REG_EAX );
  1549     store_reg( REG_EAX, Rn );
  1550     sh4_x86.tstate = TSTATE_NONE;
  1551 :}
  1552 MOV.L @(disp, GBR), R0 {:
  1553     COUNT_INST(I_MOVL);
  1554     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1555     ADDL_imms_r32( disp, REG_EAX );
  1556     check_ralign32( REG_EAX );
  1557     MEM_READ_LONG( REG_EAX, REG_EAX );
  1558     store_reg( REG_EAX, 0 );
  1559     sh4_x86.tstate = TSTATE_NONE;
  1560 :}
  1561 MOV.L @(disp, PC), Rn {:  
  1562     COUNT_INST(I_MOVLPC);
  1563     if( sh4_x86.in_delay_slot ) {
  1564 	SLOTILLEGAL();
  1565     } else {
  1566 	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
  1567 	if( IS_IN_ICACHE(target) ) {
  1568 	    // If the target address is in the same page as the code, it's
  1569 	    // pretty safe to just ref it directly and circumvent the whole
  1570 	    // memory subsystem. (this is a big performance win)
  1572 	    // FIXME: There's a corner-case that's not handled here when
  1573 	    // the current code-page is in the ITLB but not in the UTLB.
  1574 	    // (should generate a TLB miss although need to test SH4 
  1575 	    // behaviour to confirm) Unlikely to be anyone depending on this
  1576 	    // behaviour though.
  1577 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1578 	    MOVL_moffptr_eax( ptr );
  1579 	} else {
  1580 	    // Note: we use sh4r.pc for the calc as we could be running at a
  1581 	    // different virtual address than the translation was done with,
  1582 	    // but we can safely assume that the low bits are the same.
  1583 	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
  1584 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1585 	    MEM_READ_LONG( REG_EAX, REG_EAX );
  1586 	    sh4_x86.tstate = TSTATE_NONE;
  1588 	store_reg( REG_EAX, Rn );
  1590 :}
  1591 MOV.L @(disp, Rm), Rn {:  
  1592     COUNT_INST(I_MOVL);
  1593     load_reg( REG_EAX, Rm );
  1594     ADDL_imms_r32( disp, REG_EAX );
  1595     check_ralign32( REG_EAX );
  1596     MEM_READ_LONG( REG_EAX, REG_EAX );
  1597     store_reg( REG_EAX, Rn );
  1598     sh4_x86.tstate = TSTATE_NONE;
  1599 :}
  1600 MOV.W Rm, @Rn {:  
  1601     COUNT_INST(I_MOVW);
  1602     load_reg( REG_EAX, Rn );
  1603     check_walign16( REG_EAX );
  1604     load_reg( REG_EDX, Rm );
  1605     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1606     sh4_x86.tstate = TSTATE_NONE;
  1607 :}
  1608 MOV.W Rm, @-Rn {:  
  1609     COUNT_INST(I_MOVW);
  1610     load_reg( REG_EAX, Rn );
  1611     check_walign16( REG_EAX );
  1612     LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
  1613     load_reg( REG_EDX, Rm );
  1614     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1615     ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
  1616     sh4_x86.tstate = TSTATE_NONE;
  1617 :}
  1618 MOV.W Rm, @(R0, Rn) {:  
  1619     COUNT_INST(I_MOVW);
  1620     load_reg( REG_EAX, 0 );
  1621     ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1622     check_walign16( REG_EAX );
  1623     load_reg( REG_EDX, Rm );
  1624     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1625     sh4_x86.tstate = TSTATE_NONE;
  1626 :}
  1627 MOV.W R0, @(disp, GBR) {:  
  1628     COUNT_INST(I_MOVW);
  1629     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1630     ADDL_imms_r32( disp, REG_EAX );
  1631     check_walign16( REG_EAX );
  1632     load_reg( REG_EDX, 0 );
  1633     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1634     sh4_x86.tstate = TSTATE_NONE;
  1635 :}
  1636 MOV.W R0, @(disp, Rn) {:  
  1637     COUNT_INST(I_MOVW);
  1638     load_reg( REG_EAX, Rn );
  1639     ADDL_imms_r32( disp, REG_EAX );
  1640     check_walign16( REG_EAX );
  1641     load_reg( REG_EDX, 0 );
  1642     MEM_WRITE_WORD( REG_EAX, REG_EDX );
  1643     sh4_x86.tstate = TSTATE_NONE;
  1644 :}
  1645 MOV.W @Rm, Rn {:  
  1646     COUNT_INST(I_MOVW);
  1647     load_reg( REG_EAX, Rm );
  1648     check_ralign16( REG_EAX );
  1649     MEM_READ_WORD( REG_EAX, REG_EAX );
  1650     store_reg( REG_EAX, Rn );
  1651     sh4_x86.tstate = TSTATE_NONE;
  1652 :}
  1653 MOV.W @Rm+, Rn {:  
  1654     COUNT_INST(I_MOVW);
  1655     load_reg( REG_EAX, Rm );
  1656     check_ralign16( REG_EAX );
  1657     MEM_READ_WORD( REG_EAX, REG_EAX );
  1658     if( Rm != Rn ) {
  1659         ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
  1661     store_reg( REG_EAX, Rn );
  1662     sh4_x86.tstate = TSTATE_NONE;
  1663 :}
  1664 MOV.W @(R0, Rm), Rn {:  
  1665     COUNT_INST(I_MOVW);
  1666     load_reg( REG_EAX, 0 );
  1667     ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
  1668     check_ralign16( REG_EAX );
  1669     MEM_READ_WORD( REG_EAX, REG_EAX );
  1670     store_reg( REG_EAX, Rn );
  1671     sh4_x86.tstate = TSTATE_NONE;
  1672 :}
  1673 MOV.W @(disp, GBR), R0 {:  
  1674     COUNT_INST(I_MOVW);
  1675     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  1676     ADDL_imms_r32( disp, REG_EAX );
  1677     check_ralign16( REG_EAX );
  1678     MEM_READ_WORD( REG_EAX, REG_EAX );
  1679     store_reg( REG_EAX, 0 );
  1680     sh4_x86.tstate = TSTATE_NONE;
  1681 :}
  1682 MOV.W @(disp, PC), Rn {:  
  1683     COUNT_INST(I_MOVW);
  1684     if( sh4_x86.in_delay_slot ) {
  1685 	SLOTILLEGAL();
  1686     } else {
  1687 	// See comments for MOV.L @(disp, PC), Rn
  1688 	uint32_t target = pc + disp + 4;
  1689 	if( IS_IN_ICACHE(target) ) {
  1690 	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
  1691 	    MOVL_moffptr_eax( ptr );
  1692 	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
  1693 	} else {
  1694 	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
  1695 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1696 	    MEM_READ_WORD( REG_EAX, REG_EAX );
  1697 	    sh4_x86.tstate = TSTATE_NONE;
  1699 	store_reg( REG_EAX, Rn );
  1701 :}
  1702 MOV.W @(disp, Rm), R0 {:  
  1703     COUNT_INST(I_MOVW);
  1704     load_reg( REG_EAX, Rm );
  1705     ADDL_imms_r32( disp, REG_EAX );
  1706     check_ralign16( REG_EAX );
  1707     MEM_READ_WORD( REG_EAX, REG_EAX );
  1708     store_reg( REG_EAX, 0 );
  1709     sh4_x86.tstate = TSTATE_NONE;
  1710 :}
  1711 MOVA @(disp, PC), R0 {:  
  1712     COUNT_INST(I_MOVA);
  1713     if( sh4_x86.in_delay_slot ) {
  1714 	SLOTILLEGAL();
  1715     } else {
  1716 	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
  1717 	ADDL_rbpdisp_r32( R_PC, REG_ECX );
  1718 	store_reg( REG_ECX, 0 );
  1719 	sh4_x86.tstate = TSTATE_NONE;
  1721 :}
  1722 MOVCA.L R0, @Rn {:  
  1723     COUNT_INST(I_MOVCA);
  1724     load_reg( REG_EAX, Rn );
  1725     check_walign32( REG_EAX );
  1726     load_reg( REG_EDX, 0 );
  1727     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  1728     sh4_x86.tstate = TSTATE_NONE;
  1729 :}
  1731 /* Control transfer instructions */
  1732 BF disp {:
  1733     COUNT_INST(I_BF);
  1734     if( sh4_x86.in_delay_slot ) {
  1735 	SLOTILLEGAL();
  1736     } else {
  1737 	sh4vma_t target = disp + pc + 4;
  1738 	JT_label( nottaken );
  1739 	exit_block_rel(target, pc+2 );
  1740 	JMP_TARGET(nottaken);
  1741 	return 2;
  1743 :}
  1744 BF/S disp {:
  1745     COUNT_INST(I_BFS);
  1746     if( sh4_x86.in_delay_slot ) {
  1747 	SLOTILLEGAL();
  1748     } else {
  1749 	sh4_x86.in_delay_slot = DELAY_PC;
  1750 	if( UNTRANSLATABLE(pc+2) ) {
  1751 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1752 	    JT_label(nottaken);
  1753 	    ADDL_imms_r32( disp, REG_EAX );
  1754 	    JMP_TARGET(nottaken);
  1755 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1756 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1757 	    exit_block_emu(pc+2);
  1758 	    sh4_x86.branch_taken = TRUE;
  1759 	    return 2;
  1760 	} else {
  1761 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1762 		CMPL_imms_rbpdisp( 1, R_T );
  1763 		sh4_x86.tstate = TSTATE_E;
  1765 	    sh4vma_t target = disp + pc + 4;
  1766 	    JCC_cc_rel32(sh4_x86.tstate,0);
  1767 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1768 	    int save_tstate = sh4_x86.tstate;
  1769 	    sh4_translate_instruction(pc+2);
  1770             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1771 	    exit_block_rel( target, pc+4 );
  1773 	    // not taken
  1774 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1775 	    sh4_x86.tstate = save_tstate;
  1776 	    sh4_translate_instruction(pc+2);
  1777 	    return 4;
  1780 :}
  1781 BRA disp {:  
  1782     COUNT_INST(I_BRA);
  1783     if( sh4_x86.in_delay_slot ) {
  1784 	SLOTILLEGAL();
  1785     } else {
  1786 	sh4_x86.in_delay_slot = DELAY_PC;
  1787 	sh4_x86.branch_taken = TRUE;
  1788 	if( UNTRANSLATABLE(pc+2) ) {
  1789 	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1790 	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
  1791 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1792 	    exit_block_emu(pc+2);
  1793 	    return 2;
  1794 	} else {
  1795 	    sh4_translate_instruction( pc + 2 );
  1796 	    exit_block_rel( disp + pc + 4, pc+4 );
  1797 	    return 4;
  1800 :}
  1801 BRAF Rn {:  
  1802     COUNT_INST(I_BRAF);
  1803     if( sh4_x86.in_delay_slot ) {
  1804 	SLOTILLEGAL();
  1805     } else {
  1806 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1807 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1808 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1809 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1810 	sh4_x86.in_delay_slot = DELAY_PC;
  1811 	sh4_x86.tstate = TSTATE_NONE;
  1812 	sh4_x86.branch_taken = TRUE;
  1813 	if( UNTRANSLATABLE(pc+2) ) {
  1814 	    exit_block_emu(pc+2);
  1815 	    return 2;
  1816 	} else {
  1817 	    sh4_translate_instruction( pc + 2 );
  1818 	    exit_block_newpcset(pc+4);
  1819 	    return 4;
  1822 :}
  1823 BSR disp {:  
  1824     COUNT_INST(I_BSR);
  1825     if( sh4_x86.in_delay_slot ) {
  1826 	SLOTILLEGAL();
  1827     } else {
  1828 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1829 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1830 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1831 	sh4_x86.in_delay_slot = DELAY_PC;
  1832 	sh4_x86.branch_taken = TRUE;
  1833 	sh4_x86.tstate = TSTATE_NONE;
  1834 	if( UNTRANSLATABLE(pc+2) ) {
  1835 	    ADDL_imms_r32( disp, REG_EAX );
  1836 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1837 	    exit_block_emu(pc+2);
  1838 	    return 2;
  1839 	} else {
  1840 	    sh4_translate_instruction( pc + 2 );
  1841 	    exit_block_rel( disp + pc + 4, pc+4 );
  1842 	    return 4;
  1845 :}
  1846 BSRF Rn {:  
  1847     COUNT_INST(I_BSRF);
  1848     if( sh4_x86.in_delay_slot ) {
  1849 	SLOTILLEGAL();
  1850     } else {
  1851 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1852 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1853 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1854 	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
  1855 	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1857 	sh4_x86.in_delay_slot = DELAY_PC;
  1858 	sh4_x86.tstate = TSTATE_NONE;
  1859 	sh4_x86.branch_taken = TRUE;
  1860 	if( UNTRANSLATABLE(pc+2) ) {
  1861 	    exit_block_emu(pc+2);
  1862 	    return 2;
  1863 	} else {
  1864 	    sh4_translate_instruction( pc + 2 );
  1865 	    exit_block_newpcset(pc+4);
  1866 	    return 4;
  1869 :}
  1870 BT disp {:
  1871     COUNT_INST(I_BT);
  1872     if( sh4_x86.in_delay_slot ) {
  1873 	SLOTILLEGAL();
  1874     } else {
  1875 	sh4vma_t target = disp + pc + 4;
  1876 	JF_label( nottaken );
  1877 	exit_block_rel(target, pc+2 );
  1878 	JMP_TARGET(nottaken);
  1879 	return 2;
  1881 :}
  1882 BT/S disp {:
  1883     COUNT_INST(I_BTS);
  1884     if( sh4_x86.in_delay_slot ) {
  1885 	SLOTILLEGAL();
  1886     } else {
  1887 	sh4_x86.in_delay_slot = DELAY_PC;
  1888 	if( UNTRANSLATABLE(pc+2) ) {
  1889 	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1890 	    JF_label(nottaken);
  1891 	    ADDL_imms_r32( disp, REG_EAX );
  1892 	    JMP_TARGET(nottaken);
  1893 	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
  1894 	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
  1895 	    exit_block_emu(pc+2);
  1896 	    sh4_x86.branch_taken = TRUE;
  1897 	    return 2;
  1898 	} else {
  1899 	    if( sh4_x86.tstate == TSTATE_NONE ) {
  1900 		CMPL_imms_rbpdisp( 1, R_T );
  1901 		sh4_x86.tstate = TSTATE_E;
  1903 	    JCC_cc_rel32(sh4_x86.tstate^1,0);
  1904 	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
  1906 	    int save_tstate = sh4_x86.tstate;
  1907 	    sh4_translate_instruction(pc+2);
  1908             sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
  1909 	    exit_block_rel( disp + pc + 4, pc+4 );
  1910 	    // not taken
  1911 	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
  1912 	    sh4_x86.tstate = save_tstate;
  1913 	    sh4_translate_instruction(pc+2);
  1914 	    return 4;
  1917 :}
  1918 JMP @Rn {:  
  1919     COUNT_INST(I_JMP);
  1920     if( sh4_x86.in_delay_slot ) {
  1921 	SLOTILLEGAL();
  1922     } else {
  1923 	load_reg( REG_ECX, Rn );
  1924 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1925 	sh4_x86.in_delay_slot = DELAY_PC;
  1926 	sh4_x86.branch_taken = TRUE;
  1927 	if( UNTRANSLATABLE(pc+2) ) {
  1928 	    exit_block_emu(pc+2);
  1929 	    return 2;
  1930 	} else {
  1931 	    sh4_translate_instruction(pc+2);
  1932 	    exit_block_newpcset(pc+4);
  1933 	    return 4;
  1936 :}
  1937 JSR @Rn {:  
  1938     COUNT_INST(I_JSR);
  1939     if( sh4_x86.in_delay_slot ) {
  1940 	SLOTILLEGAL();
  1941     } else {
  1942 	MOVL_rbpdisp_r32( R_PC, REG_EAX );
  1943 	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
  1944 	MOVL_r32_rbpdisp( REG_EAX, R_PR );
  1945 	load_reg( REG_ECX, Rn );
  1946 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1947 	sh4_x86.in_delay_slot = DELAY_PC;
  1948 	sh4_x86.branch_taken = TRUE;
  1949 	sh4_x86.tstate = TSTATE_NONE;
  1950 	if( UNTRANSLATABLE(pc+2) ) {
  1951 	    exit_block_emu(pc+2);
  1952 	    return 2;
  1953 	} else {
  1954 	    sh4_translate_instruction(pc+2);
  1955 	    exit_block_newpcset(pc+4);
  1956 	    return 4;
  1959 :}
  1960 RTE {:  
  1961     COUNT_INST(I_RTE);
  1962     if( sh4_x86.in_delay_slot ) {
  1963 	SLOTILLEGAL();
  1964     } else {
  1965 	check_priv();
  1966 	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
  1967 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1968 	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  1969 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  1970 	sh4_x86.in_delay_slot = DELAY_PC;
  1971 	sh4_x86.fpuen_checked = FALSE;
  1972 	sh4_x86.tstate = TSTATE_NONE;
  1973 	sh4_x86.branch_taken = TRUE;
  1974 	if( UNTRANSLATABLE(pc+2) ) {
  1975 	    exit_block_emu(pc+2);
  1976 	    return 2;
  1977 	} else {
  1978 	    sh4_translate_instruction(pc+2);
  1979 	    exit_block_newpcset(pc+4);
  1980 	    return 4;
  1983 :}
  1984 RTS {:  
  1985     COUNT_INST(I_RTS);
  1986     if( sh4_x86.in_delay_slot ) {
  1987 	SLOTILLEGAL();
  1988     } else {
  1989 	MOVL_rbpdisp_r32( R_PR, REG_ECX );
  1990 	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
  1991 	sh4_x86.in_delay_slot = DELAY_PC;
  1992 	sh4_x86.branch_taken = TRUE;
  1993 	if( UNTRANSLATABLE(pc+2) ) {
  1994 	    exit_block_emu(pc+2);
  1995 	    return 2;
  1996 	} else {
  1997 	    sh4_translate_instruction(pc+2);
  1998 	    exit_block_newpcset(pc+4);
  1999 	    return 4;
  2002 :}
  2003 TRAPA #imm {:  
  2004     COUNT_INST(I_TRAPA);
  2005     if( sh4_x86.in_delay_slot ) {
  2006 	SLOTILLEGAL();
  2007     } else {
  2008 	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
  2009 	ADDL_r32_rbpdisp( REG_ECX, R_PC );
  2010 	MOVL_imm32_r32( imm, REG_EAX );
  2011 	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
  2012 	sh4_x86.tstate = TSTATE_NONE;
  2013 	exit_block_pcset(pc+2);
  2014 	sh4_x86.branch_taken = TRUE;
  2015 	return 2;
  2017 :}
  2018 UNDEF {:  
  2019     COUNT_INST(I_UNDEF);
  2020     if( sh4_x86.in_delay_slot ) {
  2021 	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
  2022     } else {
  2023 	exit_block_exc(EXC_ILLEGAL, pc);    
  2024 	return 2;
  2026 :}
  2028 CLRMAC {:  
  2029     COUNT_INST(I_CLRMAC);
  2030     XORL_r32_r32(REG_EAX, REG_EAX);
  2031     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2032     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2033     sh4_x86.tstate = TSTATE_NONE;
  2034 :}
  2035 CLRS {:
  2036     COUNT_INST(I_CLRS);
  2037     CLC();
  2038     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2039     sh4_x86.tstate = TSTATE_NONE;
  2040 :}
  2041 CLRT {:  
  2042     COUNT_INST(I_CLRT);
  2043     CLC();
  2044     SETC_t();
  2045     sh4_x86.tstate = TSTATE_C;
  2046 :}
  2047 SETS {:  
  2048     COUNT_INST(I_SETS);
  2049     STC();
  2050     SETCCB_cc_rbpdisp(X86_COND_C, R_S);
  2051     sh4_x86.tstate = TSTATE_NONE;
  2052 :}
  2053 SETT {:  
  2054     COUNT_INST(I_SETT);
  2055     STC();
  2056     SETC_t();
  2057     sh4_x86.tstate = TSTATE_C;
  2058 :}
  2060 /* Floating point moves */
  2061 FMOV FRm, FRn {:  
  2062     COUNT_INST(I_FMOV1);
  2063     check_fpuen();
  2064     if( sh4_x86.double_size ) {
  2065         load_dr0( REG_EAX, FRm );
  2066         load_dr1( REG_ECX, FRm );
  2067         store_dr0( REG_EAX, FRn );
  2068         store_dr1( REG_ECX, FRn );
  2069     } else {
  2070         load_fr( REG_EAX, FRm ); // SZ=0 branch
  2071         store_fr( REG_EAX, FRn );
  2073 :}
  2074 FMOV FRm, @Rn {: 
  2075     COUNT_INST(I_FMOV2);
  2076     check_fpuen();
  2077     load_reg( REG_EAX, Rn );
  2078     if( sh4_x86.double_size ) {
  2079         check_walign64( REG_EAX );
  2080         load_dr0( REG_EDX, FRm );
  2081         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2082         load_reg( REG_EAX, Rn );
  2083         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2084         load_dr1( REG_EDX, FRm );
  2085         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2086     } else {
  2087         check_walign32( REG_EAX );
  2088         load_fr( REG_EDX, FRm );
  2089         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2091     sh4_x86.tstate = TSTATE_NONE;
  2092 :}
  2093 FMOV @Rm, FRn {:  
  2094     COUNT_INST(I_FMOV5);
  2095     check_fpuen();
  2096     load_reg( REG_EAX, Rm );
  2097     if( sh4_x86.double_size ) {
  2098         check_ralign64( REG_EAX );
  2099         MEM_READ_LONG( REG_EAX, REG_EAX );
  2100         store_dr0( REG_EAX, FRn );
  2101         load_reg( REG_EAX, Rm );
  2102         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2103         MEM_READ_LONG( REG_EAX, REG_EAX );
  2104         store_dr1( REG_EAX, FRn );
  2105     } else {
  2106         check_ralign32( REG_EAX );
  2107         MEM_READ_LONG( REG_EAX, REG_EAX );
  2108         store_fr( REG_EAX, FRn );
  2110     sh4_x86.tstate = TSTATE_NONE;
  2111 :}
  2112 FMOV FRm, @-Rn {:  
  2113     COUNT_INST(I_FMOV3);
  2114     check_fpuen();
  2115     load_reg( REG_EAX, Rn );
  2116     if( sh4_x86.double_size ) {
  2117         check_walign64( REG_EAX );
  2118         LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
  2119         load_dr0( REG_EDX, FRm );
  2120         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2121         load_reg( REG_EAX, Rn );
  2122         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2123         load_dr1( REG_EDX, FRm );
  2124         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2125         ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
  2126     } else {
  2127         check_walign32( REG_EAX );
  2128         LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2129         load_fr( REG_EDX, FRm );
  2130         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2131         ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
  2133     sh4_x86.tstate = TSTATE_NONE;
  2134 :}
  2135 FMOV @Rm+, FRn {:
  2136     COUNT_INST(I_FMOV6);
  2137     check_fpuen();
  2138     load_reg( REG_EAX, Rm );
  2139     if( sh4_x86.double_size ) {
  2140         check_ralign64( REG_EAX );
  2141         MEM_READ_LONG( REG_EAX, REG_EAX );
  2142         store_dr0( REG_EAX, FRn );
  2143         load_reg( REG_EAX, Rm );
  2144         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2145         MEM_READ_LONG( REG_EAX, REG_EAX );
  2146         store_dr1( REG_EAX, FRn );
  2147         ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
  2148     } else {
  2149         check_ralign32( REG_EAX );
  2150         MEM_READ_LONG( REG_EAX, REG_EAX );
  2151         store_fr( REG_EAX, FRn );
  2152         ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2154     sh4_x86.tstate = TSTATE_NONE;
  2155 :}
  2156 FMOV FRm, @(R0, Rn) {:  
  2157     COUNT_INST(I_FMOV4);
  2158     check_fpuen();
  2159     load_reg( REG_EAX, Rn );
  2160     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2161     if( sh4_x86.double_size ) {
  2162         check_walign64( REG_EAX );
  2163         load_dr0( REG_EDX, FRm );
  2164         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2165         load_reg( REG_EAX, Rn );
  2166         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2167         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2168         load_dr1( REG_EDX, FRm );
  2169         MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2170     } else {
  2171         check_walign32( REG_EAX );
  2172         load_fr( REG_EDX, FRm );
  2173         MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
  2175     sh4_x86.tstate = TSTATE_NONE;
  2176 :}
  2177 FMOV @(R0, Rm), FRn {:  
  2178     COUNT_INST(I_FMOV7);
  2179     check_fpuen();
  2180     load_reg( REG_EAX, Rm );
  2181     ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2182     if( sh4_x86.double_size ) {
  2183         check_ralign64( REG_EAX );
  2184         MEM_READ_LONG( REG_EAX, REG_EAX );
  2185         store_dr0( REG_EAX, FRn );
  2186         load_reg( REG_EAX, Rm );
  2187         ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
  2188         LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
  2189         MEM_READ_LONG( REG_EAX, REG_EAX );
  2190         store_dr1( REG_EAX, FRn );
  2191     } else {
  2192         check_ralign32( REG_EAX );
  2193         MEM_READ_LONG( REG_EAX, REG_EAX );
  2194         store_fr( REG_EAX, FRn );
  2196     sh4_x86.tstate = TSTATE_NONE;
  2197 :}
  2198 FLDI0 FRn {:  /* IFF PR=0 */
  2199     COUNT_INST(I_FLDI0);
  2200     check_fpuen();
  2201     if( sh4_x86.double_prec == 0 ) {
  2202         XORL_r32_r32( REG_EAX, REG_EAX );
  2203         store_fr( REG_EAX, FRn );
  2205     sh4_x86.tstate = TSTATE_NONE;
  2206 :}
  2207 FLDI1 FRn {:  /* IFF PR=0 */
  2208     COUNT_INST(I_FLDI1);
  2209     check_fpuen();
  2210     if( sh4_x86.double_prec == 0 ) {
  2211         MOVL_imm32_r32( 0x3F800000, REG_EAX );
  2212         store_fr( REG_EAX, FRn );
  2214 :}
  2216 FLOAT FPUL, FRn {:  
  2217     COUNT_INST(I_FLOAT);
  2218     check_fpuen();
  2219     FILD_rbpdisp(R_FPUL);
  2220     if( sh4_x86.double_prec ) {
  2221         pop_dr( FRn );
  2222     } else {
  2223         pop_fr( FRn );
  2225 :}
  2226 FTRC FRm, FPUL {:  
  2227     COUNT_INST(I_FTRC);
  2228     check_fpuen();
  2229     if( sh4_x86.double_prec ) {
  2230         push_dr( FRm );
  2231     } else {
  2232         push_fr( FRm );
  2234     MOVP_immptr_rptr( &max_int, REG_ECX );
  2235     FILD_r32disp( REG_ECX, 0 );
  2236     FCOMIP_st(1);
  2237     JNA_label( sat );
  2238     MOVP_immptr_rptr( &min_int, REG_ECX );
  2239     FILD_r32disp( REG_ECX, 0 );
  2240     FCOMIP_st(1);              
  2241     JAE_label( sat2 );            
  2242     MOVP_immptr_rptr( &save_fcw, REG_EAX );
  2243     FNSTCW_r32disp( REG_EAX, 0 );
  2244     MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
  2245     FLDCW_r32disp( REG_EDX, 0 );
  2246     FISTP_rbpdisp(R_FPUL);             
  2247     FLDCW_r32disp( REG_EAX, 0 );
  2248     JMP_label(end);             
  2250     JMP_TARGET(sat);
  2251     JMP_TARGET(sat2);
  2252     MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
  2253     MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
  2254     FPOP_st();
  2255     JMP_TARGET(end);
  2256     sh4_x86.tstate = TSTATE_NONE;
  2257 :}
  2258 FLDS FRm, FPUL {:  
  2259     COUNT_INST(I_FLDS);
  2260     check_fpuen();
  2261     load_fr( REG_EAX, FRm );
  2262     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2263 :}
  2264 FSTS FPUL, FRn {:  
  2265     COUNT_INST(I_FSTS);
  2266     check_fpuen();
  2267     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2268     store_fr( REG_EAX, FRn );
  2269 :}
  2270 FCNVDS FRm, FPUL {:  
  2271     COUNT_INST(I_FCNVDS);
  2272     check_fpuen();
  2273     if( sh4_x86.double_prec ) {
  2274         push_dr( FRm );
  2275         pop_fpul();
  2277 :}
  2278 FCNVSD FPUL, FRn {:  
  2279     COUNT_INST(I_FCNVSD);
  2280     check_fpuen();
  2281     if( sh4_x86.double_prec ) {
  2282         push_fpul();
  2283         pop_dr( FRn );
  2285 :}
  2287 /* Floating point instructions */
  2288 FABS FRn {:  
  2289     COUNT_INST(I_FABS);
  2290     check_fpuen();
  2291     if( sh4_x86.double_prec ) {
  2292         push_dr(FRn);
  2293         FABS_st0();
  2294         pop_dr(FRn);
  2295     } else {
  2296         push_fr(FRn);
  2297         FABS_st0();
  2298         pop_fr(FRn);
  2300 :}
  2301 FADD FRm, FRn {:  
  2302     COUNT_INST(I_FADD);
  2303     check_fpuen();
  2304     if( sh4_x86.double_prec ) {
  2305         push_dr(FRm);
  2306         push_dr(FRn);
  2307         FADDP_st(1);
  2308         pop_dr(FRn);
  2309     } else {
  2310         push_fr(FRm);
  2311         push_fr(FRn);
  2312         FADDP_st(1);
  2313         pop_fr(FRn);
  2315 :}
  2316 FDIV FRm, FRn {:  
  2317     COUNT_INST(I_FDIV);
  2318     check_fpuen();
  2319     if( sh4_x86.double_prec ) {
  2320         push_dr(FRn);
  2321         push_dr(FRm);
  2322         FDIVP_st(1);
  2323         pop_dr(FRn);
  2324     } else {
  2325         push_fr(FRn);
  2326         push_fr(FRm);
  2327         FDIVP_st(1);
  2328         pop_fr(FRn);
  2330 :}
  2331 FMAC FR0, FRm, FRn {:  
  2332     COUNT_INST(I_FMAC);
  2333     check_fpuen();
  2334     if( sh4_x86.double_prec ) {
  2335         push_dr( 0 );
  2336         push_dr( FRm );
  2337         FMULP_st(1);
  2338         push_dr( FRn );
  2339         FADDP_st(1);
  2340         pop_dr( FRn );
  2341     } else {
  2342         push_fr( 0 );
  2343         push_fr( FRm );
  2344         FMULP_st(1);
  2345         push_fr( FRn );
  2346         FADDP_st(1);
  2347         pop_fr( FRn );
  2349 :}
  2351 FMUL FRm, FRn {:  
  2352     COUNT_INST(I_FMUL);
  2353     check_fpuen();
  2354     if( sh4_x86.double_prec ) {
  2355         push_dr(FRm);
  2356         push_dr(FRn);
  2357         FMULP_st(1);
  2358         pop_dr(FRn);
  2359     } else {
  2360         push_fr(FRm);
  2361         push_fr(FRn);
  2362         FMULP_st(1);
  2363         pop_fr(FRn);
  2365 :}
  2366 FNEG FRn {:  
  2367     COUNT_INST(I_FNEG);
  2368     check_fpuen();
  2369     if( sh4_x86.double_prec ) {
  2370         push_dr(FRn);
  2371         FCHS_st0();
  2372         pop_dr(FRn);
  2373     } else {
  2374         push_fr(FRn);
  2375         FCHS_st0();
  2376         pop_fr(FRn);
  2378 :}
  2379 FSRRA FRn {:  
  2380     COUNT_INST(I_FSRRA);
  2381     check_fpuen();
  2382     if( sh4_x86.double_prec == 0 ) {
  2383         FLD1_st0();
  2384         push_fr(FRn);
  2385         FSQRT_st0();
  2386         FDIVP_st(1);
  2387         pop_fr(FRn);
  2389 :}
  2390 FSQRT FRn {:  
  2391     COUNT_INST(I_FSQRT);
  2392     check_fpuen();
  2393     if( sh4_x86.double_prec ) {
  2394         push_dr(FRn);
  2395         FSQRT_st0();
  2396         pop_dr(FRn);
  2397     } else {
  2398         push_fr(FRn);
  2399         FSQRT_st0();
  2400         pop_fr(FRn);
  2402 :}
  2403 FSUB FRm, FRn {:  
  2404     COUNT_INST(I_FSUB);
  2405     check_fpuen();
  2406     if( sh4_x86.double_prec ) {
  2407         push_dr(FRn);
  2408         push_dr(FRm);
  2409         FSUBP_st(1);
  2410         pop_dr(FRn);
  2411     } else {
  2412         push_fr(FRn);
  2413         push_fr(FRm);
  2414         FSUBP_st(1);
  2415         pop_fr(FRn);
  2417 :}
  2419 FCMP/EQ FRm, FRn {:  
  2420     COUNT_INST(I_FCMPEQ);
  2421     check_fpuen();
  2422     if( sh4_x86.double_prec ) {
  2423         push_dr(FRm);
  2424         push_dr(FRn);
  2425     } else {
  2426         push_fr(FRm);
  2427         push_fr(FRn);
  2429     FCOMIP_st(1);
  2430     SETE_t();
  2431     FPOP_st();
  2432     sh4_x86.tstate = TSTATE_E;
  2433 :}
  2434 FCMP/GT FRm, FRn {:  
  2435     COUNT_INST(I_FCMPGT);
  2436     check_fpuen();
  2437     if( sh4_x86.double_prec ) {
  2438         push_dr(FRm);
  2439         push_dr(FRn);
  2440     } else {
  2441         push_fr(FRm);
  2442         push_fr(FRn);
  2444     FCOMIP_st(1);
  2445     SETA_t();
  2446     FPOP_st();
  2447     sh4_x86.tstate = TSTATE_A;
  2448 :}
  2450 FSCA FPUL, FRn {:  
  2451     COUNT_INST(I_FSCA);
  2452     check_fpuen();
  2453     if( sh4_x86.double_prec == 0 ) {
  2454         LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
  2455         MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2456         CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
  2458     sh4_x86.tstate = TSTATE_NONE;
  2459 :}
  2460 FIPR FVm, FVn {:  
  2461     COUNT_INST(I_FIPR);
  2462     check_fpuen();
  2463     if( sh4_x86.double_prec == 0 ) {
  2464         if( sh4_x86.sse3_enabled ) {
  2465             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
  2466             MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
  2467             HADDPS_xmm_xmm( 4, 4 ); 
  2468             HADDPS_xmm_xmm( 4, 4 );
  2469             MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
  2470         } else {
  2471             push_fr( FVm<<2 );
  2472             push_fr( FVn<<2 );
  2473             FMULP_st(1);
  2474             push_fr( (FVm<<2)+1);
  2475             push_fr( (FVn<<2)+1);
  2476             FMULP_st(1);
  2477             FADDP_st(1);
  2478             push_fr( (FVm<<2)+2);
  2479             push_fr( (FVn<<2)+2);
  2480             FMULP_st(1);
  2481             FADDP_st(1);
  2482             push_fr( (FVm<<2)+3);
  2483             push_fr( (FVn<<2)+3);
  2484             FMULP_st(1);
  2485             FADDP_st(1);
  2486             pop_fr( (FVn<<2)+3);
  2489 :}
  2490 FTRV XMTRX, FVn {:  
  2491     COUNT_INST(I_FTRV);
  2492     check_fpuen();
  2493     if( sh4_x86.double_prec == 0 ) {
  2494         if( sh4_x86.sse3_enabled ) {
  2495             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
  2496             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
  2497             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
  2498             MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
  2500             MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
  2501             MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
  2502             MOV_xmm_xmm( 4, 6 );
  2503             MOV_xmm_xmm( 5, 7 );
  2504             MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
  2505             MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
  2506             MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
  2507             MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
  2508             MULPS_xmm_xmm( 0, 4 );
  2509             MULPS_xmm_xmm( 1, 5 );
  2510             MULPS_xmm_xmm( 2, 6 );
  2511             MULPS_xmm_xmm( 3, 7 );
  2512             ADDPS_xmm_xmm( 5, 4 );
  2513             ADDPS_xmm_xmm( 7, 6 );
  2514             ADDPS_xmm_xmm( 6, 4 );
  2515             MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
  2516         } else {
  2517             LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
  2518             CALL1_ptr_r32( sh4_ftrv, REG_EAX );
  2521     sh4_x86.tstate = TSTATE_NONE;
  2522 :}
  2524 FRCHG {:  
  2525     COUNT_INST(I_FRCHG);
  2526     check_fpuen();
  2527     XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
  2528     CALL_ptr( sh4_switch_fr_banks );
  2529     sh4_x86.tstate = TSTATE_NONE;
  2530 :}
  2531 FSCHG {:  
  2532     COUNT_INST(I_FSCHG);
  2533     check_fpuen();
  2534     XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
  2535     XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
  2536     sh4_x86.tstate = TSTATE_NONE;
  2537     sh4_x86.double_size = !sh4_x86.double_size;
  2538 :}
  2540 /* Processor control instructions */
  2541 LDC Rm, SR {:
  2542     COUNT_INST(I_LDCSR);
  2543     if( sh4_x86.in_delay_slot ) {
  2544 	SLOTILLEGAL();
  2545     } else {
  2546 	check_priv();
  2547 	load_reg( REG_EAX, Rm );
  2548 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2549 	sh4_x86.fpuen_checked = FALSE;
  2550 	sh4_x86.tstate = TSTATE_NONE;
  2551 	return 2;
  2553 :}
  2554 LDC Rm, GBR {: 
  2555     COUNT_INST(I_LDC);
  2556     load_reg( REG_EAX, Rm );
  2557     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2558 :}
  2559 LDC Rm, VBR {:  
  2560     COUNT_INST(I_LDC);
  2561     check_priv();
  2562     load_reg( REG_EAX, Rm );
  2563     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2564     sh4_x86.tstate = TSTATE_NONE;
  2565 :}
  2566 LDC Rm, SSR {:  
  2567     COUNT_INST(I_LDC);
  2568     check_priv();
  2569     load_reg( REG_EAX, Rm );
  2570     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2571     sh4_x86.tstate = TSTATE_NONE;
  2572 :}
  2573 LDC Rm, SGR {:  
  2574     COUNT_INST(I_LDC);
  2575     check_priv();
  2576     load_reg( REG_EAX, Rm );
  2577     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2578     sh4_x86.tstate = TSTATE_NONE;
  2579 :}
  2580 LDC Rm, SPC {:  
  2581     COUNT_INST(I_LDC);
  2582     check_priv();
  2583     load_reg( REG_EAX, Rm );
  2584     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2585     sh4_x86.tstate = TSTATE_NONE;
  2586 :}
  2587 LDC Rm, DBR {:  
  2588     COUNT_INST(I_LDC);
  2589     check_priv();
  2590     load_reg( REG_EAX, Rm );
  2591     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2592     sh4_x86.tstate = TSTATE_NONE;
  2593 :}
  2594 LDC Rm, Rn_BANK {:  
  2595     COUNT_INST(I_LDC);
  2596     check_priv();
  2597     load_reg( REG_EAX, Rm );
  2598     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2599     sh4_x86.tstate = TSTATE_NONE;
  2600 :}
  2601 LDC.L @Rm+, GBR {:  
  2602     COUNT_INST(I_LDCM);
  2603     load_reg( REG_EAX, Rm );
  2604     check_ralign32( REG_EAX );
  2605     MEM_READ_LONG( REG_EAX, REG_EAX );
  2606     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2607     MOVL_r32_rbpdisp( REG_EAX, R_GBR );
  2608     sh4_x86.tstate = TSTATE_NONE;
  2609 :}
  2610 LDC.L @Rm+, SR {:
  2611     COUNT_INST(I_LDCSRM);
  2612     if( sh4_x86.in_delay_slot ) {
  2613 	SLOTILLEGAL();
  2614     } else {
  2615 	check_priv();
  2616 	load_reg( REG_EAX, Rm );
  2617 	check_ralign32( REG_EAX );
  2618 	MEM_READ_LONG( REG_EAX, REG_EAX );
  2619 	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2620 	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
  2621 	sh4_x86.fpuen_checked = FALSE;
  2622 	sh4_x86.tstate = TSTATE_NONE;
  2623 	return 2;
  2625 :}
  2626 LDC.L @Rm+, VBR {:  
  2627     COUNT_INST(I_LDCM);
  2628     check_priv();
  2629     load_reg( REG_EAX, Rm );
  2630     check_ralign32( REG_EAX );
  2631     MEM_READ_LONG( REG_EAX, REG_EAX );
  2632     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2633     MOVL_r32_rbpdisp( REG_EAX, R_VBR );
  2634     sh4_x86.tstate = TSTATE_NONE;
  2635 :}
  2636 LDC.L @Rm+, SSR {:
  2637     COUNT_INST(I_LDCM);
  2638     check_priv();
  2639     load_reg( REG_EAX, Rm );
  2640     check_ralign32( REG_EAX );
  2641     MEM_READ_LONG( REG_EAX, REG_EAX );
  2642     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2643     MOVL_r32_rbpdisp( REG_EAX, R_SSR );
  2644     sh4_x86.tstate = TSTATE_NONE;
  2645 :}
  2646 LDC.L @Rm+, SGR {:  
  2647     COUNT_INST(I_LDCM);
  2648     check_priv();
  2649     load_reg( REG_EAX, Rm );
  2650     check_ralign32( REG_EAX );
  2651     MEM_READ_LONG( REG_EAX, REG_EAX );
  2652     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2653     MOVL_r32_rbpdisp( REG_EAX, R_SGR );
  2654     sh4_x86.tstate = TSTATE_NONE;
  2655 :}
  2656 LDC.L @Rm+, SPC {:  
  2657     COUNT_INST(I_LDCM);
  2658     check_priv();
  2659     load_reg( REG_EAX, Rm );
  2660     check_ralign32( REG_EAX );
  2661     MEM_READ_LONG( REG_EAX, REG_EAX );
  2662     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2663     MOVL_r32_rbpdisp( REG_EAX, R_SPC );
  2664     sh4_x86.tstate = TSTATE_NONE;
  2665 :}
  2666 LDC.L @Rm+, DBR {:  
  2667     COUNT_INST(I_LDCM);
  2668     check_priv();
  2669     load_reg( REG_EAX, Rm );
  2670     check_ralign32( REG_EAX );
  2671     MEM_READ_LONG( REG_EAX, REG_EAX );
  2672     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2673     MOVL_r32_rbpdisp( REG_EAX, R_DBR );
  2674     sh4_x86.tstate = TSTATE_NONE;
  2675 :}
  2676 LDC.L @Rm+, Rn_BANK {:  
  2677     COUNT_INST(I_LDCM);
  2678     check_priv();
  2679     load_reg( REG_EAX, Rm );
  2680     check_ralign32( REG_EAX );
  2681     MEM_READ_LONG( REG_EAX, REG_EAX );
  2682     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2683     MOVL_r32_rbpdisp( REG_EAX, REG_OFFSET(r_bank[Rn_BANK]) );
  2684     sh4_x86.tstate = TSTATE_NONE;
  2685 :}
  2686 LDS Rm, FPSCR {:
  2687     COUNT_INST(I_LDSFPSCR);
  2688     check_fpuen();
  2689     load_reg( REG_EAX, Rm );
  2690     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2691     sh4_x86.tstate = TSTATE_NONE;
  2692     return 2;
  2693 :}
  2694 LDS.L @Rm+, FPSCR {:  
  2695     COUNT_INST(I_LDSFPSCRM);
  2696     check_fpuen();
  2697     load_reg( REG_EAX, Rm );
  2698     check_ralign32( REG_EAX );
  2699     MEM_READ_LONG( REG_EAX, REG_EAX );
  2700     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2701     CALL1_ptr_r32( sh4_write_fpscr, REG_EAX );
  2702     sh4_x86.tstate = TSTATE_NONE;
  2703     return 2;
  2704 :}
  2705 LDS Rm, FPUL {:  
  2706     COUNT_INST(I_LDS);
  2707     check_fpuen();
  2708     load_reg( REG_EAX, Rm );
  2709     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2710 :}
  2711 LDS.L @Rm+, FPUL {:  
  2712     COUNT_INST(I_LDSM);
  2713     check_fpuen();
  2714     load_reg( REG_EAX, Rm );
  2715     check_ralign32( REG_EAX );
  2716     MEM_READ_LONG( REG_EAX, REG_EAX );
  2717     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2718     MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
  2719     sh4_x86.tstate = TSTATE_NONE;
  2720 :}
  2721 LDS Rm, MACH {: 
  2722     COUNT_INST(I_LDS);
  2723     load_reg( REG_EAX, Rm );
  2724     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2725 :}
  2726 LDS.L @Rm+, MACH {:  
  2727     COUNT_INST(I_LDSM);
  2728     load_reg( REG_EAX, Rm );
  2729     check_ralign32( REG_EAX );
  2730     MEM_READ_LONG( REG_EAX, REG_EAX );
  2731     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2732     MOVL_r32_rbpdisp( REG_EAX, R_MACH );
  2733     sh4_x86.tstate = TSTATE_NONE;
  2734 :}
  2735 LDS Rm, MACL {:  
  2736     COUNT_INST(I_LDS);
  2737     load_reg( REG_EAX, Rm );
  2738     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2739 :}
  2740 LDS.L @Rm+, MACL {:  
  2741     COUNT_INST(I_LDSM);
  2742     load_reg( REG_EAX, Rm );
  2743     check_ralign32( REG_EAX );
  2744     MEM_READ_LONG( REG_EAX, REG_EAX );
  2745     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2746     MOVL_r32_rbpdisp( REG_EAX, R_MACL );
  2747     sh4_x86.tstate = TSTATE_NONE;
  2748 :}
  2749 LDS Rm, PR {:  
  2750     COUNT_INST(I_LDS);
  2751     load_reg( REG_EAX, Rm );
  2752     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2753 :}
  2754 LDS.L @Rm+, PR {:  
  2755     COUNT_INST(I_LDSM);
  2756     load_reg( REG_EAX, Rm );
  2757     check_ralign32( REG_EAX );
  2758     MEM_READ_LONG( REG_EAX, REG_EAX );
  2759     ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
  2760     MOVL_r32_rbpdisp( REG_EAX, R_PR );
  2761     sh4_x86.tstate = TSTATE_NONE;
  2762 :}
  2763 LDTLB {:  
  2764     COUNT_INST(I_LDTLB);
  2765     CALL_ptr( MMU_ldtlb );
  2766     sh4_x86.tstate = TSTATE_NONE;
  2767 :}
  2768 OCBI @Rn {:
  2769     COUNT_INST(I_OCBI);
  2770 :}
  2771 OCBP @Rn {:
  2772     COUNT_INST(I_OCBP);
  2773 :}
  2774 OCBWB @Rn {:
  2775     COUNT_INST(I_OCBWB);
  2776 :}
  2777 PREF @Rn {:
  2778     COUNT_INST(I_PREF);
  2779     load_reg( REG_EAX, Rn );
  2780     MEM_PREFETCH( REG_EAX );
  2781     sh4_x86.tstate = TSTATE_NONE;
  2782 :}
  2783 SLEEP {: 
  2784     COUNT_INST(I_SLEEP);
  2785     check_priv();
  2786     CALL_ptr( sh4_sleep );
  2787     sh4_x86.tstate = TSTATE_NONE;
  2788     sh4_x86.in_delay_slot = DELAY_NONE;
  2789     return 2;
  2790 :}
  2791 STC SR, Rn {:
  2792     COUNT_INST(I_STCSR);
  2793     check_priv();
  2794     CALL_ptr(sh4_read_sr);
  2795     store_reg( REG_EAX, Rn );
  2796     sh4_x86.tstate = TSTATE_NONE;
  2797 :}
  2798 STC GBR, Rn {:  
  2799     COUNT_INST(I_STC);
  2800     MOVL_rbpdisp_r32( R_GBR, REG_EAX );
  2801     store_reg( REG_EAX, Rn );
  2802 :}
  2803 STC VBR, Rn {:  
  2804     COUNT_INST(I_STC);
  2805     check_priv();
  2806     MOVL_rbpdisp_r32( R_VBR, REG_EAX );
  2807     store_reg( REG_EAX, Rn );
  2808     sh4_x86.tstate = TSTATE_NONE;
  2809 :}
  2810 STC SSR, Rn {:  
  2811     COUNT_INST(I_STC);
  2812     check_priv();
  2813     MOVL_rbpdisp_r32( R_SSR, REG_EAX );
  2814     store_reg( REG_EAX, Rn );
  2815     sh4_x86.tstate = TSTATE_NONE;
  2816 :}
  2817 STC SPC, Rn {:  
  2818     COUNT_INST(I_STC);
  2819     check_priv();
  2820     MOVL_rbpdisp_r32( R_SPC, REG_EAX );
  2821     store_reg( REG_EAX, Rn );
  2822     sh4_x86.tstate = TSTATE_NONE;
  2823 :}
  2824 STC SGR, Rn {:  
  2825     COUNT_INST(I_STC);
  2826     check_priv();
  2827     MOVL_rbpdisp_r32( R_SGR, REG_EAX );
  2828     store_reg( REG_EAX, Rn );
  2829     sh4_x86.tstate = TSTATE_NONE;
  2830 :}
  2831 STC DBR, Rn {:  
  2832     COUNT_INST(I_STC);
  2833     check_priv();
  2834     MOVL_rbpdisp_r32( R_DBR, REG_EAX );
  2835     store_reg( REG_EAX, Rn );
  2836     sh4_x86.tstate = TSTATE_NONE;
  2837 :}
  2838 STC Rm_BANK, Rn {:
  2839     COUNT_INST(I_STC);
  2840     check_priv();
  2841     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EAX );
  2842     store_reg( REG_EAX, Rn );
  2843     sh4_x86.tstate = TSTATE_NONE;
  2844 :}
  2845 STC.L SR, @-Rn {:
  2846     COUNT_INST(I_STCSRM);
  2847     check_priv();
  2848     CALL_ptr( sh4_read_sr );
  2849     MOVL_r32_r32( REG_EAX, REG_EDX );
  2850     load_reg( REG_EAX, Rn );
  2851     check_walign32( REG_EAX );
  2852     LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
  2853     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2854     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2855     sh4_x86.tstate = TSTATE_NONE;
  2856 :}
  2857 STC.L VBR, @-Rn {:  
  2858     COUNT_INST(I_STCM);
  2859     check_priv();
  2860     load_reg( REG_EAX, Rn );
  2861     check_walign32( REG_EAX );
  2862     ADDL_imms_r32( -4, REG_EAX );
  2863     MOVL_rbpdisp_r32( R_VBR, REG_EDX );
  2864     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2865     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2866     sh4_x86.tstate = TSTATE_NONE;
  2867 :}
  2868 STC.L SSR, @-Rn {:  
  2869     COUNT_INST(I_STCM);
  2870     check_priv();
  2871     load_reg( REG_EAX, Rn );
  2872     check_walign32( REG_EAX );
  2873     ADDL_imms_r32( -4, REG_EAX );
  2874     MOVL_rbpdisp_r32( R_SSR, REG_EDX );
  2875     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2876     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2877     sh4_x86.tstate = TSTATE_NONE;
  2878 :}
  2879 STC.L SPC, @-Rn {:
  2880     COUNT_INST(I_STCM);
  2881     check_priv();
  2882     load_reg( REG_EAX, Rn );
  2883     check_walign32( REG_EAX );
  2884     ADDL_imms_r32( -4, REG_EAX );
  2885     MOVL_rbpdisp_r32( R_SPC, REG_EDX );
  2886     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2887     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2888     sh4_x86.tstate = TSTATE_NONE;
  2889 :}
  2890 STC.L SGR, @-Rn {:  
  2891     COUNT_INST(I_STCM);
  2892     check_priv();
  2893     load_reg( REG_EAX, Rn );
  2894     check_walign32( REG_EAX );
  2895     ADDL_imms_r32( -4, REG_EAX );
  2896     MOVL_rbpdisp_r32( R_SGR, REG_EDX );
  2897     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2898     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2899     sh4_x86.tstate = TSTATE_NONE;
  2900 :}
  2901 STC.L DBR, @-Rn {:  
  2902     COUNT_INST(I_STCM);
  2903     check_priv();
  2904     load_reg( REG_EAX, Rn );
  2905     check_walign32( REG_EAX );
  2906     ADDL_imms_r32( -4, REG_EAX );
  2907     MOVL_rbpdisp_r32( R_DBR, REG_EDX );
  2908     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2909     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2910     sh4_x86.tstate = TSTATE_NONE;
  2911 :}
  2912 STC.L Rm_BANK, @-Rn {:  
  2913     COUNT_INST(I_STCM);
  2914     check_priv();
  2915     load_reg( REG_EAX, Rn );
  2916     check_walign32( REG_EAX );
  2917     ADDL_imms_r32( -4, REG_EAX );
  2918     MOVL_rbpdisp_r32( REG_OFFSET(r_bank[Rm_BANK]), REG_EDX );
  2919     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2920     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2921     sh4_x86.tstate = TSTATE_NONE;
  2922 :}
  2923 STC.L GBR, @-Rn {:  
  2924     COUNT_INST(I_STCM);
  2925     load_reg( REG_EAX, Rn );
  2926     check_walign32( REG_EAX );
  2927     ADDL_imms_r32( -4, REG_EAX );
  2928     MOVL_rbpdisp_r32( R_GBR, REG_EDX );
  2929     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2930     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2931     sh4_x86.tstate = TSTATE_NONE;
  2932 :}
  2933 STS FPSCR, Rn {:  
  2934     COUNT_INST(I_STSFPSCR);
  2935     check_fpuen();
  2936     MOVL_rbpdisp_r32( R_FPSCR, REG_EAX );
  2937     store_reg( REG_EAX, Rn );
  2938 :}
  2939 STS.L FPSCR, @-Rn {:  
  2940     COUNT_INST(I_STSFPSCRM);
  2941     check_fpuen();
  2942     load_reg( REG_EAX, Rn );
  2943     check_walign32( REG_EAX );
  2944     ADDL_imms_r32( -4, REG_EAX );
  2945     MOVL_rbpdisp_r32( R_FPSCR, REG_EDX );
  2946     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2947     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2948     sh4_x86.tstate = TSTATE_NONE;
  2949 :}
  2950 STS FPUL, Rn {:  
  2951     COUNT_INST(I_STS);
  2952     check_fpuen();
  2953     MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
  2954     store_reg( REG_EAX, Rn );
  2955 :}
  2956 STS.L FPUL, @-Rn {:  
  2957     COUNT_INST(I_STSM);
  2958     check_fpuen();
  2959     load_reg( REG_EAX, Rn );
  2960     check_walign32( REG_EAX );
  2961     ADDL_imms_r32( -4, REG_EAX );
  2962     MOVL_rbpdisp_r32( R_FPUL, REG_EDX );
  2963     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2964     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2965     sh4_x86.tstate = TSTATE_NONE;
  2966 :}
  2967 STS MACH, Rn {:  
  2968     COUNT_INST(I_STS);
  2969     MOVL_rbpdisp_r32( R_MACH, REG_EAX );
  2970     store_reg( REG_EAX, Rn );
  2971 :}
  2972 STS.L MACH, @-Rn {:  
  2973     COUNT_INST(I_STSM);
  2974     load_reg( REG_EAX, Rn );
  2975     check_walign32( REG_EAX );
  2976     ADDL_imms_r32( -4, REG_EAX );
  2977     MOVL_rbpdisp_r32( R_MACH, REG_EDX );
  2978     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2979     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2980     sh4_x86.tstate = TSTATE_NONE;
  2981 :}
  2982 STS MACL, Rn {:  
  2983     COUNT_INST(I_STS);
  2984     MOVL_rbpdisp_r32( R_MACL, REG_EAX );
  2985     store_reg( REG_EAX, Rn );
  2986 :}
  2987 STS.L MACL, @-Rn {:  
  2988     COUNT_INST(I_STSM);
  2989     load_reg( REG_EAX, Rn );
  2990     check_walign32( REG_EAX );
  2991     ADDL_imms_r32( -4, REG_EAX );
  2992     MOVL_rbpdisp_r32( R_MACL, REG_EDX );
  2993     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  2994     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  2995     sh4_x86.tstate = TSTATE_NONE;
  2996 :}
  2997 STS PR, Rn {:  
  2998     COUNT_INST(I_STS);
  2999     MOVL_rbpdisp_r32( R_PR, REG_EAX );
  3000     store_reg( REG_EAX, Rn );
  3001 :}
  3002 STS.L PR, @-Rn {:  
  3003     COUNT_INST(I_STSM);
  3004     load_reg( REG_EAX, Rn );
  3005     check_walign32( REG_EAX );
  3006     ADDL_imms_r32( -4, REG_EAX );
  3007     MOVL_rbpdisp_r32( R_PR, REG_EDX );
  3008     MEM_WRITE_LONG( REG_EAX, REG_EDX );
  3009     ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
  3010     sh4_x86.tstate = TSTATE_NONE;
  3011 :}
  3013 NOP {: 
  3014     COUNT_INST(I_NOP);
  3015     /* Do nothing. Well, we could emit an 0x90, but what would really be the point? */ 
  3016 :}
  3017 %%
  3018     sh4_x86.in_delay_slot = DELAY_NONE;
  3019     return 0;
  3023 /**
  3024  * The unwind methods only work if we compiled with DWARF2 frame information
  3025  * (ie -fexceptions), otherwise we have to use the direct frame scan.
  3026  */
  3027 #ifdef HAVE_EXCEPTIONS
  3028 #include <unwind.h>
  3030 struct UnwindInfo {
  3031     uintptr_t block_start;
  3032     uintptr_t block_end;
  3033     void *pc;
  3034 };
  3036 static _Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
  3038     struct UnwindInfo *info = arg;
  3039     void *pc = (void *)_Unwind_GetIP(context);
  3040     if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
  3041         info->pc = pc;
  3042         return _URC_NORMAL_STOP;
  3044     return _URC_NO_REASON;
  3047 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3049     struct _Unwind_Exception exc;
  3050     struct UnwindInfo info;
  3052     info.pc = NULL;
  3053     info.block_start = (uintptr_t)code;
  3054     info.block_end = info.block_start + code_size;
  3055     void *result = NULL;
  3056     _Unwind_Backtrace( xlat_check_frame, &info );
  3057     return info.pc;
  3059 #else
  3060 /* Assume this is an ia32 build - amd64 should always have dwarf information */
  3061 void *xlat_get_native_pc( void *code, uint32_t code_size )
  3063     void *result = NULL;
  3064     asm(
  3065         "mov %%ebp, %%eax\n\t"
  3066         "mov $0x8, %%ecx\n\t"
  3067         "mov %1, %%edx\n"
  3068         "frame_loop: test %%eax, %%eax\n\t"
  3069         "je frame_not_found\n\t"
  3070         "cmp (%%eax), %%edx\n\t"
  3071         "je frame_found\n\t"
  3072         "sub $0x1, %%ecx\n\t"
  3073         "je frame_not_found\n\t"
  3074         "movl (%%eax), %%eax\n\t"
  3075         "jmp frame_loop\n"
  3076         "frame_found: movl 0x4(%%eax), %0\n"
  3077         "frame_not_found:"
  3078         : "=r" (result)
  3079         : "r" (((uint8_t *)&sh4r) + 128 )
  3080         : "eax", "ecx", "edx" );
  3081     return result;
  3083 #endif
.