Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1146:76c5d1064262
prev1125:9dd5dee45db9
next1149:da6124fceec6
author nkeynes
date Mon Nov 08 18:56:11 2010 +1000 (11 years ago)
permissions -rw-r--r--
last change Fix 32-bit non-fastcall build
file annotate diff log raw
nkeynes@359
     1
/**
nkeynes@586
     2
 * $Id$
nkeynes@359
     3
 * 
nkeynes@359
     4
 * SH4 => x86 translation. This version does no real optimization, it just
nkeynes@359
     5
 * outputs straight-line x86 code - it mainly exists to provide a baseline
nkeynes@359
     6
 * to test the optimizing versions against.
nkeynes@359
     7
 *
nkeynes@359
     8
 * Copyright (c) 2007 Nathan Keynes.
nkeynes@359
     9
 *
nkeynes@359
    10
 * This program is free software; you can redistribute it and/or modify
nkeynes@359
    11
 * it under the terms of the GNU General Public License as published by
nkeynes@359
    12
 * the Free Software Foundation; either version 2 of the License, or
nkeynes@359
    13
 * (at your option) any later version.
nkeynes@359
    14
 *
nkeynes@359
    15
 * This program is distributed in the hope that it will be useful,
nkeynes@359
    16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
nkeynes@359
    17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
nkeynes@359
    18
 * GNU General Public License for more details.
nkeynes@359
    19
 */
nkeynes@359
    20
nkeynes@368
    21
#include <assert.h>
nkeynes@388
    22
#include <math.h>
nkeynes@368
    23
nkeynes@380
    24
#ifndef NDEBUG
nkeynes@380
    25
#define DEBUG_JUMPS 1
nkeynes@380
    26
#endif
nkeynes@380
    27
nkeynes@905
    28
#include "lxdream.h"
nkeynes@368
    29
#include "sh4/sh4core.h"
nkeynes@1091
    30
#include "sh4/sh4dasm.h"
nkeynes@368
    31
#include "sh4/sh4trans.h"
nkeynes@671
    32
#include "sh4/sh4stat.h"
nkeynes@388
    33
#include "sh4/sh4mmio.h"
nkeynes@939
    34
#include "sh4/mmu.h"
nkeynes@991
    35
#include "xlat/xltcache.h"
nkeynes@991
    36
#include "xlat/x86/x86op.h"
nkeynes@1091
    37
#include "x86dasm/x86dasm.h"
nkeynes@368
    38
#include "clock.h"
nkeynes@368
    39
nkeynes@368
    40
#define DEFAULT_BACKPATCH_SIZE 4096
nkeynes@368
    41
nkeynes@991
    42
/* Offset of a reg relative to the sh4r structure */
nkeynes@991
    43
#define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
nkeynes@991
    44
nkeynes@995
    45
#define R_T      REG_OFFSET(t)
nkeynes@995
    46
#define R_Q      REG_OFFSET(q)
nkeynes@995
    47
#define R_S      REG_OFFSET(s)
nkeynes@995
    48
#define R_M      REG_OFFSET(m)
nkeynes@995
    49
#define R_SR     REG_OFFSET(sr)
nkeynes@995
    50
#define R_GBR    REG_OFFSET(gbr)
nkeynes@995
    51
#define R_SSR    REG_OFFSET(ssr)
nkeynes@995
    52
#define R_SPC    REG_OFFSET(spc)
nkeynes@995
    53
#define R_VBR    REG_OFFSET(vbr)
nkeynes@995
    54
#define R_MACH   REG_OFFSET(mac)+4
nkeynes@995
    55
#define R_MACL   REG_OFFSET(mac)
nkeynes@995
    56
#define R_PC     REG_OFFSET(pc)
nkeynes@991
    57
#define R_NEW_PC REG_OFFSET(new_pc)
nkeynes@995
    58
#define R_PR     REG_OFFSET(pr)
nkeynes@995
    59
#define R_SGR    REG_OFFSET(sgr)
nkeynes@995
    60
#define R_FPUL   REG_OFFSET(fpul)
nkeynes@995
    61
#define R_FPSCR  REG_OFFSET(fpscr)
nkeynes@995
    62
#define R_DBR    REG_OFFSET(dbr)
nkeynes@995
    63
#define R_R(rn)  REG_OFFSET(r[rn])
nkeynes@995
    64
#define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
nkeynes@995
    65
#define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
nkeynes@995
    66
#define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
nkeynes@995
    67
#define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
nkeynes@995
    68
#define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
nkeynes@995
    69
nkeynes@995
    70
#define DELAY_NONE 0
nkeynes@995
    71
#define DELAY_PC 1
nkeynes@995
    72
#define DELAY_PC_PR 2
nkeynes@991
    73
nkeynes@1112
    74
#define SH4_MODE_UNKNOWN -1
nkeynes@1112
    75
nkeynes@586
    76
struct backpatch_record {
nkeynes@604
    77
    uint32_t fixup_offset;
nkeynes@586
    78
    uint32_t fixup_icount;
nkeynes@596
    79
    int32_t exc_code;
nkeynes@586
    80
};
nkeynes@586
    81
nkeynes@368
    82
/** 
nkeynes@368
    83
 * Struct to manage internal translation state. This state is not saved -
nkeynes@368
    84
 * it is only valid between calls to sh4_translate_begin_block() and
nkeynes@368
    85
 * sh4_translate_end_block()
nkeynes@368
    86
 */
nkeynes@368
    87
struct sh4_x86_state {
nkeynes@590
    88
    int in_delay_slot;
nkeynes@1112
    89
    uint8_t *code;
nkeynes@368
    90
    gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
nkeynes@409
    91
    gboolean branch_taken; /* true if we branched unconditionally */
nkeynes@901
    92
    gboolean double_prec; /* true if FPU is in double-precision mode */
nkeynes@903
    93
    gboolean double_size; /* true if FPU is in double-size mode */
nkeynes@903
    94
    gboolean sse3_enabled; /* true if host supports SSE3 instructions */
nkeynes@408
    95
    uint32_t block_start_pc;
nkeynes@547
    96
    uint32_t stack_posn;   /* Trace stack height for alignment purposes */
nkeynes@1112
    97
    uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
nkeynes@417
    98
    int tstate;
nkeynes@368
    99
nkeynes@1125
   100
    /* mode settings */
nkeynes@586
   101
    gboolean tlb_on; /* True if tlb translation is active */
nkeynes@1125
   102
    struct mem_region_fn **priv_address_space;
nkeynes@1125
   103
    struct mem_region_fn **user_address_space;
nkeynes@586
   104
nkeynes@1125
   105
    /* Instrumentation */
nkeynes@1125
   106
    xlat_block_begin_callback_t begin_callback;
nkeynes@1125
   107
    xlat_block_end_callback_t end_callback;
nkeynes@1125
   108
    gboolean fastmem;
nkeynes@1125
   109
    
nkeynes@368
   110
    /* Allocated memory for the (block-wide) back-patch list */
nkeynes@586
   111
    struct backpatch_record *backpatch_list;
nkeynes@368
   112
    uint32_t backpatch_posn;
nkeynes@368
   113
    uint32_t backpatch_size;
nkeynes@368
   114
};
nkeynes@368
   115
nkeynes@368
   116
static struct sh4_x86_state sh4_x86;
nkeynes@368
   117
nkeynes@388
   118
static uint32_t max_int = 0x7FFFFFFF;
nkeynes@388
   119
static uint32_t min_int = 0x80000000;
nkeynes@394
   120
static uint32_t save_fcw; /* save value for fpu control word */
nkeynes@394
   121
static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
nkeynes@386
   122
nkeynes@1091
   123
static struct x86_symbol x86_symbol_table[] = {
nkeynes@1091
   124
    { "sh4r+128", ((char *)&sh4r)+128 },
nkeynes@1091
   125
    { "sh4_cpu_period", &sh4_cpu_period },
nkeynes@1091
   126
    { "sh4_address_space", NULL },
nkeynes@1125
   127
    { "sh4_user_address_space", NULL },
nkeynes@1120
   128
    { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
nkeynes@1091
   129
    { "sh4_write_fpscr", sh4_write_fpscr },
nkeynes@1091
   130
    { "sh4_write_sr", sh4_write_sr },
nkeynes@1091
   131
    { "sh4_read_sr", sh4_read_sr },
nkeynes@1091
   132
    { "sh4_sleep", sh4_sleep },
nkeynes@1091
   133
    { "sh4_fsca", sh4_fsca },
nkeynes@1091
   134
    { "sh4_ftrv", sh4_ftrv },
nkeynes@1091
   135
    { "sh4_switch_fr_banks", sh4_switch_fr_banks },
nkeynes@1091
   136
    { "sh4_execute_instruction", sh4_execute_instruction },
nkeynes@1091
   137
    { "signsat48", signsat48 },
nkeynes@1091
   138
    { "xlat_get_code_by_vma", xlat_get_code_by_vma },
nkeynes@1091
   139
    { "xlat_get_code", xlat_get_code }
nkeynes@1091
   140
};
nkeynes@1091
   141
nkeynes@1091
   142
nkeynes@903
   143
gboolean is_sse3_supported()
nkeynes@903
   144
{
nkeynes@903
   145
    uint32_t features;
nkeynes@903
   146
    
nkeynes@903
   147
    __asm__ __volatile__(
nkeynes@903
   148
        "mov $0x01, %%eax\n\t"
nkeynes@908
   149
        "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
nkeynes@903
   150
    return (features & 1) ? TRUE : FALSE;
nkeynes@903
   151
}
nkeynes@903
   152
nkeynes@1125
   153
void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
nkeynes@1125
   154
{
nkeynes@1125
   155
    sh4_x86.priv_address_space = priv;
nkeynes@1125
   156
    sh4_x86.user_address_space = user;
nkeynes@1125
   157
    x86_symbol_table[2].ptr = priv;
nkeynes@1125
   158
    x86_symbol_table[3].ptr = user;
nkeynes@1125
   159
}
nkeynes@1125
   160
nkeynes@669
   161
void sh4_translate_init(void)
nkeynes@368
   162
{
nkeynes@368
   163
    sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
nkeynes@586
   164
    sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
nkeynes@1125
   165
    sh4_x86.begin_callback = NULL;
nkeynes@1125
   166
    sh4_x86.end_callback = NULL;
nkeynes@1125
   167
    sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
nkeynes@1125
   168
    sh4_x86.fastmem = TRUE;
nkeynes@903
   169
    sh4_x86.sse3_enabled = is_sse3_supported();
nkeynes@1091
   170
    x86_disasm_init();
nkeynes@1091
   171
    x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
nkeynes@368
   172
}
nkeynes@368
   173
nkeynes@1125
   174
void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
nkeynes@1125
   175
{
nkeynes@1125
   176
    sh4_x86.begin_callback = begin;
nkeynes@1125
   177
    sh4_x86.end_callback = end;
nkeynes@1125
   178
}
nkeynes@1125
   179
nkeynes@1125
   180
void sh4_translate_set_fastmem( gboolean flag )
nkeynes@1125
   181
{
nkeynes@1125
   182
    sh4_x86.fastmem = flag;
nkeynes@1125
   183
}
nkeynes@1125
   184
nkeynes@1091
   185
/**
nkeynes@1091
   186
 * Disassemble the given translated code block, and it's source SH4 code block
nkeynes@1091
   187
 * side-by-side. The current native pc will be marked if non-null.
nkeynes@1091
   188
 */
nkeynes@1091
   189
void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
nkeynes@1091
   190
{
nkeynes@1091
   191
    char buf[256];
nkeynes@1091
   192
    char op[256];
nkeynes@1091
   193
nkeynes@1091
   194
    uintptr_t target_start = (uintptr_t)code, target_pc;
nkeynes@1091
   195
    uintptr_t target_end = target_start + xlat_get_code_size(code);
nkeynes@1091
   196
    uint32_t source_pc = source_start;
nkeynes@1091
   197
    uint32_t source_end = source_pc;
nkeynes@1091
   198
    xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
nkeynes@1092
   199
    xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
nkeynes@1091
   200
nkeynes@1091
   201
    for( target_pc = target_start; target_pc < target_end;  ) {
nkeynes@1091
   202
        uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
nkeynes@1092
   203
#if SIZEOF_VOID_P == 8
nkeynes@1092
   204
        fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
nkeynes@1092
   205
                      target_pc, op, buf );
nkeynes@1092
   206
#else
nkeynes@1112
   207
        fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
nkeynes@1092
   208
                      target_pc, op, buf );
nkeynes@1092
   209
#endif        
nkeynes@1091
   210
        if( source_recov_table < source_recov_end && 
nkeynes@1091
   211
            target_pc >= (target_start + source_recov_table->xlat_offset) ) {
nkeynes@1091
   212
            source_recov_table++;
nkeynes@1091
   213
            if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
nkeynes@1091
   214
                source_end = source_start + (source_recov_table->sh4_icount)*2;
nkeynes@1091
   215
        }
nkeynes@1091
   216
nkeynes@1091
   217
        if( source_pc < source_end ) {
nkeynes@1091
   218
            uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
nkeynes@1091
   219
            fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
nkeynes@1091
   220
            source_pc = source_pc2;
nkeynes@1091
   221
        } else {
nkeynes@1091
   222
            fprintf( out, "\n" );
nkeynes@1091
   223
        }
nkeynes@1091
   224
        
nkeynes@1091
   225
        target_pc = pc2;
nkeynes@1091
   226
    }
nkeynes@1091
   227
    
nkeynes@1091
   228
    while( source_pc < source_end ) {
nkeynes@1091
   229
        uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
nkeynes@1091
   230
        fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
nkeynes@1091
   231
        source_pc = source_pc2;
nkeynes@1091
   232
    }
nkeynes@1091
   233
}
nkeynes@368
   234
nkeynes@586
   235
static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
nkeynes@368
   236
{
nkeynes@991
   237
    int reloc_size = 4;
nkeynes@991
   238
    
nkeynes@991
   239
    if( exc_code == -2 ) {
nkeynes@991
   240
        reloc_size = sizeof(void *);
nkeynes@991
   241
    }
nkeynes@991
   242
    
nkeynes@368
   243
    if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
nkeynes@368
   244
	sh4_x86.backpatch_size <<= 1;
nkeynes@586
   245
	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
nkeynes@586
   246
					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
nkeynes@368
   247
	assert( sh4_x86.backpatch_list != NULL );
nkeynes@368
   248
    }
nkeynes@586
   249
    if( sh4_x86.in_delay_slot ) {
nkeynes@586
   250
	fixup_pc -= 2;
nkeynes@586
   251
    }
nkeynes@991
   252
nkeynes@604
   253
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
nkeynes@991
   254
	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
nkeynes@586
   255
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
nkeynes@586
   256
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
nkeynes@586
   257
    sh4_x86.backpatch_posn++;
nkeynes@368
   258
}
nkeynes@368
   259
nkeynes@991
   260
#define TSTATE_NONE -1
nkeynes@995
   261
#define TSTATE_O    X86_COND_O
nkeynes@995
   262
#define TSTATE_C    X86_COND_C
nkeynes@995
   263
#define TSTATE_E    X86_COND_E
nkeynes@995
   264
#define TSTATE_NE   X86_COND_NE
nkeynes@995
   265
#define TSTATE_G    X86_COND_G
nkeynes@995
   266
#define TSTATE_GE   X86_COND_GE
nkeynes@995
   267
#define TSTATE_A    X86_COND_A
nkeynes@995
   268
#define TSTATE_AE   X86_COND_AE
nkeynes@359
   269
nkeynes@991
   270
#define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
nkeynes@991
   271
#define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
nkeynes@368
   272
nkeynes@991
   273
/* Convenience instructions */
nkeynes@991
   274
#define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
nkeynes@991
   275
#define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
nkeynes@991
   276
#define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
nkeynes@991
   277
#define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
nkeynes@991
   278
#define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
nkeynes@991
   279
#define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
nkeynes@991
   280
#define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
nkeynes@991
   281
#define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
nkeynes@991
   282
#define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
nkeynes@991
   283
#define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
nkeynes@991
   284
#define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
nkeynes@1112
   285
#define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
nkeynes@991
   286
#define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
nkeynes@991
   287
#define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
nkeynes@991
   288
#define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
nkeynes@991
   289
#define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
nkeynes@991
   290
#define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
nkeynes@991
   291
#define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
nkeynes@991
   292
#define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
nkeynes@991
   293
#define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
nkeynes@374
   294
nkeynes@991
   295
/** Branch if T is set (either in the current cflags, or in sh4r.t) */
nkeynes@991
   296
#define JT_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
nkeynes@991
   297
	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
nkeynes@991
   298
    JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
nkeynes@368
   299
nkeynes@991
   300
/** Branch if T is clear (either in the current cflags or in sh4r.t) */
nkeynes@991
   301
#define JF_label(label) if( sh4_x86.tstate == TSTATE_NONE ) { \
nkeynes@991
   302
	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; } \
nkeynes@991
   303
    JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
nkeynes@359
   304
nkeynes@939
   305
nkeynes@991
   306
#define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
nkeynes@991
   307
#define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
nkeynes@374
   308
nkeynes@375
   309
/**
nkeynes@375
   310
 * Load an FR register (single-precision floating point) into an integer x86
nkeynes@375
   311
 * register (eg for register-to-register moves)
nkeynes@375
   312
 */
nkeynes@991
   313
#define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
nkeynes@991
   314
#define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
nkeynes@375
   315
nkeynes@375
   316
/**
nkeynes@669
   317
 * Load the low half of a DR register (DR or XD) into an integer x86 register 
nkeynes@669
   318
 */
nkeynes@991
   319
#define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
nkeynes@991
   320
#define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
nkeynes@669
   321
nkeynes@669
   322
/**
nkeynes@669
   323
 * Store an FR register (single-precision floating point) from an integer x86+
nkeynes@375
   324
 * register (eg for register-to-register moves)
nkeynes@375
   325
 */
nkeynes@991
   326
#define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   327
#define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@375
   328
nkeynes@991
   329
#define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
nkeynes@991
   330
#define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
nkeynes@375
   331
nkeynes@374
   332
nkeynes@991
   333
#define push_fpul()  FLDF_rbpdisp(R_FPUL)
nkeynes@991
   334
#define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
nkeynes@991
   335
#define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   336
#define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   337
#define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@991
   338
#define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@991
   339
#define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
nkeynes@991
   340
#define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
nkeynes@991
   341
#define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
nkeynes@991
   342
#define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
nkeynes@377
   343
nkeynes@991
   344
#ifdef ENABLE_SH4STATS
nkeynes@995
   345
#define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
nkeynes@991
   346
#else
nkeynes@991
   347
#define COUNT_INST(id)
nkeynes@991
   348
#endif
nkeynes@377
   349
nkeynes@374
   350
nkeynes@368
   351
/* Exception checks - Note that all exception checks will clobber EAX */
nkeynes@416
   352
nkeynes@416
   353
#define check_priv( ) \
nkeynes@1112
   354
    if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
nkeynes@937
   355
        if( sh4_x86.in_delay_slot ) { \
nkeynes@956
   356
            exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2) ); \
nkeynes@937
   357
        } else { \
nkeynes@956
   358
            exit_block_exc(EXC_ILLEGAL, pc); \
nkeynes@937
   359
        } \
nkeynes@956
   360
        sh4_x86.branch_taken = TRUE; \
nkeynes@937
   361
        sh4_x86.in_delay_slot = DELAY_NONE; \
nkeynes@937
   362
        return 2; \
nkeynes@937
   363
    }
nkeynes@416
   364
nkeynes@416
   365
#define check_fpuen( ) \
nkeynes@416
   366
    if( !sh4_x86.fpuen_checked ) {\
nkeynes@416
   367
	sh4_x86.fpuen_checked = TRUE;\
nkeynes@995
   368
	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
nkeynes@991
   369
	ANDL_imms_r32( SR_FD, REG_EAX );\
nkeynes@416
   370
	if( sh4_x86.in_delay_slot ) {\
nkeynes@586
   371
	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
nkeynes@416
   372
	} else {\
nkeynes@586
   373
	    JNE_exc(EXC_FPU_DISABLED);\
nkeynes@416
   374
	}\
nkeynes@875
   375
	sh4_x86.tstate = TSTATE_NONE; \
nkeynes@416
   376
    }
nkeynes@416
   377
nkeynes@586
   378
#define check_ralign16( x86reg ) \
nkeynes@991
   379
    TESTL_imms_r32( 0x00000001, x86reg ); \
nkeynes@586
   380
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@416
   381
nkeynes@586
   382
#define check_walign16( x86reg ) \
nkeynes@991
   383
    TESTL_imms_r32( 0x00000001, x86reg ); \
nkeynes@586
   384
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@368
   385
nkeynes@586
   386
#define check_ralign32( x86reg ) \
nkeynes@991
   387
    TESTL_imms_r32( 0x00000003, x86reg ); \
nkeynes@586
   388
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@368
   389
nkeynes@586
   390
#define check_walign32( x86reg ) \
nkeynes@991
   391
    TESTL_imms_r32( 0x00000003, x86reg ); \
nkeynes@586
   392
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@368
   393
nkeynes@732
   394
#define check_ralign64( x86reg ) \
nkeynes@991
   395
    TESTL_imms_r32( 0x00000007, x86reg ); \
nkeynes@732
   396
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@732
   397
nkeynes@732
   398
#define check_walign64( x86reg ) \
nkeynes@991
   399
    TESTL_imms_r32( 0x00000007, x86reg ); \
nkeynes@732
   400
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@732
   401
nkeynes@1125
   402
#define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
nkeynes@1004
   403
nkeynes@824
   404
#define UNDEF(ir)
nkeynes@939
   405
/* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
nkeynes@939
   406
 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
nkeynes@586
   407
 */
nkeynes@941
   408
#ifdef HAVE_FRAME_ADDRESS
nkeynes@995
   409
static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   410
{
nkeynes@1004
   411
    decode_address(address_space(), addr_reg);
nkeynes@1112
   412
    if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
nkeynes@995
   413
        CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
nkeynes@995
   414
    } else {
nkeynes@995
   415
        if( addr_reg != REG_ARG1 ) {
nkeynes@995
   416
            MOVL_r32_r32( addr_reg, REG_ARG1 );
nkeynes@995
   417
        }
nkeynes@995
   418
        MOVP_immptr_rptr( 0, REG_ARG2 );
nkeynes@995
   419
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@995
   420
        CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
nkeynes@995
   421
    }
nkeynes@995
   422
    if( value_reg != REG_RESULT1 ) { 
nkeynes@995
   423
        MOVL_r32_r32( REG_RESULT1, value_reg );
nkeynes@995
   424
    }
nkeynes@995
   425
}
nkeynes@995
   426
nkeynes@995
   427
static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   428
{
nkeynes@1004
   429
    decode_address(address_space(), addr_reg);
nkeynes@1112
   430
    if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
nkeynes@995
   431
        CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
nkeynes@995
   432
    } else {
nkeynes@995
   433
        if( value_reg != REG_ARG2 ) {
nkeynes@995
   434
            MOVL_r32_r32( value_reg, REG_ARG2 );
nkeynes@995
   435
	}        
nkeynes@995
   436
        if( addr_reg != REG_ARG1 ) {
nkeynes@995
   437
            MOVL_r32_r32( addr_reg, REG_ARG1 );
nkeynes@995
   438
        }
nkeynes@995
   439
#if MAX_REG_ARG > 2        
nkeynes@995
   440
        MOVP_immptr_rptr( 0, REG_ARG3 );
nkeynes@995
   441
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@995
   442
        CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
nkeynes@995
   443
#else
nkeynes@995
   444
        MOVL_imm32_rspdisp( 0, 0 );
nkeynes@995
   445
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@995
   446
        CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
nkeynes@995
   447
#endif
nkeynes@995
   448
    }
nkeynes@995
   449
}
nkeynes@995
   450
#else
nkeynes@995
   451
static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   452
{
nkeynes@1004
   453
    decode_address(address_space(), addr_reg);
nkeynes@995
   454
    CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
nkeynes@995
   455
    if( value_reg != REG_RESULT1 ) {
nkeynes@995
   456
        MOVL_r32_r32( REG_RESULT1, value_reg );
nkeynes@995
   457
    }
nkeynes@995
   458
}     
nkeynes@995
   459
nkeynes@996
   460
static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   461
{
nkeynes@1004
   462
    decode_address(address_space(), addr_reg);
nkeynes@995
   463
    CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
nkeynes@995
   464
}
nkeynes@941
   465
#endif
nkeynes@939
   466
                
nkeynes@995
   467
#define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
nkeynes@995
   468
#define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
nkeynes@995
   469
#define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
nkeynes@995
   470
#define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
nkeynes@995
   471
#define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
nkeynes@995
   472
#define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
nkeynes@995
   473
#define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
nkeynes@995
   474
#define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
nkeynes@995
   475
#define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
nkeynes@368
   476
nkeynes@956
   477
#define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
nkeynes@539
   478
nkeynes@901
   479
void sh4_translate_begin_block( sh4addr_t pc ) 
nkeynes@901
   480
{
nkeynes@1112
   481
	sh4_x86.code = xlat_output;
nkeynes@901
   482
    sh4_x86.in_delay_slot = FALSE;
nkeynes@901
   483
    sh4_x86.fpuen_checked = FALSE;
nkeynes@901
   484
    sh4_x86.branch_taken = FALSE;
nkeynes@901
   485
    sh4_x86.backpatch_posn = 0;
nkeynes@901
   486
    sh4_x86.block_start_pc = pc;
nkeynes@939
   487
    sh4_x86.tlb_on = IS_TLB_ENABLED();
nkeynes@901
   488
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@901
   489
    sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
nkeynes@903
   490
    sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
nkeynes@1112
   491
    sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
nkeynes@1125
   492
    emit_prologue();
nkeynes@1125
   493
    if( sh4_x86.begin_callback ) {
nkeynes@1125
   494
        CALL_ptr( sh4_x86.begin_callback );
nkeynes@1125
   495
    }
nkeynes@901
   496
}
nkeynes@901
   497
nkeynes@901
   498
nkeynes@593
   499
uint32_t sh4_translate_end_block_size()
nkeynes@593
   500
{
nkeynes@596
   501
    if( sh4_x86.backpatch_posn <= 3 ) {
nkeynes@1146
   502
        return EPILOGUE_SIZE + (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
nkeynes@596
   503
    } else {
nkeynes@1146
   504
        return EPILOGUE_SIZE + (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
nkeynes@596
   505
    }
nkeynes@593
   506
}
nkeynes@593
   507
nkeynes@593
   508
nkeynes@590
   509
/**
nkeynes@590
   510
 * Embed a breakpoint into the generated code
nkeynes@590
   511
 */
nkeynes@586
   512
void sh4_translate_emit_breakpoint( sh4vma_t pc )
nkeynes@586
   513
{
nkeynes@995
   514
    MOVL_imm32_r32( pc, REG_EAX );
nkeynes@995
   515
    CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
nkeynes@875
   516
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@586
   517
}
nkeynes@590
   518
nkeynes@601
   519
nkeynes@601
   520
#define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
nkeynes@601
   521
nkeynes@1112
   522
/** Offset of xlat_sh4_mode field relative to the code pointer */ 
nkeynes@1112
   523
#define XLAT_SH4_MODE_CODE_OFFSET  (uint32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
nkeynes@1112
   524
nkeynes@1112
   525
/**
nkeynes@1112
   526
 * Test if the loaded target code pointer in %eax is valid, and if so jump
nkeynes@1112
   527
 * directly into it, bypassing the normal exit.
nkeynes@1112
   528
 */
nkeynes@1112
   529
static void jump_next_block()
nkeynes@1112
   530
{
nkeynes@1112
   531
	TESTP_rptr_rptr(REG_EAX, REG_EAX);
nkeynes@1112
   532
	JE_label(nocode);
nkeynes@1112
   533
	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
nkeynes@1112
   534
	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
nkeynes@1112
   535
	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
nkeynes@1112
   536
	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
nkeynes@1112
   537
	} else {
nkeynes@1112
   538
	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
nkeynes@1112
   539
	}
nkeynes@1112
   540
	JNE_label(wrongmode);
nkeynes@1112
   541
	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
nkeynes@1125
   542
	if( sh4_x86.end_callback ) {
nkeynes@1125
   543
	    /* Note this does leave the stack out of alignment, but doesn't matter
nkeynes@1125
   544
	     * for what we're currently using it for.
nkeynes@1125
   545
	     */
nkeynes@1125
   546
	    PUSH_r32(REG_EAX);
nkeynes@1125
   547
	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
nkeynes@1125
   548
	    JMP_rptr(REG_ECX);
nkeynes@1125
   549
	} else {
nkeynes@1125
   550
	    JMP_rptr(REG_EAX);
nkeynes@1125
   551
	}
nkeynes@1112
   552
	JMP_TARGET(nocode); JMP_TARGET(wrongmode);
nkeynes@1112
   553
}
nkeynes@1112
   554
nkeynes@1125
   555
static void exit_block()
nkeynes@1125
   556
{
nkeynes@1125
   557
	emit_epilogue();
nkeynes@1125
   558
	if( sh4_x86.end_callback ) {
nkeynes@1125
   559
	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
nkeynes@1125
   560
	    JMP_rptr(REG_ECX);
nkeynes@1125
   561
	} else {
nkeynes@1125
   562
	    RET();
nkeynes@1125
   563
	}
nkeynes@1125
   564
}
nkeynes@1125
   565
nkeynes@590
   566
/**
nkeynes@995
   567
 * Exit the block with sh4r.pc already written
nkeynes@995
   568
 */
nkeynes@995
   569
void exit_block_pcset( sh4addr_t pc )
nkeynes@995
   570
{
nkeynes@995
   571
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   572
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   573
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   574
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   575
    JBE_label(exitloop);
nkeynes@995
   576
    MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
nkeynes@995
   577
    if( sh4_x86.tlb_on ) {
nkeynes@995
   578
        CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
nkeynes@995
   579
    } else {
nkeynes@995
   580
        CALL1_ptr_r32(xlat_get_code,REG_ARG1);
nkeynes@995
   581
    }
nkeynes@1112
   582
    
nkeynes@1112
   583
    jump_next_block();
nkeynes@1112
   584
    JMP_TARGET(exitloop);
nkeynes@995
   585
    exit_block();
nkeynes@995
   586
}
nkeynes@995
   587
nkeynes@995
   588
/**
nkeynes@995
   589
 * Exit the block with sh4r.new_pc written with the target pc
nkeynes@995
   590
 */
nkeynes@995
   591
void exit_block_newpcset( sh4addr_t pc )
nkeynes@995
   592
{
nkeynes@995
   593
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   594
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   595
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@995
   596
    MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
nkeynes@995
   597
    MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   598
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   599
    JBE_label(exitloop);
nkeynes@995
   600
    if( sh4_x86.tlb_on ) {
nkeynes@995
   601
        CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
nkeynes@995
   602
    } else {
nkeynes@995
   603
        CALL1_ptr_r32(xlat_get_code,REG_ARG1);
nkeynes@995
   604
    }
nkeynes@1112
   605
	
nkeynes@1112
   606
	jump_next_block();
nkeynes@1112
   607
    JMP_TARGET(exitloop);
nkeynes@995
   608
    exit_block();
nkeynes@995
   609
}
nkeynes@995
   610
nkeynes@995
   611
nkeynes@995
   612
/**
nkeynes@995
   613
 * Exit the block to an absolute PC
nkeynes@995
   614
 */
nkeynes@995
   615
void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
nkeynes@995
   616
{
nkeynes@1112
   617
    MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   618
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   619
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   620
nkeynes@1112
   621
    MOVL_imm32_r32( pc, REG_ARG1 );
nkeynes@1112
   622
    MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   623
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   624
    JBE_label(exitloop);
nkeynes@1112
   625
nkeynes@995
   626
    if( IS_IN_ICACHE(pc) ) {
nkeynes@995
   627
        MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
nkeynes@995
   628
        ANDP_imms_rptr( -4, REG_EAX );
nkeynes@995
   629
    } else if( sh4_x86.tlb_on ) {
nkeynes@1112
   630
        CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
nkeynes@995
   631
    } else {
nkeynes@1112
   632
        CALL1_ptr_r32(xlat_get_code, REG_ARG1);
nkeynes@995
   633
    }
nkeynes@1112
   634
    jump_next_block();
nkeynes@1112
   635
    JMP_TARGET(exitloop);
nkeynes@995
   636
    exit_block();
nkeynes@995
   637
}
nkeynes@995
   638
nkeynes@995
   639
/**
nkeynes@995
   640
 * Exit the block to a relative PC
nkeynes@995
   641
 */
nkeynes@995
   642
void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
nkeynes@995
   643
{
nkeynes@1112
   644
    MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   645
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   646
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   647
nkeynes@1112
   648
	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
nkeynes@1112
   649
	    /* Special case for tight loops - the PC doesn't change, and
nkeynes@1112
   650
	     * we already know the target address. Just check events pending before
nkeynes@1112
   651
	     * looping.
nkeynes@1112
   652
	     */
nkeynes@1112
   653
        CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   654
        uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
nkeynes@1112
   655
        JCC_cc_prerel(X86_COND_A, backdisp);
nkeynes@1112
   656
	} else {
nkeynes@1112
   657
        MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
nkeynes@1112
   658
        ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
nkeynes@1112
   659
        MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   660
        CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   661
        JBE_label(exitloop2);
nkeynes@1112
   662
nkeynes@1112
   663
        if( IS_IN_ICACHE(pc) ) {
nkeynes@1112
   664
            MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
nkeynes@1112
   665
            ANDP_imms_rptr( -4, REG_EAX );
nkeynes@1112
   666
        } else if( sh4_x86.tlb_on ) {
nkeynes@1112
   667
            CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
nkeynes@1112
   668
        } else {
nkeynes@1112
   669
            CALL1_ptr_r32(xlat_get_code, REG_ARG1);
nkeynes@1112
   670
        }
nkeynes@1112
   671
        jump_next_block();
nkeynes@1112
   672
        JMP_TARGET(exitloop2);
nkeynes@995
   673
    }
nkeynes@995
   674
    exit_block();
nkeynes@995
   675
}
nkeynes@995
   676
nkeynes@995
   677
/**
nkeynes@995
   678
 * Exit unconditionally with a general exception
nkeynes@995
   679
 */
nkeynes@995
   680
void exit_block_exc( int code, sh4addr_t pc )
nkeynes@995
   681
{
nkeynes@995
   682
    MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
nkeynes@995
   683
    ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@995
   684
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@995
   685
    ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@995
   686
    MOVL_imm32_r32( code, REG_ARG1 );
nkeynes@995
   687
    CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
nkeynes@995
   688
    exit_block();
nkeynes@995
   689
}    
nkeynes@995
   690
nkeynes@995
   691
/**
nkeynes@590
   692
 * Embed a call to sh4_execute_instruction for situations that we
nkeynes@601
   693
 * can't translate (just page-crossing delay slots at the moment).
nkeynes@601
   694
 * Caller is responsible for setting new_pc before calling this function.
nkeynes@601
   695
 *
nkeynes@601
   696
 * Performs:
nkeynes@601
   697
 *   Set PC = endpc
nkeynes@601
   698
 *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
nkeynes@601
   699
 *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
nkeynes@601
   700
 *   Call sh4_execute_instruction
nkeynes@601
   701
 *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
nkeynes@590
   702
 */
nkeynes@601
   703
void exit_block_emu( sh4vma_t endpc )
nkeynes@590
   704
{
nkeynes@995
   705
    MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
nkeynes@991
   706
    ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@586
   707
    
nkeynes@995
   708
    MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
nkeynes@991
   709
    ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@995
   710
    MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
nkeynes@995
   711
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
nkeynes@590
   712
nkeynes@1112
   713
    CALL_ptr( sh4_execute_instruction );
nkeynes@926
   714
    exit_block();
nkeynes@590
   715
} 
nkeynes@539
   716
nkeynes@359
   717
/**
nkeynes@995
   718
 * Write the block trailer (exception handling block)
nkeynes@995
   719
 */
nkeynes@995
   720
void sh4_translate_end_block( sh4addr_t pc ) {
nkeynes@995
   721
    if( sh4_x86.branch_taken == FALSE ) {
nkeynes@995
   722
        // Didn't exit unconditionally already, so write the termination here
nkeynes@995
   723
        exit_block_rel( pc, pc );
nkeynes@995
   724
    }
nkeynes@995
   725
    if( sh4_x86.backpatch_posn != 0 ) {
nkeynes@995
   726
        unsigned int i;
nkeynes@995
   727
        // Exception raised - cleanup and exit
nkeynes@995
   728
        uint8_t *end_ptr = xlat_output;
nkeynes@995
   729
        MOVL_r32_r32( REG_EDX, REG_ECX );
nkeynes@995
   730
        ADDL_r32_r32( REG_EDX, REG_ECX );
nkeynes@995
   731
        ADDL_r32_rbpdisp( REG_ECX, R_SPC );
nkeynes@995
   732
        MOVL_moffptr_eax( &sh4_cpu_period );
nkeynes@995
   733
        MULL_r32( REG_EDX );
nkeynes@995
   734
        ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
nkeynes@995
   735
        exit_block();
nkeynes@995
   736
nkeynes@995
   737
        for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
nkeynes@995
   738
            uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
nkeynes@995
   739
            if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
nkeynes@995
   740
                if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
nkeynes@995
   741
                    *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
nkeynes@995
   742
                } else {
nkeynes@995
   743
                    *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@995
   744
                }
nkeynes@995
   745
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
nkeynes@995
   746
                int rel = end_ptr - xlat_output;
nkeynes@995
   747
                JMP_prerel(rel);
nkeynes@995
   748
            } else {
nkeynes@995
   749
                *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@995
   750
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
nkeynes@995
   751
                CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
nkeynes@995
   752
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
nkeynes@995
   753
                int rel = end_ptr - xlat_output;
nkeynes@995
   754
                JMP_prerel(rel);
nkeynes@995
   755
            }
nkeynes@995
   756
        }
nkeynes@995
   757
    }
nkeynes@995
   758
}
nkeynes@539
   759
nkeynes@359
   760
/**
nkeynes@359
   761
 * Translate a single instruction. Delayed branches are handled specially
nkeynes@359
   762
 * by translating both branch and delayed instruction as a single unit (as
nkeynes@359
   763
 * 
nkeynes@586
   764
 * The instruction MUST be in the icache (assert check)
nkeynes@359
   765
 *
nkeynes@359
   766
 * @return true if the instruction marks the end of a basic block
nkeynes@359
   767
 * (eg a branch or 
nkeynes@359
   768
 */
nkeynes@590
   769
uint32_t sh4_translate_instruction( sh4vma_t pc )
nkeynes@359
   770
{
nkeynes@388
   771
    uint32_t ir;
nkeynes@586
   772
    /* Read instruction from icache */
nkeynes@586
   773
    assert( IS_IN_ICACHE(pc) );
nkeynes@586
   774
    ir = *(uint16_t *)GET_ICACHE_PTR(pc);
nkeynes@586
   775
    
nkeynes@586
   776
    if( !sh4_x86.in_delay_slot ) {
nkeynes@596
   777
	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
nkeynes@388
   778
    }
nkeynes@1003
   779
    
nkeynes@1003
   780
    /* check for breakpoints at this pc */
nkeynes@1003
   781
    for( int i=0; i<sh4_breakpoint_count; i++ ) {
nkeynes@1003
   782
        if( sh4_breakpoints[i].address == pc ) {
nkeynes@1003
   783
            sh4_translate_emit_breakpoint(pc);
nkeynes@1003
   784
            break;
nkeynes@1003
   785
        }
nkeynes@571
   786
    }
nkeynes@359
   787
%%
nkeynes@359
   788
/* ALU operations */
nkeynes@359
   789
ADD Rm, Rn {:
nkeynes@671
   790
    COUNT_INST(I_ADD);
nkeynes@991
   791
    load_reg( REG_EAX, Rm );
nkeynes@991
   792
    load_reg( REG_ECX, Rn );
nkeynes@991
   793
    ADDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   794
    store_reg( REG_ECX, Rn );
nkeynes@417
   795
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   796
:}
nkeynes@359
   797
ADD #imm, Rn {:  
nkeynes@671
   798
    COUNT_INST(I_ADDI);
nkeynes@991
   799
    ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
nkeynes@417
   800
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   801
:}
nkeynes@359
   802
ADDC Rm, Rn {:
nkeynes@671
   803
    COUNT_INST(I_ADDC);
nkeynes@417
   804
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@911
   805
        LDC_t();
nkeynes@417
   806
    }
nkeynes@991
   807
    load_reg( REG_EAX, Rm );
nkeynes@991
   808
    load_reg( REG_ECX, Rn );
nkeynes@991
   809
    ADCL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   810
    store_reg( REG_ECX, Rn );
nkeynes@359
   811
    SETC_t();
nkeynes@417
   812
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
   813
:}
nkeynes@359
   814
ADDV Rm, Rn {:
nkeynes@671
   815
    COUNT_INST(I_ADDV);
nkeynes@991
   816
    load_reg( REG_EAX, Rm );
nkeynes@991
   817
    load_reg( REG_ECX, Rn );
nkeynes@991
   818
    ADDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   819
    store_reg( REG_ECX, Rn );
nkeynes@359
   820
    SETO_t();
nkeynes@417
   821
    sh4_x86.tstate = TSTATE_O;
nkeynes@359
   822
:}
nkeynes@359
   823
AND Rm, Rn {:
nkeynes@671
   824
    COUNT_INST(I_AND);
nkeynes@991
   825
    load_reg( REG_EAX, Rm );
nkeynes@991
   826
    load_reg( REG_ECX, Rn );
nkeynes@991
   827
    ANDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   828
    store_reg( REG_ECX, Rn );
nkeynes@417
   829
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   830
:}
nkeynes@359
   831
AND #imm, R0 {:  
nkeynes@671
   832
    COUNT_INST(I_ANDI);
nkeynes@991
   833
    load_reg( REG_EAX, 0 );
nkeynes@991
   834
    ANDL_imms_r32(imm, REG_EAX); 
nkeynes@991
   835
    store_reg( REG_EAX, 0 );
nkeynes@417
   836
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   837
:}
nkeynes@359
   838
AND.B #imm, @(R0, GBR) {: 
nkeynes@671
   839
    COUNT_INST(I_ANDB);
nkeynes@991
   840
    load_reg( REG_EAX, 0 );
nkeynes@991
   841
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
   842
    MOVL_r32_rspdisp(REG_EAX, 0);
nkeynes@991
   843
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@991
   844
    MOVL_rspdisp_r32(0, REG_EAX);
nkeynes@991
   845
    ANDL_imms_r32(imm, REG_EDX );
nkeynes@991
   846
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
   847
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   848
:}
nkeynes@359
   849
CMP/EQ Rm, Rn {:  
nkeynes@671
   850
    COUNT_INST(I_CMPEQ);
nkeynes@991
   851
    load_reg( REG_EAX, Rm );
nkeynes@991
   852
    load_reg( REG_ECX, Rn );
nkeynes@991
   853
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   854
    SETE_t();
nkeynes@417
   855
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
   856
:}
nkeynes@359
   857
CMP/EQ #imm, R0 {:  
nkeynes@671
   858
    COUNT_INST(I_CMPEQI);
nkeynes@991
   859
    load_reg( REG_EAX, 0 );
nkeynes@991
   860
    CMPL_imms_r32(imm, REG_EAX);
nkeynes@359
   861
    SETE_t();
nkeynes@417
   862
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
   863
:}
nkeynes@359
   864
CMP/GE Rm, Rn {:  
nkeynes@671
   865
    COUNT_INST(I_CMPGE);
nkeynes@991
   866
    load_reg( REG_EAX, Rm );
nkeynes@991
   867
    load_reg( REG_ECX, Rn );
nkeynes@991
   868
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   869
    SETGE_t();
nkeynes@417
   870
    sh4_x86.tstate = TSTATE_GE;
nkeynes@359
   871
:}
nkeynes@359
   872
CMP/GT Rm, Rn {: 
nkeynes@671
   873
    COUNT_INST(I_CMPGT);
nkeynes@991
   874
    load_reg( REG_EAX, Rm );
nkeynes@991
   875
    load_reg( REG_ECX, Rn );
nkeynes@991
   876
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   877
    SETG_t();
nkeynes@417
   878
    sh4_x86.tstate = TSTATE_G;
nkeynes@359
   879
:}
nkeynes@359
   880
CMP/HI Rm, Rn {:  
nkeynes@671
   881
    COUNT_INST(I_CMPHI);
nkeynes@991
   882
    load_reg( REG_EAX, Rm );
nkeynes@991
   883
    load_reg( REG_ECX, Rn );
nkeynes@991
   884
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   885
    SETA_t();
nkeynes@417
   886
    sh4_x86.tstate = TSTATE_A;
nkeynes@359
   887
:}
nkeynes@359
   888
CMP/HS Rm, Rn {: 
nkeynes@671
   889
    COUNT_INST(I_CMPHS);
nkeynes@991
   890
    load_reg( REG_EAX, Rm );
nkeynes@991
   891
    load_reg( REG_ECX, Rn );
nkeynes@991
   892
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   893
    SETAE_t();
nkeynes@417
   894
    sh4_x86.tstate = TSTATE_AE;
nkeynes@359
   895
 :}
nkeynes@359
   896
CMP/PL Rn {: 
nkeynes@671
   897
    COUNT_INST(I_CMPPL);
nkeynes@991
   898
    load_reg( REG_EAX, Rn );
nkeynes@991
   899
    CMPL_imms_r32( 0, REG_EAX );
nkeynes@359
   900
    SETG_t();
nkeynes@417
   901
    sh4_x86.tstate = TSTATE_G;
nkeynes@359
   902
:}
nkeynes@359
   903
CMP/PZ Rn {:  
nkeynes@671
   904
    COUNT_INST(I_CMPPZ);
nkeynes@991
   905
    load_reg( REG_EAX, Rn );
nkeynes@991
   906
    CMPL_imms_r32( 0, REG_EAX );
nkeynes@359
   907
    SETGE_t();
nkeynes@417
   908
    sh4_x86.tstate = TSTATE_GE;
nkeynes@359
   909
:}
nkeynes@361
   910
CMP/STR Rm, Rn {:  
nkeynes@671
   911
    COUNT_INST(I_CMPSTR);
nkeynes@991
   912
    load_reg( REG_EAX, Rm );
nkeynes@991
   913
    load_reg( REG_ECX, Rn );
nkeynes@991
   914
    XORL_r32_r32( REG_ECX, REG_EAX );
nkeynes@991
   915
    TESTB_r8_r8( REG_AL, REG_AL );
nkeynes@991
   916
    JE_label(target1);
nkeynes@991
   917
    TESTB_r8_r8( REG_AH, REG_AH );
nkeynes@991
   918
    JE_label(target2);
nkeynes@991
   919
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
   920
    TESTB_r8_r8( REG_AL, REG_AL );
nkeynes@991
   921
    JE_label(target3);
nkeynes@991
   922
    TESTB_r8_r8( REG_AH, REG_AH );
nkeynes@380
   923
    JMP_TARGET(target1);
nkeynes@380
   924
    JMP_TARGET(target2);
nkeynes@380
   925
    JMP_TARGET(target3);
nkeynes@368
   926
    SETE_t();
nkeynes@417
   927
    sh4_x86.tstate = TSTATE_E;
nkeynes@361
   928
:}
nkeynes@361
   929
DIV0S Rm, Rn {:
nkeynes@671
   930
    COUNT_INST(I_DIV0S);
nkeynes@991
   931
    load_reg( REG_EAX, Rm );
nkeynes@991
   932
    load_reg( REG_ECX, Rn );
nkeynes@991
   933
    SHRL_imm_r32( 31, REG_EAX );
nkeynes@991
   934
    SHRL_imm_r32( 31, REG_ECX );
nkeynes@995
   935
    MOVL_r32_rbpdisp( REG_EAX, R_M );
nkeynes@995
   936
    MOVL_r32_rbpdisp( REG_ECX, R_Q );
nkeynes@991
   937
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@386
   938
    SETNE_t();
nkeynes@417
   939
    sh4_x86.tstate = TSTATE_NE;
nkeynes@361
   940
:}
nkeynes@361
   941
DIV0U {:  
nkeynes@671
   942
    COUNT_INST(I_DIV0U);
nkeynes@991
   943
    XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@995
   944
    MOVL_r32_rbpdisp( REG_EAX, R_Q );
nkeynes@995
   945
    MOVL_r32_rbpdisp( REG_EAX, R_M );
nkeynes@995
   946
    MOVL_r32_rbpdisp( REG_EAX, R_T );
nkeynes@417
   947
    sh4_x86.tstate = TSTATE_C; // works for DIV1
nkeynes@361
   948
:}
nkeynes@386
   949
DIV1 Rm, Rn {:
nkeynes@671
   950
    COUNT_INST(I_DIV1);
nkeynes@995
   951
    MOVL_rbpdisp_r32( R_M, REG_ECX );
nkeynes@991
   952
    load_reg( REG_EAX, Rn );
nkeynes@417
   953
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
   954
	LDC_t();
nkeynes@417
   955
    }
nkeynes@991
   956
    RCLL_imm_r32( 1, REG_EAX );
nkeynes@991
   957
    SETC_r8( REG_DL ); // Q'
nkeynes@991
   958
    CMPL_rbpdisp_r32( R_Q, REG_ECX );
nkeynes@991
   959
    JE_label(mqequal);
nkeynes@991
   960
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
   961
    JMP_label(end);
nkeynes@380
   962
    JMP_TARGET(mqequal);
nkeynes@991
   963
    SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@386
   964
    JMP_TARGET(end);
nkeynes@991
   965
    store_reg( REG_EAX, Rn ); // Done with Rn now
nkeynes@991
   966
    SETC_r8(REG_AL); // tmp1
nkeynes@991
   967
    XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
nkeynes@991
   968
    XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
nkeynes@995
   969
    MOVL_r32_rbpdisp( REG_ECX, R_Q );
nkeynes@991
   970
    XORL_imms_r32( 1, REG_AL );   // T = !Q'
nkeynes@991
   971
    MOVZXL_r8_r32( REG_AL, REG_EAX );
nkeynes@995
   972
    MOVL_r32_rbpdisp( REG_EAX, R_T );
nkeynes@417
   973
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
   974
:}
nkeynes@361
   975
DMULS.L Rm, Rn {:  
nkeynes@671
   976
    COUNT_INST(I_DMULS);
nkeynes@991
   977
    load_reg( REG_EAX, Rm );
nkeynes@991
   978
    load_reg( REG_ECX, Rn );
nkeynes@991
   979
    IMULL_r32(REG_ECX);
nkeynes@995
   980
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@995
   981
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
   982
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
   983
:}
nkeynes@361
   984
DMULU.L Rm, Rn {:  
nkeynes@671
   985
    COUNT_INST(I_DMULU);
nkeynes@991
   986
    load_reg( REG_EAX, Rm );
nkeynes@991
   987
    load_reg( REG_ECX, Rn );
nkeynes@991
   988
    MULL_r32(REG_ECX);
nkeynes@995
   989
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@995
   990
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
nkeynes@417
   991
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
   992
:}
nkeynes@359
   993
DT Rn {:  
nkeynes@671
   994
    COUNT_INST(I_DT);
nkeynes@991
   995
    load_reg( REG_EAX, Rn );
nkeynes@991
   996
    ADDL_imms_r32( -1, REG_EAX );
nkeynes@991
   997
    store_reg( REG_EAX, Rn );
nkeynes@359
   998
    SETE_t();
nkeynes@417
   999
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
  1000
:}
nkeynes@359
  1001
EXTS.B Rm, Rn {:  
nkeynes@671
  1002
    COUNT_INST(I_EXTSB);
nkeynes@991
  1003
    load_reg( REG_EAX, Rm );
nkeynes@991
  1004
    MOVSXL_r8_r32( REG_EAX, REG_EAX );
nkeynes@991
  1005
    store_reg( REG_EAX, Rn );
nkeynes@359
  1006
:}
nkeynes@361
  1007
EXTS.W Rm, Rn {:  
nkeynes@671
  1008
    COUNT_INST(I_EXTSW);
nkeynes@991
  1009
    load_reg( REG_EAX, Rm );
nkeynes@991
  1010
    MOVSXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@991
  1011
    store_reg( REG_EAX, Rn );
nkeynes@361
  1012
:}
nkeynes@361
  1013
EXTU.B Rm, Rn {:  
nkeynes@671
  1014
    COUNT_INST(I_EXTUB);
nkeynes@991
  1015
    load_reg( REG_EAX, Rm );
nkeynes@991
  1016
    MOVZXL_r8_r32( REG_EAX, REG_EAX );
nkeynes@991
  1017
    store_reg( REG_EAX, Rn );
nkeynes@361
  1018
:}
nkeynes@361
  1019
EXTU.W Rm, Rn {:  
nkeynes@671
  1020
    COUNT_INST(I_EXTUW);
nkeynes@991
  1021
    load_reg( REG_EAX, Rm );
nkeynes@991
  1022
    MOVZXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@991
  1023
    store_reg( REG_EAX, Rn );
nkeynes@361
  1024
:}
nkeynes@586
  1025
MAC.L @Rm+, @Rn+ {:
nkeynes@671
  1026
    COUNT_INST(I_MACL);
nkeynes@586
  1027
    if( Rm == Rn ) {
nkeynes@991
  1028
	load_reg( REG_EAX, Rm );
nkeynes@991
  1029
	check_ralign32( REG_EAX );
nkeynes@991
  1030
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1031
	MOVL_r32_rspdisp(REG_EAX, 0);
nkeynes@991
  1032
	load_reg( REG_EAX, Rm );
nkeynes@991
  1033
	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  1034
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1035
        ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
nkeynes@586
  1036
    } else {
nkeynes@991
  1037
	load_reg( REG_EAX, Rm );
nkeynes@991
  1038
	check_ralign32( REG_EAX );
nkeynes@991
  1039
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1040
	MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1041
	load_reg( REG_EAX, Rn );
nkeynes@991
  1042
	check_ralign32( REG_EAX );
nkeynes@991
  1043
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1044
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
nkeynes@991
  1045
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@586
  1046
    }
nkeynes@939
  1047
    
nkeynes@991
  1048
    IMULL_rspdisp( 0 );
nkeynes@991
  1049
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@991
  1050
    ADCL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@386
  1051
nkeynes@995
  1052
    MOVL_rbpdisp_r32( R_S, REG_ECX );
nkeynes@991
  1053
    TESTL_r32_r32(REG_ECX, REG_ECX);
nkeynes@991
  1054
    JE_label( nosat );
nkeynes@995
  1055
    CALL_ptr( signsat48 );
nkeynes@386
  1056
    JMP_TARGET( nosat );
nkeynes@417
  1057
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@386
  1058
:}
nkeynes@386
  1059
MAC.W @Rm+, @Rn+ {:  
nkeynes@671
  1060
    COUNT_INST(I_MACW);
nkeynes@586
  1061
    if( Rm == Rn ) {
nkeynes@991
  1062
	load_reg( REG_EAX, Rm );
nkeynes@991
  1063
	check_ralign16( REG_EAX );
nkeynes@991
  1064
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1065
        MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1066
	load_reg( REG_EAX, Rm );
nkeynes@991
  1067
	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
nkeynes@991
  1068
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1069
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
nkeynes@586
  1070
	// Note translate twice in case of page boundaries. Maybe worth
nkeynes@586
  1071
	// adding a page-boundary check to skip the second translation
nkeynes@586
  1072
    } else {
nkeynes@991
  1073
	load_reg( REG_EAX, Rm );
nkeynes@991
  1074
	check_ralign16( REG_EAX );
nkeynes@991
  1075
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1076
        MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1077
	load_reg( REG_EAX, Rn );
nkeynes@991
  1078
	check_ralign16( REG_EAX );
nkeynes@991
  1079
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1080
	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
nkeynes@991
  1081
	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
nkeynes@586
  1082
    }
nkeynes@991
  1083
    IMULL_rspdisp( 0 );
nkeynes@995
  1084
    MOVL_rbpdisp_r32( R_S, REG_ECX );
nkeynes@991
  1085
    TESTL_r32_r32( REG_ECX, REG_ECX );
nkeynes@991
  1086
    JE_label( nosat );
nkeynes@386
  1087
nkeynes@991
  1088
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
nkeynes@991
  1089
    JNO_label( end );            // 2
nkeynes@995
  1090
    MOVL_imm32_r32( 1, REG_EDX );         // 5
nkeynes@995
  1091
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
nkeynes@991
  1092
    JS_label( positive );        // 2
nkeynes@995
  1093
    MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
nkeynes@995
  1094
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
nkeynes@991
  1095
    JMP_label(end2);           // 2
nkeynes@386
  1096
nkeynes@386
  1097
    JMP_TARGET(positive);
nkeynes@995
  1098
    MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
nkeynes@995
  1099
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
nkeynes@991
  1100
    JMP_label(end3);            // 2
nkeynes@386
  1101
nkeynes@386
  1102
    JMP_TARGET(nosat);
nkeynes@991
  1103
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
nkeynes@991
  1104
    ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
nkeynes@386
  1105
    JMP_TARGET(end);
nkeynes@386
  1106
    JMP_TARGET(end2);
nkeynes@386
  1107
    JMP_TARGET(end3);
nkeynes@417
  1108
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@386
  1109
:}
nkeynes@359
  1110
MOVT Rn {:  
nkeynes@671
  1111
    COUNT_INST(I_MOVT);
nkeynes@995
  1112
    MOVL_rbpdisp_r32( R_T, REG_EAX );
nkeynes@991
  1113
    store_reg( REG_EAX, Rn );
nkeynes@359
  1114
:}
nkeynes@361
  1115
MUL.L Rm, Rn {:  
nkeynes@671
  1116
    COUNT_INST(I_MULL);
nkeynes@991
  1117
    load_reg( REG_EAX, Rm );
nkeynes@991
  1118
    load_reg( REG_ECX, Rn );
nkeynes@991
  1119
    MULL_r32( REG_ECX );
nkeynes@995
  1120
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1121
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1122
:}
nkeynes@374
  1123
MULS.W Rm, Rn {:
nkeynes@671
  1124
    COUNT_INST(I_MULSW);
nkeynes@995
  1125
    MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
nkeynes@995
  1126
    MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
nkeynes@991
  1127
    MULL_r32( REG_ECX );
nkeynes@995
  1128
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1129
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1130
:}
nkeynes@374
  1131
MULU.W Rm, Rn {:  
nkeynes@671
  1132
    COUNT_INST(I_MULUW);
nkeynes@995
  1133
    MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
nkeynes@995
  1134
    MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
nkeynes@991
  1135
    MULL_r32( REG_ECX );
nkeynes@995
  1136
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1137
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1138
:}
nkeynes@359
  1139
NEG Rm, Rn {:
nkeynes@671
  1140
    COUNT_INST(I_NEG);
nkeynes@991
  1141
    load_reg( REG_EAX, Rm );
nkeynes@991
  1142
    NEGL_r32( REG_EAX );
nkeynes@991
  1143
    store_reg( REG_EAX, Rn );
nkeynes@417
  1144
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1145
:}
nkeynes@359
  1146
NEGC Rm, Rn {:  
nkeynes@671
  1147
    COUNT_INST(I_NEGC);
nkeynes@991
  1148
    load_reg( REG_EAX, Rm );
nkeynes@991
  1149
    XORL_r32_r32( REG_ECX, REG_ECX );
nkeynes@359
  1150
    LDC_t();
nkeynes@991
  1151
    SBBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1152
    store_reg( REG_ECX, Rn );
nkeynes@359
  1153
    SETC_t();
nkeynes@417
  1154
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1155
:}
nkeynes@359
  1156
NOT Rm, Rn {:  
nkeynes@671
  1157
    COUNT_INST(I_NOT);
nkeynes@991
  1158
    load_reg( REG_EAX, Rm );
nkeynes@991
  1159
    NOTL_r32( REG_EAX );
nkeynes@991
  1160
    store_reg( REG_EAX, Rn );
nkeynes@417
  1161
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1162
:}
nkeynes@359
  1163
OR Rm, Rn {:  
nkeynes@671
  1164
    COUNT_INST(I_OR);
nkeynes@991
  1165
    load_reg( REG_EAX, Rm );
nkeynes@991
  1166
    load_reg( REG_ECX, Rn );
nkeynes@991
  1167
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1168
    store_reg( REG_ECX, Rn );
nkeynes@417
  1169
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1170
:}
nkeynes@359
  1171
OR #imm, R0 {:
nkeynes@671
  1172
    COUNT_INST(I_ORI);
nkeynes@991
  1173
    load_reg( REG_EAX, 0 );
nkeynes@991
  1174
    ORL_imms_r32(imm, REG_EAX);
nkeynes@991
  1175
    store_reg( REG_EAX, 0 );
nkeynes@417
  1176
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1177
:}
nkeynes@374
  1178
OR.B #imm, @(R0, GBR) {:  
nkeynes@671
  1179
    COUNT_INST(I_ORB);
nkeynes@991
  1180
    load_reg( REG_EAX, 0 );
nkeynes@991
  1181
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1182
    MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1183
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@991
  1184
    MOVL_rspdisp_r32( 0, REG_EAX );
nkeynes@991
  1185
    ORL_imms_r32(imm, REG_EDX );
nkeynes@991
  1186
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1187
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1188
:}
nkeynes@359
  1189
ROTCL Rn {:
nkeynes@671
  1190
    COUNT_INST(I_ROTCL);
nkeynes@991
  1191
    load_reg( REG_EAX, Rn );
nkeynes@417
  1192
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1193
	LDC_t();
nkeynes@417
  1194
    }
nkeynes@991
  1195
    RCLL_imm_r32( 1, REG_EAX );
nkeynes@991
  1196
    store_reg( REG_EAX, Rn );
nkeynes@359
  1197
    SETC_t();
nkeynes@417
  1198
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1199
:}
nkeynes@359
  1200
ROTCR Rn {:  
nkeynes@671
  1201
    COUNT_INST(I_ROTCR);
nkeynes@991
  1202
    load_reg( REG_EAX, Rn );
nkeynes@417
  1203
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1204
	LDC_t();
nkeynes@417
  1205
    }
nkeynes@991
  1206
    RCRL_imm_r32( 1, REG_EAX );
nkeynes@991
  1207
    store_reg( REG_EAX, Rn );
nkeynes@359
  1208
    SETC_t();
nkeynes@417
  1209
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1210
:}
nkeynes@359
  1211
ROTL Rn {:  
nkeynes@671
  1212
    COUNT_INST(I_ROTL);
nkeynes@991
  1213
    load_reg( REG_EAX, Rn );
nkeynes@991
  1214
    ROLL_imm_r32( 1, REG_EAX );
nkeynes@991
  1215
    store_reg( REG_EAX, Rn );
nkeynes@359
  1216
    SETC_t();
nkeynes@417
  1217
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1218
:}
nkeynes@359
  1219
ROTR Rn {:  
nkeynes@671
  1220
    COUNT_INST(I_ROTR);
nkeynes@991
  1221
    load_reg( REG_EAX, Rn );
nkeynes@991
  1222
    RORL_imm_r32( 1, REG_EAX );
nkeynes@991
  1223
    store_reg( REG_EAX, Rn );
nkeynes@359
  1224
    SETC_t();
nkeynes@417
  1225
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1226
:}
nkeynes@359
  1227
SHAD Rm, Rn {:
nkeynes@671
  1228
    COUNT_INST(I_SHAD);
nkeynes@359
  1229
    /* Annoyingly enough, not directly convertible */
nkeynes@991
  1230
    load_reg( REG_EAX, Rn );
nkeynes@991
  1231
    load_reg( REG_ECX, Rm );
nkeynes@991
  1232
    CMPL_imms_r32( 0, REG_ECX );
nkeynes@991
  1233
    JGE_label(doshl);
nkeynes@361
  1234
                    
nkeynes@991
  1235
    NEGL_r32( REG_ECX );      // 2
nkeynes@991
  1236
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1237
    JE_label(emptysar);     // 2
nkeynes@991
  1238
    SARL_cl_r32( REG_EAX );       // 2
nkeynes@991
  1239
    JMP_label(end);          // 2
nkeynes@386
  1240
nkeynes@386
  1241
    JMP_TARGET(emptysar);
nkeynes@991
  1242
    SARL_imm_r32(31, REG_EAX );  // 3
nkeynes@991
  1243
    JMP_label(end2);
nkeynes@382
  1244
nkeynes@380
  1245
    JMP_TARGET(doshl);
nkeynes@991
  1246
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1247
    SHLL_cl_r32( REG_EAX );       // 2
nkeynes@380
  1248
    JMP_TARGET(end);
nkeynes@386
  1249
    JMP_TARGET(end2);
nkeynes@991
  1250
    store_reg( REG_EAX, Rn );
nkeynes@417
  1251
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1252
:}
nkeynes@359
  1253
SHLD Rm, Rn {:  
nkeynes@671
  1254
    COUNT_INST(I_SHLD);
nkeynes@991
  1255
    load_reg( REG_EAX, Rn );
nkeynes@991
  1256
    load_reg( REG_ECX, Rm );
nkeynes@991
  1257
    CMPL_imms_r32( 0, REG_ECX );
nkeynes@991
  1258
    JGE_label(doshl);
nkeynes@368
  1259
nkeynes@991
  1260
    NEGL_r32( REG_ECX );      // 2
nkeynes@991
  1261
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1262
    JE_label(emptyshr );
nkeynes@991
  1263
    SHRL_cl_r32( REG_EAX );       // 2
nkeynes@991
  1264
    JMP_label(end);          // 2
nkeynes@386
  1265
nkeynes@386
  1266
    JMP_TARGET(emptyshr);
nkeynes@991
  1267
    XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@991
  1268
    JMP_label(end2);
nkeynes@382
  1269
nkeynes@382
  1270
    JMP_TARGET(doshl);
nkeynes@991
  1271
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1272
    SHLL_cl_r32( REG_EAX );       // 2
nkeynes@382
  1273
    JMP_TARGET(end);
nkeynes@386
  1274
    JMP_TARGET(end2);
nkeynes@991
  1275
    store_reg( REG_EAX, Rn );
nkeynes@417
  1276
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1277
:}
nkeynes@359
  1278
SHAL Rn {: 
nkeynes@671
  1279
    COUNT_INST(I_SHAL);
nkeynes@991
  1280
    load_reg( REG_EAX, Rn );
nkeynes@991
  1281
    SHLL_imm_r32( 1, REG_EAX );
nkeynes@397
  1282
    SETC_t();
nkeynes@991
  1283
    store_reg( REG_EAX, Rn );
nkeynes@417
  1284
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1285
:}
nkeynes@359
  1286
SHAR Rn {:  
nkeynes@671
  1287
    COUNT_INST(I_SHAR);
nkeynes@991
  1288
    load_reg( REG_EAX, Rn );
nkeynes@991
  1289
    SARL_imm_r32( 1, REG_EAX );
nkeynes@397
  1290
    SETC_t();
nkeynes@991
  1291
    store_reg( REG_EAX, Rn );
nkeynes@417
  1292
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1293
:}
nkeynes@359
  1294
SHLL Rn {:  
nkeynes@671
  1295
    COUNT_INST(I_SHLL);
nkeynes@991
  1296
    load_reg( REG_EAX, Rn );
nkeynes@991
  1297
    SHLL_imm_r32( 1, REG_EAX );
nkeynes@397
  1298
    SETC_t();
nkeynes@991
  1299
    store_reg( REG_EAX, Rn );
nkeynes@417
  1300
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1301
:}
nkeynes@359
  1302
SHLL2 Rn {:
nkeynes@671
  1303
    COUNT_INST(I_SHLL);
nkeynes@991
  1304
    load_reg( REG_EAX, Rn );
nkeynes@991
  1305
    SHLL_imm_r32( 2, REG_EAX );
nkeynes@991
  1306
    store_reg( REG_EAX, Rn );
nkeynes@417
  1307
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1308
:}
nkeynes@359
  1309
SHLL8 Rn {:  
nkeynes@671
  1310
    COUNT_INST(I_SHLL);
nkeynes@991
  1311
    load_reg( REG_EAX, Rn );
nkeynes@991
  1312
    SHLL_imm_r32( 8, REG_EAX );
nkeynes@991
  1313
    store_reg( REG_EAX, Rn );
nkeynes@417
  1314
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1315
:}
nkeynes@359
  1316
SHLL16 Rn {:  
nkeynes@671
  1317
    COUNT_INST(I_SHLL);
nkeynes@991
  1318
    load_reg( REG_EAX, Rn );
nkeynes@991
  1319
    SHLL_imm_r32( 16, REG_EAX );
nkeynes@991
  1320
    store_reg( REG_EAX, Rn );
nkeynes@417
  1321
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1322
:}
nkeynes@359
  1323
SHLR Rn {:  
nkeynes@671
  1324
    COUNT_INST(I_SHLR);
nkeynes@991
  1325
    load_reg( REG_EAX, Rn );
nkeynes@991
  1326
    SHRL_imm_r32( 1, REG_EAX );
nkeynes@397
  1327
    SETC_t();
nkeynes@991
  1328
    store_reg( REG_EAX, Rn );
nkeynes@417
  1329
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1330
:}
nkeynes@359
  1331
SHLR2 Rn {:  
nkeynes@671
  1332
    COUNT_INST(I_SHLR);
nkeynes@991
  1333
    load_reg( REG_EAX, Rn );
nkeynes@991
  1334
    SHRL_imm_r32( 2, REG_EAX );
nkeynes@991
  1335
    store_reg( REG_EAX, Rn );
nkeynes@417
  1336
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1337
:}
nkeynes@359
  1338
SHLR8 Rn {:  
nkeynes@671
  1339
    COUNT_INST(I_SHLR);
nkeynes@991
  1340
    load_reg( REG_EAX, Rn );
nkeynes@991
  1341
    SHRL_imm_r32( 8, REG_EAX );
nkeynes@991
  1342
    store_reg( REG_EAX, Rn );
nkeynes@417
  1343
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1344
:}
nkeynes@359
  1345
SHLR16 Rn {:  
nkeynes@671
  1346
    COUNT_INST(I_SHLR);
nkeynes@991
  1347
    load_reg( REG_EAX, Rn );
nkeynes@991
  1348
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
  1349
    store_reg( REG_EAX, Rn );
nkeynes@417
  1350
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1351
:}
nkeynes@359
  1352
SUB Rm, Rn {:  
nkeynes@671
  1353
    COUNT_INST(I_SUB);
nkeynes@991
  1354
    load_reg( REG_EAX, Rm );
nkeynes@991
  1355
    load_reg( REG_ECX, Rn );
nkeynes@991
  1356
    SUBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1357
    store_reg( REG_ECX, Rn );
nkeynes@417
  1358
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1359
:}
nkeynes@359
  1360
SUBC Rm, Rn {:  
nkeynes@671
  1361
    COUNT_INST(I_SUBC);
nkeynes@991
  1362
    load_reg( REG_EAX, Rm );
nkeynes@991
  1363
    load_reg( REG_ECX, Rn );
nkeynes@417
  1364
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1365
	LDC_t();
nkeynes@417
  1366
    }
nkeynes@991
  1367
    SBBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1368
    store_reg( REG_ECX, Rn );
nkeynes@394
  1369
    SETC_t();
nkeynes@417
  1370
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1371
:}
nkeynes@359
  1372
SUBV Rm, Rn {:  
nkeynes@671
  1373
    COUNT_INST(I_SUBV);
nkeynes@991
  1374
    load_reg( REG_EAX, Rm );
nkeynes@991
  1375
    load_reg( REG_ECX, Rn );
nkeynes@991
  1376
    SUBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1377
    store_reg( REG_ECX, Rn );
nkeynes@359
  1378
    SETO_t();
nkeynes@417
  1379
    sh4_x86.tstate = TSTATE_O;
nkeynes@359
  1380
:}
nkeynes@359
  1381
SWAP.B Rm, Rn {:  
nkeynes@671
  1382
    COUNT_INST(I_SWAPB);
nkeynes@991
  1383
    load_reg( REG_EAX, Rm );
nkeynes@991
  1384
    XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
nkeynes@991
  1385
    store_reg( REG_EAX, Rn );
nkeynes@359
  1386
:}
nkeynes@359
  1387
SWAP.W Rm, Rn {:  
nkeynes@671
  1388
    COUNT_INST(I_SWAPB);
nkeynes@991
  1389
    load_reg( REG_EAX, Rm );
nkeynes@991
  1390
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1391
    SHLL_imm_r32( 16, REG_ECX );
nkeynes@991
  1392
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
  1393
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1394
    store_reg( REG_ECX, Rn );
nkeynes@417
  1395
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1396
:}
nkeynes@361
  1397
TAS.B @Rn {:  
nkeynes@671
  1398
    COUNT_INST(I_TASB);
nkeynes@991
  1399
    load_reg( REG_EAX, Rn );
nkeynes@991
  1400
    MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1401
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@991
  1402
    TESTB_r8_r8( REG_DL, REG_DL );
nkeynes@361
  1403
    SETE_t();
nkeynes@991
  1404
    ORB_imms_r8( 0x80, REG_DL );
nkeynes@991
  1405
    MOVL_rspdisp_r32( 0, REG_EAX );
nkeynes@991
  1406
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1407
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1408
:}
nkeynes@361
  1409
TST Rm, Rn {:  
nkeynes@671
  1410
    COUNT_INST(I_TST);
nkeynes@991
  1411
    load_reg( REG_EAX, Rm );
nkeynes@991
  1412
    load_reg( REG_ECX, Rn );
nkeynes@991
  1413
    TESTL_r32_r32( REG_EAX, REG_ECX );
nkeynes@361
  1414
    SETE_t();
nkeynes@417
  1415
    sh4_x86.tstate = TSTATE_E;
nkeynes@361
  1416
:}
nkeynes@368
  1417
TST #imm, R0 {:  
nkeynes@671
  1418
    COUNT_INST(I_TSTI);
nkeynes@991
  1419
    load_reg( REG_EAX, 0 );
nkeynes@991
  1420
    TESTL_imms_r32( imm, REG_EAX );
nkeynes@368
  1421
    SETE_t();
nkeynes@417
  1422
    sh4_x86.tstate = TSTATE_E;
nkeynes@368
  1423
:}
nkeynes@368
  1424
TST.B #imm, @(R0, GBR) {:  
nkeynes@671
  1425
    COUNT_INST(I_TSTB);
nkeynes@991
  1426
    load_reg( REG_EAX, 0);
nkeynes@991
  1427
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1428
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1429
    TESTB_imms_r8( imm, REG_AL );
nkeynes@368
  1430
    SETE_t();
nkeynes@417
  1431
    sh4_x86.tstate = TSTATE_E;
nkeynes@368
  1432
:}
nkeynes@359
  1433
XOR Rm, Rn {:  
nkeynes@671
  1434
    COUNT_INST(I_XOR);
nkeynes@991
  1435
    load_reg( REG_EAX, Rm );
nkeynes@991
  1436
    load_reg( REG_ECX, Rn );
nkeynes@991
  1437
    XORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1438
    store_reg( REG_ECX, Rn );
nkeynes@417
  1439
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1440
:}
nkeynes@359
  1441
XOR #imm, R0 {:  
nkeynes@671
  1442
    COUNT_INST(I_XORI);
nkeynes@991
  1443
    load_reg( REG_EAX, 0 );
nkeynes@991
  1444
    XORL_imms_r32( imm, REG_EAX );
nkeynes@991
  1445
    store_reg( REG_EAX, 0 );
nkeynes@417
  1446
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1447
:}
nkeynes@359
  1448
XOR.B #imm, @(R0, GBR) {:  
nkeynes@671
  1449
    COUNT_INST(I_XORB);
nkeynes@991
  1450
    load_reg( REG_EAX, 0 );
nkeynes@991
  1451
    ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
nkeynes@991
  1452
    MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1453
    MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
nkeynes@991
  1454
    MOVL_rspdisp_r32( 0, REG_EAX );
nkeynes@991
  1455
    XORL_imms_r32( imm, REG_EDX );
nkeynes@991
  1456
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1457
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1458
:}
nkeynes@361
  1459
XTRCT Rm, Rn {:
nkeynes@671
  1460
    COUNT_INST(I_XTRCT);
nkeynes@991
  1461
    load_reg( REG_EAX, Rm );
nkeynes@991
  1462
    load_reg( REG_ECX, Rn );
nkeynes@991
  1463
    SHLL_imm_r32( 16, REG_EAX );
nkeynes@991
  1464
    SHRL_imm_r32( 16, REG_ECX );
nkeynes@991
  1465
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1466
    store_reg( REG_ECX, Rn );
nkeynes@417
  1467
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1468
:}
nkeynes@359
  1469
nkeynes@359
  1470
/* Data move instructions */
nkeynes@359
  1471
MOV Rm, Rn {:  
nkeynes@671
  1472
    COUNT_INST(I_MOV);
nkeynes@991
  1473
    load_reg( REG_EAX, Rm );
nkeynes@991
  1474
    store_reg( REG_EAX, Rn );
nkeynes@359
  1475
:}
nkeynes@359
  1476
MOV #imm, Rn {:  
nkeynes@671
  1477
    COUNT_INST(I_MOVI);
nkeynes@995
  1478
    MOVL_imm32_r32( imm, REG_EAX );
nkeynes@991
  1479
    store_reg( REG_EAX, Rn );
nkeynes@359
  1480
:}
nkeynes@359
  1481
MOV.B Rm, @Rn {:  
nkeynes@671
  1482
    COUNT_INST(I_MOVB);
nkeynes@991
  1483
    load_reg( REG_EAX, Rn );
nkeynes@991
  1484
    load_reg( REG_EDX, Rm );
nkeynes@991
  1485
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1486
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1487
:}
nkeynes@359
  1488
MOV.B Rm, @-Rn {:  
nkeynes@671
  1489
    COUNT_INST(I_MOVB);
nkeynes@991
  1490
    load_reg( REG_EAX, Rn );
nkeynes@991
  1491
    LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
nkeynes@991
  1492
    load_reg( REG_EDX, Rm );
nkeynes@991
  1493
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@991
  1494
    ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
nkeynes@417
  1495
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1496
:}
nkeynes@359
  1497
MOV.B Rm, @(R0, Rn) {:  
nkeynes@671
  1498
    COUNT_INST(I_MOVB);
nkeynes@991
  1499
    load_reg( REG_EAX, 0 );
nkeynes@991
  1500
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1501
    load_reg( REG_EDX, Rm );
nkeynes@991
  1502
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1503
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1504
:}
nkeynes@359
  1505
MOV.B R0, @(disp, GBR) {:  
nkeynes@671
  1506
    COUNT_INST(I_MOVB);
nkeynes@995
  1507
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1508
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1509
    load_reg( REG_EDX, 0 );
nkeynes@991
  1510
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1511
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1512
:}
nkeynes@359
  1513
MOV.B R0, @(disp, Rn) {:  
nkeynes@671
  1514
    COUNT_INST(I_MOVB);
nkeynes@991
  1515
    load_reg( REG_EAX, Rn );
nkeynes@991
  1516
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1517
    load_reg( REG_EDX, 0 );
nkeynes@991
  1518
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1519
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1520
:}
nkeynes@359
  1521
MOV.B @Rm, Rn {:  
nkeynes@671
  1522
    COUNT_INST(I_MOVB);
nkeynes@991
  1523
    load_reg( REG_EAX, Rm );
nkeynes@991
  1524
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1525
    store_reg( REG_EAX, Rn );
nkeynes@417
  1526
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1527
:}
nkeynes@359
  1528
MOV.B @Rm+, Rn {:  
nkeynes@671
  1529
    COUNT_INST(I_MOVB);
nkeynes@991
  1530
    load_reg( REG_EAX, Rm );
nkeynes@991
  1531
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@939
  1532
    if( Rm != Rn ) {
nkeynes@991
  1533
    	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
nkeynes@939
  1534
    }
nkeynes@991
  1535
    store_reg( REG_EAX, Rn );
nkeynes@417
  1536
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1537
:}
nkeynes@359
  1538
MOV.B @(R0, Rm), Rn {:  
nkeynes@671
  1539
    COUNT_INST(I_MOVB);
nkeynes@991
  1540
    load_reg( REG_EAX, 0 );
nkeynes@991
  1541
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1542
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1543
    store_reg( REG_EAX, Rn );
nkeynes@417
  1544
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1545
:}
nkeynes@359
  1546
MOV.B @(disp, GBR), R0 {:  
nkeynes@671
  1547
    COUNT_INST(I_MOVB);
nkeynes@995
  1548
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1549
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1550
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1551
    store_reg( REG_EAX, 0 );
nkeynes@417
  1552
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1553
:}
nkeynes@359
  1554
MOV.B @(disp, Rm), R0 {:  
nkeynes@671
  1555
    COUNT_INST(I_MOVB);
nkeynes@991
  1556
    load_reg( REG_EAX, Rm );
nkeynes@991
  1557
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1558
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1559
    store_reg( REG_EAX, 0 );
nkeynes@417
  1560
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1561
:}
nkeynes@374
  1562
MOV.L Rm, @Rn {:
nkeynes@671
  1563
    COUNT_INST(I_MOVL);
nkeynes@991
  1564
    load_reg( REG_EAX, Rn );
nkeynes@991
  1565
    check_walign32(REG_EAX);
nkeynes@991
  1566
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1567
    ANDL_imms_r32( 0xFC000000, REG_ECX );
nkeynes@991
  1568
    CMPL_imms_r32( 0xE0000000, REG_ECX );
nkeynes@991
  1569
    JNE_label( notsq );
nkeynes@991
  1570
    ANDL_imms_r32( 0x3C, REG_EAX );
nkeynes@991
  1571
    load_reg( REG_EDX, Rm );
nkeynes@991
  1572
    MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
nkeynes@991
  1573
    JMP_label(end);
nkeynes@930
  1574
    JMP_TARGET(notsq);
nkeynes@991
  1575
    load_reg( REG_EDX, Rm );
nkeynes@991
  1576
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@930
  1577
    JMP_TARGET(end);
nkeynes@417
  1578
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1579
:}
nkeynes@361
  1580
MOV.L Rm, @-Rn {:  
nkeynes@671
  1581
    COUNT_INST(I_MOVL);
nkeynes@991
  1582
    load_reg( REG_EAX, Rn );
nkeynes@991
  1583
    ADDL_imms_r32( -4, REG_EAX );
nkeynes@991
  1584
    check_walign32( REG_EAX );
nkeynes@991
  1585
    load_reg( REG_EDX, Rm );
nkeynes@991
  1586
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  1587
    ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
nkeynes@417
  1588
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1589
:}
nkeynes@361
  1590
MOV.L Rm, @(R0, Rn) {:  
nkeynes@671
  1591
    COUNT_INST(I_MOVL);
nkeynes@991
  1592
    load_reg( REG_EAX, 0 );
nkeynes@991
  1593
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1594
    check_walign32( REG_EAX );
nkeynes@991
  1595
    load_reg( REG_EDX, Rm );
nkeynes@991
  1596
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1597
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1598
:}
nkeynes@361
  1599
MOV.L R0, @(disp, GBR) {:  
nkeynes@671
  1600
    COUNT_INST(I_MOVL);
nkeynes@995
  1601
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1602
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1603
    check_walign32( REG_EAX );
nkeynes@991
  1604
    load_reg( REG_EDX, 0 );
nkeynes@991
  1605
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1606
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1607
:}
nkeynes@361
  1608
MOV.L Rm, @(disp, Rn) {:  
nkeynes@671
  1609
    COUNT_INST(I_MOVL);
nkeynes@991
  1610
    load_reg( REG_EAX, Rn );
nkeynes@991
  1611
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1612
    check_walign32( REG_EAX );
nkeynes@991
  1613
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1614
    ANDL_imms_r32( 0xFC000000, REG_ECX );
nkeynes@991
  1615
    CMPL_imms_r32( 0xE0000000, REG_ECX );
nkeynes@991
  1616
    JNE_label( notsq );
nkeynes@991
  1617
    ANDL_imms_r32( 0x3C, REG_EAX );
nkeynes@991
  1618
    load_reg( REG_EDX, Rm );
nkeynes@991
  1619
    MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
nkeynes@991
  1620
    JMP_label(end);
nkeynes@930
  1621
    JMP_TARGET(notsq);
nkeynes@991
  1622
    load_reg( REG_EDX, Rm );
nkeynes@991
  1623
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@930
  1624
    JMP_TARGET(end);
nkeynes@417
  1625
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1626
:}
nkeynes@361
  1627
MOV.L @Rm, Rn {:  
nkeynes@671
  1628
    COUNT_INST(I_MOVL);
nkeynes@991
  1629
    load_reg( REG_EAX, Rm );
nkeynes@991
  1630
    check_ralign32( REG_EAX );
nkeynes@991
  1631
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1632
    store_reg( REG_EAX, Rn );
nkeynes@417
  1633
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1634
:}
nkeynes@361
  1635
MOV.L @Rm+, Rn {:  
nkeynes@671
  1636
    COUNT_INST(I_MOVL);
nkeynes@991
  1637
    load_reg( REG_EAX, Rm );
nkeynes@991
  1638
    check_ralign32( REG_EAX );
nkeynes@991
  1639
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@939
  1640
    if( Rm != Rn ) {
nkeynes@991
  1641
    	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@939
  1642
    }
nkeynes@991
  1643
    store_reg( REG_EAX, Rn );
nkeynes@417
  1644
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1645
:}
nkeynes@361
  1646
MOV.L @(R0, Rm), Rn {:  
nkeynes@671
  1647
    COUNT_INST(I_MOVL);
nkeynes@991
  1648
    load_reg( REG_EAX, 0 );
nkeynes@991
  1649
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1650
    check_ralign32( REG_EAX );
nkeynes@991
  1651
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1652
    store_reg( REG_EAX, Rn );
nkeynes@417
  1653
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1654
:}
nkeynes@361
  1655
MOV.L @(disp, GBR), R0 {:
nkeynes@671
  1656
    COUNT_INST(I_MOVL);
nkeynes@995
  1657
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1658
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1659
    check_ralign32( REG_EAX );
nkeynes@991
  1660
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1661
    store_reg( REG_EAX, 0 );
nkeynes@417
  1662
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1663
:}
nkeynes@361
  1664
MOV.L @(disp, PC), Rn {:  
nkeynes@671
  1665
    COUNT_INST(I_MOVLPC);
nkeynes@374
  1666
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1667
	SLOTILLEGAL();
nkeynes@374
  1668
    } else {
nkeynes@388
  1669
	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
nkeynes@1125
  1670
	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
nkeynes@586
  1671
	    // If the target address is in the same page as the code, it's
nkeynes@586
  1672
	    // pretty safe to just ref it directly and circumvent the whole
nkeynes@586
  1673
	    // memory subsystem. (this is a big performance win)
nkeynes@586
  1674
nkeynes@586
  1675
	    // FIXME: There's a corner-case that's not handled here when
nkeynes@586
  1676
	    // the current code-page is in the ITLB but not in the UTLB.
nkeynes@586
  1677
	    // (should generate a TLB miss although need to test SH4 
nkeynes@586
  1678
	    // behaviour to confirm) Unlikely to be anyone depending on this
nkeynes@586
  1679
	    // behaviour though.
nkeynes@586
  1680
	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
nkeynes@991
  1681
	    MOVL_moffptr_eax( ptr );
nkeynes@388
  1682
	} else {
nkeynes@586
  1683
	    // Note: we use sh4r.pc for the calc as we could be running at a
nkeynes@586
  1684
	    // different virtual address than the translation was done with,
nkeynes@586
  1685
	    // but we can safely assume that the low bits are the same.
nkeynes@995
  1686
	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
nkeynes@991
  1687
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1688
	    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@586
  1689
	    sh4_x86.tstate = TSTATE_NONE;
nkeynes@388
  1690
	}
nkeynes@991
  1691
	store_reg( REG_EAX, Rn );
nkeynes@374
  1692
    }
nkeynes@361
  1693
:}
nkeynes@361
  1694
MOV.L @(disp, Rm), Rn {:  
nkeynes@671
  1695
    COUNT_INST(I_MOVL);
nkeynes@991
  1696
    load_reg( REG_EAX, Rm );
nkeynes@991
  1697
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1698
    check_ralign32( REG_EAX );
nkeynes@991
  1699
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1700
    store_reg( REG_EAX, Rn );
nkeynes@417
  1701
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1702
:}
nkeynes@361
  1703
MOV.W Rm, @Rn {:  
nkeynes@671
  1704
    COUNT_INST(I_MOVW);
nkeynes@991
  1705
    load_reg( REG_EAX, Rn );
nkeynes@991
  1706
    check_walign16( REG_EAX );
nkeynes@991
  1707
    load_reg( REG_EDX, Rm );
nkeynes@991
  1708
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1709
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1710
:}
nkeynes@361
  1711
MOV.W Rm, @-Rn {:  
nkeynes@671
  1712
    COUNT_INST(I_MOVW);
nkeynes@991
  1713
    load_reg( REG_EAX, Rn );
nkeynes@991
  1714
    check_walign16( REG_EAX );
nkeynes@991
  1715
    LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
nkeynes@991
  1716
    load_reg( REG_EDX, Rm );
nkeynes@991
  1717
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@991
  1718
    ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
nkeynes@417
  1719
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1720
:}
nkeynes@361
  1721
MOV.W Rm, @(R0, Rn) {:  
nkeynes@671
  1722
    COUNT_INST(I_MOVW);
nkeynes@991
  1723
    load_reg( REG_EAX, 0 );
nkeynes@991
  1724
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1725
    check_walign16( REG_EAX );
nkeynes@991
  1726
    load_reg( REG_EDX, Rm );
nkeynes@991
  1727
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1728
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1729
:}
nkeynes@361
  1730
MOV.W R0, @(disp, GBR) {:  
nkeynes@671
  1731
    COUNT_INST(I_MOVW);
nkeynes@995
  1732
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1733
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1734
    check_walign16( REG_EAX );
nkeynes@991
  1735
    load_reg( REG_EDX, 0 );
nkeynes@991
  1736
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1737
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1738
:}
nkeynes@361
  1739
MOV.W R0, @(disp, Rn) {:  
nkeynes@671
  1740
    COUNT_INST(I_MOVW);
nkeynes@991
  1741
    load_reg( REG_EAX, Rn );
nkeynes@991
  1742
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1743
    check_walign16( REG_EAX );
nkeynes@991
  1744
    load_reg( REG_EDX, 0 );
nkeynes@991
  1745
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1746
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1747
:}
nkeynes@361
  1748
MOV.W @Rm, Rn {:  
nkeynes@671
  1749
    COUNT_INST(I_MOVW);
nkeynes@991
  1750
    load_reg( REG_EAX, Rm );
nkeynes@991
  1751
    check_ralign16( REG_EAX );
nkeynes@991
  1752
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1753
    store_reg( REG_EAX, Rn );
nkeynes@417
  1754
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1755
:}
nkeynes@361
  1756
MOV.W @Rm+, Rn {:  
nkeynes@671
  1757
    COUNT_INST(I_MOVW);
nkeynes@991
  1758
    load_reg( REG_EAX, Rm );
nkeynes@991
  1759
    check_ralign16( REG_EAX );
nkeynes@991
  1760
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@939
  1761
    if( Rm != Rn ) {
nkeynes@991
  1762
        ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
nkeynes@939
  1763
    }
nkeynes@991
  1764
    store_reg( REG_EAX, Rn );
nkeynes@417
  1765
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1766
:}
nkeynes@361
  1767
MOV.W @(R0, Rm), Rn {:  
nkeynes@671
  1768
    COUNT_INST(I_MOVW);
nkeynes@991
  1769
    load_reg( REG_EAX, 0 );
nkeynes@991
  1770
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1771
    check_ralign16( REG_EAX );
nkeynes@991
  1772
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1773
    store_reg( REG_EAX, Rn );
nkeynes@417
  1774
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1775
:}
nkeynes@361
  1776
MOV.W @(disp, GBR), R0 {:  
nkeynes@671
  1777
    COUNT_INST(I_MOVW);
nkeynes@995
  1778
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1779
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1780
    check_ralign16( REG_EAX );
nkeynes@991
  1781
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1782
    store_reg( REG_EAX, 0 );
nkeynes@417
  1783
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1784
:}
nkeynes@361
  1785
MOV.W @(disp, PC), Rn {:  
nkeynes@671
  1786
    COUNT_INST(I_MOVW);
nkeynes@374
  1787
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1788
	SLOTILLEGAL();
nkeynes@374
  1789
    } else {
nkeynes@586
  1790
	// See comments for MOV.L @(disp, PC), Rn
nkeynes@586
  1791
	uint32_t target = pc + disp + 4;
nkeynes@1125
  1792
	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
nkeynes@586
  1793
	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
nkeynes@991
  1794
	    MOVL_moffptr_eax( ptr );
nkeynes@991
  1795
	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@586
  1796
	} else {
nkeynes@995
  1797
	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
nkeynes@991
  1798
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1799
	    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@586
  1800
	    sh4_x86.tstate = TSTATE_NONE;
nkeynes@586
  1801
	}
nkeynes@991
  1802
	store_reg( REG_EAX, Rn );
nkeynes@374
  1803
    }
nkeynes@361
  1804
:}
nkeynes@361
  1805
MOV.W @(disp, Rm), R0 {:  
nkeynes@671
  1806
    COUNT_INST(I_MOVW);
nkeynes@991
  1807
    load_reg( REG_EAX, Rm );
nkeynes@991
  1808
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1809
    check_ralign16( REG_EAX );
nkeynes@991
  1810
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1811
    store_reg( REG_EAX, 0 );
nkeynes@417
  1812
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1813
:}
nkeynes@361
  1814
MOVA @(disp, PC), R0 {:  
nkeynes@671
  1815
    COUNT_INST(I_MOVA);
nkeynes@374
  1816
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1817
	SLOTILLEGAL();
nkeynes@374
  1818
    } else {
nkeynes@995
  1819
	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
nkeynes@991
  1820
	ADDL_rbpdisp_r32( R_PC, REG_ECX );
nkeynes@991
  1821
	store_reg( REG_ECX, 0 );
nkeynes@586
  1822
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1823
    }
nkeynes@361
  1824
:}
nkeynes@361
  1825
MOVCA.L R0, @Rn {:  
nkeynes@671
  1826
    COUNT_INST(I_MOVCA);
nkeynes@991
  1827
    load_reg( REG_EAX, Rn );
nkeynes@991
  1828
    check_walign32( REG_EAX );
nkeynes@991
  1829
    load_reg( REG_EDX, 0 );
nkeynes@991
  1830
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1831
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1832
:}
nkeynes@359
  1833
nkeynes@359
  1834
/* Control transfer instructions */
nkeynes@374
  1835
BF disp {:
nkeynes@671
  1836
    COUNT_INST(I_BF);
nkeynes@374
  1837
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1838
	SLOTILLEGAL();
nkeynes@374
  1839
    } else {
nkeynes@586
  1840
	sh4vma_t target = disp + pc + 4;
nkeynes@991
  1841
	JT_label( nottaken );
nkeynes@586
  1842
	exit_block_rel(target, pc+2 );
nkeynes@380
  1843
	JMP_TARGET(nottaken);
nkeynes@408
  1844
	return 2;
nkeynes@374
  1845
    }
nkeynes@374
  1846
:}
nkeynes@374
  1847
BF/S disp {:
nkeynes@671
  1848
    COUNT_INST(I_BFS);
nkeynes@374
  1849
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1850
	SLOTILLEGAL();
nkeynes@374
  1851
    } else {
nkeynes@590
  1852
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@601
  1853
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  1854
	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  1855
	    JT_label(nottaken);
nkeynes@991
  1856
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@601
  1857
	    JMP_TARGET(nottaken);
nkeynes@991
  1858
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@995
  1859
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1860
	    exit_block_emu(pc+2);
nkeynes@601
  1861
	    sh4_x86.branch_taken = TRUE;
nkeynes@601
  1862
	    return 2;
nkeynes@601
  1863
	} else {
nkeynes@601
  1864
	    if( sh4_x86.tstate == TSTATE_NONE ) {
nkeynes@991
  1865
		CMPL_imms_rbpdisp( 1, R_T );
nkeynes@601
  1866
		sh4_x86.tstate = TSTATE_E;
nkeynes@601
  1867
	    }
nkeynes@601
  1868
	    sh4vma_t target = disp + pc + 4;
nkeynes@991
  1869
	    JCC_cc_rel32(sh4_x86.tstate,0);
nkeynes@991
  1870
	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
nkeynes@879
  1871
	    int save_tstate = sh4_x86.tstate;
nkeynes@601
  1872
	    sh4_translate_instruction(pc+2);
nkeynes@1091
  1873
            sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
nkeynes@601
  1874
	    exit_block_rel( target, pc+4 );
nkeynes@601
  1875
	    
nkeynes@601
  1876
	    // not taken
nkeynes@601
  1877
	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
nkeynes@879
  1878
	    sh4_x86.tstate = save_tstate;
nkeynes@601
  1879
	    sh4_translate_instruction(pc+2);
nkeynes@601
  1880
	    return 4;
nkeynes@417
  1881
	}
nkeynes@374
  1882
    }
nkeynes@374
  1883
:}
nkeynes@374
  1884
BRA disp {:  
nkeynes@671
  1885
    COUNT_INST(I_BRA);
nkeynes@374
  1886
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1887
	SLOTILLEGAL();
nkeynes@374
  1888
    } else {
nkeynes@590
  1889
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  1890
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  1891
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  1892
	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1893
	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  1894
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1895
	    exit_block_emu(pc+2);
nkeynes@601
  1896
	    return 2;
nkeynes@601
  1897
	} else {
nkeynes@601
  1898
	    sh4_translate_instruction( pc + 2 );
nkeynes@601
  1899
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  1900
	    return 4;
nkeynes@601
  1901
	}
nkeynes@374
  1902
    }
nkeynes@374
  1903
:}
nkeynes@374
  1904
BRAF Rn {:  
nkeynes@671
  1905
    COUNT_INST(I_BRAF);
nkeynes@374
  1906
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1907
	SLOTILLEGAL();
nkeynes@374
  1908
    } else {
nkeynes@995
  1909
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1910
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  1911
	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@995
  1912
	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@590
  1913
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@417
  1914
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  1915
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  1916
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  1917
	    exit_block_emu(pc+2);
nkeynes@601
  1918
	    return 2;
nkeynes@601
  1919
	} else {
nkeynes@601
  1920
	    sh4_translate_instruction( pc + 2 );
nkeynes@974
  1921
	    exit_block_newpcset(pc+4);
nkeynes@601
  1922
	    return 4;
nkeynes@601
  1923
	}
nkeynes@374
  1924
    }
nkeynes@374
  1925
:}
nkeynes@374
  1926
BSR disp {:  
nkeynes@671
  1927
    COUNT_INST(I_BSR);
nkeynes@374
  1928
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1929
	SLOTILLEGAL();
nkeynes@374
  1930
    } else {
nkeynes@995
  1931
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1932
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  1933
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@590
  1934
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  1935
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  1936
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@601
  1937
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@991
  1938
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@995
  1939
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1940
	    exit_block_emu(pc+2);
nkeynes@601
  1941
	    return 2;
nkeynes@601
  1942
	} else {
nkeynes@601
  1943
	    sh4_translate_instruction( pc + 2 );
nkeynes@601
  1944
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  1945
	    return 4;
nkeynes@601
  1946
	}
nkeynes@374
  1947
    }
nkeynes@374
  1948
:}
nkeynes@374
  1949
BSRF Rn {:  
nkeynes@671
  1950
    COUNT_INST(I_BSRF);
nkeynes@374
  1951
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1952
	SLOTILLEGAL();
nkeynes@374
  1953
    } else {
nkeynes@995
  1954
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1955
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  1956
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@991
  1957
	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@995
  1958
	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@590
  1959
nkeynes@601
  1960
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@417
  1961
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  1962
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  1963
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  1964
	    exit_block_emu(pc+2);
nkeynes@601
  1965
	    return 2;
nkeynes@601
  1966
	} else {
nkeynes@601
  1967
	    sh4_translate_instruction( pc + 2 );
nkeynes@974
  1968
	    exit_block_newpcset(pc+4);
nkeynes@601
  1969
	    return 4;
nkeynes@601
  1970
	}
nkeynes@374
  1971
    }
nkeynes@374
  1972
:}
nkeynes@374
  1973
BT disp {:
nkeynes@671
  1974
    COUNT_INST(I_BT);
nkeynes@374
  1975
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1976
	SLOTILLEGAL();
nkeynes@374
  1977
    } else {
nkeynes@586
  1978
	sh4vma_t target = disp + pc + 4;
nkeynes@991
  1979
	JF_label( nottaken );
nkeynes@586
  1980
	exit_block_rel(target, pc+2 );
nkeynes@380
  1981
	JMP_TARGET(nottaken);
nkeynes@408
  1982
	return 2;
nkeynes@374
  1983
    }
nkeynes@374
  1984
:}
nkeynes@374
  1985
BT/S disp {:
nkeynes@671
  1986
    COUNT_INST(I_BTS);
nkeynes@374
  1987
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1988
	SLOTILLEGAL();
nkeynes@374
  1989
    } else {
nkeynes@590
  1990
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@601
  1991
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  1992
	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  1993
	    JF_label(nottaken);
nkeynes@991
  1994
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@601
  1995
	    JMP_TARGET(nottaken);
nkeynes@991
  1996
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@995
  1997
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1998
	    exit_block_emu(pc+2);
nkeynes@601
  1999
	    sh4_x86.branch_taken = TRUE;
nkeynes@601
  2000
	    return 2;
nkeynes@601
  2001
	} else {
nkeynes@601
  2002
	    if( sh4_x86.tstate == TSTATE_NONE ) {
nkeynes@991
  2003
		CMPL_imms_rbpdisp( 1, R_T );
nkeynes@601
  2004
		sh4_x86.tstate = TSTATE_E;
nkeynes@601
  2005
	    }
nkeynes@991
  2006
	    JCC_cc_rel32(sh4_x86.tstate^1,0);
nkeynes@991
  2007
	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
nkeynes@991
  2008
nkeynes@879
  2009
	    int save_tstate = sh4_x86.tstate;
nkeynes@601
  2010
	    sh4_translate_instruction(pc+2);
nkeynes@1091
  2011
            sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
nkeynes@601
  2012
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  2013
	    // not taken
nkeynes@601
  2014
	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
nkeynes@879
  2015
	    sh4_x86.tstate = save_tstate;
nkeynes@601
  2016
	    sh4_translate_instruction(pc+2);
nkeynes@601
  2017
	    return 4;
nkeynes@417
  2018
	}
nkeynes@374
  2019
    }
nkeynes@374
  2020
:}
nkeynes@374
  2021
JMP @Rn {:  
nkeynes@671
  2022
    COUNT_INST(I_JMP);
nkeynes@374
  2023
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2024
	SLOTILLEGAL();
nkeynes@374
  2025
    } else {
nkeynes@991
  2026
	load_reg( REG_ECX, Rn );
nkeynes@995
  2027
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@590
  2028
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2029
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2030
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2031
	    exit_block_emu(pc+2);
nkeynes@601
  2032
	    return 2;
nkeynes@601
  2033
	} else {
nkeynes@601
  2034
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2035
	    exit_block_newpcset(pc+4);
nkeynes@601
  2036
	    return 4;
nkeynes@601
  2037
	}
nkeynes@374
  2038
    }
nkeynes@374
  2039
:}
nkeynes@374
  2040
JSR @Rn {:  
nkeynes@671
  2041
    COUNT_INST(I_JSR);
nkeynes@374
  2042
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2043
	SLOTILLEGAL();
nkeynes@374
  2044
    } else {
nkeynes@995
  2045
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  2046
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  2047
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@991
  2048
	load_reg( REG_ECX, Rn );
nkeynes@995
  2049
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@601
  2050
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2051
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2052
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@601
  2053
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2054
	    exit_block_emu(pc+2);
nkeynes@601
  2055
	    return 2;
nkeynes@601
  2056
	} else {
nkeynes@601
  2057
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2058
	    exit_block_newpcset(pc+4);
nkeynes@601
  2059
	    return 4;
nkeynes@601
  2060
	}
nkeynes@374
  2061
    }
nkeynes@374
  2062
:}
nkeynes@374
  2063
RTE {:  
nkeynes@671
  2064
    COUNT_INST(I_RTE);
nkeynes@374
  2065
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2066
	SLOTILLEGAL();
nkeynes@374
  2067
    } else {
nkeynes@408
  2068
	check_priv();
nkeynes@995
  2069
	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
nkeynes@995
  2070
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@995
  2071
	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
nkeynes@995
  2072
	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
nkeynes@590
  2073
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@377
  2074
	sh4_x86.fpuen_checked = FALSE;
nkeynes@417
  2075
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  2076
	sh4_x86.branch_taken = TRUE;
nkeynes@1112
  2077
    sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
nkeynes@601
  2078
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2079
	    exit_block_emu(pc+2);
nkeynes@601
  2080
	    return 2;
nkeynes@601
  2081
	} else {
nkeynes@601
  2082
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2083
	    exit_block_newpcset(pc+4);
nkeynes@601
  2084
	    return 4;
nkeynes@601
  2085
	}
nkeynes@374
  2086
    }
nkeynes@374
  2087
:}
nkeynes@374
  2088
RTS {:  
nkeynes@671
  2089
    COUNT_INST(I_RTS);
nkeynes@374
  2090
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2091
	SLOTILLEGAL();
nkeynes@374
  2092
    } else {
nkeynes@995
  2093
	MOVL_rbpdisp_r32( R_PR, REG_ECX );
nkeynes@995
  2094
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@590
  2095
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2096
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2097
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2098
	    exit_block_emu(pc+2);
nkeynes@601
  2099
	    return 2;
nkeynes@601
  2100
	} else {
nkeynes@601
  2101
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2102
	    exit_block_newpcset(pc+4);
nkeynes@601
  2103
	    return 4;
nkeynes@601
  2104
	}
nkeynes@374
  2105
    }
nkeynes@374
  2106
:}
nkeynes@374
  2107
TRAPA #imm {:  
nkeynes@671
  2108
    COUNT_INST(I_TRAPA);
nkeynes@374
  2109
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2110
	SLOTILLEGAL();
nkeynes@374
  2111
    } else {
nkeynes@995
  2112
	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
nkeynes@991
  2113
	ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@995
  2114
	MOVL_imm32_r32( imm, REG_EAX );
nkeynes@995
  2115
	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
nkeynes@417
  2116
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@974
  2117
	exit_block_pcset(pc+2);
nkeynes@409
  2118
	sh4_x86.branch_taken = TRUE;
nkeynes@408
  2119
	return 2;
nkeynes@374
  2120
    }
nkeynes@374
  2121
:}
nkeynes@374
  2122
UNDEF {:  
nkeynes@671
  2123
    COUNT_INST(I_UNDEF);
nkeynes@374
  2124
    if( sh4_x86.in_delay_slot ) {
nkeynes@956
  2125
	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2);    
nkeynes@374
  2126
    } else {
nkeynes@956
  2127
	exit_block_exc(EXC_ILLEGAL, pc);    
nkeynes@408
  2128
	return 2;
nkeynes@374
  2129
    }
nkeynes@368
  2130
:}
nkeynes@374
  2131
nkeynes@374
  2132
CLRMAC {:  
nkeynes@671
  2133
    COUNT_INST(I_CLRMAC);
nkeynes@991
  2134
    XORL_r32_r32(REG_EAX, REG_EAX);
nkeynes@995
  2135
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@995
  2136
    MOVL_r32_rbpdisp( REG_EAX, R_MACH );
nkeynes@417
  2137
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@368
  2138
:}
nkeynes@374
  2139
CLRS {:
nkeynes@671
  2140
    COUNT_INST(I_CLRS);
nkeynes@374
  2141
    CLC();
nkeynes@991
  2142
    SETCCB_cc_rbpdisp(X86_COND_C, R_S);
nkeynes@872
  2143
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@368
  2144
:}
nkeynes@374
  2145
CLRT {:  
nkeynes@671
  2146
    COUNT_INST(I_CLRT);
nkeynes@374
  2147
    CLC();
nkeynes@374
  2148
    SETC_t();
nkeynes@417
  2149
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  2150
:}
nkeynes@374
  2151
SETS {:  
nkeynes@671
  2152
    COUNT_INST(I_SETS);
nkeynes@374
  2153
    STC();
nkeynes@991
  2154
    SETCCB_cc_rbpdisp(X86_COND_C, R_S);
nkeynes@872
  2155
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  2156
:}
nkeynes@374
  2157
SETT {:  
nkeynes@671
  2158
    COUNT_INST(I_SETT);
nkeynes@374
  2159
    STC();
nkeynes@374
  2160
    SETC_t();
nkeynes@417
  2161
    sh4_x86.tstate = TSTATE_C;
nkeynes@374
  2162
:}
nkeynes@359
  2163
nkeynes@375
  2164
/* Floating point moves */
nkeynes@375
  2165
FMOV FRm, FRn {:  
nkeynes@671
  2166
    COUNT_INST(I_FMOV1);
nkeynes@377
  2167
    check_fpuen();
nkeynes@901
  2168
    if( sh4_x86.double_size ) {
nkeynes@991
  2169
        load_dr0( REG_EAX, FRm );
nkeynes@991
  2170
        load_dr1( REG_ECX, FRm );
nkeynes@991
  2171
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2172
        store_dr1( REG_ECX, FRn );
nkeynes@901
  2173
    } else {
nkeynes@991
  2174
        load_fr( REG_EAX, FRm ); // SZ=0 branch
nkeynes@991
  2175
        store_fr( REG_EAX, FRn );
nkeynes@901
  2176
    }
nkeynes@375
  2177
:}
nkeynes@416
  2178
FMOV FRm, @Rn {: 
nkeynes@671
  2179
    COUNT_INST(I_FMOV2);
nkeynes@586
  2180
    check_fpuen();
nkeynes@991
  2181
    load_reg( REG_EAX, Rn );
nkeynes@901
  2182
    if( sh4_x86.double_size ) {
nkeynes@991
  2183
        check_walign64( REG_EAX );
nkeynes@991
  2184
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2185
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2186
        load_reg( REG_EAX, Rn );
nkeynes@991
  2187
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2188
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2189
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2190
    } else {
nkeynes@991
  2191
        check_walign32( REG_EAX );
nkeynes@991
  2192
        load_fr( REG_EDX, FRm );
nkeynes@991
  2193
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2194
    }
nkeynes@417
  2195
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@375
  2196
:}
nkeynes@375
  2197
FMOV @Rm, FRn {:  
nkeynes@671
  2198
    COUNT_INST(I_FMOV5);
nkeynes@586
  2199
    check_fpuen();
nkeynes@991
  2200
    load_reg( REG_EAX, Rm );
nkeynes@901
  2201
    if( sh4_x86.double_size ) {
nkeynes@991
  2202
        check_ralign64( REG_EAX );
nkeynes@991
  2203
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2204
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2205
        load_reg( REG_EAX, Rm );
nkeynes@991
  2206
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2207
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2208
        store_dr1( REG_EAX, FRn );
nkeynes@901
  2209
    } else {
nkeynes@991
  2210
        check_ralign32( REG_EAX );
nkeynes@991
  2211
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2212
        store_fr( REG_EAX, FRn );
nkeynes@901
  2213
    }
nkeynes@417
  2214
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@375
  2215
:}
nkeynes@377
  2216
FMOV FRm, @-Rn {:  
nkeynes@671
  2217
    COUNT_INST(I_FMOV3);
nkeynes@586
  2218
    check_fpuen();
nkeynes@991
  2219
    load_reg( REG_EAX, Rn );
nkeynes@901
  2220
    if( sh4_x86.double_size ) {
nkeynes@991
  2221
        check_walign64( REG_EAX );
nkeynes@991
  2222
        LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
nkeynes@991
  2223
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2224
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2225
        load_reg( REG_EAX, Rn );
nkeynes@991
  2226
        LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
nkeynes@991
  2227
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2228
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2229
        ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
nkeynes@901
  2230
    } else {
nkeynes@991
  2231
        check_walign32( REG_EAX );
nkeynes@991
  2232
        LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
nkeynes@991
  2233
        load_fr( REG_EDX, FRm );
nkeynes@991
  2234
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2235
        ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
nkeynes@901
  2236
    }
nkeynes@417
  2237
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2238
:}
nkeynes@416
  2239
FMOV @Rm+, FRn {:
nkeynes@671
  2240
    COUNT_INST(I_FMOV6);
nkeynes@586
  2241
    check_fpuen();
nkeynes@991
  2242
    load_reg( REG_EAX, Rm );
nkeynes@901
  2243
    if( sh4_x86.double_size ) {
nkeynes@991
  2244
        check_ralign64( REG_EAX );
nkeynes@991
  2245
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2246
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2247
        load_reg( REG_EAX, Rm );
nkeynes@991
  2248
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2249
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2250
        store_dr1( REG_EAX, FRn );
nkeynes@991
  2251
        ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
nkeynes@901
  2252
    } else {
nkeynes@991
  2253
        check_ralign32( REG_EAX );
nkeynes@991
  2254
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2255
        store_fr( REG_EAX, FRn );
nkeynes@991
  2256
        ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@901
  2257
    }
nkeynes@417
  2258
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2259
:}
nkeynes@377
  2260
FMOV FRm, @(R0, Rn) {:  
nkeynes@671
  2261
    COUNT_INST(I_FMOV4);
nkeynes@586
  2262
    check_fpuen();
nkeynes@991
  2263
    load_reg( REG_EAX, Rn );
nkeynes@991
  2264
    ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@901
  2265
    if( sh4_x86.double_size ) {
nkeynes@991
  2266
        check_walign64( REG_EAX );
nkeynes@991
  2267
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2268
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2269
        load_reg( REG_EAX, Rn );
nkeynes@991
  2270
        ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@991
  2271
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2272
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2273
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2274
    } else {
nkeynes@991
  2275
        check_walign32( REG_EAX );
nkeynes@991
  2276
        load_fr( REG_EDX, FRm );
nkeynes@991
  2277
        MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
nkeynes@901
  2278
    }
nkeynes@417
  2279
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2280
:}
nkeynes@377
  2281
FMOV @(R0, Rm), FRn {:  
nkeynes@671
  2282
    COUNT_INST(I_FMOV7);
nkeynes@586
  2283
    check_fpuen();
nkeynes@991
  2284
    load_reg( REG_EAX, Rm );
nkeynes@991
  2285
    ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@901
  2286
    if( sh4_x86.double_size ) {
nkeynes@991
  2287
        check_ralign64( REG_EAX );
nkeynes@991
  2288
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2289
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2290
        load_reg( REG_EAX, Rm );
nkeynes@991
  2291
        ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@991
  2292
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2293
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2294
        store_dr1( REG_EAX, FRn );
nkeynes@901
  2295
    } else {
nkeynes@991
  2296
        check_ralign32( REG_EAX );
nkeynes@991
  2297
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2298
        store_fr( REG_EAX, FRn );
nkeynes@901
  2299
    }
nkeynes@417
  2300
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2301
:}
nkeynes@377
  2302
FLDI0 FRn {:  /* IFF PR=0 */
nkeynes@671
  2303
    COUNT_INST(I_FLDI0);
nkeynes@377
  2304
    check_fpuen();
nkeynes@901
  2305
    if( sh4_x86.double_prec == 0 ) {
nkeynes@991
  2306
        XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@991
  2307
        store_fr( REG_EAX, FRn );
nkeynes@901
  2308
    }
nkeynes@417
  2309
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2310
:}
nkeynes@377
  2311
FLDI1 FRn {:  /* IFF PR=0 */
nkeynes@671
  2312
    COUNT_INST(I_FLDI1);
nkeynes@377
  2313
    check_fpuen();
nkeynes@901
  2314
    if( sh4_x86.double_prec == 0 ) {
nkeynes@995
  2315
        MOVL_imm32_r32( 0x3F800000, REG_EAX );
nkeynes@991
  2316
        store_fr( REG_EAX, FRn );
nkeynes@901
  2317
    }
nkeynes@377
  2318
:}
nkeynes@377
  2319
nkeynes@377
  2320
FLOAT FPUL, FRn {:  
nkeynes@671
  2321
    COUNT_INST(I_FLOAT);
nkeynes@377
  2322
    check_fpuen();
nkeynes@991
  2323
    FILD_rbpdisp(R_FPUL);
nkeynes@901
  2324
    if( sh4_x86.double_prec ) {
nkeynes@901
  2325
        pop_dr( FRn );
nkeynes@901
  2326
    } else {
nkeynes@901
  2327
        pop_fr( FRn );
nkeynes@901
  2328
    }
nkeynes@377
  2329
:}
nkeynes@377
  2330
FTRC FRm, FPUL {:  
nkeynes@671
  2331
    COUNT_INST(I_FTRC);
nkeynes@377
  2332
    check_fpuen();
nkeynes@901
  2333
    if( sh4_x86.double_prec ) {
nkeynes@901
  2334
        push_dr( FRm );
nkeynes@901
  2335
    } else {
nkeynes@901
  2336
        push_fr( FRm );
nkeynes@901
  2337
    }
nkeynes@995
  2338
    MOVP_immptr_rptr( &max_int, REG_ECX );
nkeynes@991
  2339
    FILD_r32disp( REG_ECX, 0 );
nkeynes@388
  2340
    FCOMIP_st(1);
nkeynes@991
  2341
    JNA_label( sat );
nkeynes@995
  2342
    MOVP_immptr_rptr( &min_int, REG_ECX );
nkeynes@995
  2343
    FILD_r32disp( REG_ECX, 0 );
nkeynes@995
  2344
    FCOMIP_st(1);              
nkeynes@995
  2345
    JAE_label( sat2 );            
nkeynes@995
  2346
    MOVP_immptr_rptr( &save_fcw, REG_EAX );
nkeynes@991
  2347
    FNSTCW_r32disp( REG_EAX, 0 );
nkeynes@995
  2348
    MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
nkeynes@991
  2349
    FLDCW_r32disp( REG_EDX, 0 );
nkeynes@995
  2350
    FISTP_rbpdisp(R_FPUL);             
nkeynes@991
  2351
    FLDCW_r32disp( REG_EAX, 0 );
nkeynes@995
  2352
    JMP_label(end);             
nkeynes@388
  2353
nkeynes@388
  2354
    JMP_TARGET(sat);
nkeynes@388
  2355
    JMP_TARGET(sat2);
nkeynes@991
  2356
    MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
nkeynes@995
  2357
    MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
nkeynes@388
  2358
    FPOP_st();
nkeynes@388
  2359
    JMP_TARGET(end);
nkeynes@417
  2360
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2361
:}
nkeynes@377
  2362
FLDS FRm, FPUL {:  
nkeynes@671
  2363
    COUNT_INST(I_FLDS);
nkeynes@377
  2364
    check_fpuen();
nkeynes@991
  2365
    load_fr( REG_EAX, FRm );
nkeynes@995
  2366
    MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
nkeynes@377
  2367
:}
nkeynes@377
  2368
FSTS FPUL, FRn {:  
nkeynes@671
  2369
    COUNT_INST(I_FSTS);
nkeynes@377
  2370
    check_fpuen();
nkeynes@995
  2371
    MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
nkeynes@991
  2372
    store_fr( REG_EAX, FRn );
nkeynes@377
  2373
:}
nkeynes@377
  2374
FCNVDS FRm, FPUL {:  
nkeynes@671
  2375
    COUNT_INST(I_FCNVDS);
nkeynes@377
  2376
    check_fpuen();
nkeynes@901
  2377
    if( sh4_x86.double_prec ) {
nkeynes@901
  2378
        push_dr( FRm );
nkeynes@901
  2379
        pop_fpul();
nkeynes@901
  2380
    }
nkeynes@377
  2381
:}
nkeynes@377
  2382
FCNVSD FPUL, FRn {:  
nkeynes@671
  2383
    COUNT_INST(I_FCNVSD);
nkeynes@377
  2384
    check_fpuen();
nkeynes@901
  2385
    if( sh4_x86.double_prec ) {
nkeynes@901
  2386
        push_fpul();
nkeynes@901
  2387
        pop_dr( FRn );
nkeynes@901
  2388
    }
nkeynes@377
  2389
:}
nkeynes@375
  2390
nkeynes@359
  2391
/* Floating point instructions */
nkeynes@374
  2392
FABS FRn {:  
nkeynes@671
  2393
    COUNT_INST(I_FABS);
nkeynes@377
  2394
    check_fpuen();
nkeynes@901
  2395
    if( sh4_x86.double_prec ) {
nkeynes@901
  2396
        push_dr(FRn);
nkeynes@901
  2397
        FABS_st0();
nkeynes@901
  2398
        pop_dr(FRn);
nkeynes@901
  2399
    } else {
nkeynes@901
  2400
        push_fr(FRn);
nkeynes@901
  2401
        FABS_st0();
nkeynes@901
  2402
        pop_fr(FRn);
nkeynes@901
  2403
    }
nkeynes@374
  2404
:}
nkeynes@377
  2405
FADD FRm, FRn {:  
nkeynes@671
  2406
    COUNT_INST(I_FADD);
nkeynes@377
  2407
    check_fpuen();
nkeynes@901
  2408
    if( sh4_x86.double_prec ) {
nkeynes@901
  2409
        push_dr(FRm);
nkeynes@901
  2410
        push_dr(FRn);
nkeynes@901
  2411
        FADDP_st(1);
nkeynes@901
  2412
        pop_dr(FRn);
nkeynes@901
  2413
    } else {
nkeynes@901
  2414
        push_fr(FRm);
nkeynes@901
  2415
        push_fr(FRn);
nkeynes@901
  2416
        FADDP_st(1);
nkeynes@901
  2417
        pop_fr(FRn);
nkeynes@901
  2418
    }
nkeynes@375
  2419
:}
nkeynes@377
  2420
FDIV FRm, FRn {:  
nkeynes@671
  2421
    COUNT_INST(I_FDIV);
nkeynes@377
  2422
    check_fpuen();
nkeynes@901
  2423
    if( sh4_x86.double_prec ) {
nkeynes@901
  2424
        push_dr(FRn);
nkeynes@901
  2425
        push_dr(FRm);
nkeynes@901
  2426
        FDIVP_st(1);
nkeynes@901
  2427
        pop_dr(FRn);
nkeynes@901
  2428
    } else {
nkeynes@901
  2429
        push_fr(FRn);
nkeynes@901
  2430
        push_fr(FRm);
nkeynes@901
  2431
        FDIVP_st(1);
nkeynes@901
  2432
        pop_fr(FRn);
nkeynes@901
  2433
    }
nkeynes@375
  2434
:}
nkeynes@375
  2435
FMAC FR0, FRm, FRn {:  
nkeynes@671
  2436
    COUNT_INST(I_FMAC);
nkeynes@377
  2437
    check_fpuen();
nkeynes@901
  2438
    if( sh4_x86.double_prec ) {
nkeynes@901
  2439
        push_dr( 0 );
nkeynes@901
  2440
        push_dr( FRm );
nkeynes@901
  2441
        FMULP_st(1);
nkeynes@901
  2442
        push_dr( FRn );
nkeynes@901
  2443
        FADDP_st(1);
nkeynes@901
  2444
        pop_dr( FRn );
nkeynes@901
  2445
    } else {
nkeynes@901
  2446
        push_fr( 0 );
nkeynes@901
  2447
        push_fr( FRm );
nkeynes@901
  2448
        FMULP_st(1);
nkeynes@901
  2449
        push_fr( FRn );
nkeynes@901
  2450
        FADDP_st(1);
nkeynes@901
  2451
        pop_fr( FRn );
nkeynes@901
  2452
    }
nkeynes@375
  2453
:}
nkeynes@375
  2454
nkeynes@377
  2455
FMUL FRm, FRn {:  
nkeynes@671
  2456
    COUNT_INST(I_FMUL);
nkeynes@377
  2457
    check_fpuen();
nkeynes@901
  2458
    if( sh4_x86.double_prec ) {
nkeynes@901
  2459
        push_dr(FRm);
nkeynes@901
  2460
        push_dr(FRn);
nkeynes@901
  2461
        FMULP_st(1);
nkeynes@901
  2462
        pop_dr(FRn);
nkeynes@901
  2463
    } else {
nkeynes@901
  2464
        push_fr(FRm);
nkeynes@901
  2465
        push_fr(FRn);
nkeynes@901
  2466
        FMULP_st(1);
nkeynes@901
  2467
        pop_fr(FRn);
nkeynes@901
  2468
    }
nkeynes@377
  2469
:}
nkeynes@377
  2470
FNEG FRn {:  
nkeynes@671
  2471
    COUNT_INST(I_FNEG);
nkeynes@377
  2472
    check_fpuen();
nkeynes@901
  2473
    if( sh4_x86.double_prec ) {
nkeynes@901
  2474
        push_dr(FRn);
nkeynes@901
  2475
        FCHS_st0();
nkeynes@901
  2476
        pop_dr(FRn);
nkeynes@901
  2477
    } else {
nkeynes@901
  2478
        push_fr(FRn);
nkeynes@901
  2479
        FCHS_st0();
nkeynes@901
  2480
        pop_fr(FRn);
nkeynes@901
  2481
    }
nkeynes@377
  2482
:}
nkeynes@377
  2483
FSRRA FRn {:  
nkeynes@671
  2484
    COUNT_INST(I_FSRRA);
nkeynes@377
  2485
    check_fpuen();
nkeynes@901
  2486
    if( sh4_x86.double_prec == 0 ) {
nkeynes@901
  2487
        FLD1_st0();
nkeynes@901
  2488
        push_fr(FRn);
nkeynes@901
  2489
        FSQRT_st0();
nkeynes@901
  2490
        FDIVP_st(1);
nkeynes@901
  2491
        pop_fr(FRn);
nkeynes@901
  2492
    }
nkeynes@377
  2493
:}
nkeynes@377
  2494
FSQRT FRn {:  
nkeynes@671
  2495
    COUNT_INST(I_FSQRT);
nkeynes@377
  2496
    check_fpuen();
nkeynes@901
  2497
    if( sh4_x86.double_prec ) {
nkeynes@901
  2498
        push_dr(FRn);
nkeynes@901
  2499
        FSQRT_st0();
nkeynes@901
  2500
        pop_dr(FRn);
nkeynes@901
  2501
    } else {
nkeynes@901
  2502
        push_fr(FRn);
nkeynes@901
  2503
        FSQRT_st0();
nkeynes@901
  2504
        pop_fr(FRn);
nkeynes@901
  2505
    }
nkeynes@377
  2506
:}
nkeynes@377
  2507
FSUB FRm, FRn {:  
nkeynes@671
  2508
    COUNT_INST(I_FSUB);
nkeynes@377
  2509
    check_fpuen();
nkeynes@901
  2510
    if( sh4_x86.double_prec ) {
nkeynes@901
  2511
        push_dr(FRn);
nkeynes@901
  2512
        push_dr(FRm);
nkeynes@901
  2513
        FSUBP_st(1);
nkeynes@901
  2514
        pop_dr(FRn);
nkeynes@901
  2515
    } else {
nkeynes@901
  2516
        push_fr(FRn);
nkeynes@901
  2517
        push_fr(FRm);
nkeynes@901
  2518
        FSUBP_st(1);
nkeynes@901
  2519
        pop_fr(FRn);
nkeynes@901
  2520
    }
nkeynes@377
  2521
:}
nkeynes@377
  2522
nkeynes@377
  2523
FCMP/EQ FRm, FRn {:  
nkeynes@671
  2524
    COUNT_INST(I_FCMPEQ);
nkeynes@377
  2525
    check_fpuen();
nkeynes@901
  2526
    if( sh4_x86.double_prec ) {
nkeynes@901
  2527
        push_dr(FRm);
nkeynes@901
  2528
        push_dr(FRn);
nkeynes@901
  2529
    } else {
nkeynes@901
  2530
        push_fr(FRm);
nkeynes@901
  2531
        push_fr(FRn);
nkeynes@901
  2532
    }
nkeynes@377
  2533
    FCOMIP_st(1);
nkeynes@377
  2534
    SETE_t();
nkeynes@377
  2535
    FPOP_st();
nkeynes@901
  2536
    sh4_x86.tstate = TSTATE_E;
nkeynes@377
  2537
:}
nkeynes@377
  2538
FCMP/GT FRm, FRn {:  
nkeynes@671
  2539
    COUNT_INST(I_FCMPGT);
nkeynes@377
  2540
    check_fpuen();
nkeynes@901
  2541
    if( sh4_x86.double_prec ) {
nkeynes@901
  2542
        push_dr(FRm);
nkeynes@901
  2543
        push_dr(FRn);
nkeynes@901
  2544
    } else {
nkeynes@901
  2545
        push_fr(FRm);
nkeynes@901
  2546
        push_fr(FRn);
nkeynes@901
  2547
    }
nkeynes@377
  2548
    FCOMIP_st(1);
nkeynes@377
  2549
    SETA_t();
nkeynes@377
  2550
    FPOP_st();
nkeynes@901
  2551
    sh4_x86.tstate = TSTATE_A;
nkeynes@377
  2552
:}
nkeynes@377
  2553
nkeynes@377
  2554
FSCA FPUL, FRn {:  
nkeynes@671
  2555
    COUNT_INST(I_FSCA);
nkeynes@377
  2556
    check_fpuen();
nkeynes@901
  2557
    if( sh4_x86.double_prec == 0 ) {
nkeynes@991
  2558
        LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
nkeynes@995
  2559
        MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
nkeynes@995
  2560
        CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
nkeynes@901
  2561
    }
nkeynes@417
  2562
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2563
:}
nkeynes@377
  2564
FIPR FVm, FVn {:  
nkeynes@671
  2565
    COUNT_INST(I_FIPR);
nkeynes@377
  2566
    check_fpuen();
nkeynes@901
  2567
    if( sh4_x86.double_prec == 0 ) {
nkeynes@904
  2568
        if( sh4_x86.sse3_enabled ) {
nkeynes@991
  2569
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
nkeynes@991
  2570
            MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
nkeynes@903
  2571
            HADDPS_xmm_xmm( 4, 4 ); 
nkeynes@903
  2572
            HADDPS_xmm_xmm( 4, 4 );
nkeynes@991
  2573
            MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
nkeynes@903
  2574
        } else {
nkeynes@904
  2575
            push_fr( FVm<<2 );
nkeynes@903
  2576
            push_fr( FVn<<2 );
nkeynes@903
  2577
            FMULP_st(1);
nkeynes@903
  2578
            push_fr( (FVm<<2)+1);
nkeynes@903
  2579
            push_fr( (FVn<<2)+1);
nkeynes@903
  2580
            FMULP_st(1);
nkeynes@903
  2581
            FADDP_st(1);
nkeynes@903
  2582
            push_fr( (FVm<<2)+2);
nkeynes@903
  2583
            push_fr( (FVn<<2)+2);
nkeynes@903
  2584
            FMULP_st(1);
nkeynes@903
  2585
            FADDP_st(1);
nkeynes@903
  2586
            push_fr( (FVm<<2)+3);
nkeynes@903
  2587
            push_fr( (FVn<<2)+3);
nkeynes@903
  2588
            FMULP_st(1);
nkeynes@903
  2589
            FADDP_st(1);
nkeynes@903
  2590
            pop_fr( (FVn<<2)+3);
nkeynes@904
  2591
        }
nkeynes@901
  2592
    }
nkeynes@377
  2593
:}
nkeynes@377
  2594
FTRV XMTRX, FVn {:  
nkeynes@671
  2595
    COUNT_INST(I_FTRV);
nkeynes@377
  2596
    check_fpuen();
nkeynes@901
  2597
    if( sh4_x86.double_prec == 0 ) {
nkeynes@903
  2598
        if( sh4_x86.sse3_enabled ) {
nkeynes@991
  2599
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][0]), 1 ); // M1  M0  M3  M2
nkeynes@991
  2600
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][4]), 0 ); // M5  M4  M7  M6
nkeynes@991
  2601
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][8]), 3 ); // M9  M8  M11 M10
nkeynes@991
  2602
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[1][12]), 2 );// M13 M12 M15 M14
nkeynes@903
  2603
nkeynes@991
  2604
            MOVSLDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 ); // V1 V1 V3 V3
nkeynes@991
  2605
            MOVSHDUP_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 5 ); // V0 V0 V2 V2
nkeynes@991
  2606
            MOV_xmm_xmm( 4, 6 );
nkeynes@991
  2607
            MOV_xmm_xmm( 5, 7 );
nkeynes@903
  2608
            MOVLHPS_xmm_xmm( 4, 4 );  // V1 V1 V1 V1
nkeynes@903
  2609
            MOVHLPS_xmm_xmm( 6, 6 );  // V3 V3 V3 V3
nkeynes@903
  2610
            MOVLHPS_xmm_xmm( 5, 5 );  // V0 V0 V0 V0
nkeynes@903
  2611
            MOVHLPS_xmm_xmm( 7, 7 );  // V2 V2 V2 V2
nkeynes@903
  2612
            MULPS_xmm_xmm( 0, 4 );
nkeynes@903
  2613
            MULPS_xmm_xmm( 1, 5 );
nkeynes@903
  2614
            MULPS_xmm_xmm( 2, 6 );
nkeynes@903
  2615
            MULPS_xmm_xmm( 3, 7 );
nkeynes@903
  2616
            ADDPS_xmm_xmm( 5, 4 );
nkeynes@903
  2617
            ADDPS_xmm_xmm( 7, 6 );
nkeynes@903
  2618
            ADDPS_xmm_xmm( 6, 4 );
nkeynes@991
  2619
            MOVAPS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][FVn<<2]) );
nkeynes@903
  2620
        } else {
nkeynes@991
  2621
            LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FVn<<2]), REG_EAX );
nkeynes@995
  2622
            CALL1_ptr_r32( sh4_ftrv, REG_EAX );
nkeynes@903
  2623
        }
nkeynes@901
  2624
    }
nkeynes@417
  2625
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2626
:}
nkeynes@377
  2627
nkeynes@377
  2628
FRCHG {:  
nkeynes@671
  2629
    COUNT_INST(I_FRCHG);
nkeynes@377
  2630
    check_fpuen();
nkeynes@991
  2631
    XORL_imms_rbpdisp( FPSCR_FR, R_FPSCR );
nkeynes@995
  2632
    CALL_ptr( sh4_switch_fr_banks );
nkeynes@417
  2633
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2634
:}
nkeynes@377
  2635
FSCHG {:  
nkeynes@671
  2636
    COUNT_INST(I_FSCHG);
nkeynes@377
  2637
    check_fpuen();
nkeynes@991
  2638
    XORL_imms_rbpdisp( FPSCR_SZ, R_FPSCR);
nkeynes@991
  2639
    XORL_imms_rbpdisp( FPSCR_SZ, REG_OFFSET(xlat_sh4_mode) );
nkeynes@417
  2640
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@901
  2641
    sh4_x86.double_size = !sh4_x86.double_size;
nkeynes@1112
  2642
    sh4_x86.sh4_mode = sh4_x86.sh4_mode ^ FPSCR_SZ;
nkeynes@377
  2643
:}
nkeynes@359
  2644
nkeynes@359
  2645
/* Processor control instructions */
nkeynes@368
  2646
LDC Rm, SR {:
nkeynes@671
  2647
    COUNT_INST(I_LDCSR);
nkeynes@386
  2648
    if( sh4_x86.in_delay_slot ) {
nkeynes@386
  2649
	SLOTILLEGAL();
nkeynes@386
  2650
    } else {
nkeynes@386
  2651
	check_priv();
nkeynes@991
  2652
	load_reg( REG_EAX, Rm );
nkeynes@995
  2653
	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
nkeynes@386
  2654
	sh4_x86.fpuen_checked = FALSE;
nkeynes@417
  2655
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@1112
  2656
    sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
nkeynes@937
  2657
	return 2;
nkeynes@386
  2658
    }
nkeynes@368
  2659
:}
nkeynes@359
  2660
LDC Rm, GBR {: 
nkeynes@671
  2661
    COUNT_INST(I_LDC);
nkeynes@991
  2662
    load_reg( REG_EAX, Rm );
nkeynes@995
  2663
    MOVL_r32_rbpdisp( REG_EAX, R_GBR );
nkeynes@359
  2664
:}
nkeynes@359
  2665
LDC Rm, VBR {:  
nkeynes@671
  2666
    COUNT_INST(I_LDC);
nkeynes@386
  2667
    check_priv();
nkeynes@991
  2668
    load_reg( REG_EAX, Rm );
nkeynes@995
  2669
    MOVL_r32_rbpdisp( REG_EAX, R_VBR );
nkeynes@417
  2670
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  2671
:}
nkeynes@359
  2672
LDC Rm, SSR {:  
nkeynes@671
  2673
    COUNT_INST(I_LDC);
nkeynes@386
  2674
    check_priv();