Search
lxdream.org :: lxdream/src/sh4/sh4x86.in
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/sh4x86.in
changeset 1198:407659e01ef0
prev1197:904fba59a705
next1214:49152b3d8b75
author Nathan Keynes <nkeynes@lxdream.org>
date Fri Dec 16 10:08:45 2011 +1000 (7 years ago)
permissions -rw-r--r--
last change Add volatile qualifier to return-address frobbing - works around optimizer
bug in GCC versions after 4.2
file annotate diff log raw
nkeynes@359
     1
/**
nkeynes@586
     2
 * $Id$
nkeynes@359
     3
 * 
nkeynes@359
     4
 * SH4 => x86 translation. This version does no real optimization, it just
nkeynes@359
     5
 * outputs straight-line x86 code - it mainly exists to provide a baseline
nkeynes@359
     6
 * to test the optimizing versions against.
nkeynes@359
     7
 *
nkeynes@359
     8
 * Copyright (c) 2007 Nathan Keynes.
nkeynes@359
     9
 *
nkeynes@359
    10
 * This program is free software; you can redistribute it and/or modify
nkeynes@359
    11
 * it under the terms of the GNU General Public License as published by
nkeynes@359
    12
 * the Free Software Foundation; either version 2 of the License, or
nkeynes@359
    13
 * (at your option) any later version.
nkeynes@359
    14
 *
nkeynes@359
    15
 * This program is distributed in the hope that it will be useful,
nkeynes@359
    16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
nkeynes@359
    17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
nkeynes@359
    18
 * GNU General Public License for more details.
nkeynes@359
    19
 */
nkeynes@359
    20
nkeynes@368
    21
#include <assert.h>
nkeynes@388
    22
#include <math.h>
nkeynes@368
    23
nkeynes@380
    24
#ifndef NDEBUG
nkeynes@380
    25
#define DEBUG_JUMPS 1
nkeynes@380
    26
#endif
nkeynes@380
    27
nkeynes@905
    28
#include "lxdream.h"
nkeynes@368
    29
#include "sh4/sh4core.h"
nkeynes@1091
    30
#include "sh4/sh4dasm.h"
nkeynes@368
    31
#include "sh4/sh4trans.h"
nkeynes@671
    32
#include "sh4/sh4stat.h"
nkeynes@388
    33
#include "sh4/sh4mmio.h"
nkeynes@939
    34
#include "sh4/mmu.h"
nkeynes@991
    35
#include "xlat/xltcache.h"
nkeynes@991
    36
#include "xlat/x86/x86op.h"
nkeynes@1091
    37
#include "x86dasm/x86dasm.h"
nkeynes@368
    38
#include "clock.h"
nkeynes@368
    39
nkeynes@368
    40
#define DEFAULT_BACKPATCH_SIZE 4096
nkeynes@368
    41
nkeynes@991
    42
/* Offset of a reg relative to the sh4r structure */
nkeynes@991
    43
#define REG_OFFSET(reg)  (((char *)&sh4r.reg) - ((char *)&sh4r) - 128)
nkeynes@991
    44
nkeynes@995
    45
#define R_T      REG_OFFSET(t)
nkeynes@995
    46
#define R_Q      REG_OFFSET(q)
nkeynes@995
    47
#define R_S      REG_OFFSET(s)
nkeynes@995
    48
#define R_M      REG_OFFSET(m)
nkeynes@995
    49
#define R_SR     REG_OFFSET(sr)
nkeynes@995
    50
#define R_GBR    REG_OFFSET(gbr)
nkeynes@995
    51
#define R_SSR    REG_OFFSET(ssr)
nkeynes@995
    52
#define R_SPC    REG_OFFSET(spc)
nkeynes@995
    53
#define R_VBR    REG_OFFSET(vbr)
nkeynes@995
    54
#define R_MACH   REG_OFFSET(mac)+4
nkeynes@995
    55
#define R_MACL   REG_OFFSET(mac)
nkeynes@995
    56
#define R_PC     REG_OFFSET(pc)
nkeynes@991
    57
#define R_NEW_PC REG_OFFSET(new_pc)
nkeynes@995
    58
#define R_PR     REG_OFFSET(pr)
nkeynes@995
    59
#define R_SGR    REG_OFFSET(sgr)
nkeynes@995
    60
#define R_FPUL   REG_OFFSET(fpul)
nkeynes@995
    61
#define R_FPSCR  REG_OFFSET(fpscr)
nkeynes@995
    62
#define R_DBR    REG_OFFSET(dbr)
nkeynes@995
    63
#define R_R(rn)  REG_OFFSET(r[rn])
nkeynes@995
    64
#define R_FR(f)  REG_OFFSET(fr[0][(f)^1])
nkeynes@995
    65
#define R_XF(f)  REG_OFFSET(fr[1][(f)^1])
nkeynes@995
    66
#define R_DR(f)  REG_OFFSET(fr[(f)&1][(f)&0x0E])
nkeynes@995
    67
#define R_DRL(f) REG_OFFSET(fr[(f)&1][(f)|0x01])
nkeynes@995
    68
#define R_DRH(f) REG_OFFSET(fr[(f)&1][(f)&0x0E])
nkeynes@995
    69
nkeynes@995
    70
#define DELAY_NONE 0
nkeynes@995
    71
#define DELAY_PC 1
nkeynes@995
    72
#define DELAY_PC_PR 2
nkeynes@991
    73
nkeynes@1112
    74
#define SH4_MODE_UNKNOWN -1
nkeynes@1112
    75
nkeynes@586
    76
struct backpatch_record {
nkeynes@604
    77
    uint32_t fixup_offset;
nkeynes@586
    78
    uint32_t fixup_icount;
nkeynes@596
    79
    int32_t exc_code;
nkeynes@586
    80
};
nkeynes@586
    81
nkeynes@368
    82
/** 
nkeynes@368
    83
 * Struct to manage internal translation state. This state is not saved -
nkeynes@368
    84
 * it is only valid between calls to sh4_translate_begin_block() and
nkeynes@368
    85
 * sh4_translate_end_block()
nkeynes@368
    86
 */
nkeynes@368
    87
struct sh4_x86_state {
nkeynes@590
    88
    int in_delay_slot;
nkeynes@1112
    89
    uint8_t *code;
nkeynes@368
    90
    gboolean fpuen_checked; /* true if we've already checked fpu enabled. */
nkeynes@409
    91
    gboolean branch_taken; /* true if we branched unconditionally */
nkeynes@901
    92
    gboolean double_prec; /* true if FPU is in double-precision mode */
nkeynes@903
    93
    gboolean double_size; /* true if FPU is in double-size mode */
nkeynes@903
    94
    gboolean sse3_enabled; /* true if host supports SSE3 instructions */
nkeynes@408
    95
    uint32_t block_start_pc;
nkeynes@547
    96
    uint32_t stack_posn;   /* Trace stack height for alignment purposes */
nkeynes@1112
    97
    uint32_t sh4_mode;     /* Mirror of sh4r.xlat_sh4_mode */
nkeynes@417
    98
    int tstate;
nkeynes@368
    99
nkeynes@1125
   100
    /* mode settings */
nkeynes@586
   101
    gboolean tlb_on; /* True if tlb translation is active */
nkeynes@1125
   102
    struct mem_region_fn **priv_address_space;
nkeynes@1125
   103
    struct mem_region_fn **user_address_space;
nkeynes@586
   104
nkeynes@1125
   105
    /* Instrumentation */
nkeynes@1125
   106
    xlat_block_begin_callback_t begin_callback;
nkeynes@1125
   107
    xlat_block_end_callback_t end_callback;
nkeynes@1125
   108
    gboolean fastmem;
nkeynes@1182
   109
    gboolean profile_blocks;
nkeynes@1125
   110
    
nkeynes@368
   111
    /* Allocated memory for the (block-wide) back-patch list */
nkeynes@586
   112
    struct backpatch_record *backpatch_list;
nkeynes@368
   113
    uint32_t backpatch_posn;
nkeynes@368
   114
    uint32_t backpatch_size;
nkeynes@368
   115
};
nkeynes@368
   116
nkeynes@368
   117
static struct sh4_x86_state sh4_x86;
nkeynes@368
   118
nkeynes@388
   119
static uint32_t max_int = 0x7FFFFFFF;
nkeynes@388
   120
static uint32_t min_int = 0x80000000;
nkeynes@394
   121
static uint32_t save_fcw; /* save value for fpu control word */
nkeynes@394
   122
static uint32_t trunc_fcw = 0x0F7F; /* fcw value for truncation mode */
nkeynes@386
   123
nkeynes@1196
   124
static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc );
nkeynes@1196
   125
nkeynes@1091
   126
static struct x86_symbol x86_symbol_table[] = {
nkeynes@1091
   127
    { "sh4r+128", ((char *)&sh4r)+128 },
nkeynes@1091
   128
    { "sh4_cpu_period", &sh4_cpu_period },
nkeynes@1091
   129
    { "sh4_address_space", NULL },
nkeynes@1125
   130
    { "sh4_user_address_space", NULL },
nkeynes@1120
   131
    { "sh4_translate_breakpoint_hit", sh4_translate_breakpoint_hit },
nkeynes@1196
   132
    { "sh4_translate_get_code_and_backpatch", sh4_translate_get_code_and_backpatch },
nkeynes@1091
   133
    { "sh4_write_fpscr", sh4_write_fpscr },
nkeynes@1091
   134
    { "sh4_write_sr", sh4_write_sr },
nkeynes@1091
   135
    { "sh4_read_sr", sh4_read_sr },
nkeynes@1191
   136
    { "sh4_raise_exception", sh4_raise_exception },
nkeynes@1091
   137
    { "sh4_sleep", sh4_sleep },
nkeynes@1091
   138
    { "sh4_fsca", sh4_fsca },
nkeynes@1091
   139
    { "sh4_ftrv", sh4_ftrv },
nkeynes@1091
   140
    { "sh4_switch_fr_banks", sh4_switch_fr_banks },
nkeynes@1091
   141
    { "sh4_execute_instruction", sh4_execute_instruction },
nkeynes@1091
   142
    { "signsat48", signsat48 },
nkeynes@1091
   143
    { "xlat_get_code_by_vma", xlat_get_code_by_vma },
nkeynes@1091
   144
    { "xlat_get_code", xlat_get_code }
nkeynes@1091
   145
};
nkeynes@1091
   146
nkeynes@1091
   147
nkeynes@903
   148
gboolean is_sse3_supported()
nkeynes@903
   149
{
nkeynes@903
   150
    uint32_t features;
nkeynes@903
   151
    
nkeynes@903
   152
    __asm__ __volatile__(
nkeynes@903
   153
        "mov $0x01, %%eax\n\t"
nkeynes@908
   154
        "cpuid\n\t" : "=c" (features) : : "eax", "edx", "ebx");
nkeynes@903
   155
    return (features & 1) ? TRUE : FALSE;
nkeynes@903
   156
}
nkeynes@903
   157
nkeynes@1125
   158
void sh4_translate_set_address_space( struct mem_region_fn **priv, struct mem_region_fn **user )
nkeynes@1125
   159
{
nkeynes@1125
   160
    sh4_x86.priv_address_space = priv;
nkeynes@1125
   161
    sh4_x86.user_address_space = user;
nkeynes@1125
   162
    x86_symbol_table[2].ptr = priv;
nkeynes@1125
   163
    x86_symbol_table[3].ptr = user;
nkeynes@1125
   164
}
nkeynes@1125
   165
nkeynes@669
   166
void sh4_translate_init(void)
nkeynes@368
   167
{
nkeynes@368
   168
    sh4_x86.backpatch_list = malloc(DEFAULT_BACKPATCH_SIZE);
nkeynes@586
   169
    sh4_x86.backpatch_size = DEFAULT_BACKPATCH_SIZE / sizeof(struct backpatch_record);
nkeynes@1125
   170
    sh4_x86.begin_callback = NULL;
nkeynes@1125
   171
    sh4_x86.end_callback = NULL;
nkeynes@1125
   172
    sh4_translate_set_address_space( sh4_address_space, sh4_user_address_space );
nkeynes@1125
   173
    sh4_x86.fastmem = TRUE;
nkeynes@1182
   174
    sh4_x86.profile_blocks = FALSE;
nkeynes@903
   175
    sh4_x86.sse3_enabled = is_sse3_supported();
nkeynes@1091
   176
    x86_disasm_init();
nkeynes@1091
   177
    x86_set_symtab( x86_symbol_table, sizeof(x86_symbol_table)/sizeof(struct x86_symbol) );
nkeynes@368
   178
}
nkeynes@368
   179
nkeynes@1125
   180
void sh4_translate_set_callbacks( xlat_block_begin_callback_t begin, xlat_block_end_callback_t end )
nkeynes@1125
   181
{
nkeynes@1125
   182
    sh4_x86.begin_callback = begin;
nkeynes@1125
   183
    sh4_x86.end_callback = end;
nkeynes@1125
   184
}
nkeynes@1125
   185
nkeynes@1125
   186
void sh4_translate_set_fastmem( gboolean flag )
nkeynes@1125
   187
{
nkeynes@1125
   188
    sh4_x86.fastmem = flag;
nkeynes@1125
   189
}
nkeynes@1125
   190
nkeynes@1182
   191
void sh4_translate_set_profile_blocks( gboolean flag )
nkeynes@1182
   192
{
nkeynes@1182
   193
    sh4_x86.profile_blocks = flag;
nkeynes@1182
   194
}
nkeynes@1182
   195
nkeynes@1182
   196
gboolean sh4_translate_get_profile_blocks()
nkeynes@1182
   197
{
nkeynes@1182
   198
    return sh4_x86.profile_blocks;
nkeynes@1182
   199
}
nkeynes@1182
   200
nkeynes@1091
   201
/**
nkeynes@1091
   202
 * Disassemble the given translated code block, and it's source SH4 code block
nkeynes@1091
   203
 * side-by-side. The current native pc will be marked if non-null.
nkeynes@1091
   204
 */
nkeynes@1091
   205
void sh4_translate_disasm_block( FILE *out, void *code, sh4addr_t source_start, void *native_pc )
nkeynes@1091
   206
{
nkeynes@1091
   207
    char buf[256];
nkeynes@1091
   208
    char op[256];
nkeynes@1091
   209
nkeynes@1091
   210
    uintptr_t target_start = (uintptr_t)code, target_pc;
nkeynes@1091
   211
    uintptr_t target_end = target_start + xlat_get_code_size(code);
nkeynes@1091
   212
    uint32_t source_pc = source_start;
nkeynes@1091
   213
    uint32_t source_end = source_pc;
nkeynes@1091
   214
    xlat_recovery_record_t source_recov_table = XLAT_RECOVERY_TABLE(code);
nkeynes@1092
   215
    xlat_recovery_record_t source_recov_end = source_recov_table + XLAT_BLOCK_FOR_CODE(code)->recover_table_size - 1;
nkeynes@1091
   216
nkeynes@1091
   217
    for( target_pc = target_start; target_pc < target_end;  ) {
nkeynes@1091
   218
        uintptr_t pc2 = x86_disasm_instruction( target_pc, buf, sizeof(buf), op );
nkeynes@1092
   219
#if SIZEOF_VOID_P == 8
nkeynes@1092
   220
        fprintf( out, "%c%016lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
nkeynes@1092
   221
                      target_pc, op, buf );
nkeynes@1092
   222
#else
nkeynes@1112
   223
        fprintf( out, "%c%08lx: %-30s %-40s", (target_pc == (uintptr_t)native_pc ? '*' : ' '),
nkeynes@1092
   224
                      target_pc, op, buf );
nkeynes@1092
   225
#endif        
nkeynes@1091
   226
        if( source_recov_table < source_recov_end && 
nkeynes@1091
   227
            target_pc >= (target_start + source_recov_table->xlat_offset) ) {
nkeynes@1091
   228
            source_recov_table++;
nkeynes@1091
   229
            if( source_end < (source_start + (source_recov_table->sh4_icount)*2) )
nkeynes@1091
   230
                source_end = source_start + (source_recov_table->sh4_icount)*2;
nkeynes@1091
   231
        }
nkeynes@1091
   232
nkeynes@1091
   233
        if( source_pc < source_end ) {
nkeynes@1091
   234
            uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
nkeynes@1091
   235
            fprintf( out, " %08X: %s  %s\n", source_pc, op, buf );
nkeynes@1091
   236
            source_pc = source_pc2;
nkeynes@1091
   237
        } else {
nkeynes@1091
   238
            fprintf( out, "\n" );
nkeynes@1091
   239
        }
nkeynes@1091
   240
        
nkeynes@1091
   241
        target_pc = pc2;
nkeynes@1091
   242
    }
nkeynes@1091
   243
    
nkeynes@1091
   244
    while( source_pc < source_end ) {
nkeynes@1091
   245
        uint32_t source_pc2 = sh4_disasm_instruction( source_pc, buf, sizeof(buf), op );
nkeynes@1091
   246
        fprintf( out, "%*c %08X: %s  %s\n", 72,' ', source_pc, op, buf );
nkeynes@1091
   247
        source_pc = source_pc2;
nkeynes@1091
   248
    }
nkeynes@1091
   249
}
nkeynes@368
   250
nkeynes@586
   251
static void sh4_x86_add_backpatch( uint8_t *fixup_addr, uint32_t fixup_pc, uint32_t exc_code )
nkeynes@368
   252
{
nkeynes@991
   253
    int reloc_size = 4;
nkeynes@991
   254
    
nkeynes@991
   255
    if( exc_code == -2 ) {
nkeynes@991
   256
        reloc_size = sizeof(void *);
nkeynes@991
   257
    }
nkeynes@991
   258
    
nkeynes@368
   259
    if( sh4_x86.backpatch_posn == sh4_x86.backpatch_size ) {
nkeynes@368
   260
	sh4_x86.backpatch_size <<= 1;
nkeynes@586
   261
	sh4_x86.backpatch_list = realloc( sh4_x86.backpatch_list, 
nkeynes@586
   262
					  sh4_x86.backpatch_size * sizeof(struct backpatch_record));
nkeynes@368
   263
	assert( sh4_x86.backpatch_list != NULL );
nkeynes@368
   264
    }
nkeynes@586
   265
    if( sh4_x86.in_delay_slot ) {
nkeynes@586
   266
	fixup_pc -= 2;
nkeynes@586
   267
    }
nkeynes@991
   268
nkeynes@604
   269
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_offset = 
nkeynes@991
   270
	(((uint8_t *)fixup_addr) - ((uint8_t *)xlat_current_block->code)) - reloc_size;
nkeynes@586
   271
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].fixup_icount = (fixup_pc - sh4_x86.block_start_pc)>>1;
nkeynes@586
   272
    sh4_x86.backpatch_list[sh4_x86.backpatch_posn].exc_code = exc_code;
nkeynes@586
   273
    sh4_x86.backpatch_posn++;
nkeynes@368
   274
}
nkeynes@368
   275
nkeynes@991
   276
#define TSTATE_NONE -1
nkeynes@995
   277
#define TSTATE_O    X86_COND_O
nkeynes@995
   278
#define TSTATE_C    X86_COND_C
nkeynes@995
   279
#define TSTATE_E    X86_COND_E
nkeynes@995
   280
#define TSTATE_NE   X86_COND_NE
nkeynes@995
   281
#define TSTATE_G    X86_COND_G
nkeynes@995
   282
#define TSTATE_GE   X86_COND_GE
nkeynes@995
   283
#define TSTATE_A    X86_COND_A
nkeynes@995
   284
#define TSTATE_AE   X86_COND_AE
nkeynes@359
   285
nkeynes@991
   286
#define MARK_JMP8(x) uint8_t *_mark_jmp_##x = (xlat_output-1)
nkeynes@991
   287
#define JMP_TARGET(x) *_mark_jmp_##x += (xlat_output - _mark_jmp_##x)
nkeynes@368
   288
nkeynes@991
   289
/* Convenience instructions */
nkeynes@991
   290
#define LDC_t()          CMPB_imms_rbpdisp(1,R_T); CMC()
nkeynes@991
   291
#define SETE_t()         SETCCB_cc_rbpdisp(X86_COND_E,R_T)
nkeynes@991
   292
#define SETA_t()         SETCCB_cc_rbpdisp(X86_COND_A,R_T)
nkeynes@991
   293
#define SETAE_t()        SETCCB_cc_rbpdisp(X86_COND_AE,R_T)
nkeynes@991
   294
#define SETG_t()         SETCCB_cc_rbpdisp(X86_COND_G,R_T)
nkeynes@991
   295
#define SETGE_t()        SETCCB_cc_rbpdisp(X86_COND_GE,R_T)
nkeynes@991
   296
#define SETC_t()         SETCCB_cc_rbpdisp(X86_COND_C,R_T)
nkeynes@991
   297
#define SETO_t()         SETCCB_cc_rbpdisp(X86_COND_O,R_T)
nkeynes@991
   298
#define SETNE_t()        SETCCB_cc_rbpdisp(X86_COND_NE,R_T)
nkeynes@991
   299
#define SETC_r8(r1)      SETCCB_cc_r8(X86_COND_C, r1)
nkeynes@991
   300
#define JAE_label(label) JCC_cc_rel8(X86_COND_AE,-1); MARK_JMP8(label)
nkeynes@1112
   301
#define JBE_label(label) JCC_cc_rel8(X86_COND_BE,-1); MARK_JMP8(label)
nkeynes@991
   302
#define JE_label(label)  JCC_cc_rel8(X86_COND_E,-1); MARK_JMP8(label)
nkeynes@991
   303
#define JGE_label(label) JCC_cc_rel8(X86_COND_GE,-1); MARK_JMP8(label)
nkeynes@991
   304
#define JNA_label(label) JCC_cc_rel8(X86_COND_NA,-1); MARK_JMP8(label)
nkeynes@991
   305
#define JNE_label(label) JCC_cc_rel8(X86_COND_NE,-1); MARK_JMP8(label)
nkeynes@991
   306
#define JNO_label(label) JCC_cc_rel8(X86_COND_NO,-1); MARK_JMP8(label)
nkeynes@1197
   307
#define JP_label(label)  JCC_cc_rel8(X86_COND_P,-1); MARK_JMP8(label)
nkeynes@991
   308
#define JS_label(label)  JCC_cc_rel8(X86_COND_S,-1); MARK_JMP8(label)
nkeynes@991
   309
#define JMP_label(label) JMP_rel8(-1); MARK_JMP8(label)
nkeynes@991
   310
#define JNE_exc(exc)     JCC_cc_rel32(X86_COND_NE,0); sh4_x86_add_backpatch(xlat_output, pc, exc)
nkeynes@374
   311
nkeynes@1197
   312
#define LOAD_t() if( sh4_x86.tstate == TSTATE_NONE ) { \
nkeynes@1197
   313
	CMPL_imms_rbpdisp( 1, R_T ); sh4_x86.tstate = TSTATE_E; }     
nkeynes@1197
   314
nkeynes@991
   315
/** Branch if T is set (either in the current cflags, or in sh4r.t) */
nkeynes@1197
   316
#define JT_label(label) LOAD_t() \
nkeynes@991
   317
    JCC_cc_rel8(sh4_x86.tstate,-1); MARK_JMP8(label)
nkeynes@368
   318
nkeynes@991
   319
/** Branch if T is clear (either in the current cflags or in sh4r.t) */
nkeynes@1197
   320
#define JF_label(label) LOAD_t() \
nkeynes@991
   321
    JCC_cc_rel8(sh4_x86.tstate^1, -1); MARK_JMP8(label)
nkeynes@359
   322
nkeynes@939
   323
nkeynes@991
   324
#define load_reg(x86reg,sh4reg)     MOVL_rbpdisp_r32( REG_OFFSET(r[sh4reg]), x86reg )
nkeynes@991
   325
#define store_reg(x86reg,sh4reg)    MOVL_r32_rbpdisp( x86reg, REG_OFFSET(r[sh4reg]) )
nkeynes@374
   326
nkeynes@375
   327
/**
nkeynes@375
   328
 * Load an FR register (single-precision floating point) into an integer x86
nkeynes@375
   329
 * register (eg for register-to-register moves)
nkeynes@375
   330
 */
nkeynes@991
   331
#define load_fr(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[0][(frm)^1]), reg )
nkeynes@991
   332
#define load_xf(reg,frm)  MOVL_rbpdisp_r32( REG_OFFSET(fr[1][(frm)^1]), reg )
nkeynes@375
   333
nkeynes@375
   334
/**
nkeynes@669
   335
 * Load the low half of a DR register (DR or XD) into an integer x86 register 
nkeynes@669
   336
 */
nkeynes@991
   337
#define load_dr0(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm|0x01]), reg )
nkeynes@991
   338
#define load_dr1(reg,frm) MOVL_rbpdisp_r32( REG_OFFSET(fr[frm&1][frm&0x0E]), reg )
nkeynes@669
   339
nkeynes@669
   340
/**
nkeynes@669
   341
 * Store an FR register (single-precision floating point) from an integer x86+
nkeynes@375
   342
 * register (eg for register-to-register moves)
nkeynes@375
   343
 */
nkeynes@991
   344
#define store_fr(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   345
#define store_xf(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@375
   346
nkeynes@991
   347
#define store_dr0(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm|0x01]) )
nkeynes@991
   348
#define store_dr1(reg,frm) MOVL_r32_rbpdisp( reg, REG_OFFSET(fr[frm&1][frm&0x0E]) )
nkeynes@375
   349
nkeynes@374
   350
nkeynes@991
   351
#define push_fpul()  FLDF_rbpdisp(R_FPUL)
nkeynes@991
   352
#define pop_fpul()   FSTPF_rbpdisp(R_FPUL)
nkeynes@991
   353
#define push_fr(frm) FLDF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   354
#define pop_fr(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[0][(frm)^1]) )
nkeynes@991
   355
#define push_xf(frm) FLDF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@991
   356
#define pop_xf(frm)  FSTPF_rbpdisp( REG_OFFSET(fr[1][(frm)^1]) )
nkeynes@991
   357
#define push_dr(frm) FLDD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
nkeynes@991
   358
#define pop_dr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[0][(frm)&0x0E]) )
nkeynes@991
   359
#define push_xdr(frm) FLDD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
nkeynes@991
   360
#define pop_xdr(frm)  FSTPD_rbpdisp( REG_OFFSET(fr[1][(frm)&0x0E]) )
nkeynes@377
   361
nkeynes@991
   362
#ifdef ENABLE_SH4STATS
nkeynes@995
   363
#define COUNT_INST(id) MOVL_imm32_r32( id, REG_EAX ); CALL1_ptr_r32(sh4_stats_add, REG_EAX); sh4_x86.tstate = TSTATE_NONE
nkeynes@991
   364
#else
nkeynes@991
   365
#define COUNT_INST(id)
nkeynes@991
   366
#endif
nkeynes@377
   367
nkeynes@374
   368
nkeynes@368
   369
/* Exception checks - Note that all exception checks will clobber EAX */
nkeynes@416
   370
nkeynes@416
   371
#define check_priv( ) \
nkeynes@1112
   372
    if( (sh4_x86.sh4_mode & SR_MD) == 0 ) { \
nkeynes@937
   373
        if( sh4_x86.in_delay_slot ) { \
nkeynes@1191
   374
            exit_block_exc(EXC_SLOT_ILLEGAL, (pc-2), 4 ); \
nkeynes@937
   375
        } else { \
nkeynes@1191
   376
            exit_block_exc(EXC_ILLEGAL, pc, 2); \
nkeynes@937
   377
        } \
nkeynes@956
   378
        sh4_x86.branch_taken = TRUE; \
nkeynes@937
   379
        sh4_x86.in_delay_slot = DELAY_NONE; \
nkeynes@937
   380
        return 2; \
nkeynes@937
   381
    }
nkeynes@416
   382
nkeynes@416
   383
#define check_fpuen( ) \
nkeynes@416
   384
    if( !sh4_x86.fpuen_checked ) {\
nkeynes@416
   385
	sh4_x86.fpuen_checked = TRUE;\
nkeynes@995
   386
	MOVL_rbpdisp_r32( R_SR, REG_EAX );\
nkeynes@991
   387
	ANDL_imms_r32( SR_FD, REG_EAX );\
nkeynes@416
   388
	if( sh4_x86.in_delay_slot ) {\
nkeynes@586
   389
	    JNE_exc(EXC_SLOT_FPU_DISABLED);\
nkeynes@416
   390
	} else {\
nkeynes@586
   391
	    JNE_exc(EXC_FPU_DISABLED);\
nkeynes@416
   392
	}\
nkeynes@875
   393
	sh4_x86.tstate = TSTATE_NONE; \
nkeynes@416
   394
    }
nkeynes@416
   395
nkeynes@586
   396
#define check_ralign16( x86reg ) \
nkeynes@991
   397
    TESTL_imms_r32( 0x00000001, x86reg ); \
nkeynes@586
   398
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@416
   399
nkeynes@586
   400
#define check_walign16( x86reg ) \
nkeynes@991
   401
    TESTL_imms_r32( 0x00000001, x86reg ); \
nkeynes@586
   402
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@368
   403
nkeynes@586
   404
#define check_ralign32( x86reg ) \
nkeynes@991
   405
    TESTL_imms_r32( 0x00000003, x86reg ); \
nkeynes@586
   406
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@368
   407
nkeynes@586
   408
#define check_walign32( x86reg ) \
nkeynes@991
   409
    TESTL_imms_r32( 0x00000003, x86reg ); \
nkeynes@586
   410
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@368
   411
nkeynes@732
   412
#define check_ralign64( x86reg ) \
nkeynes@991
   413
    TESTL_imms_r32( 0x00000007, x86reg ); \
nkeynes@732
   414
    JNE_exc(EXC_DATA_ADDR_READ)
nkeynes@732
   415
nkeynes@732
   416
#define check_walign64( x86reg ) \
nkeynes@991
   417
    TESTL_imms_r32( 0x00000007, x86reg ); \
nkeynes@732
   418
    JNE_exc(EXC_DATA_ADDR_WRITE);
nkeynes@732
   419
nkeynes@1125
   420
#define address_space() ((sh4_x86.sh4_mode&SR_MD) ? (uintptr_t)sh4_x86.priv_address_space : (uintptr_t)sh4_x86.user_address_space)
nkeynes@1004
   421
nkeynes@824
   422
#define UNDEF(ir)
nkeynes@939
   423
/* Note: For SR.MD == 1 && MMUCR.AT == 0, there are no memory exceptions, so 
nkeynes@939
   424
 * don't waste the cycles expecting them. Otherwise we need to save the exception pointer.
nkeynes@586
   425
 */
nkeynes@941
   426
#ifdef HAVE_FRAME_ADDRESS
nkeynes@995
   427
static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   428
{
nkeynes@1004
   429
    decode_address(address_space(), addr_reg);
nkeynes@1112
   430
    if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
nkeynes@995
   431
        CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
nkeynes@995
   432
    } else {
nkeynes@995
   433
        if( addr_reg != REG_ARG1 ) {
nkeynes@995
   434
            MOVL_r32_r32( addr_reg, REG_ARG1 );
nkeynes@995
   435
        }
nkeynes@995
   436
        MOVP_immptr_rptr( 0, REG_ARG2 );
nkeynes@995
   437
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@995
   438
        CALL2_r32disp_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2);
nkeynes@995
   439
    }
nkeynes@995
   440
    if( value_reg != REG_RESULT1 ) { 
nkeynes@995
   441
        MOVL_r32_r32( REG_RESULT1, value_reg );
nkeynes@995
   442
    }
nkeynes@995
   443
}
nkeynes@995
   444
nkeynes@995
   445
static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   446
{
nkeynes@1004
   447
    decode_address(address_space(), addr_reg);
nkeynes@1112
   448
    if( !sh4_x86.tlb_on && (sh4_x86.sh4_mode & SR_MD) ) { 
nkeynes@995
   449
        CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
nkeynes@995
   450
    } else {
nkeynes@995
   451
        if( value_reg != REG_ARG2 ) {
nkeynes@995
   452
            MOVL_r32_r32( value_reg, REG_ARG2 );
nkeynes@995
   453
	}        
nkeynes@995
   454
        if( addr_reg != REG_ARG1 ) {
nkeynes@995
   455
            MOVL_r32_r32( addr_reg, REG_ARG1 );
nkeynes@995
   456
        }
nkeynes@995
   457
#if MAX_REG_ARG > 2        
nkeynes@995
   458
        MOVP_immptr_rptr( 0, REG_ARG3 );
nkeynes@995
   459
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@995
   460
        CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, REG_ARG3);
nkeynes@995
   461
#else
nkeynes@995
   462
        MOVL_imm32_rspdisp( 0, 0 );
nkeynes@995
   463
        sh4_x86_add_backpatch( xlat_output, pc, -2 );
nkeynes@995
   464
        CALL3_r32disp_r32_r32_r32(REG_ECX, offset, REG_ARG1, REG_ARG2, 0);
nkeynes@995
   465
#endif
nkeynes@995
   466
    }
nkeynes@995
   467
}
nkeynes@995
   468
#else
nkeynes@995
   469
static void call_read_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   470
{
nkeynes@1004
   471
    decode_address(address_space(), addr_reg);
nkeynes@995
   472
    CALL1_r32disp_r32(REG_ECX, offset, addr_reg);
nkeynes@995
   473
    if( value_reg != REG_RESULT1 ) {
nkeynes@995
   474
        MOVL_r32_r32( REG_RESULT1, value_reg );
nkeynes@995
   475
    }
nkeynes@995
   476
}     
nkeynes@995
   477
nkeynes@996
   478
static void call_write_func(int addr_reg, int value_reg, int offset, int pc)
nkeynes@995
   479
{
nkeynes@1004
   480
    decode_address(address_space(), addr_reg);
nkeynes@995
   481
    CALL2_r32disp_r32_r32(REG_ECX, offset, addr_reg, value_reg);
nkeynes@995
   482
}
nkeynes@941
   483
#endif
nkeynes@939
   484
                
nkeynes@995
   485
#define MEM_REGION_PTR(name) offsetof( struct mem_region_fn, name )
nkeynes@995
   486
#define MEM_READ_BYTE( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_byte), pc)
nkeynes@995
   487
#define MEM_READ_BYTE_FOR_WRITE( addr_reg, value_reg ) call_read_func( addr_reg, value_reg, MEM_REGION_PTR(read_byte_for_write), pc) 
nkeynes@995
   488
#define MEM_READ_WORD( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_word), pc)
nkeynes@995
   489
#define MEM_READ_LONG( addr_reg, value_reg ) call_read_func(addr_reg, value_reg, MEM_REGION_PTR(read_long), pc)
nkeynes@995
   490
#define MEM_WRITE_BYTE( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_byte), pc)
nkeynes@995
   491
#define MEM_WRITE_WORD( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_word), pc)
nkeynes@995
   492
#define MEM_WRITE_LONG( addr_reg, value_reg ) call_write_func(addr_reg, value_reg, MEM_REGION_PTR(write_long), pc)
nkeynes@995
   493
#define MEM_PREFETCH( addr_reg ) call_read_func(addr_reg, REG_RESULT1, MEM_REGION_PTR(prefetch), pc)
nkeynes@368
   494
nkeynes@1191
   495
#define SLOTILLEGAL() exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4); sh4_x86.in_delay_slot = DELAY_NONE; return 2;
nkeynes@539
   496
nkeynes@1182
   497
/** Offset of xlat_sh4_mode field relative to the code pointer */ 
nkeynes@1186
   498
#define XLAT_SH4_MODE_CODE_OFFSET  (int32_t)(offsetof(struct xlat_cache_block, xlat_sh4_mode) - offsetof(struct xlat_cache_block,code) )
nkeynes@1186
   499
#define XLAT_CHAIN_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, chain) - offsetof(struct xlat_cache_block,code) )
nkeynes@1186
   500
#define XLAT_ACTIVE_CODE_OFFSET (int32_t)(offsetof(struct xlat_cache_block, active) - offsetof(struct xlat_cache_block,code) )
nkeynes@1182
   501
nkeynes@901
   502
void sh4_translate_begin_block( sh4addr_t pc ) 
nkeynes@901
   503
{
nkeynes@1112
   504
	sh4_x86.code = xlat_output;
nkeynes@901
   505
    sh4_x86.in_delay_slot = FALSE;
nkeynes@901
   506
    sh4_x86.fpuen_checked = FALSE;
nkeynes@901
   507
    sh4_x86.branch_taken = FALSE;
nkeynes@901
   508
    sh4_x86.backpatch_posn = 0;
nkeynes@901
   509
    sh4_x86.block_start_pc = pc;
nkeynes@939
   510
    sh4_x86.tlb_on = IS_TLB_ENABLED();
nkeynes@901
   511
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@901
   512
    sh4_x86.double_prec = sh4r.fpscr & FPSCR_PR;
nkeynes@903
   513
    sh4_x86.double_size = sh4r.fpscr & FPSCR_SZ;
nkeynes@1112
   514
    sh4_x86.sh4_mode = sh4r.xlat_sh4_mode;
nkeynes@1125
   515
    emit_prologue();
nkeynes@1125
   516
    if( sh4_x86.begin_callback ) {
nkeynes@1125
   517
        CALL_ptr( sh4_x86.begin_callback );
nkeynes@1125
   518
    }
nkeynes@1182
   519
    if( sh4_x86.profile_blocks ) {
nkeynes@1186
   520
    	MOVP_immptr_rptr( sh4_x86.code + XLAT_ACTIVE_CODE_OFFSET, REG_EAX );
nkeynes@1182
   521
    	ADDL_imms_r32disp( 1, REG_EAX, 0 );
nkeynes@1182
   522
    }  
nkeynes@901
   523
}
nkeynes@901
   524
nkeynes@901
   525
nkeynes@593
   526
uint32_t sh4_translate_end_block_size()
nkeynes@593
   527
{
nkeynes@1196
   528
	uint32_t epilogue_size = EPILOGUE_SIZE;
nkeynes@1196
   529
	if( sh4_x86.end_callback ) {
nkeynes@1196
   530
	    epilogue_size += (CALL1_PTR_MIN_SIZE - 1);
nkeynes@1196
   531
	}
nkeynes@596
   532
    if( sh4_x86.backpatch_posn <= 3 ) {
nkeynes@1196
   533
        epilogue_size += (sh4_x86.backpatch_posn*(12+CALL1_PTR_MIN_SIZE));
nkeynes@596
   534
    } else {
nkeynes@1196
   535
        epilogue_size += (3*(12+CALL1_PTR_MIN_SIZE)) + (sh4_x86.backpatch_posn-3)*(15+CALL1_PTR_MIN_SIZE);
nkeynes@596
   536
    }
nkeynes@1196
   537
    return epilogue_size;
nkeynes@593
   538
}
nkeynes@593
   539
nkeynes@593
   540
nkeynes@590
   541
/**
nkeynes@590
   542
 * Embed a breakpoint into the generated code
nkeynes@590
   543
 */
nkeynes@586
   544
void sh4_translate_emit_breakpoint( sh4vma_t pc )
nkeynes@586
   545
{
nkeynes@995
   546
    MOVL_imm32_r32( pc, REG_EAX );
nkeynes@995
   547
    CALL1_ptr_r32( sh4_translate_breakpoint_hit, REG_EAX );
nkeynes@875
   548
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@586
   549
}
nkeynes@590
   550
nkeynes@601
   551
nkeynes@601
   552
#define UNTRANSLATABLE(pc) !IS_IN_ICACHE(pc)
nkeynes@601
   553
nkeynes@1112
   554
/**
nkeynes@1112
   555
 * Test if the loaded target code pointer in %eax is valid, and if so jump
nkeynes@1112
   556
 * directly into it, bypassing the normal exit.
nkeynes@1112
   557
 */
nkeynes@1112
   558
static void jump_next_block()
nkeynes@1112
   559
{
nkeynes@1149
   560
	uint8_t *ptr = xlat_output;
nkeynes@1112
   561
	TESTP_rptr_rptr(REG_EAX, REG_EAX);
nkeynes@1112
   562
	JE_label(nocode);
nkeynes@1112
   563
	if( sh4_x86.sh4_mode == SH4_MODE_UNKNOWN ) {
nkeynes@1112
   564
	    /* sr/fpscr was changed, possibly updated xlat_sh4_mode, so reload it */
nkeynes@1112
   565
	    MOVL_rbpdisp_r32( REG_OFFSET(xlat_sh4_mode), REG_ECX );
nkeynes@1112
   566
	    CMPL_r32_r32disp( REG_ECX, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
nkeynes@1112
   567
	} else {
nkeynes@1112
   568
	    CMPL_imms_r32disp( sh4_x86.sh4_mode, REG_EAX, XLAT_SH4_MODE_CODE_OFFSET );
nkeynes@1112
   569
	}
nkeynes@1112
   570
	JNE_label(wrongmode);
nkeynes@1112
   571
	LEAP_rptrdisp_rptr(REG_EAX, PROLOGUE_SIZE,REG_EAX);
nkeynes@1125
   572
	if( sh4_x86.end_callback ) {
nkeynes@1125
   573
	    /* Note this does leave the stack out of alignment, but doesn't matter
nkeynes@1125
   574
	     * for what we're currently using it for.
nkeynes@1125
   575
	     */
nkeynes@1125
   576
	    PUSH_r32(REG_EAX);
nkeynes@1125
   577
	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
nkeynes@1125
   578
	    JMP_rptr(REG_ECX);
nkeynes@1125
   579
	} else {
nkeynes@1125
   580
	    JMP_rptr(REG_EAX);
nkeynes@1125
   581
	}
nkeynes@1149
   582
	JMP_TARGET(wrongmode);
nkeynes@1176
   583
	MOVP_rptrdisp_rptr( REG_EAX, XLAT_CHAIN_CODE_OFFSET, REG_EAX );
nkeynes@1149
   584
	int rel = ptr - xlat_output;
nkeynes@1149
   585
    JMP_prerel(rel);
nkeynes@1149
   586
	JMP_TARGET(nocode); 
nkeynes@1112
   587
}
nkeynes@1112
   588
nkeynes@1186
   589
/**
nkeynes@1186
   590
 * 
nkeynes@1186
   591
 */
nkeynes@1186
   592
static void FASTCALL sh4_translate_get_code_and_backpatch( uint32_t pc )
nkeynes@1186
   593
{
nkeynes@1186
   594
    uint8_t *target = (uint8_t *)xlat_get_code_by_vma(pc);
nkeynes@1186
   595
    while( target != NULL && sh4r.xlat_sh4_mode != XLAT_BLOCK_MODE(target) ) {
nkeynes@1186
   596
        target = XLAT_BLOCK_CHAIN(target);
nkeynes@1186
   597
	}
nkeynes@1186
   598
    if( target == NULL ) {
nkeynes@1186
   599
        target = sh4_translate_basic_block( pc );
nkeynes@1186
   600
    }
nkeynes@1186
   601
    uint8_t *backpatch = ((uint8_t *)__builtin_return_address(0)) - (CALL1_PTR_MIN_SIZE);
nkeynes@1186
   602
    *backpatch = 0xE9;
nkeynes@1186
   603
    *(uint32_t *)(backpatch+1) = (uint32_t)(target-backpatch)+PROLOGUE_SIZE-5;
nkeynes@1186
   604
    *(void **)(backpatch+5) = XLAT_BLOCK_FOR_CODE(target)->use_list;
nkeynes@1186
   605
    XLAT_BLOCK_FOR_CODE(target)->use_list = backpatch; 
nkeynes@1186
   606
nkeynes@1198
   607
    uint8_t * volatile *retptr = ((uint8_t * volatile *)__builtin_frame_address(0))+1;
nkeynes@1186
   608
    assert( *retptr == ((uint8_t *)__builtin_return_address(0)) );
nkeynes@1186
   609
	*retptr = backpatch;
nkeynes@1186
   610
}
nkeynes@1186
   611
nkeynes@1186
   612
static void emit_translate_and_backpatch()
nkeynes@1186
   613
{
nkeynes@1186
   614
    /* NB: this is either 7 bytes (i386) or 12 bytes (x86-64) */
nkeynes@1186
   615
    CALL1_ptr_r32(sh4_translate_get_code_and_backpatch, REG_ARG1);
nkeynes@1186
   616
nkeynes@1186
   617
    /* When patched, the jmp instruction will be 5 bytes (either platform) -
nkeynes@1186
   618
     * we need to reserve sizeof(void*) bytes for the use-list
nkeynes@1186
   619
	 * pointer
nkeynes@1186
   620
	 */ 
nkeynes@1186
   621
    if( sizeof(void*) == 8 ) {
nkeynes@1186
   622
        NOP();
nkeynes@1186
   623
    } else {
nkeynes@1186
   624
        NOP2();
nkeynes@1186
   625
    }
nkeynes@1186
   626
}
nkeynes@1186
   627
nkeynes@1186
   628
/**
nkeynes@1186
   629
 * If we're jumping to a fixed address (or at least fixed relative to the
nkeynes@1186
   630
 * current PC, then we can do a direct branch. REG_ARG1 should contain
nkeynes@1186
   631
 * the PC at this point.
nkeynes@1186
   632
 */
nkeynes@1186
   633
static void jump_next_block_fixed_pc( sh4addr_t pc )
nkeynes@1186
   634
{
nkeynes@1186
   635
	if( IS_IN_ICACHE(pc) ) {
nkeynes@1194
   636
	    if( sh4_x86.sh4_mode != SH4_MODE_UNKNOWN && sh4_x86.end_callback == NULL ) {
nkeynes@1186
   637
	        /* Fixed address, in cache, and fixed SH4 mode - generate a call to the
nkeynes@1186
   638
	         * fetch-and-backpatch routine, which will replace the call with a branch */
nkeynes@1186
   639
           emit_translate_and_backpatch();	         
nkeynes@1186
   640
           return;
nkeynes@1186
   641
		} else {
nkeynes@1186
   642
            MOVP_moffptr_rax( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) );
nkeynes@1186
   643
            ANDP_imms_rptr( -4, REG_EAX );
nkeynes@1186
   644
        }
nkeynes@1186
   645
	} else if( sh4_x86.tlb_on ) {
nkeynes@1186
   646
        CALL1_ptr_r32(xlat_get_code_by_vma, REG_ARG1);
nkeynes@1186
   647
    } else {
nkeynes@1186
   648
        CALL1_ptr_r32(xlat_get_code, REG_ARG1);
nkeynes@1186
   649
    }
nkeynes@1186
   650
    jump_next_block();
nkeynes@1186
   651
nkeynes@1186
   652
nkeynes@1186
   653
}
nkeynes@1186
   654
nkeynes@1186
   655
void sh4_translate_unlink_block( void *use_list )
nkeynes@1186
   656
{
nkeynes@1186
   657
	uint8_t *tmp = xlat_output; /* In case something is active, which should never happen */
nkeynes@1186
   658
	void *next = use_list;
nkeynes@1186
   659
	while( next != NULL ) {
nkeynes@1186
   660
    	xlat_output = (uint8_t *)next;
nkeynes@1186
   661
 	    next = *(void **)(xlat_output+5);
nkeynes@1186
   662
 		emit_translate_and_backpatch();
nkeynes@1186
   663
 	}
nkeynes@1186
   664
 	xlat_output = tmp;
nkeynes@1186
   665
}
nkeynes@1186
   666
nkeynes@1186
   667
nkeynes@1186
   668
nkeynes@1125
   669
static void exit_block()
nkeynes@1125
   670
{
nkeynes@1125
   671
	emit_epilogue();
nkeynes@1125
   672
	if( sh4_x86.end_callback ) {
nkeynes@1125
   673
	    MOVP_immptr_rptr(sh4_x86.end_callback, REG_ECX);
nkeynes@1125
   674
	    JMP_rptr(REG_ECX);
nkeynes@1125
   675
	} else {
nkeynes@1125
   676
	    RET();
nkeynes@1125
   677
	}
nkeynes@1125
   678
}
nkeynes@1125
   679
nkeynes@590
   680
/**
nkeynes@995
   681
 * Exit the block with sh4r.pc already written
nkeynes@995
   682
 */
nkeynes@995
   683
void exit_block_pcset( sh4addr_t pc )
nkeynes@995
   684
{
nkeynes@995
   685
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   686
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   687
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   688
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   689
    JBE_label(exitloop);
nkeynes@995
   690
    MOVL_rbpdisp_r32( R_PC, REG_ARG1 );
nkeynes@995
   691
    if( sh4_x86.tlb_on ) {
nkeynes@995
   692
        CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
nkeynes@995
   693
    } else {
nkeynes@995
   694
        CALL1_ptr_r32(xlat_get_code,REG_ARG1);
nkeynes@995
   695
    }
nkeynes@1112
   696
    
nkeynes@1112
   697
    jump_next_block();
nkeynes@1112
   698
    JMP_TARGET(exitloop);
nkeynes@995
   699
    exit_block();
nkeynes@995
   700
}
nkeynes@995
   701
nkeynes@995
   702
/**
nkeynes@995
   703
 * Exit the block with sh4r.new_pc written with the target pc
nkeynes@995
   704
 */
nkeynes@995
   705
void exit_block_newpcset( sh4addr_t pc )
nkeynes@995
   706
{
nkeynes@995
   707
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   708
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   709
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@995
   710
    MOVL_rbpdisp_r32( R_NEW_PC, REG_ARG1 );
nkeynes@995
   711
    MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   712
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   713
    JBE_label(exitloop);
nkeynes@995
   714
    if( sh4_x86.tlb_on ) {
nkeynes@995
   715
        CALL1_ptr_r32(xlat_get_code_by_vma,REG_ARG1);
nkeynes@995
   716
    } else {
nkeynes@995
   717
        CALL1_ptr_r32(xlat_get_code,REG_ARG1);
nkeynes@995
   718
    }
nkeynes@1112
   719
	
nkeynes@1112
   720
	jump_next_block();
nkeynes@1112
   721
    JMP_TARGET(exitloop);
nkeynes@995
   722
    exit_block();
nkeynes@995
   723
}
nkeynes@995
   724
nkeynes@995
   725
nkeynes@995
   726
/**
nkeynes@995
   727
 * Exit the block to an absolute PC
nkeynes@995
   728
 */
nkeynes@995
   729
void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
nkeynes@995
   730
{
nkeynes@1112
   731
    MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   732
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   733
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   734
nkeynes@1112
   735
    MOVL_imm32_r32( pc, REG_ARG1 );
nkeynes@1112
   736
    MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   737
    CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   738
    JBE_label(exitloop);
nkeynes@1186
   739
    jump_next_block_fixed_pc(pc);    
nkeynes@1112
   740
    JMP_TARGET(exitloop);
nkeynes@995
   741
    exit_block();
nkeynes@995
   742
}
nkeynes@995
   743
nkeynes@995
   744
/**
nkeynes@995
   745
 * Exit the block to a relative PC
nkeynes@995
   746
 */
nkeynes@995
   747
void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
nkeynes@995
   748
{
nkeynes@1112
   749
    MOVL_imm32_r32( ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@1112
   750
    ADDL_rbpdisp_r32( REG_OFFSET(slice_cycle), REG_ECX );
nkeynes@1112
   751
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@1112
   752
nkeynes@1112
   753
	if( pc == sh4_x86.block_start_pc && sh4_x86.sh4_mode == sh4r.xlat_sh4_mode ) {
nkeynes@1112
   754
	    /* Special case for tight loops - the PC doesn't change, and
nkeynes@1112
   755
	     * we already know the target address. Just check events pending before
nkeynes@1112
   756
	     * looping.
nkeynes@1112
   757
	     */
nkeynes@1112
   758
        CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   759
        uint32_t backdisp = ((uintptr_t)(sh4_x86.code - xlat_output)) + PROLOGUE_SIZE;
nkeynes@1112
   760
        JCC_cc_prerel(X86_COND_A, backdisp);
nkeynes@1112
   761
	} else {
nkeynes@1112
   762
        MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ARG1 );
nkeynes@1112
   763
        ADDL_rbpdisp_r32( R_PC, REG_ARG1 );
nkeynes@1112
   764
        MOVL_r32_rbpdisp( REG_ARG1, R_PC );
nkeynes@1112
   765
        CMPL_r32_rbpdisp( REG_ECX, REG_OFFSET(event_pending) );
nkeynes@1112
   766
        JBE_label(exitloop2);
nkeynes@1186
   767
        
nkeynes@1186
   768
        jump_next_block_fixed_pc(pc);
nkeynes@1112
   769
        JMP_TARGET(exitloop2);
nkeynes@995
   770
    }
nkeynes@995
   771
    exit_block();
nkeynes@995
   772
}
nkeynes@995
   773
nkeynes@995
   774
/**
nkeynes@995
   775
 * Exit unconditionally with a general exception
nkeynes@995
   776
 */
nkeynes@1191
   777
void exit_block_exc( int code, sh4addr_t pc, int inst_adjust )
nkeynes@995
   778
{
nkeynes@995
   779
    MOVL_imm32_r32( pc - sh4_x86.block_start_pc, REG_ECX );
nkeynes@995
   780
    ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@1191
   781
    MOVL_imm32_r32( ((pc - sh4_x86.block_start_pc + inst_adjust)>>1)*sh4_cpu_period, REG_ECX );
nkeynes@995
   782
    ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );
nkeynes@995
   783
    MOVL_imm32_r32( code, REG_ARG1 );
nkeynes@995
   784
    CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
nkeynes@995
   785
    exit_block();
nkeynes@995
   786
}    
nkeynes@995
   787
nkeynes@995
   788
/**
nkeynes@590
   789
 * Embed a call to sh4_execute_instruction for situations that we
nkeynes@601
   790
 * can't translate (just page-crossing delay slots at the moment).
nkeynes@601
   791
 * Caller is responsible for setting new_pc before calling this function.
nkeynes@601
   792
 *
nkeynes@601
   793
 * Performs:
nkeynes@601
   794
 *   Set PC = endpc
nkeynes@601
   795
 *   Set sh4r.in_delay_slot = sh4_x86.in_delay_slot
nkeynes@601
   796
 *   Update slice_cycle for endpc+2 (single step doesn't update slice_cycle)
nkeynes@601
   797
 *   Call sh4_execute_instruction
nkeynes@601
   798
 *   Call xlat_get_code_by_vma / xlat_get_code as for normal exit
nkeynes@590
   799
 */
nkeynes@601
   800
void exit_block_emu( sh4vma_t endpc )
nkeynes@590
   801
{
nkeynes@995
   802
    MOVL_imm32_r32( endpc - sh4_x86.block_start_pc, REG_ECX );   // 5
nkeynes@991
   803
    ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@586
   804
    
nkeynes@995
   805
    MOVL_imm32_r32( (((endpc - sh4_x86.block_start_pc)>>1)+1)*sh4_cpu_period, REG_ECX ); // 5
nkeynes@991
   806
    ADDL_r32_rbpdisp( REG_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@995
   807
    MOVL_imm32_r32( sh4_x86.in_delay_slot ? 1 : 0, REG_ECX );
nkeynes@995
   808
    MOVL_r32_rbpdisp( REG_ECX, REG_OFFSET(in_delay_slot) );
nkeynes@590
   809
nkeynes@1112
   810
    CALL_ptr( sh4_execute_instruction );
nkeynes@926
   811
    exit_block();
nkeynes@590
   812
} 
nkeynes@539
   813
nkeynes@359
   814
/**
nkeynes@995
   815
 * Write the block trailer (exception handling block)
nkeynes@995
   816
 */
nkeynes@995
   817
void sh4_translate_end_block( sh4addr_t pc ) {
nkeynes@995
   818
    if( sh4_x86.branch_taken == FALSE ) {
nkeynes@995
   819
        // Didn't exit unconditionally already, so write the termination here
nkeynes@995
   820
        exit_block_rel( pc, pc );
nkeynes@995
   821
    }
nkeynes@995
   822
    if( sh4_x86.backpatch_posn != 0 ) {
nkeynes@995
   823
        unsigned int i;
nkeynes@995
   824
        // Exception raised - cleanup and exit
nkeynes@995
   825
        uint8_t *end_ptr = xlat_output;
nkeynes@995
   826
        MOVL_r32_r32( REG_EDX, REG_ECX );
nkeynes@995
   827
        ADDL_r32_r32( REG_EDX, REG_ECX );
nkeynes@995
   828
        ADDL_r32_rbpdisp( REG_ECX, R_SPC );
nkeynes@995
   829
        MOVL_moffptr_eax( &sh4_cpu_period );
nkeynes@1191
   830
        INC_r32( REG_EDX );  /* Add 1 for the aborting instruction itself */ 
nkeynes@995
   831
        MULL_r32( REG_EDX );
nkeynes@995
   832
        ADDL_r32_rbpdisp( REG_EAX, REG_OFFSET(slice_cycle) );
nkeynes@995
   833
        exit_block();
nkeynes@995
   834
nkeynes@995
   835
        for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
nkeynes@995
   836
            uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
nkeynes@995
   837
            if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
nkeynes@995
   838
                if( sh4_x86.backpatch_list[i].exc_code == -2 ) {
nkeynes@995
   839
                    *((uintptr_t *)fixup_addr) = (uintptr_t)xlat_output; 
nkeynes@995
   840
                } else {
nkeynes@995
   841
                    *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@995
   842
                }
nkeynes@995
   843
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
nkeynes@995
   844
                int rel = end_ptr - xlat_output;
nkeynes@995
   845
                JMP_prerel(rel);
nkeynes@995
   846
            } else {
nkeynes@995
   847
                *fixup_addr += xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@995
   848
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].exc_code, REG_ARG1 );
nkeynes@995
   849
                CALL1_ptr_r32( sh4_raise_exception, REG_ARG1 );
nkeynes@995
   850
                MOVL_imm32_r32( sh4_x86.backpatch_list[i].fixup_icount, REG_EDX );
nkeynes@995
   851
                int rel = end_ptr - xlat_output;
nkeynes@995
   852
                JMP_prerel(rel);
nkeynes@995
   853
            }
nkeynes@995
   854
        }
nkeynes@995
   855
    }
nkeynes@995
   856
}
nkeynes@539
   857
nkeynes@359
   858
/**
nkeynes@359
   859
 * Translate a single instruction. Delayed branches are handled specially
nkeynes@359
   860
 * by translating both branch and delayed instruction as a single unit (as
nkeynes@359
   861
 * 
nkeynes@586
   862
 * The instruction MUST be in the icache (assert check)
nkeynes@359
   863
 *
nkeynes@359
   864
 * @return true if the instruction marks the end of a basic block
nkeynes@359
   865
 * (eg a branch or 
nkeynes@359
   866
 */
nkeynes@590
   867
uint32_t sh4_translate_instruction( sh4vma_t pc )
nkeynes@359
   868
{
nkeynes@388
   869
    uint32_t ir;
nkeynes@586
   870
    /* Read instruction from icache */
nkeynes@586
   871
    assert( IS_IN_ICACHE(pc) );
nkeynes@586
   872
    ir = *(uint16_t *)GET_ICACHE_PTR(pc);
nkeynes@586
   873
    
nkeynes@586
   874
    if( !sh4_x86.in_delay_slot ) {
nkeynes@596
   875
	sh4_translate_add_recovery( (pc - sh4_x86.block_start_pc)>>1 );
nkeynes@388
   876
    }
nkeynes@1003
   877
    
nkeynes@1003
   878
    /* check for breakpoints at this pc */
nkeynes@1003
   879
    for( int i=0; i<sh4_breakpoint_count; i++ ) {
nkeynes@1003
   880
        if( sh4_breakpoints[i].address == pc ) {
nkeynes@1003
   881
            sh4_translate_emit_breakpoint(pc);
nkeynes@1003
   882
            break;
nkeynes@1003
   883
        }
nkeynes@571
   884
    }
nkeynes@359
   885
%%
nkeynes@359
   886
/* ALU operations */
nkeynes@359
   887
ADD Rm, Rn {:
nkeynes@671
   888
    COUNT_INST(I_ADD);
nkeynes@991
   889
    load_reg( REG_EAX, Rm );
nkeynes@991
   890
    load_reg( REG_ECX, Rn );
nkeynes@991
   891
    ADDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   892
    store_reg( REG_ECX, Rn );
nkeynes@417
   893
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   894
:}
nkeynes@359
   895
ADD #imm, Rn {:  
nkeynes@671
   896
    COUNT_INST(I_ADDI);
nkeynes@991
   897
    ADDL_imms_rbpdisp( imm, REG_OFFSET(r[Rn]) );
nkeynes@417
   898
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   899
:}
nkeynes@359
   900
ADDC Rm, Rn {:
nkeynes@671
   901
    COUNT_INST(I_ADDC);
nkeynes@417
   902
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@911
   903
        LDC_t();
nkeynes@417
   904
    }
nkeynes@991
   905
    load_reg( REG_EAX, Rm );
nkeynes@991
   906
    load_reg( REG_ECX, Rn );
nkeynes@991
   907
    ADCL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   908
    store_reg( REG_ECX, Rn );
nkeynes@359
   909
    SETC_t();
nkeynes@417
   910
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
   911
:}
nkeynes@359
   912
ADDV Rm, Rn {:
nkeynes@671
   913
    COUNT_INST(I_ADDV);
nkeynes@991
   914
    load_reg( REG_EAX, Rm );
nkeynes@991
   915
    load_reg( REG_ECX, Rn );
nkeynes@991
   916
    ADDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   917
    store_reg( REG_ECX, Rn );
nkeynes@359
   918
    SETO_t();
nkeynes@417
   919
    sh4_x86.tstate = TSTATE_O;
nkeynes@359
   920
:}
nkeynes@359
   921
AND Rm, Rn {:
nkeynes@671
   922
    COUNT_INST(I_AND);
nkeynes@991
   923
    load_reg( REG_EAX, Rm );
nkeynes@991
   924
    load_reg( REG_ECX, Rn );
nkeynes@991
   925
    ANDL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
   926
    store_reg( REG_ECX, Rn );
nkeynes@417
   927
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   928
:}
nkeynes@359
   929
AND #imm, R0 {:  
nkeynes@671
   930
    COUNT_INST(I_ANDI);
nkeynes@991
   931
    load_reg( REG_EAX, 0 );
nkeynes@991
   932
    ANDL_imms_r32(imm, REG_EAX); 
nkeynes@991
   933
    store_reg( REG_EAX, 0 );
nkeynes@417
   934
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   935
:}
nkeynes@359
   936
AND.B #imm, @(R0, GBR) {: 
nkeynes@671
   937
    COUNT_INST(I_ANDB);
nkeynes@991
   938
    load_reg( REG_EAX, 0 );
nkeynes@991
   939
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
   940
    MOVL_r32_rspdisp(REG_EAX, 0);
nkeynes@991
   941
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@991
   942
    MOVL_rspdisp_r32(0, REG_EAX);
nkeynes@991
   943
    ANDL_imms_r32(imm, REG_EDX );
nkeynes@991
   944
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
   945
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
   946
:}
nkeynes@359
   947
CMP/EQ Rm, Rn {:  
nkeynes@671
   948
    COUNT_INST(I_CMPEQ);
nkeynes@991
   949
    load_reg( REG_EAX, Rm );
nkeynes@991
   950
    load_reg( REG_ECX, Rn );
nkeynes@991
   951
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   952
    SETE_t();
nkeynes@417
   953
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
   954
:}
nkeynes@359
   955
CMP/EQ #imm, R0 {:  
nkeynes@671
   956
    COUNT_INST(I_CMPEQI);
nkeynes@991
   957
    load_reg( REG_EAX, 0 );
nkeynes@991
   958
    CMPL_imms_r32(imm, REG_EAX);
nkeynes@359
   959
    SETE_t();
nkeynes@417
   960
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
   961
:}
nkeynes@359
   962
CMP/GE Rm, Rn {:  
nkeynes@671
   963
    COUNT_INST(I_CMPGE);
nkeynes@991
   964
    load_reg( REG_EAX, Rm );
nkeynes@991
   965
    load_reg( REG_ECX, Rn );
nkeynes@991
   966
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   967
    SETGE_t();
nkeynes@417
   968
    sh4_x86.tstate = TSTATE_GE;
nkeynes@359
   969
:}
nkeynes@359
   970
CMP/GT Rm, Rn {: 
nkeynes@671
   971
    COUNT_INST(I_CMPGT);
nkeynes@991
   972
    load_reg( REG_EAX, Rm );
nkeynes@991
   973
    load_reg( REG_ECX, Rn );
nkeynes@991
   974
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   975
    SETG_t();
nkeynes@417
   976
    sh4_x86.tstate = TSTATE_G;
nkeynes@359
   977
:}
nkeynes@359
   978
CMP/HI Rm, Rn {:  
nkeynes@671
   979
    COUNT_INST(I_CMPHI);
nkeynes@991
   980
    load_reg( REG_EAX, Rm );
nkeynes@991
   981
    load_reg( REG_ECX, Rn );
nkeynes@991
   982
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   983
    SETA_t();
nkeynes@417
   984
    sh4_x86.tstate = TSTATE_A;
nkeynes@359
   985
:}
nkeynes@359
   986
CMP/HS Rm, Rn {: 
nkeynes@671
   987
    COUNT_INST(I_CMPHS);
nkeynes@991
   988
    load_reg( REG_EAX, Rm );
nkeynes@991
   989
    load_reg( REG_ECX, Rn );
nkeynes@991
   990
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@359
   991
    SETAE_t();
nkeynes@417
   992
    sh4_x86.tstate = TSTATE_AE;
nkeynes@359
   993
 :}
nkeynes@359
   994
CMP/PL Rn {: 
nkeynes@671
   995
    COUNT_INST(I_CMPPL);
nkeynes@991
   996
    load_reg( REG_EAX, Rn );
nkeynes@991
   997
    CMPL_imms_r32( 0, REG_EAX );
nkeynes@359
   998
    SETG_t();
nkeynes@417
   999
    sh4_x86.tstate = TSTATE_G;
nkeynes@359
  1000
:}
nkeynes@359
  1001
CMP/PZ Rn {:  
nkeynes@671
  1002
    COUNT_INST(I_CMPPZ);
nkeynes@991
  1003
    load_reg( REG_EAX, Rn );
nkeynes@991
  1004
    CMPL_imms_r32( 0, REG_EAX );
nkeynes@359
  1005
    SETGE_t();
nkeynes@417
  1006
    sh4_x86.tstate = TSTATE_GE;
nkeynes@359
  1007
:}
nkeynes@361
  1008
CMP/STR Rm, Rn {:  
nkeynes@671
  1009
    COUNT_INST(I_CMPSTR);
nkeynes@991
  1010
    load_reg( REG_EAX, Rm );
nkeynes@991
  1011
    load_reg( REG_ECX, Rn );
nkeynes@991
  1012
    XORL_r32_r32( REG_ECX, REG_EAX );
nkeynes@991
  1013
    TESTB_r8_r8( REG_AL, REG_AL );
nkeynes@991
  1014
    JE_label(target1);
nkeynes@991
  1015
    TESTB_r8_r8( REG_AH, REG_AH );
nkeynes@991
  1016
    JE_label(target2);
nkeynes@991
  1017
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
  1018
    TESTB_r8_r8( REG_AL, REG_AL );
nkeynes@991
  1019
    JE_label(target3);
nkeynes@991
  1020
    TESTB_r8_r8( REG_AH, REG_AH );
nkeynes@380
  1021
    JMP_TARGET(target1);
nkeynes@380
  1022
    JMP_TARGET(target2);
nkeynes@380
  1023
    JMP_TARGET(target3);
nkeynes@368
  1024
    SETE_t();
nkeynes@417
  1025
    sh4_x86.tstate = TSTATE_E;
nkeynes@361
  1026
:}
nkeynes@361
  1027
DIV0S Rm, Rn {:
nkeynes@671
  1028
    COUNT_INST(I_DIV0S);
nkeynes@991
  1029
    load_reg( REG_EAX, Rm );
nkeynes@991
  1030
    load_reg( REG_ECX, Rn );
nkeynes@991
  1031
    SHRL_imm_r32( 31, REG_EAX );
nkeynes@991
  1032
    SHRL_imm_r32( 31, REG_ECX );
nkeynes@995
  1033
    MOVL_r32_rbpdisp( REG_EAX, R_M );
nkeynes@995
  1034
    MOVL_r32_rbpdisp( REG_ECX, R_Q );
nkeynes@991
  1035
    CMPL_r32_r32( REG_EAX, REG_ECX );
nkeynes@386
  1036
    SETNE_t();
nkeynes@417
  1037
    sh4_x86.tstate = TSTATE_NE;
nkeynes@361
  1038
:}
nkeynes@361
  1039
DIV0U {:  
nkeynes@671
  1040
    COUNT_INST(I_DIV0U);
nkeynes@991
  1041
    XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@995
  1042
    MOVL_r32_rbpdisp( REG_EAX, R_Q );
nkeynes@995
  1043
    MOVL_r32_rbpdisp( REG_EAX, R_M );
nkeynes@995
  1044
    MOVL_r32_rbpdisp( REG_EAX, R_T );
nkeynes@417
  1045
    sh4_x86.tstate = TSTATE_C; // works for DIV1
nkeynes@361
  1046
:}
nkeynes@386
  1047
DIV1 Rm, Rn {:
nkeynes@671
  1048
    COUNT_INST(I_DIV1);
nkeynes@995
  1049
    MOVL_rbpdisp_r32( R_M, REG_ECX );
nkeynes@991
  1050
    load_reg( REG_EAX, Rn );
nkeynes@417
  1051
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1052
	LDC_t();
nkeynes@417
  1053
    }
nkeynes@991
  1054
    RCLL_imm_r32( 1, REG_EAX );
nkeynes@991
  1055
    SETC_r8( REG_DL ); // Q'
nkeynes@991
  1056
    CMPL_rbpdisp_r32( R_Q, REG_ECX );
nkeynes@991
  1057
    JE_label(mqequal);
nkeynes@991
  1058
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1059
    JMP_label(end);
nkeynes@380
  1060
    JMP_TARGET(mqequal);
nkeynes@991
  1061
    SUBL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@386
  1062
    JMP_TARGET(end);
nkeynes@991
  1063
    store_reg( REG_EAX, Rn ); // Done with Rn now
nkeynes@991
  1064
    SETC_r8(REG_AL); // tmp1
nkeynes@991
  1065
    XORB_r8_r8( REG_DL, REG_AL ); // Q' = Q ^ tmp1
nkeynes@991
  1066
    XORB_r8_r8( REG_AL, REG_CL ); // Q'' = Q' ^ M
nkeynes@995
  1067
    MOVL_r32_rbpdisp( REG_ECX, R_Q );
nkeynes@991
  1068
    XORL_imms_r32( 1, REG_AL );   // T = !Q'
nkeynes@991
  1069
    MOVZXL_r8_r32( REG_AL, REG_EAX );
nkeynes@995
  1070
    MOVL_r32_rbpdisp( REG_EAX, R_T );
nkeynes@417
  1071
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1072
:}
nkeynes@361
  1073
DMULS.L Rm, Rn {:  
nkeynes@671
  1074
    COUNT_INST(I_DMULS);
nkeynes@991
  1075
    load_reg( REG_EAX, Rm );
nkeynes@991
  1076
    load_reg( REG_ECX, Rn );
nkeynes@991
  1077
    IMULL_r32(REG_ECX);
nkeynes@995
  1078
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@995
  1079
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1080
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1081
:}
nkeynes@361
  1082
DMULU.L Rm, Rn {:  
nkeynes@671
  1083
    COUNT_INST(I_DMULU);
nkeynes@991
  1084
    load_reg( REG_EAX, Rm );
nkeynes@991
  1085
    load_reg( REG_ECX, Rn );
nkeynes@991
  1086
    MULL_r32(REG_ECX);
nkeynes@995
  1087
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@995
  1088
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );    
nkeynes@417
  1089
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1090
:}
nkeynes@359
  1091
DT Rn {:  
nkeynes@671
  1092
    COUNT_INST(I_DT);
nkeynes@991
  1093
    load_reg( REG_EAX, Rn );
nkeynes@991
  1094
    ADDL_imms_r32( -1, REG_EAX );
nkeynes@991
  1095
    store_reg( REG_EAX, Rn );
nkeynes@359
  1096
    SETE_t();
nkeynes@417
  1097
    sh4_x86.tstate = TSTATE_E;
nkeynes@359
  1098
:}
nkeynes@359
  1099
EXTS.B Rm, Rn {:  
nkeynes@671
  1100
    COUNT_INST(I_EXTSB);
nkeynes@991
  1101
    load_reg( REG_EAX, Rm );
nkeynes@991
  1102
    MOVSXL_r8_r32( REG_EAX, REG_EAX );
nkeynes@991
  1103
    store_reg( REG_EAX, Rn );
nkeynes@359
  1104
:}
nkeynes@361
  1105
EXTS.W Rm, Rn {:  
nkeynes@671
  1106
    COUNT_INST(I_EXTSW);
nkeynes@991
  1107
    load_reg( REG_EAX, Rm );
nkeynes@991
  1108
    MOVSXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@991
  1109
    store_reg( REG_EAX, Rn );
nkeynes@361
  1110
:}
nkeynes@361
  1111
EXTU.B Rm, Rn {:  
nkeynes@671
  1112
    COUNT_INST(I_EXTUB);
nkeynes@991
  1113
    load_reg( REG_EAX, Rm );
nkeynes@991
  1114
    MOVZXL_r8_r32( REG_EAX, REG_EAX );
nkeynes@991
  1115
    store_reg( REG_EAX, Rn );
nkeynes@361
  1116
:}
nkeynes@361
  1117
EXTU.W Rm, Rn {:  
nkeynes@671
  1118
    COUNT_INST(I_EXTUW);
nkeynes@991
  1119
    load_reg( REG_EAX, Rm );
nkeynes@991
  1120
    MOVZXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@991
  1121
    store_reg( REG_EAX, Rn );
nkeynes@361
  1122
:}
nkeynes@586
  1123
MAC.L @Rm+, @Rn+ {:
nkeynes@671
  1124
    COUNT_INST(I_MACL);
nkeynes@586
  1125
    if( Rm == Rn ) {
nkeynes@991
  1126
	load_reg( REG_EAX, Rm );
nkeynes@991
  1127
	check_ralign32( REG_EAX );
nkeynes@991
  1128
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1129
	MOVL_r32_rspdisp(REG_EAX, 0);
nkeynes@991
  1130
	load_reg( REG_EAX, Rm );
nkeynes@991
  1131
	LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  1132
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1133
        ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rn]) );
nkeynes@586
  1134
    } else {
nkeynes@991
  1135
	load_reg( REG_EAX, Rm );
nkeynes@991
  1136
	check_ralign32( REG_EAX );
nkeynes@991
  1137
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1138
	MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1139
	load_reg( REG_EAX, Rn );
nkeynes@991
  1140
	check_ralign32( REG_EAX );
nkeynes@991
  1141
	MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1142
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
nkeynes@991
  1143
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@586
  1144
    }
nkeynes@939
  1145
    
nkeynes@991
  1146
    IMULL_rspdisp( 0 );
nkeynes@991
  1147
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@991
  1148
    ADCL_r32_rbpdisp( REG_EDX, R_MACH );
nkeynes@386
  1149
nkeynes@995
  1150
    MOVL_rbpdisp_r32( R_S, REG_ECX );
nkeynes@991
  1151
    TESTL_r32_r32(REG_ECX, REG_ECX);
nkeynes@991
  1152
    JE_label( nosat );
nkeynes@995
  1153
    CALL_ptr( signsat48 );
nkeynes@386
  1154
    JMP_TARGET( nosat );
nkeynes@417
  1155
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@386
  1156
:}
nkeynes@386
  1157
MAC.W @Rm+, @Rn+ {:  
nkeynes@671
  1158
    COUNT_INST(I_MACW);
nkeynes@586
  1159
    if( Rm == Rn ) {
nkeynes@991
  1160
	load_reg( REG_EAX, Rm );
nkeynes@991
  1161
	check_ralign16( REG_EAX );
nkeynes@991
  1162
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1163
        MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1164
	load_reg( REG_EAX, Rm );
nkeynes@991
  1165
	LEAL_r32disp_r32( REG_EAX, 2, REG_EAX );
nkeynes@991
  1166
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1167
	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rn]) );
nkeynes@586
  1168
	// Note translate twice in case of page boundaries. Maybe worth
nkeynes@586
  1169
	// adding a page-boundary check to skip the second translation
nkeynes@586
  1170
    } else {
nkeynes@1193
  1171
	load_reg( REG_EAX, Rn );
nkeynes@991
  1172
	check_ralign16( REG_EAX );
nkeynes@991
  1173
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1174
        MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@1193
  1175
	load_reg( REG_EAX, Rm );
nkeynes@991
  1176
	check_ralign16( REG_EAX );
nkeynes@991
  1177
	MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1178
	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rn]) );
nkeynes@991
  1179
	ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
nkeynes@586
  1180
    }
nkeynes@991
  1181
    IMULL_rspdisp( 0 );
nkeynes@995
  1182
    MOVL_rbpdisp_r32( R_S, REG_ECX );
nkeynes@991
  1183
    TESTL_r32_r32( REG_ECX, REG_ECX );
nkeynes@991
  1184
    JE_label( nosat );
nkeynes@386
  1185
nkeynes@991
  1186
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
nkeynes@991
  1187
    JNO_label( end );            // 2
nkeynes@995
  1188
    MOVL_imm32_r32( 1, REG_EDX );         // 5
nkeynes@995
  1189
    MOVL_r32_rbpdisp( REG_EDX, R_MACH );   // 6
nkeynes@991
  1190
    JS_label( positive );        // 2
nkeynes@995
  1191
    MOVL_imm32_r32( 0x80000000, REG_EAX );// 5
nkeynes@995
  1192
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
nkeynes@991
  1193
    JMP_label(end2);           // 2
nkeynes@386
  1194
nkeynes@386
  1195
    JMP_TARGET(positive);
nkeynes@995
  1196
    MOVL_imm32_r32( 0x7FFFFFFF, REG_EAX );// 5
nkeynes@995
  1197
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );   // 6
nkeynes@991
  1198
    JMP_label(end3);            // 2
nkeynes@386
  1199
nkeynes@386
  1200
    JMP_TARGET(nosat);
nkeynes@991
  1201
    ADDL_r32_rbpdisp( REG_EAX, R_MACL );  // 6
nkeynes@991
  1202
    ADCL_r32_rbpdisp( REG_EDX, R_MACH );  // 6
nkeynes@386
  1203
    JMP_TARGET(end);
nkeynes@386
  1204
    JMP_TARGET(end2);
nkeynes@386
  1205
    JMP_TARGET(end3);
nkeynes@417
  1206
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@386
  1207
:}
nkeynes@359
  1208
MOVT Rn {:  
nkeynes@671
  1209
    COUNT_INST(I_MOVT);
nkeynes@995
  1210
    MOVL_rbpdisp_r32( R_T, REG_EAX );
nkeynes@991
  1211
    store_reg( REG_EAX, Rn );
nkeynes@359
  1212
:}
nkeynes@361
  1213
MUL.L Rm, Rn {:  
nkeynes@671
  1214
    COUNT_INST(I_MULL);
nkeynes@991
  1215
    load_reg( REG_EAX, Rm );
nkeynes@991
  1216
    load_reg( REG_ECX, Rn );
nkeynes@991
  1217
    MULL_r32( REG_ECX );
nkeynes@995
  1218
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1219
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1220
:}
nkeynes@374
  1221
MULS.W Rm, Rn {:
nkeynes@671
  1222
    COUNT_INST(I_MULSW);
nkeynes@995
  1223
    MOVSXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
nkeynes@995
  1224
    MOVSXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
nkeynes@991
  1225
    MULL_r32( REG_ECX );
nkeynes@995
  1226
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1227
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1228
:}
nkeynes@374
  1229
MULU.W Rm, Rn {:  
nkeynes@671
  1230
    COUNT_INST(I_MULUW);
nkeynes@995
  1231
    MOVZXL_rbpdisp16_r32( R_R(Rm), REG_EAX );
nkeynes@995
  1232
    MOVZXL_rbpdisp16_r32( R_R(Rn), REG_ECX );
nkeynes@991
  1233
    MULL_r32( REG_ECX );
nkeynes@995
  1234
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@417
  1235
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1236
:}
nkeynes@359
  1237
NEG Rm, Rn {:
nkeynes@671
  1238
    COUNT_INST(I_NEG);
nkeynes@991
  1239
    load_reg( REG_EAX, Rm );
nkeynes@991
  1240
    NEGL_r32( REG_EAX );
nkeynes@991
  1241
    store_reg( REG_EAX, Rn );
nkeynes@417
  1242
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1243
:}
nkeynes@359
  1244
NEGC Rm, Rn {:  
nkeynes@671
  1245
    COUNT_INST(I_NEGC);
nkeynes@991
  1246
    load_reg( REG_EAX, Rm );
nkeynes@991
  1247
    XORL_r32_r32( REG_ECX, REG_ECX );
nkeynes@359
  1248
    LDC_t();
nkeynes@991
  1249
    SBBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1250
    store_reg( REG_ECX, Rn );
nkeynes@359
  1251
    SETC_t();
nkeynes@417
  1252
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1253
:}
nkeynes@359
  1254
NOT Rm, Rn {:  
nkeynes@671
  1255
    COUNT_INST(I_NOT);
nkeynes@991
  1256
    load_reg( REG_EAX, Rm );
nkeynes@991
  1257
    NOTL_r32( REG_EAX );
nkeynes@991
  1258
    store_reg( REG_EAX, Rn );
nkeynes@417
  1259
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1260
:}
nkeynes@359
  1261
OR Rm, Rn {:  
nkeynes@671
  1262
    COUNT_INST(I_OR);
nkeynes@991
  1263
    load_reg( REG_EAX, Rm );
nkeynes@991
  1264
    load_reg( REG_ECX, Rn );
nkeynes@991
  1265
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1266
    store_reg( REG_ECX, Rn );
nkeynes@417
  1267
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1268
:}
nkeynes@359
  1269
OR #imm, R0 {:
nkeynes@671
  1270
    COUNT_INST(I_ORI);
nkeynes@991
  1271
    load_reg( REG_EAX, 0 );
nkeynes@991
  1272
    ORL_imms_r32(imm, REG_EAX);
nkeynes@991
  1273
    store_reg( REG_EAX, 0 );
nkeynes@417
  1274
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1275
:}
nkeynes@374
  1276
OR.B #imm, @(R0, GBR) {:  
nkeynes@671
  1277
    COUNT_INST(I_ORB);
nkeynes@991
  1278
    load_reg( REG_EAX, 0 );
nkeynes@991
  1279
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1280
    MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1281
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@991
  1282
    MOVL_rspdisp_r32( 0, REG_EAX );
nkeynes@991
  1283
    ORL_imms_r32(imm, REG_EDX );
nkeynes@991
  1284
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1285
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1286
:}
nkeynes@359
  1287
ROTCL Rn {:
nkeynes@671
  1288
    COUNT_INST(I_ROTCL);
nkeynes@991
  1289
    load_reg( REG_EAX, Rn );
nkeynes@417
  1290
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1291
	LDC_t();
nkeynes@417
  1292
    }
nkeynes@991
  1293
    RCLL_imm_r32( 1, REG_EAX );
nkeynes@991
  1294
    store_reg( REG_EAX, Rn );
nkeynes@359
  1295
    SETC_t();
nkeynes@417
  1296
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1297
:}
nkeynes@359
  1298
ROTCR Rn {:  
nkeynes@671
  1299
    COUNT_INST(I_ROTCR);
nkeynes@991
  1300
    load_reg( REG_EAX, Rn );
nkeynes@417
  1301
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1302
	LDC_t();
nkeynes@417
  1303
    }
nkeynes@991
  1304
    RCRL_imm_r32( 1, REG_EAX );
nkeynes@991
  1305
    store_reg( REG_EAX, Rn );
nkeynes@359
  1306
    SETC_t();
nkeynes@417
  1307
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1308
:}
nkeynes@359
  1309
ROTL Rn {:  
nkeynes@671
  1310
    COUNT_INST(I_ROTL);
nkeynes@991
  1311
    load_reg( REG_EAX, Rn );
nkeynes@991
  1312
    ROLL_imm_r32( 1, REG_EAX );
nkeynes@991
  1313
    store_reg( REG_EAX, Rn );
nkeynes@359
  1314
    SETC_t();
nkeynes@417
  1315
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1316
:}
nkeynes@359
  1317
ROTR Rn {:  
nkeynes@671
  1318
    COUNT_INST(I_ROTR);
nkeynes@991
  1319
    load_reg( REG_EAX, Rn );
nkeynes@991
  1320
    RORL_imm_r32( 1, REG_EAX );
nkeynes@991
  1321
    store_reg( REG_EAX, Rn );
nkeynes@359
  1322
    SETC_t();
nkeynes@417
  1323
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1324
:}
nkeynes@359
  1325
SHAD Rm, Rn {:
nkeynes@671
  1326
    COUNT_INST(I_SHAD);
nkeynes@359
  1327
    /* Annoyingly enough, not directly convertible */
nkeynes@991
  1328
    load_reg( REG_EAX, Rn );
nkeynes@991
  1329
    load_reg( REG_ECX, Rm );
nkeynes@991
  1330
    CMPL_imms_r32( 0, REG_ECX );
nkeynes@991
  1331
    JGE_label(doshl);
nkeynes@361
  1332
                    
nkeynes@991
  1333
    NEGL_r32( REG_ECX );      // 2
nkeynes@991
  1334
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1335
    JE_label(emptysar);     // 2
nkeynes@991
  1336
    SARL_cl_r32( REG_EAX );       // 2
nkeynes@991
  1337
    JMP_label(end);          // 2
nkeynes@386
  1338
nkeynes@386
  1339
    JMP_TARGET(emptysar);
nkeynes@991
  1340
    SARL_imm_r32(31, REG_EAX );  // 3
nkeynes@991
  1341
    JMP_label(end2);
nkeynes@382
  1342
nkeynes@380
  1343
    JMP_TARGET(doshl);
nkeynes@991
  1344
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1345
    SHLL_cl_r32( REG_EAX );       // 2
nkeynes@380
  1346
    JMP_TARGET(end);
nkeynes@386
  1347
    JMP_TARGET(end2);
nkeynes@991
  1348
    store_reg( REG_EAX, Rn );
nkeynes@417
  1349
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1350
:}
nkeynes@359
  1351
SHLD Rm, Rn {:  
nkeynes@671
  1352
    COUNT_INST(I_SHLD);
nkeynes@991
  1353
    load_reg( REG_EAX, Rn );
nkeynes@991
  1354
    load_reg( REG_ECX, Rm );
nkeynes@991
  1355
    CMPL_imms_r32( 0, REG_ECX );
nkeynes@991
  1356
    JGE_label(doshl);
nkeynes@368
  1357
nkeynes@991
  1358
    NEGL_r32( REG_ECX );      // 2
nkeynes@991
  1359
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1360
    JE_label(emptyshr );
nkeynes@991
  1361
    SHRL_cl_r32( REG_EAX );       // 2
nkeynes@991
  1362
    JMP_label(end);          // 2
nkeynes@386
  1363
nkeynes@386
  1364
    JMP_TARGET(emptyshr);
nkeynes@991
  1365
    XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@991
  1366
    JMP_label(end2);
nkeynes@382
  1367
nkeynes@382
  1368
    JMP_TARGET(doshl);
nkeynes@991
  1369
    ANDB_imms_r8( 0x1F, REG_CL ); // 3
nkeynes@991
  1370
    SHLL_cl_r32( REG_EAX );       // 2
nkeynes@382
  1371
    JMP_TARGET(end);
nkeynes@386
  1372
    JMP_TARGET(end2);
nkeynes@991
  1373
    store_reg( REG_EAX, Rn );
nkeynes@417
  1374
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1375
:}
nkeynes@359
  1376
SHAL Rn {: 
nkeynes@671
  1377
    COUNT_INST(I_SHAL);
nkeynes@991
  1378
    load_reg( REG_EAX, Rn );
nkeynes@991
  1379
    SHLL_imm_r32( 1, REG_EAX );
nkeynes@397
  1380
    SETC_t();
nkeynes@991
  1381
    store_reg( REG_EAX, Rn );
nkeynes@417
  1382
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1383
:}
nkeynes@359
  1384
SHAR Rn {:  
nkeynes@671
  1385
    COUNT_INST(I_SHAR);
nkeynes@991
  1386
    load_reg( REG_EAX, Rn );
nkeynes@991
  1387
    SARL_imm_r32( 1, REG_EAX );
nkeynes@397
  1388
    SETC_t();
nkeynes@991
  1389
    store_reg( REG_EAX, Rn );
nkeynes@417
  1390
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1391
:}
nkeynes@359
  1392
SHLL Rn {:  
nkeynes@671
  1393
    COUNT_INST(I_SHLL);
nkeynes@991
  1394
    load_reg( REG_EAX, Rn );
nkeynes@991
  1395
    SHLL_imm_r32( 1, REG_EAX );
nkeynes@397
  1396
    SETC_t();
nkeynes@991
  1397
    store_reg( REG_EAX, Rn );
nkeynes@417
  1398
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1399
:}
nkeynes@359
  1400
SHLL2 Rn {:
nkeynes@671
  1401
    COUNT_INST(I_SHLL);
nkeynes@991
  1402
    load_reg( REG_EAX, Rn );
nkeynes@991
  1403
    SHLL_imm_r32( 2, REG_EAX );
nkeynes@991
  1404
    store_reg( REG_EAX, Rn );
nkeynes@417
  1405
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1406
:}
nkeynes@359
  1407
SHLL8 Rn {:  
nkeynes@671
  1408
    COUNT_INST(I_SHLL);
nkeynes@991
  1409
    load_reg( REG_EAX, Rn );
nkeynes@991
  1410
    SHLL_imm_r32( 8, REG_EAX );
nkeynes@991
  1411
    store_reg( REG_EAX, Rn );
nkeynes@417
  1412
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1413
:}
nkeynes@359
  1414
SHLL16 Rn {:  
nkeynes@671
  1415
    COUNT_INST(I_SHLL);
nkeynes@991
  1416
    load_reg( REG_EAX, Rn );
nkeynes@991
  1417
    SHLL_imm_r32( 16, REG_EAX );
nkeynes@991
  1418
    store_reg( REG_EAX, Rn );
nkeynes@417
  1419
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1420
:}
nkeynes@359
  1421
SHLR Rn {:  
nkeynes@671
  1422
    COUNT_INST(I_SHLR);
nkeynes@991
  1423
    load_reg( REG_EAX, Rn );
nkeynes@991
  1424
    SHRL_imm_r32( 1, REG_EAX );
nkeynes@397
  1425
    SETC_t();
nkeynes@991
  1426
    store_reg( REG_EAX, Rn );
nkeynes@417
  1427
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1428
:}
nkeynes@359
  1429
SHLR2 Rn {:  
nkeynes@671
  1430
    COUNT_INST(I_SHLR);
nkeynes@991
  1431
    load_reg( REG_EAX, Rn );
nkeynes@991
  1432
    SHRL_imm_r32( 2, REG_EAX );
nkeynes@991
  1433
    store_reg( REG_EAX, Rn );
nkeynes@417
  1434
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1435
:}
nkeynes@359
  1436
SHLR8 Rn {:  
nkeynes@671
  1437
    COUNT_INST(I_SHLR);
nkeynes@991
  1438
    load_reg( REG_EAX, Rn );
nkeynes@991
  1439
    SHRL_imm_r32( 8, REG_EAX );
nkeynes@991
  1440
    store_reg( REG_EAX, Rn );
nkeynes@417
  1441
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1442
:}
nkeynes@359
  1443
SHLR16 Rn {:  
nkeynes@671
  1444
    COUNT_INST(I_SHLR);
nkeynes@991
  1445
    load_reg( REG_EAX, Rn );
nkeynes@991
  1446
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
  1447
    store_reg( REG_EAX, Rn );
nkeynes@417
  1448
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1449
:}
nkeynes@359
  1450
SUB Rm, Rn {:  
nkeynes@671
  1451
    COUNT_INST(I_SUB);
nkeynes@991
  1452
    load_reg( REG_EAX, Rm );
nkeynes@991
  1453
    load_reg( REG_ECX, Rn );
nkeynes@991
  1454
    SUBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1455
    store_reg( REG_ECX, Rn );
nkeynes@417
  1456
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1457
:}
nkeynes@359
  1458
SUBC Rm, Rn {:  
nkeynes@671
  1459
    COUNT_INST(I_SUBC);
nkeynes@991
  1460
    load_reg( REG_EAX, Rm );
nkeynes@991
  1461
    load_reg( REG_ECX, Rn );
nkeynes@417
  1462
    if( sh4_x86.tstate != TSTATE_C ) {
nkeynes@417
  1463
	LDC_t();
nkeynes@417
  1464
    }
nkeynes@991
  1465
    SBBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1466
    store_reg( REG_ECX, Rn );
nkeynes@394
  1467
    SETC_t();
nkeynes@417
  1468
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  1469
:}
nkeynes@359
  1470
SUBV Rm, Rn {:  
nkeynes@671
  1471
    COUNT_INST(I_SUBV);
nkeynes@991
  1472
    load_reg( REG_EAX, Rm );
nkeynes@991
  1473
    load_reg( REG_ECX, Rn );
nkeynes@991
  1474
    SUBL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1475
    store_reg( REG_ECX, Rn );
nkeynes@359
  1476
    SETO_t();
nkeynes@417
  1477
    sh4_x86.tstate = TSTATE_O;
nkeynes@359
  1478
:}
nkeynes@359
  1479
SWAP.B Rm, Rn {:  
nkeynes@671
  1480
    COUNT_INST(I_SWAPB);
nkeynes@991
  1481
    load_reg( REG_EAX, Rm );
nkeynes@991
  1482
    XCHGB_r8_r8( REG_AL, REG_AH ); // NB: does not touch EFLAGS
nkeynes@991
  1483
    store_reg( REG_EAX, Rn );
nkeynes@359
  1484
:}
nkeynes@359
  1485
SWAP.W Rm, Rn {:  
nkeynes@671
  1486
    COUNT_INST(I_SWAPB);
nkeynes@991
  1487
    load_reg( REG_EAX, Rm );
nkeynes@991
  1488
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1489
    SHLL_imm_r32( 16, REG_ECX );
nkeynes@991
  1490
    SHRL_imm_r32( 16, REG_EAX );
nkeynes@991
  1491
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1492
    store_reg( REG_ECX, Rn );
nkeynes@417
  1493
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1494
:}
nkeynes@361
  1495
TAS.B @Rn {:  
nkeynes@671
  1496
    COUNT_INST(I_TASB);
nkeynes@991
  1497
    load_reg( REG_EAX, Rn );
nkeynes@991
  1498
    MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1499
    MEM_READ_BYTE_FOR_WRITE( REG_EAX, REG_EDX );
nkeynes@991
  1500
    TESTB_r8_r8( REG_DL, REG_DL );
nkeynes@361
  1501
    SETE_t();
nkeynes@991
  1502
    ORB_imms_r8( 0x80, REG_DL );
nkeynes@991
  1503
    MOVL_rspdisp_r32( 0, REG_EAX );
nkeynes@991
  1504
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1505
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1506
:}
nkeynes@361
  1507
TST Rm, Rn {:  
nkeynes@671
  1508
    COUNT_INST(I_TST);
nkeynes@991
  1509
    load_reg( REG_EAX, Rm );
nkeynes@991
  1510
    load_reg( REG_ECX, Rn );
nkeynes@991
  1511
    TESTL_r32_r32( REG_EAX, REG_ECX );
nkeynes@361
  1512
    SETE_t();
nkeynes@417
  1513
    sh4_x86.tstate = TSTATE_E;
nkeynes@361
  1514
:}
nkeynes@368
  1515
TST #imm, R0 {:  
nkeynes@671
  1516
    COUNT_INST(I_TSTI);
nkeynes@991
  1517
    load_reg( REG_EAX, 0 );
nkeynes@991
  1518
    TESTL_imms_r32( imm, REG_EAX );
nkeynes@368
  1519
    SETE_t();
nkeynes@417
  1520
    sh4_x86.tstate = TSTATE_E;
nkeynes@368
  1521
:}
nkeynes@368
  1522
TST.B #imm, @(R0, GBR) {:  
nkeynes@671
  1523
    COUNT_INST(I_TSTB);
nkeynes@991
  1524
    load_reg( REG_EAX, 0);
nkeynes@991
  1525
    ADDL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1526
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1527
    TESTB_imms_r8( imm, REG_AL );
nkeynes@368
  1528
    SETE_t();
nkeynes@417
  1529
    sh4_x86.tstate = TSTATE_E;
nkeynes@368
  1530
:}
nkeynes@359
  1531
XOR Rm, Rn {:  
nkeynes@671
  1532
    COUNT_INST(I_XOR);
nkeynes@991
  1533
    load_reg( REG_EAX, Rm );
nkeynes@991
  1534
    load_reg( REG_ECX, Rn );
nkeynes@991
  1535
    XORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1536
    store_reg( REG_ECX, Rn );
nkeynes@417
  1537
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1538
:}
nkeynes@359
  1539
XOR #imm, R0 {:  
nkeynes@671
  1540
    COUNT_INST(I_XORI);
nkeynes@991
  1541
    load_reg( REG_EAX, 0 );
nkeynes@991
  1542
    XORL_imms_r32( imm, REG_EAX );
nkeynes@991
  1543
    store_reg( REG_EAX, 0 );
nkeynes@417
  1544
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1545
:}
nkeynes@359
  1546
XOR.B #imm, @(R0, GBR) {:  
nkeynes@671
  1547
    COUNT_INST(I_XORB);
nkeynes@991
  1548
    load_reg( REG_EAX, 0 );
nkeynes@991
  1549
    ADDL_rbpdisp_r32( R_GBR, REG_EAX ); 
nkeynes@991
  1550
    MOVL_r32_rspdisp( REG_EAX, 0 );
nkeynes@991
  1551
    MEM_READ_BYTE_FOR_WRITE(REG_EAX, REG_EDX);
nkeynes@991
  1552
    MOVL_rspdisp_r32( 0, REG_EAX );
nkeynes@991
  1553
    XORL_imms_r32( imm, REG_EDX );
nkeynes@991
  1554
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1555
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1556
:}
nkeynes@361
  1557
XTRCT Rm, Rn {:
nkeynes@671
  1558
    COUNT_INST(I_XTRCT);
nkeynes@991
  1559
    load_reg( REG_EAX, Rm );
nkeynes@991
  1560
    load_reg( REG_ECX, Rn );
nkeynes@991
  1561
    SHLL_imm_r32( 16, REG_EAX );
nkeynes@991
  1562
    SHRL_imm_r32( 16, REG_ECX );
nkeynes@991
  1563
    ORL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1564
    store_reg( REG_ECX, Rn );
nkeynes@417
  1565
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1566
:}
nkeynes@359
  1567
nkeynes@359
  1568
/* Data move instructions */
nkeynes@359
  1569
MOV Rm, Rn {:  
nkeynes@671
  1570
    COUNT_INST(I_MOV);
nkeynes@991
  1571
    load_reg( REG_EAX, Rm );
nkeynes@991
  1572
    store_reg( REG_EAX, Rn );
nkeynes@359
  1573
:}
nkeynes@359
  1574
MOV #imm, Rn {:  
nkeynes@671
  1575
    COUNT_INST(I_MOVI);
nkeynes@995
  1576
    MOVL_imm32_r32( imm, REG_EAX );
nkeynes@991
  1577
    store_reg( REG_EAX, Rn );
nkeynes@359
  1578
:}
nkeynes@359
  1579
MOV.B Rm, @Rn {:  
nkeynes@671
  1580
    COUNT_INST(I_MOVB);
nkeynes@991
  1581
    load_reg( REG_EAX, Rn );
nkeynes@991
  1582
    load_reg( REG_EDX, Rm );
nkeynes@991
  1583
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1584
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1585
:}
nkeynes@359
  1586
MOV.B Rm, @-Rn {:  
nkeynes@671
  1587
    COUNT_INST(I_MOVB);
nkeynes@991
  1588
    load_reg( REG_EAX, Rn );
nkeynes@991
  1589
    LEAL_r32disp_r32( REG_EAX, -1, REG_EAX );
nkeynes@991
  1590
    load_reg( REG_EDX, Rm );
nkeynes@991
  1591
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@991
  1592
    ADDL_imms_rbpdisp( -1, REG_OFFSET(r[Rn]) );
nkeynes@417
  1593
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1594
:}
nkeynes@359
  1595
MOV.B Rm, @(R0, Rn) {:  
nkeynes@671
  1596
    COUNT_INST(I_MOVB);
nkeynes@991
  1597
    load_reg( REG_EAX, 0 );
nkeynes@991
  1598
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1599
    load_reg( REG_EDX, Rm );
nkeynes@991
  1600
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1601
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1602
:}
nkeynes@359
  1603
MOV.B R0, @(disp, GBR) {:  
nkeynes@671
  1604
    COUNT_INST(I_MOVB);
nkeynes@995
  1605
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1606
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1607
    load_reg( REG_EDX, 0 );
nkeynes@991
  1608
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1609
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1610
:}
nkeynes@359
  1611
MOV.B R0, @(disp, Rn) {:  
nkeynes@671
  1612
    COUNT_INST(I_MOVB);
nkeynes@991
  1613
    load_reg( REG_EAX, Rn );
nkeynes@991
  1614
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1615
    load_reg( REG_EDX, 0 );
nkeynes@991
  1616
    MEM_WRITE_BYTE( REG_EAX, REG_EDX );
nkeynes@417
  1617
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1618
:}
nkeynes@359
  1619
MOV.B @Rm, Rn {:  
nkeynes@671
  1620
    COUNT_INST(I_MOVB);
nkeynes@991
  1621
    load_reg( REG_EAX, Rm );
nkeynes@991
  1622
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1623
    store_reg( REG_EAX, Rn );
nkeynes@417
  1624
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1625
:}
nkeynes@359
  1626
MOV.B @Rm+, Rn {:  
nkeynes@671
  1627
    COUNT_INST(I_MOVB);
nkeynes@991
  1628
    load_reg( REG_EAX, Rm );
nkeynes@991
  1629
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@939
  1630
    if( Rm != Rn ) {
nkeynes@991
  1631
    	ADDL_imms_rbpdisp( 1, REG_OFFSET(r[Rm]) );
nkeynes@939
  1632
    }
nkeynes@991
  1633
    store_reg( REG_EAX, Rn );
nkeynes@417
  1634
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1635
:}
nkeynes@359
  1636
MOV.B @(R0, Rm), Rn {:  
nkeynes@671
  1637
    COUNT_INST(I_MOVB);
nkeynes@991
  1638
    load_reg( REG_EAX, 0 );
nkeynes@991
  1639
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1640
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1641
    store_reg( REG_EAX, Rn );
nkeynes@417
  1642
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1643
:}
nkeynes@359
  1644
MOV.B @(disp, GBR), R0 {:  
nkeynes@671
  1645
    COUNT_INST(I_MOVB);
nkeynes@995
  1646
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1647
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1648
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1649
    store_reg( REG_EAX, 0 );
nkeynes@417
  1650
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1651
:}
nkeynes@359
  1652
MOV.B @(disp, Rm), R0 {:  
nkeynes@671
  1653
    COUNT_INST(I_MOVB);
nkeynes@991
  1654
    load_reg( REG_EAX, Rm );
nkeynes@991
  1655
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1656
    MEM_READ_BYTE( REG_EAX, REG_EAX );
nkeynes@991
  1657
    store_reg( REG_EAX, 0 );
nkeynes@417
  1658
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  1659
:}
nkeynes@374
  1660
MOV.L Rm, @Rn {:
nkeynes@671
  1661
    COUNT_INST(I_MOVL);
nkeynes@991
  1662
    load_reg( REG_EAX, Rn );
nkeynes@991
  1663
    check_walign32(REG_EAX);
nkeynes@991
  1664
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1665
    ANDL_imms_r32( 0xFC000000, REG_ECX );
nkeynes@991
  1666
    CMPL_imms_r32( 0xE0000000, REG_ECX );
nkeynes@991
  1667
    JNE_label( notsq );
nkeynes@991
  1668
    ANDL_imms_r32( 0x3C, REG_EAX );
nkeynes@991
  1669
    load_reg( REG_EDX, Rm );
nkeynes@991
  1670
    MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
nkeynes@991
  1671
    JMP_label(end);
nkeynes@930
  1672
    JMP_TARGET(notsq);
nkeynes@991
  1673
    load_reg( REG_EDX, Rm );
nkeynes@991
  1674
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@930
  1675
    JMP_TARGET(end);
nkeynes@417
  1676
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1677
:}
nkeynes@361
  1678
MOV.L Rm, @-Rn {:  
nkeynes@671
  1679
    COUNT_INST(I_MOVL);
nkeynes@991
  1680
    load_reg( REG_EAX, Rn );
nkeynes@991
  1681
    ADDL_imms_r32( -4, REG_EAX );
nkeynes@991
  1682
    check_walign32( REG_EAX );
nkeynes@991
  1683
    load_reg( REG_EDX, Rm );
nkeynes@991
  1684
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  1685
    ADDL_imms_rbpdisp( -4, REG_OFFSET(r[Rn]) );
nkeynes@417
  1686
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1687
:}
nkeynes@361
  1688
MOV.L Rm, @(R0, Rn) {:  
nkeynes@671
  1689
    COUNT_INST(I_MOVL);
nkeynes@991
  1690
    load_reg( REG_EAX, 0 );
nkeynes@991
  1691
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1692
    check_walign32( REG_EAX );
nkeynes@991
  1693
    load_reg( REG_EDX, Rm );
nkeynes@991
  1694
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1695
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1696
:}
nkeynes@361
  1697
MOV.L R0, @(disp, GBR) {:  
nkeynes@671
  1698
    COUNT_INST(I_MOVL);
nkeynes@995
  1699
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1700
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1701
    check_walign32( REG_EAX );
nkeynes@991
  1702
    load_reg( REG_EDX, 0 );
nkeynes@991
  1703
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1704
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1705
:}
nkeynes@361
  1706
MOV.L Rm, @(disp, Rn) {:  
nkeynes@671
  1707
    COUNT_INST(I_MOVL);
nkeynes@991
  1708
    load_reg( REG_EAX, Rn );
nkeynes@991
  1709
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1710
    check_walign32( REG_EAX );
nkeynes@991
  1711
    MOVL_r32_r32( REG_EAX, REG_ECX );
nkeynes@991
  1712
    ANDL_imms_r32( 0xFC000000, REG_ECX );
nkeynes@991
  1713
    CMPL_imms_r32( 0xE0000000, REG_ECX );
nkeynes@991
  1714
    JNE_label( notsq );
nkeynes@991
  1715
    ANDL_imms_r32( 0x3C, REG_EAX );
nkeynes@991
  1716
    load_reg( REG_EDX, Rm );
nkeynes@991
  1717
    MOVL_r32_sib( REG_EDX, 0, REG_EBP, REG_EAX, REG_OFFSET(store_queue) );
nkeynes@991
  1718
    JMP_label(end);
nkeynes@930
  1719
    JMP_TARGET(notsq);
nkeynes@991
  1720
    load_reg( REG_EDX, Rm );
nkeynes@991
  1721
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@930
  1722
    JMP_TARGET(end);
nkeynes@417
  1723
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1724
:}
nkeynes@361
  1725
MOV.L @Rm, Rn {:  
nkeynes@671
  1726
    COUNT_INST(I_MOVL);
nkeynes@991
  1727
    load_reg( REG_EAX, Rm );
nkeynes@991
  1728
    check_ralign32( REG_EAX );
nkeynes@991
  1729
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1730
    store_reg( REG_EAX, Rn );
nkeynes@417
  1731
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1732
:}
nkeynes@361
  1733
MOV.L @Rm+, Rn {:  
nkeynes@671
  1734
    COUNT_INST(I_MOVL);
nkeynes@991
  1735
    load_reg( REG_EAX, Rm );
nkeynes@991
  1736
    check_ralign32( REG_EAX );
nkeynes@991
  1737
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@939
  1738
    if( Rm != Rn ) {
nkeynes@991
  1739
    	ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@939
  1740
    }
nkeynes@991
  1741
    store_reg( REG_EAX, Rn );
nkeynes@417
  1742
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1743
:}
nkeynes@361
  1744
MOV.L @(R0, Rm), Rn {:  
nkeynes@671
  1745
    COUNT_INST(I_MOVL);
nkeynes@991
  1746
    load_reg( REG_EAX, 0 );
nkeynes@991
  1747
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1748
    check_ralign32( REG_EAX );
nkeynes@991
  1749
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1750
    store_reg( REG_EAX, Rn );
nkeynes@417
  1751
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1752
:}
nkeynes@361
  1753
MOV.L @(disp, GBR), R0 {:
nkeynes@671
  1754
    COUNT_INST(I_MOVL);
nkeynes@995
  1755
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1756
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1757
    check_ralign32( REG_EAX );
nkeynes@991
  1758
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1759
    store_reg( REG_EAX, 0 );
nkeynes@417
  1760
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1761
:}
nkeynes@361
  1762
MOV.L @(disp, PC), Rn {:  
nkeynes@671
  1763
    COUNT_INST(I_MOVLPC);
nkeynes@374
  1764
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1765
	SLOTILLEGAL();
nkeynes@374
  1766
    } else {
nkeynes@388
  1767
	uint32_t target = (pc & 0xFFFFFFFC) + disp + 4;
nkeynes@1125
  1768
	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
nkeynes@586
  1769
	    // If the target address is in the same page as the code, it's
nkeynes@586
  1770
	    // pretty safe to just ref it directly and circumvent the whole
nkeynes@586
  1771
	    // memory subsystem. (this is a big performance win)
nkeynes@586
  1772
nkeynes@586
  1773
	    // FIXME: There's a corner-case that's not handled here when
nkeynes@586
  1774
	    // the current code-page is in the ITLB but not in the UTLB.
nkeynes@586
  1775
	    // (should generate a TLB miss although need to test SH4 
nkeynes@586
  1776
	    // behaviour to confirm) Unlikely to be anyone depending on this
nkeynes@586
  1777
	    // behaviour though.
nkeynes@586
  1778
	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
nkeynes@991
  1779
	    MOVL_moffptr_eax( ptr );
nkeynes@388
  1780
	} else {
nkeynes@586
  1781
	    // Note: we use sh4r.pc for the calc as we could be running at a
nkeynes@586
  1782
	    // different virtual address than the translation was done with,
nkeynes@586
  1783
	    // but we can safely assume that the low bits are the same.
nkeynes@995
  1784
	    MOVL_imm32_r32( (pc-sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_EAX );
nkeynes@991
  1785
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1786
	    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@586
  1787
	    sh4_x86.tstate = TSTATE_NONE;
nkeynes@388
  1788
	}
nkeynes@991
  1789
	store_reg( REG_EAX, Rn );
nkeynes@374
  1790
    }
nkeynes@361
  1791
:}
nkeynes@361
  1792
MOV.L @(disp, Rm), Rn {:  
nkeynes@671
  1793
    COUNT_INST(I_MOVL);
nkeynes@991
  1794
    load_reg( REG_EAX, Rm );
nkeynes@991
  1795
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1796
    check_ralign32( REG_EAX );
nkeynes@991
  1797
    MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  1798
    store_reg( REG_EAX, Rn );
nkeynes@417
  1799
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1800
:}
nkeynes@361
  1801
MOV.W Rm, @Rn {:  
nkeynes@671
  1802
    COUNT_INST(I_MOVW);
nkeynes@991
  1803
    load_reg( REG_EAX, Rn );
nkeynes@991
  1804
    check_walign16( REG_EAX );
nkeynes@991
  1805
    load_reg( REG_EDX, Rm );
nkeynes@991
  1806
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1807
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1808
:}
nkeynes@361
  1809
MOV.W Rm, @-Rn {:  
nkeynes@671
  1810
    COUNT_INST(I_MOVW);
nkeynes@991
  1811
    load_reg( REG_EAX, Rn );
nkeynes@991
  1812
    check_walign16( REG_EAX );
nkeynes@991
  1813
    LEAL_r32disp_r32( REG_EAX, -2, REG_EAX );
nkeynes@991
  1814
    load_reg( REG_EDX, Rm );
nkeynes@991
  1815
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@991
  1816
    ADDL_imms_rbpdisp( -2, REG_OFFSET(r[Rn]) );
nkeynes@417
  1817
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1818
:}
nkeynes@361
  1819
MOV.W Rm, @(R0, Rn) {:  
nkeynes@671
  1820
    COUNT_INST(I_MOVW);
nkeynes@991
  1821
    load_reg( REG_EAX, 0 );
nkeynes@991
  1822
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@991
  1823
    check_walign16( REG_EAX );
nkeynes@991
  1824
    load_reg( REG_EDX, Rm );
nkeynes@991
  1825
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1826
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1827
:}
nkeynes@361
  1828
MOV.W R0, @(disp, GBR) {:  
nkeynes@671
  1829
    COUNT_INST(I_MOVW);
nkeynes@995
  1830
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1831
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1832
    check_walign16( REG_EAX );
nkeynes@991
  1833
    load_reg( REG_EDX, 0 );
nkeynes@991
  1834
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1835
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1836
:}
nkeynes@361
  1837
MOV.W R0, @(disp, Rn) {:  
nkeynes@671
  1838
    COUNT_INST(I_MOVW);
nkeynes@991
  1839
    load_reg( REG_EAX, Rn );
nkeynes@991
  1840
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1841
    check_walign16( REG_EAX );
nkeynes@991
  1842
    load_reg( REG_EDX, 0 );
nkeynes@991
  1843
    MEM_WRITE_WORD( REG_EAX, REG_EDX );
nkeynes@417
  1844
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1845
:}
nkeynes@361
  1846
MOV.W @Rm, Rn {:  
nkeynes@671
  1847
    COUNT_INST(I_MOVW);
nkeynes@991
  1848
    load_reg( REG_EAX, Rm );
nkeynes@991
  1849
    check_ralign16( REG_EAX );
nkeynes@991
  1850
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1851
    store_reg( REG_EAX, Rn );
nkeynes@417
  1852
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1853
:}
nkeynes@361
  1854
MOV.W @Rm+, Rn {:  
nkeynes@671
  1855
    COUNT_INST(I_MOVW);
nkeynes@991
  1856
    load_reg( REG_EAX, Rm );
nkeynes@991
  1857
    check_ralign16( REG_EAX );
nkeynes@991
  1858
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@939
  1859
    if( Rm != Rn ) {
nkeynes@991
  1860
        ADDL_imms_rbpdisp( 2, REG_OFFSET(r[Rm]) );
nkeynes@939
  1861
    }
nkeynes@991
  1862
    store_reg( REG_EAX, Rn );
nkeynes@417
  1863
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1864
:}
nkeynes@361
  1865
MOV.W @(R0, Rm), Rn {:  
nkeynes@671
  1866
    COUNT_INST(I_MOVW);
nkeynes@991
  1867
    load_reg( REG_EAX, 0 );
nkeynes@991
  1868
    ADDL_rbpdisp_r32( REG_OFFSET(r[Rm]), REG_EAX );
nkeynes@991
  1869
    check_ralign16( REG_EAX );
nkeynes@991
  1870
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1871
    store_reg( REG_EAX, Rn );
nkeynes@417
  1872
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1873
:}
nkeynes@361
  1874
MOV.W @(disp, GBR), R0 {:  
nkeynes@671
  1875
    COUNT_INST(I_MOVW);
nkeynes@995
  1876
    MOVL_rbpdisp_r32( R_GBR, REG_EAX );
nkeynes@991
  1877
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1878
    check_ralign16( REG_EAX );
nkeynes@991
  1879
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1880
    store_reg( REG_EAX, 0 );
nkeynes@417
  1881
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1882
:}
nkeynes@361
  1883
MOV.W @(disp, PC), Rn {:  
nkeynes@671
  1884
    COUNT_INST(I_MOVW);
nkeynes@374
  1885
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1886
	SLOTILLEGAL();
nkeynes@374
  1887
    } else {
nkeynes@586
  1888
	// See comments for MOV.L @(disp, PC), Rn
nkeynes@586
  1889
	uint32_t target = pc + disp + 4;
nkeynes@1125
  1890
	if( sh4_x86.fastmem && IS_IN_ICACHE(target) ) {
nkeynes@586
  1891
	    sh4ptr_t ptr = GET_ICACHE_PTR(target);
nkeynes@991
  1892
	    MOVL_moffptr_eax( ptr );
nkeynes@991
  1893
	    MOVSXL_r16_r32( REG_EAX, REG_EAX );
nkeynes@586
  1894
	} else {
nkeynes@995
  1895
	    MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4, REG_EAX );
nkeynes@991
  1896
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1897
	    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@586
  1898
	    sh4_x86.tstate = TSTATE_NONE;
nkeynes@586
  1899
	}
nkeynes@991
  1900
	store_reg( REG_EAX, Rn );
nkeynes@374
  1901
    }
nkeynes@361
  1902
:}
nkeynes@361
  1903
MOV.W @(disp, Rm), R0 {:  
nkeynes@671
  1904
    COUNT_INST(I_MOVW);
nkeynes@991
  1905
    load_reg( REG_EAX, Rm );
nkeynes@991
  1906
    ADDL_imms_r32( disp, REG_EAX );
nkeynes@991
  1907
    check_ralign16( REG_EAX );
nkeynes@991
  1908
    MEM_READ_WORD( REG_EAX, REG_EAX );
nkeynes@991
  1909
    store_reg( REG_EAX, 0 );
nkeynes@417
  1910
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1911
:}
nkeynes@361
  1912
MOVA @(disp, PC), R0 {:  
nkeynes@671
  1913
    COUNT_INST(I_MOVA);
nkeynes@374
  1914
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1915
	SLOTILLEGAL();
nkeynes@374
  1916
    } else {
nkeynes@995
  1917
	MOVL_imm32_r32( (pc - sh4_x86.block_start_pc) + disp + 4 - (pc&0x03), REG_ECX );
nkeynes@991
  1918
	ADDL_rbpdisp_r32( R_PC, REG_ECX );
nkeynes@991
  1919
	store_reg( REG_ECX, 0 );
nkeynes@586
  1920
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@374
  1921
    }
nkeynes@361
  1922
:}
nkeynes@361
  1923
MOVCA.L R0, @Rn {:  
nkeynes@671
  1924
    COUNT_INST(I_MOVCA);
nkeynes@991
  1925
    load_reg( REG_EAX, Rn );
nkeynes@991
  1926
    check_walign32( REG_EAX );
nkeynes@991
  1927
    load_reg( REG_EDX, 0 );
nkeynes@991
  1928
    MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@417
  1929
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@361
  1930
:}
nkeynes@359
  1931
nkeynes@359
  1932
/* Control transfer instructions */
nkeynes@374
  1933
BF disp {:
nkeynes@671
  1934
    COUNT_INST(I_BF);
nkeynes@374
  1935
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1936
	SLOTILLEGAL();
nkeynes@374
  1937
    } else {
nkeynes@586
  1938
	sh4vma_t target = disp + pc + 4;
nkeynes@991
  1939
	JT_label( nottaken );
nkeynes@586
  1940
	exit_block_rel(target, pc+2 );
nkeynes@380
  1941
	JMP_TARGET(nottaken);
nkeynes@408
  1942
	return 2;
nkeynes@374
  1943
    }
nkeynes@374
  1944
:}
nkeynes@374
  1945
BF/S disp {:
nkeynes@671
  1946
    COUNT_INST(I_BFS);
nkeynes@374
  1947
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1948
	SLOTILLEGAL();
nkeynes@374
  1949
    } else {
nkeynes@590
  1950
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@601
  1951
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  1952
	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  1953
	    JT_label(nottaken);
nkeynes@991
  1954
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@601
  1955
	    JMP_TARGET(nottaken);
nkeynes@991
  1956
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@995
  1957
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1958
	    exit_block_emu(pc+2);
nkeynes@601
  1959
	    sh4_x86.branch_taken = TRUE;
nkeynes@601
  1960
	    return 2;
nkeynes@601
  1961
	} else {
nkeynes@1197
  1962
	    LOAD_t();
nkeynes@601
  1963
	    sh4vma_t target = disp + pc + 4;
nkeynes@991
  1964
	    JCC_cc_rel32(sh4_x86.tstate,0);
nkeynes@991
  1965
	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
nkeynes@879
  1966
	    int save_tstate = sh4_x86.tstate;
nkeynes@601
  1967
	    sh4_translate_instruction(pc+2);
nkeynes@1091
  1968
            sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
nkeynes@601
  1969
	    exit_block_rel( target, pc+4 );
nkeynes@601
  1970
	    
nkeynes@601
  1971
	    // not taken
nkeynes@601
  1972
	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
nkeynes@879
  1973
	    sh4_x86.tstate = save_tstate;
nkeynes@601
  1974
	    sh4_translate_instruction(pc+2);
nkeynes@601
  1975
	    return 4;
nkeynes@417
  1976
	}
nkeynes@374
  1977
    }
nkeynes@374
  1978
:}
nkeynes@374
  1979
BRA disp {:  
nkeynes@671
  1980
    COUNT_INST(I_BRA);
nkeynes@374
  1981
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  1982
	SLOTILLEGAL();
nkeynes@374
  1983
    } else {
nkeynes@590
  1984
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  1985
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  1986
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  1987
	    MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  1988
	    ADDL_imms_r32( pc + disp + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  1989
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  1990
	    exit_block_emu(pc+2);
nkeynes@601
  1991
	    return 2;
nkeynes@601
  1992
	} else {
nkeynes@601
  1993
	    sh4_translate_instruction( pc + 2 );
nkeynes@601
  1994
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  1995
	    return 4;
nkeynes@601
  1996
	}
nkeynes@374
  1997
    }
nkeynes@374
  1998
:}
nkeynes@374
  1999
BRAF Rn {:  
nkeynes@671
  2000
    COUNT_INST(I_BRAF);
nkeynes@374
  2001
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2002
	SLOTILLEGAL();
nkeynes@374
  2003
    } else {
nkeynes@995
  2004
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  2005
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  2006
	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@995
  2007
	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@590
  2008
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@417
  2009
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  2010
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2011
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2012
	    exit_block_emu(pc+2);
nkeynes@601
  2013
	    return 2;
nkeynes@601
  2014
	} else {
nkeynes@601
  2015
	    sh4_translate_instruction( pc + 2 );
nkeynes@974
  2016
	    exit_block_newpcset(pc+4);
nkeynes@601
  2017
	    return 4;
nkeynes@601
  2018
	}
nkeynes@374
  2019
    }
nkeynes@374
  2020
:}
nkeynes@374
  2021
BSR disp {:  
nkeynes@671
  2022
    COUNT_INST(I_BSR);
nkeynes@374
  2023
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2024
	SLOTILLEGAL();
nkeynes@374
  2025
    } else {
nkeynes@995
  2026
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  2027
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  2028
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@590
  2029
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2030
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2031
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@601
  2032
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@991
  2033
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@995
  2034
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  2035
	    exit_block_emu(pc+2);
nkeynes@601
  2036
	    return 2;
nkeynes@601
  2037
	} else {
nkeynes@601
  2038
	    sh4_translate_instruction( pc + 2 );
nkeynes@601
  2039
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  2040
	    return 4;
nkeynes@601
  2041
	}
nkeynes@374
  2042
    }
nkeynes@374
  2043
:}
nkeynes@374
  2044
BSRF Rn {:  
nkeynes@671
  2045
    COUNT_INST(I_BSRF);
nkeynes@374
  2046
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2047
	SLOTILLEGAL();
nkeynes@374
  2048
    } else {
nkeynes@995
  2049
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  2050
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  2051
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@991
  2052
	ADDL_rbpdisp_r32( REG_OFFSET(r[Rn]), REG_EAX );
nkeynes@995
  2053
	MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@590
  2054
nkeynes@601
  2055
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@417
  2056
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  2057
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2058
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2059
	    exit_block_emu(pc+2);
nkeynes@601
  2060
	    return 2;
nkeynes@601
  2061
	} else {
nkeynes@601
  2062
	    sh4_translate_instruction( pc + 2 );
nkeynes@974
  2063
	    exit_block_newpcset(pc+4);
nkeynes@601
  2064
	    return 4;
nkeynes@601
  2065
	}
nkeynes@374
  2066
    }
nkeynes@374
  2067
:}
nkeynes@374
  2068
BT disp {:
nkeynes@671
  2069
    COUNT_INST(I_BT);
nkeynes@374
  2070
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2071
	SLOTILLEGAL();
nkeynes@374
  2072
    } else {
nkeynes@586
  2073
	sh4vma_t target = disp + pc + 4;
nkeynes@991
  2074
	JF_label( nottaken );
nkeynes@586
  2075
	exit_block_rel(target, pc+2 );
nkeynes@380
  2076
	JMP_TARGET(nottaken);
nkeynes@408
  2077
	return 2;
nkeynes@374
  2078
    }
nkeynes@374
  2079
:}
nkeynes@374
  2080
BT/S disp {:
nkeynes@671
  2081
    COUNT_INST(I_BTS);
nkeynes@374
  2082
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2083
	SLOTILLEGAL();
nkeynes@374
  2084
    } else {
nkeynes@590
  2085
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@601
  2086
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@995
  2087
	    MOVL_imm32_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@991
  2088
	    JF_label(nottaken);
nkeynes@991
  2089
	    ADDL_imms_r32( disp, REG_EAX );
nkeynes@601
  2090
	    JMP_TARGET(nottaken);
nkeynes@991
  2091
	    ADDL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@995
  2092
	    MOVL_r32_rbpdisp( REG_EAX, R_NEW_PC );
nkeynes@601
  2093
	    exit_block_emu(pc+2);
nkeynes@601
  2094
	    sh4_x86.branch_taken = TRUE;
nkeynes@601
  2095
	    return 2;
nkeynes@601
  2096
	} else {
nkeynes@1197
  2097
		LOAD_t();
nkeynes@991
  2098
	    JCC_cc_rel32(sh4_x86.tstate^1,0);
nkeynes@991
  2099
	    uint32_t *patch = ((uint32_t *)xlat_output)-1;
nkeynes@991
  2100
nkeynes@879
  2101
	    int save_tstate = sh4_x86.tstate;
nkeynes@601
  2102
	    sh4_translate_instruction(pc+2);
nkeynes@1091
  2103
            sh4_x86.in_delay_slot = DELAY_PC; /* Cleared by sh4_translate_instruction */
nkeynes@601
  2104
	    exit_block_rel( disp + pc + 4, pc+4 );
nkeynes@601
  2105
	    // not taken
nkeynes@601
  2106
	    *patch = (xlat_output - ((uint8_t *)patch)) - 4;
nkeynes@879
  2107
	    sh4_x86.tstate = save_tstate;
nkeynes@601
  2108
	    sh4_translate_instruction(pc+2);
nkeynes@601
  2109
	    return 4;
nkeynes@417
  2110
	}
nkeynes@374
  2111
    }
nkeynes@374
  2112
:}
nkeynes@374
  2113
JMP @Rn {:  
nkeynes@671
  2114
    COUNT_INST(I_JMP);
nkeynes@374
  2115
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2116
	SLOTILLEGAL();
nkeynes@374
  2117
    } else {
nkeynes@991
  2118
	load_reg( REG_ECX, Rn );
nkeynes@995
  2119
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@590
  2120
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2121
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2122
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2123
	    exit_block_emu(pc+2);
nkeynes@601
  2124
	    return 2;
nkeynes@601
  2125
	} else {
nkeynes@601
  2126
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2127
	    exit_block_newpcset(pc+4);
nkeynes@601
  2128
	    return 4;
nkeynes@601
  2129
	}
nkeynes@374
  2130
    }
nkeynes@374
  2131
:}
nkeynes@374
  2132
JSR @Rn {:  
nkeynes@671
  2133
    COUNT_INST(I_JSR);
nkeynes@374
  2134
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2135
	SLOTILLEGAL();
nkeynes@374
  2136
    } else {
nkeynes@995
  2137
	MOVL_rbpdisp_r32( R_PC, REG_EAX );
nkeynes@991
  2138
	ADDL_imms_r32( pc + 4 - sh4_x86.block_start_pc, REG_EAX );
nkeynes@995
  2139
	MOVL_r32_rbpdisp( REG_EAX, R_PR );
nkeynes@991
  2140
	load_reg( REG_ECX, Rn );
nkeynes@995
  2141
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@601
  2142
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2143
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2144
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@601
  2145
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2146
	    exit_block_emu(pc+2);
nkeynes@601
  2147
	    return 2;
nkeynes@601
  2148
	} else {
nkeynes@601
  2149
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2150
	    exit_block_newpcset(pc+4);
nkeynes@601
  2151
	    return 4;
nkeynes@601
  2152
	}
nkeynes@374
  2153
    }
nkeynes@374
  2154
:}
nkeynes@374
  2155
RTE {:  
nkeynes@671
  2156
    COUNT_INST(I_RTE);
nkeynes@374
  2157
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2158
	SLOTILLEGAL();
nkeynes@374
  2159
    } else {
nkeynes@408
  2160
	check_priv();
nkeynes@995
  2161
	MOVL_rbpdisp_r32( R_SPC, REG_ECX );
nkeynes@995
  2162
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@995
  2163
	MOVL_rbpdisp_r32( R_SSR, REG_EAX );
nkeynes@995
  2164
	CALL1_ptr_r32( sh4_write_sr, REG_EAX );
nkeynes@590
  2165
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@377
  2166
	sh4_x86.fpuen_checked = FALSE;
nkeynes@417
  2167
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@409
  2168
	sh4_x86.branch_taken = TRUE;
nkeynes@1112
  2169
    sh4_x86.sh4_mode = SH4_MODE_UNKNOWN;
nkeynes@601
  2170
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2171
	    exit_block_emu(pc+2);
nkeynes@601
  2172
	    return 2;
nkeynes@601
  2173
	} else {
nkeynes@601
  2174
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2175
	    exit_block_newpcset(pc+4);
nkeynes@601
  2176
	    return 4;
nkeynes@601
  2177
	}
nkeynes@374
  2178
    }
nkeynes@374
  2179
:}
nkeynes@374
  2180
RTS {:  
nkeynes@671
  2181
    COUNT_INST(I_RTS);
nkeynes@374
  2182
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2183
	SLOTILLEGAL();
nkeynes@374
  2184
    } else {
nkeynes@995
  2185
	MOVL_rbpdisp_r32( R_PR, REG_ECX );
nkeynes@995
  2186
	MOVL_r32_rbpdisp( REG_ECX, R_NEW_PC );
nkeynes@590
  2187
	sh4_x86.in_delay_slot = DELAY_PC;
nkeynes@409
  2188
	sh4_x86.branch_taken = TRUE;
nkeynes@601
  2189
	if( UNTRANSLATABLE(pc+2) ) {
nkeynes@601
  2190
	    exit_block_emu(pc+2);
nkeynes@601
  2191
	    return 2;
nkeynes@601
  2192
	} else {
nkeynes@601
  2193
	    sh4_translate_instruction(pc+2);
nkeynes@974
  2194
	    exit_block_newpcset(pc+4);
nkeynes@601
  2195
	    return 4;
nkeynes@601
  2196
	}
nkeynes@374
  2197
    }
nkeynes@374
  2198
:}
nkeynes@374
  2199
TRAPA #imm {:  
nkeynes@671
  2200
    COUNT_INST(I_TRAPA);
nkeynes@374
  2201
    if( sh4_x86.in_delay_slot ) {
nkeynes@374
  2202
	SLOTILLEGAL();
nkeynes@374
  2203
    } else {
nkeynes@995
  2204
	MOVL_imm32_r32( pc+2 - sh4_x86.block_start_pc, REG_ECX );   // 5
nkeynes@991
  2205
	ADDL_r32_rbpdisp( REG_ECX, R_PC );
nkeynes@995
  2206
	MOVL_imm32_r32( imm, REG_EAX );
nkeynes@995
  2207
	CALL1_ptr_r32( sh4_raise_trap, REG_EAX );
nkeynes@417
  2208
	sh4_x86.tstate = TSTATE_NONE;
nkeynes@974
  2209
	exit_block_pcset(pc+2);
nkeynes@409
  2210
	sh4_x86.branch_taken = TRUE;
nkeynes@408
  2211
	return 2;
nkeynes@374
  2212
    }
nkeynes@374
  2213
:}
nkeynes@374
  2214
UNDEF {:  
nkeynes@671
  2215
    COUNT_INST(I_UNDEF);
nkeynes@374
  2216
    if( sh4_x86.in_delay_slot ) {
nkeynes@1191
  2217
	exit_block_exc(EXC_SLOT_ILLEGAL, pc-2, 4);    
nkeynes@374
  2218
    } else {
nkeynes@1191
  2219
	exit_block_exc(EXC_ILLEGAL, pc, 2);    
nkeynes@408
  2220
	return 2;
nkeynes@374
  2221
    }
nkeynes@368
  2222
:}
nkeynes@374
  2223
nkeynes@374
  2224
CLRMAC {:  
nkeynes@671
  2225
    COUNT_INST(I_CLRMAC);
nkeynes@991
  2226
    XORL_r32_r32(REG_EAX, REG_EAX);
nkeynes@995
  2227
    MOVL_r32_rbpdisp( REG_EAX, R_MACL );
nkeynes@995
  2228
    MOVL_r32_rbpdisp( REG_EAX, R_MACH );
nkeynes@417
  2229
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@368
  2230
:}
nkeynes@374
  2231
CLRS {:
nkeynes@671
  2232
    COUNT_INST(I_CLRS);
nkeynes@374
  2233
    CLC();
nkeynes@991
  2234
    SETCCB_cc_rbpdisp(X86_COND_C, R_S);
nkeynes@872
  2235
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@368
  2236
:}
nkeynes@374
  2237
CLRT {:  
nkeynes@671
  2238
    COUNT_INST(I_CLRT);
nkeynes@374
  2239
    CLC();
nkeynes@374
  2240
    SETC_t();
nkeynes@417
  2241
    sh4_x86.tstate = TSTATE_C;
nkeynes@359
  2242
:}
nkeynes@374
  2243
SETS {:  
nkeynes@671
  2244
    COUNT_INST(I_SETS);
nkeynes@374
  2245
    STC();
nkeynes@991
  2246
    SETCCB_cc_rbpdisp(X86_COND_C, R_S);
nkeynes@872
  2247
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@359
  2248
:}
nkeynes@374
  2249
SETT {:  
nkeynes@671
  2250
    COUNT_INST(I_SETT);
nkeynes@374
  2251
    STC();
nkeynes@374
  2252
    SETC_t();
nkeynes@417
  2253
    sh4_x86.tstate = TSTATE_C;
nkeynes@374
  2254
:}
nkeynes@359
  2255
nkeynes@375
  2256
/* Floating point moves */
nkeynes@375
  2257
FMOV FRm, FRn {:  
nkeynes@671
  2258
    COUNT_INST(I_FMOV1);
nkeynes@377
  2259
    check_fpuen();
nkeynes@901
  2260
    if( sh4_x86.double_size ) {
nkeynes@991
  2261
        load_dr0( REG_EAX, FRm );
nkeynes@991
  2262
        load_dr1( REG_ECX, FRm );
nkeynes@991
  2263
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2264
        store_dr1( REG_ECX, FRn );
nkeynes@901
  2265
    } else {
nkeynes@991
  2266
        load_fr( REG_EAX, FRm ); // SZ=0 branch
nkeynes@991
  2267
        store_fr( REG_EAX, FRn );
nkeynes@901
  2268
    }
nkeynes@375
  2269
:}
nkeynes@416
  2270
FMOV FRm, @Rn {: 
nkeynes@671
  2271
    COUNT_INST(I_FMOV2);
nkeynes@586
  2272
    check_fpuen();
nkeynes@991
  2273
    load_reg( REG_EAX, Rn );
nkeynes@901
  2274
    if( sh4_x86.double_size ) {
nkeynes@991
  2275
        check_walign64( REG_EAX );
nkeynes@991
  2276
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2277
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2278
        load_reg( REG_EAX, Rn );
nkeynes@991
  2279
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2280
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2281
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2282
    } else {
nkeynes@991
  2283
        check_walign32( REG_EAX );
nkeynes@991
  2284
        load_fr( REG_EDX, FRm );
nkeynes@991
  2285
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2286
    }
nkeynes@417
  2287
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@375
  2288
:}
nkeynes@375
  2289
FMOV @Rm, FRn {:  
nkeynes@671
  2290
    COUNT_INST(I_FMOV5);
nkeynes@586
  2291
    check_fpuen();
nkeynes@991
  2292
    load_reg( REG_EAX, Rm );
nkeynes@901
  2293
    if( sh4_x86.double_size ) {
nkeynes@991
  2294
        check_ralign64( REG_EAX );
nkeynes@991
  2295
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2296
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2297
        load_reg( REG_EAX, Rm );
nkeynes@991
  2298
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2299
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2300
        store_dr1( REG_EAX, FRn );
nkeynes@901
  2301
    } else {
nkeynes@991
  2302
        check_ralign32( REG_EAX );
nkeynes@991
  2303
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2304
        store_fr( REG_EAX, FRn );
nkeynes@901
  2305
    }
nkeynes@417
  2306
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@375
  2307
:}
nkeynes@377
  2308
FMOV FRm, @-Rn {:  
nkeynes@671
  2309
    COUNT_INST(I_FMOV3);
nkeynes@586
  2310
    check_fpuen();
nkeynes@991
  2311
    load_reg( REG_EAX, Rn );
nkeynes@901
  2312
    if( sh4_x86.double_size ) {
nkeynes@991
  2313
        check_walign64( REG_EAX );
nkeynes@991
  2314
        LEAL_r32disp_r32( REG_EAX, -8, REG_EAX );
nkeynes@991
  2315
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2316
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2317
        load_reg( REG_EAX, Rn );
nkeynes@991
  2318
        LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
nkeynes@991
  2319
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2320
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2321
        ADDL_imms_rbpdisp(-8,REG_OFFSET(r[Rn]));
nkeynes@901
  2322
    } else {
nkeynes@991
  2323
        check_walign32( REG_EAX );
nkeynes@991
  2324
        LEAL_r32disp_r32( REG_EAX, -4, REG_EAX );
nkeynes@991
  2325
        load_fr( REG_EDX, FRm );
nkeynes@991
  2326
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2327
        ADDL_imms_rbpdisp(-4,REG_OFFSET(r[Rn]));
nkeynes@901
  2328
    }
nkeynes@417
  2329
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2330
:}
nkeynes@416
  2331
FMOV @Rm+, FRn {:
nkeynes@671
  2332
    COUNT_INST(I_FMOV6);
nkeynes@586
  2333
    check_fpuen();
nkeynes@991
  2334
    load_reg( REG_EAX, Rm );
nkeynes@901
  2335
    if( sh4_x86.double_size ) {
nkeynes@991
  2336
        check_ralign64( REG_EAX );
nkeynes@991
  2337
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2338
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2339
        load_reg( REG_EAX, Rm );
nkeynes@991
  2340
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2341
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2342
        store_dr1( REG_EAX, FRn );
nkeynes@991
  2343
        ADDL_imms_rbpdisp( 8, REG_OFFSET(r[Rm]) );
nkeynes@901
  2344
    } else {
nkeynes@991
  2345
        check_ralign32( REG_EAX );
nkeynes@991
  2346
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2347
        store_fr( REG_EAX, FRn );
nkeynes@991
  2348
        ADDL_imms_rbpdisp( 4, REG_OFFSET(r[Rm]) );
nkeynes@901
  2349
    }
nkeynes@417
  2350
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2351
:}
nkeynes@377
  2352
FMOV FRm, @(R0, Rn) {:  
nkeynes@671
  2353
    COUNT_INST(I_FMOV4);
nkeynes@586
  2354
    check_fpuen();
nkeynes@991
  2355
    load_reg( REG_EAX, Rn );
nkeynes@991
  2356
    ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@901
  2357
    if( sh4_x86.double_size ) {
nkeynes@991
  2358
        check_walign64( REG_EAX );
nkeynes@991
  2359
        load_dr0( REG_EDX, FRm );
nkeynes@991
  2360
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@991
  2361
        load_reg( REG_EAX, Rn );
nkeynes@991
  2362
        ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@991
  2363
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2364
        load_dr1( REG_EDX, FRm );
nkeynes@991
  2365
        MEM_WRITE_LONG( REG_EAX, REG_EDX );
nkeynes@901
  2366
    } else {
nkeynes@991
  2367
        check_walign32( REG_EAX );
nkeynes@991
  2368
        load_fr( REG_EDX, FRm );
nkeynes@991
  2369
        MEM_WRITE_LONG( REG_EAX, REG_EDX ); // 12
nkeynes@901
  2370
    }
nkeynes@417
  2371
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2372
:}
nkeynes@377
  2373
FMOV @(R0, Rm), FRn {:  
nkeynes@671
  2374
    COUNT_INST(I_FMOV7);
nkeynes@586
  2375
    check_fpuen();
nkeynes@991
  2376
    load_reg( REG_EAX, Rm );
nkeynes@991
  2377
    ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@901
  2378
    if( sh4_x86.double_size ) {
nkeynes@991
  2379
        check_ralign64( REG_EAX );
nkeynes@991
  2380
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2381
        store_dr0( REG_EAX, FRn );
nkeynes@991
  2382
        load_reg( REG_EAX, Rm );
nkeynes@991
  2383
        ADDL_rbpdisp_r32( REG_OFFSET(r[0]), REG_EAX );
nkeynes@991
  2384
        LEAL_r32disp_r32( REG_EAX, 4, REG_EAX );
nkeynes@991
  2385
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2386
        store_dr1( REG_EAX, FRn );
nkeynes@901
  2387
    } else {
nkeynes@991
  2388
        check_ralign32( REG_EAX );
nkeynes@991
  2389
        MEM_READ_LONG( REG_EAX, REG_EAX );
nkeynes@991
  2390
        store_fr( REG_EAX, FRn );
nkeynes@901
  2391
    }
nkeynes@417
  2392
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2393
:}
nkeynes@377
  2394
FLDI0 FRn {:  /* IFF PR=0 */
nkeynes@671
  2395
    COUNT_INST(I_FLDI0);
nkeynes@377
  2396
    check_fpuen();
nkeynes@901
  2397
    if( sh4_x86.double_prec == 0 ) {
nkeynes@991
  2398
        XORL_r32_r32( REG_EAX, REG_EAX );
nkeynes@991
  2399
        store_fr( REG_EAX, FRn );
nkeynes@901
  2400
    }
nkeynes@417
  2401
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2402
:}
nkeynes@377
  2403
FLDI1 FRn {:  /* IFF PR=0 */
nkeynes@671
  2404
    COUNT_INST(I_FLDI1);
nkeynes@377
  2405
    check_fpuen();
nkeynes@901
  2406
    if( sh4_x86.double_prec == 0 ) {
nkeynes@995
  2407
        MOVL_imm32_r32( 0x3F800000, REG_EAX );
nkeynes@991
  2408
        store_fr( REG_EAX, FRn );
nkeynes@901
  2409
    }
nkeynes@377
  2410
:}
nkeynes@377
  2411
nkeynes@377
  2412
FLOAT FPUL, FRn {:  
nkeynes@671
  2413
    COUNT_INST(I_FLOAT);
nkeynes@377
  2414
    check_fpuen();
nkeynes@991
  2415
    FILD_rbpdisp(R_FPUL);
nkeynes@901
  2416
    if( sh4_x86.double_prec ) {
nkeynes@901
  2417
        pop_dr( FRn );
nkeynes@901
  2418
    } else {
nkeynes@901
  2419
        pop_fr( FRn );
nkeynes@901
  2420
    }
nkeynes@377
  2421
:}
nkeynes@377
  2422
FTRC FRm, FPUL {:  
nkeynes@671
  2423
    COUNT_INST(I_FTRC);
nkeynes@377
  2424
    check_fpuen();
nkeynes@901
  2425
    if( sh4_x86.double_prec ) {
nkeynes@901
  2426
        push_dr( FRm );
nkeynes@901
  2427
    } else {
nkeynes@901
  2428
        push_fr( FRm );
nkeynes@901
  2429
    }
nkeynes@1197
  2430
    MOVP_immptr_rptr( &min_int, REG_ECX );
nkeynes@1197
  2431
    FILD_r32disp( REG_ECX, 0 );
nkeynes@1197
  2432
    FCOMIP_st(1);              
nkeynes@1197
  2433
    JAE_label( sat );     
nkeynes@1197
  2434
    JP_label( sat2 );       
nkeynes@995
  2435
    MOVP_immptr_rptr( &max_int, REG_ECX );
nkeynes@991
  2436
    FILD_r32disp( REG_ECX, 0 );
nkeynes@388
  2437
    FCOMIP_st(1);
nkeynes@1197
  2438
    JNA_label( sat3 );
nkeynes@995
  2439
    MOVP_immptr_rptr( &save_fcw, REG_EAX );
nkeynes@991
  2440
    FNSTCW_r32disp( REG_EAX, 0 );
nkeynes@995
  2441
    MOVP_immptr_rptr( &trunc_fcw, REG_EDX );
nkeynes@991
  2442
    FLDCW_r32disp( REG_EDX, 0 );
nkeynes@995
  2443
    FISTP_rbpdisp(R_FPUL);             
nkeynes@991
  2444
    FLDCW_r32disp( REG_EAX, 0 );
nkeynes@995
  2445
    JMP_label(end);             
nkeynes@388
  2446
nkeynes@388
  2447
    JMP_TARGET(sat);
nkeynes@388
  2448
    JMP_TARGET(sat2);
nkeynes@1197
  2449
    JMP_TARGET(sat3);
nkeynes@991
  2450
    MOVL_r32disp_r32( REG_ECX, 0, REG_ECX ); // 2
nkeynes@995
  2451
    MOVL_r32_rbpdisp( REG_ECX, R_FPUL );
nkeynes@388
  2452
    FPOP_st();
nkeynes@388
  2453
    JMP_TARGET(end);
nkeynes@417
  2454
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2455
:}
nkeynes@377
  2456
FLDS FRm, FPUL {:  
nkeynes@671
  2457
    COUNT_INST(I_FLDS);
nkeynes@377
  2458
    check_fpuen();
nkeynes@991
  2459
    load_fr( REG_EAX, FRm );
nkeynes@995
  2460
    MOVL_r32_rbpdisp( REG_EAX, R_FPUL );
nkeynes@377
  2461
:}
nkeynes@377
  2462
FSTS FPUL, FRn {:  
nkeynes@671
  2463
    COUNT_INST(I_FSTS);
nkeynes@377
  2464
    check_fpuen();
nkeynes@995
  2465
    MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
nkeynes@991
  2466
    store_fr( REG_EAX, FRn );
nkeynes@377
  2467
:}
nkeynes@377
  2468
FCNVDS FRm, FPUL {:  
nkeynes@671
  2469
    COUNT_INST(I_FCNVDS);
nkeynes@377
  2470
    check_fpuen();
nkeynes@901
  2471
    if( sh4_x86.double_prec ) {
nkeynes@901
  2472
        push_dr( FRm );
nkeynes@901
  2473
        pop_fpul();
nkeynes@901
  2474
    }
nkeynes@377
  2475
:}
nkeynes@377
  2476
FCNVSD FPUL, FRn {:  
nkeynes@671
  2477
    COUNT_INST(I_FCNVSD);
nkeynes@377
  2478
    check_fpuen();
nkeynes@901
  2479
    if( sh4_x86.double_prec ) {
nkeynes@901
  2480
        push_fpul();
nkeynes@901
  2481
        pop_dr( FRn );
nkeynes@901
  2482
    }
nkeynes@377
  2483
:}
nkeynes@375
  2484
nkeynes@359
  2485
/* Floating point instructions */
nkeynes@374
  2486
FABS FRn {:  
nkeynes@671
  2487
    COUNT_INST(I_FABS);
nkeynes@377
  2488
    check_fpuen();
nkeynes@901
  2489
    if( sh4_x86.double_prec ) {
nkeynes@901
  2490
        push_dr(FRn);
nkeynes@901
  2491
        FABS_st0();
nkeynes@901
  2492
        pop_dr(FRn);
nkeynes@901
  2493
    } else {
nkeynes@901
  2494
        push_fr(FRn);
nkeynes@901
  2495
        FABS_st0();
nkeynes@901
  2496
        pop_fr(FRn);
nkeynes@901
  2497
    }
nkeynes@374
  2498
:}
nkeynes@377
  2499
FADD FRm, FRn {:  
nkeynes@671
  2500
    COUNT_INST(I_FADD);
nkeynes@377
  2501
    check_fpuen();
nkeynes@901
  2502
    if( sh4_x86.double_prec ) {
nkeynes@901
  2503
        push_dr(FRm);
nkeynes@901
  2504
        push_dr(FRn);
nkeynes@901
  2505
        FADDP_st(1);
nkeynes@901
  2506
        pop_dr(FRn);
nkeynes@901
  2507
    } else {
nkeynes@901
  2508
        push_fr(FRm);
nkeynes@901
  2509
        push_fr(FRn);
nkeynes@901
  2510
        FADDP_st(1);
nkeynes@901
  2511
        pop_fr(FRn);
nkeynes@901
  2512
    }
nkeynes@375
  2513
:}
nkeynes@377
  2514
FDIV FRm, FRn {:  
nkeynes@671
  2515
    COUNT_INST(I_FDIV);
nkeynes@377
  2516
    check_fpuen();
nkeynes@901
  2517
    if( sh4_x86.double_prec ) {
nkeynes@901
  2518
        push_dr(FRn);
nkeynes@901
  2519
        push_dr(FRm);
nkeynes@901
  2520
        FDIVP_st(1);
nkeynes@901
  2521
        pop_dr(FRn);
nkeynes@901
  2522
    } else {
nkeynes@901
  2523
        push_fr(FRn);
nkeynes@901
  2524
        push_fr(FRm);
nkeynes@901
  2525
        FDIVP_st(1);
nkeynes@901
  2526
        pop_fr(FRn);
nkeynes@901
  2527
    }
nkeynes@375
  2528
:}
nkeynes@375
  2529
FMAC FR0, FRm, FRn {:  
nkeynes@671
  2530
    COUNT_INST(I_FMAC);
nkeynes@377
  2531
    check_fpuen();
nkeynes@901
  2532
    if( sh4_x86.double_prec ) {
nkeynes@901
  2533
        push_dr( 0 );
nkeynes@901
  2534
        push_dr( FRm );
nkeynes@901
  2535
        FMULP_st(1);
nkeynes@901
  2536
        push_dr( FRn );
nkeynes@901
  2537
        FADDP_st(1);
nkeynes@901
  2538
        pop_dr( FRn );
nkeynes@901
  2539
    } else {
nkeynes@901
  2540
        push_fr( 0 );
nkeynes@901
  2541
        push_fr( FRm );
nkeynes@901
  2542
        FMULP_st(1);
nkeynes@901
  2543
        push_fr( FRn );
nkeynes@901
  2544
        FADDP_st(1);
nkeynes@901
  2545
        pop_fr( FRn );
nkeynes@901
  2546
    }
nkeynes@375
  2547
:}
nkeynes@375
  2548
nkeynes@377
  2549
FMUL FRm, FRn {:  
nkeynes@671
  2550
    COUNT_INST(I_FMUL);
nkeynes@377
  2551
    check_fpuen();
nkeynes@901
  2552
    if( sh4_x86.double_prec ) {
nkeynes@901
  2553
        push_dr(FRm);
nkeynes@901
  2554
        push_dr(FRn);
nkeynes@901
  2555
        FMULP_st(1);
nkeynes@901
  2556
        pop_dr(FRn);
nkeynes@901
  2557
    } else {
nkeynes@901
  2558
        push_fr(FRm);
nkeynes@901
  2559
        push_fr(FRn);
nkeynes@901
  2560
        FMULP_st(1);
nkeynes@901
  2561
        pop_fr(FRn);
nkeynes@901
  2562
    }
nkeynes@377
  2563
:}
nkeynes@377
  2564
FNEG FRn {:  
nkeynes@671
  2565
    COUNT_INST(I_FNEG);
nkeynes@377
  2566
    check_fpuen();
nkeynes@901
  2567
    if( sh4_x86.double_prec ) {
nkeynes@901
  2568
        push_dr(FRn);
nkeynes@901
  2569
        FCHS_st0();
nkeynes@901
  2570
        pop_dr(FRn);
nkeynes@901
  2571
    } else {
nkeynes@901
  2572
        push_fr(FRn);
nkeynes@901
  2573
        FCHS_st0();
nkeynes@901
  2574
        pop_fr(FRn);
nkeynes@901
  2575
    }
nkeynes@377
  2576
:}
nkeynes@377
  2577
FSRRA FRn {:  
nkeynes@671
  2578
    COUNT_INST(I_FSRRA);
nkeynes@377
  2579
    check_fpuen();
nkeynes@901
  2580
    if( sh4_x86.double_prec == 0 ) {
nkeynes@901
  2581
        FLD1_st0();
nkeynes@901
  2582
        push_fr(FRn);
nkeynes@901
  2583
        FSQRT_st0();
nkeynes@901
  2584
        FDIVP_st(1);
nkeynes@901
  2585
        pop_fr(FRn);
nkeynes@901
  2586
    }
nkeynes@377
  2587
:}
nkeynes@377
  2588
FSQRT FRn {:  
nkeynes@671
  2589
    COUNT_INST(I_FSQRT);
nkeynes@377
  2590
    check_fpuen();
nkeynes@901
  2591
    if( sh4_x86.double_prec ) {
nkeynes@901
  2592
        push_dr(FRn);
nkeynes@901
  2593
        FSQRT_st0();
nkeynes@901
  2594
        pop_dr(FRn);
nkeynes@901
  2595
    } else {
nkeynes@901
  2596
        push_fr(FRn);
nkeynes@901
  2597
        FSQRT_st0();
nkeynes@901
  2598
        pop_fr(FRn);
nkeynes@901
  2599
    }
nkeynes@377
  2600
:}
nkeynes@377
  2601
FSUB FRm, FRn {:  
nkeynes@671
  2602
    COUNT_INST(I_FSUB);
nkeynes@377
  2603
    check_fpuen();
nkeynes@901
  2604
    if( sh4_x86.double_prec ) {
nkeynes@901
  2605
        push_dr(FRn);
nkeynes@901
  2606
        push_dr(FRm);
nkeynes@901
  2607
        FSUBP_st(1);
nkeynes@901
  2608
        pop_dr(FRn);
nkeynes@901
  2609
    } else {
nkeynes@901
  2610
        push_fr(FRn);
nkeynes@901
  2611
        push_fr(FRm);
nkeynes@901
  2612
        FSUBP_st(1);
nkeynes@901
  2613
        pop_fr(FRn);
nkeynes@901
  2614
    }
nkeynes@377
  2615
:}
nkeynes@377
  2616
nkeynes@377
  2617
FCMP/EQ FRm, FRn {:  
nkeynes@671
  2618
    COUNT_INST(I_FCMPEQ);
nkeynes@377
  2619
    check_fpuen();
nkeynes@901
  2620
    if( sh4_x86.double_prec ) {
nkeynes@901
  2621
        push_dr(FRm);
nkeynes@901
  2622
        push_dr(FRn);
nkeynes@901
  2623
    } else {
nkeynes@901
  2624
        push_fr(FRm);
nkeynes@901
  2625
        push_fr(FRn);
nkeynes@901
  2626
    }
nkeynes@1197
  2627
    XORL_r32_r32(REG_EAX, REG_EAX);
nkeynes@1197
  2628
    XORL_r32_r32(REG_EDX, REG_EDX);
nkeynes@377
  2629
    FCOMIP_st(1);
nkeynes@1197
  2630
    SETCCB_cc_r8(X86_COND_NP, REG_DL);
nkeynes@1197
  2631
    CMOVCCL_cc_r32_r32(X86_COND_E, REG_EDX, REG_EAX);
nkeynes@1197
  2632
    MOVL_r32_rbpdisp(REG_EAX, R_T);
nkeynes@377
  2633
    FPOP_st();
nkeynes@1197
  2634
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2635
:}
nkeynes@377
  2636
FCMP/GT FRm, FRn {:  
nkeynes@671
  2637
    COUNT_INST(I_FCMPGT);
nkeynes@377
  2638
    check_fpuen();
nkeynes@901
  2639
    if( sh4_x86.double_prec ) {
nkeynes@901
  2640
        push_dr(FRm);
nkeynes@901
  2641
        push_dr(FRn);
nkeynes@901
  2642
    } else {
nkeynes@901
  2643
        push_fr(FRm);
nkeynes@901
  2644
        push_fr(FRn);
nkeynes@901
  2645
    }
nkeynes@377
  2646
    FCOMIP_st(1);
nkeynes@377
  2647
    SETA_t();
nkeynes@377
  2648
    FPOP_st();
nkeynes@901
  2649
    sh4_x86.tstate = TSTATE_A;
nkeynes@377
  2650
:}
nkeynes@377
  2651
nkeynes@377
  2652
FSCA FPUL, FRn {:  
nkeynes@671
  2653
    COUNT_INST(I_FSCA);
nkeynes@377
  2654
    check_fpuen();
nkeynes@901
  2655
    if( sh4_x86.double_prec == 0 ) {
nkeynes@991
  2656
        LEAP_rbpdisp_rptr( REG_OFFSET(fr[0][FRn&0x0E]), REG_EDX );
nkeynes@995
  2657
        MOVL_rbpdisp_r32( R_FPUL, REG_EAX );
nkeynes@995
  2658
        CALL2_ptr_r32_r32( sh4_fsca, REG_EAX, REG_EDX );
nkeynes@901
  2659
    }
nkeynes@417
  2660
    sh4_x86.tstate = TSTATE_NONE;
nkeynes@377
  2661
:}
nkeynes@377
  2662
FIPR FVm, FVn {:  
nkeynes@671
  2663
    COUNT_INST(I_FIPR);
nkeynes@377
  2664
    check_fpuen();
nkeynes@901
  2665
    if( sh4_x86.double_prec == 0 ) {
nkeynes@904
  2666
        if( sh4_x86.sse3_enabled ) {
nkeynes@991
  2667
            MOVAPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVm<<2]), 4 );
nkeynes@991
  2668
            MULPS_rbpdisp_xmm( REG_OFFSET(fr[0][FVn<<2]), 4 );
nkeynes@903
  2669
            HADDPS_xmm_xmm( 4, 4 ); 
nkeynes@903
  2670
            HADDPS_xmm_xmm( 4, 4 );
nkeynes@991
  2671
            MOVSS_xmm_rbpdisp( 4, REG_OFFSET(fr[0][(FVn<<2)+2]) );
nkeynes@903
  2672
        } else {
nkeynes@904
  2673
            push_fr( FVm<<2 );