Search
lxdream.org :: lxdream/src/sh4/ia32abi.h
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/ia32abi.h
changeset 926:68f3e0fe02f1
prev907:5ecafd8d7923
next927:17b6b9e245d8
author nkeynes
date Sun Dec 14 07:50:48 2008 +0000 (13 years ago)
permissions -rw-r--r--
last change Setup a 'proper' stackframe in translated blocks. This doesn't affect performance noticeably,
but does ensure that
a) The stack is aligned correctly on OS X with no extra effort, and
b) We can't mess up the stack and crash that way anymore.
Replace all PUSH/POP instructions (outside of prologue/epilogue) with ESP-rel moves to stack
local variables.
Finally merge ia32mac and ia32abi together, since they're pretty much the same now anyway (and
thereby simplifying maintenance a good deal)
file annotate diff log raw
nkeynes@539
     1
/**
nkeynes@586
     2
 * $Id$
nkeynes@539
     3
 * 
nkeynes@926
     4
 * Provides the implementation for the ia32 ABI variant 
nkeynes@926
     5
 * (eg prologue, epilogue, and calling conventions). Stack frame is
nkeynes@926
     6
 * aligned on 16-byte boundaries for the benefit of OS X (which 
nkeynes@926
     7
 * requires it).
nkeynes@539
     8
 *
nkeynes@539
     9
 * Copyright (c) 2007 Nathan Keynes.
nkeynes@539
    10
 *
nkeynes@539
    11
 * This program is free software; you can redistribute it and/or modify
nkeynes@539
    12
 * it under the terms of the GNU General Public License as published by
nkeynes@539
    13
 * the Free Software Foundation; either version 2 of the License, or
nkeynes@539
    14
 * (at your option) any later version.
nkeynes@539
    15
 *
nkeynes@539
    16
 * This program is distributed in the hope that it will be useful,
nkeynes@539
    17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
nkeynes@539
    18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
nkeynes@539
    19
 * GNU General Public License for more details.
nkeynes@539
    20
 */
nkeynes@539
    21
nkeynes@926
    22
#ifndef lxdream_ia32mac_H
nkeynes@926
    23
#define lxdream_ia32mac_H 1
nkeynes@539
    24
nkeynes@539
    25
#define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
nkeynes@539
    26
nkeynes@539
    27
/**
nkeynes@539
    28
 * Note: clobbers EAX to make the indirect call - this isn't usually
nkeynes@539
    29
 * a problem since the callee will usually clobber it anyway.
nkeynes@539
    30
 */
nkeynes@539
    31
static inline void call_func0( void *ptr )
nkeynes@539
    32
{
nkeynes@926
    33
    CALL_ptr(ptr);
nkeynes@539
    34
}
nkeynes@539
    35
nkeynes@905
    36
#ifdef HAVE_FASTCALL
nkeynes@905
    37
static inline void call_func1( void *ptr, int arg1 )
nkeynes@905
    38
{
nkeynes@905
    39
    if( arg1 != R_EAX ) {
nkeynes@905
    40
        MOV_r32_r32( arg1, R_EAX );
nkeynes@905
    41
    }
nkeynes@926
    42
    CALL_ptr(ptr);
nkeynes@905
    43
}
nkeynes@905
    44
nkeynes@905
    45
static inline void call_func2( void *ptr, int arg1, int arg2 )
nkeynes@905
    46
{
nkeynes@905
    47
    if( arg2 != R_EDX ) {
nkeynes@905
    48
        MOV_r32_r32( arg2, R_EDX );
nkeynes@905
    49
    }
nkeynes@905
    50
    if( arg1 != R_EAX ) {
nkeynes@905
    51
        MOV_r32_r32( arg1, R_EAX );
nkeynes@905
    52
    }
nkeynes@926
    53
    CALL_ptr(ptr);
nkeynes@905
    54
}
nkeynes@905
    55
nkeynes@905
    56
/**
nkeynes@905
    57
 * Write a double (64-bit) value into memory, with the first word in arg2a, and
nkeynes@905
    58
 * the second in arg2b
nkeynes@905
    59
 */
nkeynes@905
    60
static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@905
    61
{
nkeynes@926
    62
    MOV_r32_esp8(addr, 0);
nkeynes@926
    63
    MOV_r32_esp8(arg2b, 4);
nkeynes@905
    64
    call_func2(sh4_write_long, addr, arg2a);
nkeynes@926
    65
    MOV_esp8_r32(0, R_EAX);
nkeynes@926
    66
    MOV_esp8_r32(4, R_EDX);
nkeynes@905
    67
    ADD_imm8s_r32(4, R_EAX);
nkeynes@905
    68
    call_func0(sh4_write_long);
nkeynes@905
    69
}
nkeynes@905
    70
nkeynes@905
    71
/**
nkeynes@905
    72
 * Read a double (64-bit) value from memory, writing the first word into arg2a
nkeynes@905
    73
 * and the second into arg2b. The addr must not be in EAX
nkeynes@905
    74
 */
nkeynes@905
    75
static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@905
    76
{
nkeynes@926
    77
    MOV_r32_esp8(addr, 0);
nkeynes@905
    78
    call_func1(sh4_read_long, addr);
nkeynes@926
    79
    MOV_r32_esp8(R_EAX, 4);
nkeynes@926
    80
    MOV_esp8_r32(0, R_EAX);
nkeynes@905
    81
    ADD_imm8s_r32(4, R_EAX);
nkeynes@905
    82
    call_func0(sh4_read_long);
nkeynes@905
    83
    if( arg2b != R_EAX ) {
nkeynes@905
    84
        MOV_r32_r32(R_EAX, arg2b);
nkeynes@905
    85
    }
nkeynes@926
    86
    MOV_esp8_r32(4, arg2a);
nkeynes@905
    87
}
nkeynes@905
    88
#else
nkeynes@539
    89
static inline void call_func1( void *ptr, int arg1 )
nkeynes@539
    90
{
nkeynes@926
    91
    SUB_imm8s_r32( 12, R_ESP );
nkeynes@539
    92
    PUSH_r32(arg1);
nkeynes@926
    93
    CALL_ptr(ptr);
nkeynes@926
    94
    ADD_imm8s_r32( 16, R_ESP );
nkeynes@539
    95
}
nkeynes@539
    96
nkeynes@539
    97
static inline void call_func2( void *ptr, int arg1, int arg2 )
nkeynes@539
    98
{
nkeynes@926
    99
    SUB_imm8s_r32( 8, R_ESP );
nkeynes@539
   100
    PUSH_r32(arg2);
nkeynes@539
   101
    PUSH_r32(arg1);
nkeynes@926
   102
    CALL_ptr(ptr);
nkeynes@926
   103
    ADD_imm8s_r32( 16, R_ESP );
nkeynes@539
   104
}
nkeynes@539
   105
nkeynes@539
   106
/**
nkeynes@539
   107
 * Write a double (64-bit) value into memory, with the first word in arg2a, and
nkeynes@539
   108
 * the second in arg2b
nkeynes@539
   109
 */
nkeynes@539
   110
static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@539
   111
{
nkeynes@926
   112
    SUB_imm8s_r32( 8, R_ESP );
nkeynes@539
   113
    PUSH_r32(arg2b);
nkeynes@926
   114
    LEA_r32disp8_r32( addr, 4, arg2b );
nkeynes@926
   115
    PUSH_r32(arg2b);
nkeynes@926
   116
    SUB_imm8s_r32( 8, R_ESP );
nkeynes@539
   117
    PUSH_r32(arg2a);
nkeynes@539
   118
    PUSH_r32(addr);
nkeynes@926
   119
    CALL_ptr(sh4_write_long);
nkeynes@926
   120
    ADD_imm8s_r32( 16, R_ESP );
nkeynes@926
   121
    CALL_ptr(sh4_write_long);
nkeynes@926
   122
    ADD_imm8s_r32( 16, R_ESP );
nkeynes@539
   123
}
nkeynes@539
   124
nkeynes@539
   125
/**
nkeynes@539
   126
 * Read a double (64-bit) value from memory, writing the first word into arg2a
nkeynes@539
   127
 * and the second into arg2b. The addr must not be in EAX
nkeynes@539
   128
 */
nkeynes@539
   129
static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@539
   130
{
nkeynes@926
   131
    SUB_imm8s_r32( 12, R_ESP );
nkeynes@539
   132
    PUSH_r32(addr);
nkeynes@926
   133
    CALL_ptr(sh4_read_long);
nkeynes@926
   134
    MOV_r32_esp8(R_EAX, 4);
nkeynes@926
   135
    ADD_imm8s_esp8(4, 0);
nkeynes@926
   136
    CALL_ptr(sh4_read_long);
nkeynes@926
   137
    if( arg2b != R_EAX ) {
nkeynes@926
   138
        MOV_r32_r32( R_EAX, arg2b );
nkeynes@926
   139
    }
nkeynes@926
   140
    MOV_esp8_r32( 4, arg2a );
nkeynes@926
   141
    ADD_imm8s_r32( 16, R_ESP );
nkeynes@539
   142
}
nkeynes@926
   143
nkeynes@905
   144
#endif
nkeynes@539
   145
nkeynes@539
   146
/**
nkeynes@539
   147
 * Emit the 'start of block' assembly. Sets up the stack frame and save
nkeynes@539
   148
 * SI/DI as required
nkeynes@926
   149
 * Allocates 8 bytes for local variables, which also has the convenient
nkeynes@926
   150
 * side-effect of aligning the stack.
nkeynes@539
   151
 */
nkeynes@901
   152
void enter_block( ) 
nkeynes@539
   153
{
nkeynes@539
   154
    PUSH_r32(R_EBP);
nkeynes@669
   155
    load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
nkeynes@926
   156
    SUB_imm8s_r32( 8, R_ESP ); 
nkeynes@926
   157
}
nkeynes@736
   158
nkeynes@926
   159
static inline void exit_block( )
nkeynes@926
   160
{
nkeynes@926
   161
    ADD_imm8s_r32( 8, R_ESP );
nkeynes@926
   162
    POP_r32(R_EBP);
nkeynes@926
   163
    RET();
nkeynes@539
   164
}
nkeynes@539
   165
nkeynes@539
   166
/**
nkeynes@926
   167
 * Exit the block with sh4r.new_pc written with the target pc
nkeynes@539
   168
 */
nkeynes@586
   169
void exit_block_pcset( sh4addr_t pc )
nkeynes@539
   170
{
nkeynes@539
   171
    load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@539
   172
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
nkeynes@590
   173
    load_spreg( R_EAX, R_PC );
nkeynes@590
   174
    if( sh4_x86.tlb_on ) {
nkeynes@736
   175
        call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@590
   176
    } else {
nkeynes@736
   177
        call_func1(xlat_get_code,R_EAX);
nkeynes@926
   178
    }
nkeynes@926
   179
    exit_block();
nkeynes@590
   180
}
nkeynes@590
   181
nkeynes@590
   182
/**
nkeynes@590
   183
 * Exit the block with sh4r.new_pc written with the target pc
nkeynes@590
   184
 */
nkeynes@590
   185
void exit_block_newpcset( sh4addr_t pc )
nkeynes@590
   186
{
nkeynes@590
   187
    load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@590
   188
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
nkeynes@590
   189
    load_spreg( R_EAX, R_NEW_PC );
nkeynes@590
   190
    store_spreg( R_EAX, R_PC );
nkeynes@586
   191
    if( sh4_x86.tlb_on ) {
nkeynes@736
   192
        call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@586
   193
    } else {
nkeynes@736
   194
        call_func1(xlat_get_code,R_EAX);
nkeynes@926
   195
    }
nkeynes@926
   196
    exit_block();
nkeynes@539
   197
}
nkeynes@539
   198
nkeynes@586
   199
nkeynes@539
   200
/**
nkeynes@539
   201
 * Exit the block to an absolute PC
nkeynes@539
   202
 */
nkeynes@926
   203
void exit_block_abs( sh4addr_t pc, sh4addr_t endpc )
nkeynes@539
   204
{
nkeynes@539
   205
    load_imm32( R_ECX, pc );                            // 5
nkeynes@539
   206
    store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
nkeynes@586
   207
    if( IS_IN_ICACHE(pc) ) {
nkeynes@736
   208
        MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
nkeynes@926
   209
        AND_imm8s_r32( 0xFC, R_EAX ); // 3
nkeynes@586
   210
    } else if( sh4_x86.tlb_on ) {
nkeynes@736
   211
        call_func1(xlat_get_code_by_vma,R_ECX);
nkeynes@586
   212
    } else {
nkeynes@736
   213
        call_func1(xlat_get_code,R_ECX);
nkeynes@586
   214
    }
nkeynes@586
   215
    load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@586
   216
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@926
   217
    exit_block();
nkeynes@586
   218
}
nkeynes@586
   219
nkeynes@586
   220
/**
nkeynes@586
   221
 * Exit the block to a relative PC
nkeynes@586
   222
 */
nkeynes@586
   223
void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
nkeynes@586
   224
{
nkeynes@586
   225
    load_imm32( R_ECX, pc - sh4_x86.block_start_pc );   // 5
nkeynes@586
   226
    ADD_sh4r_r32( R_PC, R_ECX );
nkeynes@586
   227
    store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
nkeynes@586
   228
    if( IS_IN_ICACHE(pc) ) {
nkeynes@736
   229
        MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
nkeynes@926
   230
        AND_imm8s_r32( 0xFC, R_EAX ); // 3
nkeynes@586
   231
    } else if( sh4_x86.tlb_on ) {
nkeynes@736
   232
        call_func1(xlat_get_code_by_vma,R_ECX);
nkeynes@586
   233
    } else {
nkeynes@736
   234
        call_func1(xlat_get_code,R_ECX);
nkeynes@586
   235
    }
nkeynes@539
   236
    load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@539
   237
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@926
   238
    exit_block();
nkeynes@539
   239
}
nkeynes@539
   240
nkeynes@539
   241
/**
nkeynes@539
   242
 * Write the block trailer (exception handling block)
nkeynes@539
   243
 */
nkeynes@539
   244
void sh4_translate_end_block( sh4addr_t pc ) {
nkeynes@539
   245
    if( sh4_x86.branch_taken == FALSE ) {
nkeynes@736
   246
        // Didn't exit unconditionally already, so write the termination here
nkeynes@736
   247
        exit_block_rel( pc, pc );
nkeynes@539
   248
    }
nkeynes@539
   249
    if( sh4_x86.backpatch_posn != 0 ) {
nkeynes@736
   250
        unsigned int i;
nkeynes@736
   251
        // Raise exception
nkeynes@736
   252
        uint8_t *end_ptr = xlat_output;
nkeynes@736
   253
        MOV_r32_r32( R_EDX, R_ECX );
nkeynes@736
   254
        ADD_r32_r32( R_EDX, R_ECX );
nkeynes@736
   255
        ADD_r32_sh4r( R_ECX, R_PC );
nkeynes@736
   256
        MOV_moff32_EAX( &sh4_cpu_period );
nkeynes@736
   257
        MUL_r32( R_EDX );
nkeynes@736
   258
        ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
nkeynes@539
   259
nkeynes@926
   260
        POP_r32(R_EAX);
nkeynes@907
   261
        call_func1( sh4_raise_exception, R_EAX );
nkeynes@736
   262
        load_spreg( R_EAX, R_PC );
nkeynes@736
   263
        if( sh4_x86.tlb_on ) {
nkeynes@736
   264
            call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@736
   265
        } else {
nkeynes@736
   266
            call_func1(xlat_get_code,R_EAX);
nkeynes@736
   267
        }
nkeynes@926
   268
        exit_block();
nkeynes@539
   269
nkeynes@736
   270
        // Exception already raised - just cleanup
nkeynes@736
   271
        uint8_t *preexc_ptr = xlat_output;
nkeynes@736
   272
        MOV_r32_r32( R_EDX, R_ECX );
nkeynes@736
   273
        ADD_r32_r32( R_EDX, R_ECX );
nkeynes@736
   274
        ADD_r32_sh4r( R_ECX, R_SPC );
nkeynes@736
   275
        MOV_moff32_EAX( &sh4_cpu_period );
nkeynes@736
   276
        MUL_r32( R_EDX );
nkeynes@736
   277
        ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
nkeynes@736
   278
        load_spreg( R_EAX, R_PC );
nkeynes@736
   279
        if( sh4_x86.tlb_on ) {
nkeynes@736
   280
            call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@736
   281
        } else {
nkeynes@736
   282
            call_func1(xlat_get_code,R_EAX);
nkeynes@736
   283
        }
nkeynes@926
   284
        exit_block();
nkeynes@586
   285
nkeynes@736
   286
        for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
nkeynes@736
   287
            uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
nkeynes@736
   288
            *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@736
   289
            if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
nkeynes@736
   290
                load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
nkeynes@736
   291
                int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;
nkeynes@736
   292
                if( stack_adj > 0 ) { 
nkeynes@736
   293
                    ADD_imm8s_r32( stack_adj, R_ESP );
nkeynes@736
   294
                }
nkeynes@736
   295
                int rel = preexc_ptr - xlat_output;
nkeynes@736
   296
                JMP_rel(rel);
nkeynes@736
   297
            } else {
nkeynes@736
   298
                PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );
nkeynes@736
   299
                load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
nkeynes@736
   300
                int rel = end_ptr - xlat_output;
nkeynes@736
   301
                JMP_rel(rel);
nkeynes@736
   302
            }
nkeynes@736
   303
        }
nkeynes@539
   304
    }
nkeynes@539
   305
}
nkeynes@539
   306
nkeynes@926
   307
nkeynes@899
   308
/**
nkeynes@899
   309
 * The unwind methods only work if we compiled with DWARF2 frame information
nkeynes@899
   310
 * (ie -fexceptions), otherwise we have to use the direct frame scan.
nkeynes@899
   311
 */
nkeynes@899
   312
#ifdef HAVE_EXCEPTIONS
nkeynes@899
   313
#include <unwind.h>
nkeynes@899
   314
nkeynes@899
   315
struct UnwindInfo {
nkeynes@926
   316
    uintptr_t block_start;
nkeynes@926
   317
    uintptr_t block_end;
nkeynes@926
   318
    void *pc;
nkeynes@899
   319
};
nkeynes@899
   320
nkeynes@899
   321
_Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
nkeynes@899
   322
{
nkeynes@926
   323
    struct UnwindInfo *info = arg;
nkeynes@926
   324
    void *pc = (void *)_Unwind_GetIP(context);
nkeynes@926
   325
    if( ((uintptr_t)pc) >= info->block_start && ((uintptr_t)pc) < info->block_end ) {
nkeynes@926
   326
        info->pc = pc;
nkeynes@899
   327
        return _URC_NORMAL_STOP;
nkeynes@899
   328
    }
nkeynes@899
   329
nkeynes@899
   330
    return _URC_NO_REASON;
nkeynes@899
   331
}
nkeynes@899
   332
nkeynes@906
   333
void *xlat_get_native_pc( void *code, uint32_t code_size )
nkeynes@899
   334
{
nkeynes@899
   335
    struct _Unwind_Exception exc;
nkeynes@899
   336
    struct UnwindInfo info;
nkeynes@899
   337
nkeynes@926
   338
    info.pc = NULL;
nkeynes@926
   339
    info.block_start = (uintptr_t)code;
nkeynes@926
   340
    info.block_end = info.block_start + code_size;
nkeynes@899
   341
    void *result = NULL;
nkeynes@899
   342
    _Unwind_Backtrace( xlat_check_frame, &info );
nkeynes@926
   343
    return info.pc;
nkeynes@899
   344
}
nkeynes@899
   345
#else 
nkeynes@906
   346
void *xlat_get_native_pc( void *code, uint32_t code_size )
nkeynes@586
   347
{
nkeynes@586
   348
    void *result = NULL;
nkeynes@586
   349
    asm(
nkeynes@736
   350
        "mov %%ebp, %%eax\n\t"
nkeynes@736
   351
        "mov $0x8, %%ecx\n\t"
nkeynes@736
   352
        "mov %1, %%edx\n"
nkeynes@736
   353
        "frame_loop: test %%eax, %%eax\n\t"
nkeynes@736
   354
        "je frame_not_found\n\t"
nkeynes@736
   355
        "cmp (%%eax), %%edx\n\t"
nkeynes@736
   356
        "je frame_found\n\t"
nkeynes@736
   357
        "sub $0x1, %%ecx\n\t"
nkeynes@736
   358
        "je frame_not_found\n\t"
nkeynes@736
   359
        "movl (%%eax), %%eax\n\t"
nkeynes@736
   360
        "jmp frame_loop\n"
nkeynes@736
   361
        "frame_found: movl 0x4(%%eax), %0\n"
nkeynes@736
   362
        "frame_not_found:"
nkeynes@736
   363
        : "=r" (result)
nkeynes@736
   364
        : "r" (((uint8_t *)&sh4r) + 128 )
nkeynes@736
   365
        : "eax", "ecx", "edx" );
nkeynes@586
   366
    return result;
nkeynes@586
   367
}
nkeynes@899
   368
#endif
nkeynes@586
   369
nkeynes@926
   370
#endif /* !lxdream_ia32mac.h */
nkeynes@539
   371
nkeynes@539
   372
.