Search
lxdream.org :: lxdream/src/sh4/ia32abi.h
lxdream 0.9.1
released Jun 29
Download Now
filename src/sh4/ia32abi.h
changeset 905:4c17ebd9ef5e
prev901:32c5cf5e206f
next906:268ea359f884
author nkeynes
date Wed Oct 29 23:51:58 2008 +0000 (13 years ago)
permissions -rw-r--r--
last change Use regparam calling conventions for all functions called from translated code,
along with a few other high-use functions. Can probably extend this to all functions,
but as it is this is a nice performance boost
file annotate diff log raw
nkeynes@539
     1
/**
nkeynes@586
     2
 * $Id$
nkeynes@539
     3
 * 
nkeynes@539
     4
 * Provides the implementation for the ia32 ABI (eg prologue, epilogue, and
nkeynes@539
     5
 * calling conventions)
nkeynes@539
     6
 *
nkeynes@539
     7
 * Copyright (c) 2007 Nathan Keynes.
nkeynes@539
     8
 *
nkeynes@539
     9
 * This program is free software; you can redistribute it and/or modify
nkeynes@539
    10
 * it under the terms of the GNU General Public License as published by
nkeynes@539
    11
 * the Free Software Foundation; either version 2 of the License, or
nkeynes@539
    12
 * (at your option) any later version.
nkeynes@539
    13
 *
nkeynes@539
    14
 * This program is distributed in the hope that it will be useful,
nkeynes@539
    15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
nkeynes@539
    16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
nkeynes@539
    17
 * GNU General Public License for more details.
nkeynes@539
    18
 */
nkeynes@539
    19
nkeynes@736
    20
#ifndef lxdream_ia32abi_H
nkeynes@736
    21
#define lxdream_ia32abi_H 1
nkeynes@539
    22
nkeynes@539
    23
#define load_ptr( reg, ptr ) load_imm32( reg, (uint32_t)ptr );
nkeynes@539
    24
nkeynes@539
    25
/**
nkeynes@539
    26
 * Note: clobbers EAX to make the indirect call - this isn't usually
nkeynes@539
    27
 * a problem since the callee will usually clobber it anyway.
nkeynes@539
    28
 */
nkeynes@539
    29
static inline void call_func0( void *ptr )
nkeynes@539
    30
{
nkeynes@539
    31
    load_imm32(R_EAX, (uint32_t)ptr);
nkeynes@539
    32
    CALL_r32(R_EAX);
nkeynes@539
    33
}
nkeynes@539
    34
nkeynes@905
    35
#ifdef HAVE_FASTCALL
nkeynes@905
    36
static inline void call_func1( void *ptr, int arg1 )
nkeynes@905
    37
{
nkeynes@905
    38
    if( arg1 != R_EAX ) {
nkeynes@905
    39
        MOV_r32_r32( arg1, R_EAX );
nkeynes@905
    40
    }
nkeynes@905
    41
    load_imm32(R_ECX, (uint32_t)ptr);
nkeynes@905
    42
    CALL_r32(R_ECX);
nkeynes@905
    43
}
nkeynes@905
    44
nkeynes@905
    45
static inline void call_func2( void *ptr, int arg1, int arg2 )
nkeynes@905
    46
{
nkeynes@905
    47
    if( arg2 != R_EDX ) {
nkeynes@905
    48
        MOV_r32_r32( arg2, R_EDX );
nkeynes@905
    49
    }
nkeynes@905
    50
    if( arg1 != R_EAX ) {
nkeynes@905
    51
        MOV_r32_r32( arg1, R_EAX );
nkeynes@905
    52
    }
nkeynes@905
    53
    load_imm32(R_ECX, (uint32_t)ptr);
nkeynes@905
    54
    CALL_r32(R_ECX);
nkeynes@905
    55
}
nkeynes@905
    56
nkeynes@905
    57
/**
nkeynes@905
    58
 * Write a double (64-bit) value into memory, with the first word in arg2a, and
nkeynes@905
    59
 * the second in arg2b
nkeynes@905
    60
 */
nkeynes@905
    61
static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@905
    62
{
nkeynes@905
    63
    PUSH_r32(arg2b);
nkeynes@905
    64
    PUSH_r32(addr);
nkeynes@905
    65
    call_func2(sh4_write_long, addr, arg2a);
nkeynes@905
    66
    POP_r32(R_EAX);
nkeynes@905
    67
    POP_r32(R_EDX);
nkeynes@905
    68
    ADD_imm8s_r32(4, R_EAX);
nkeynes@905
    69
    call_func0(sh4_write_long);
nkeynes@905
    70
}
nkeynes@905
    71
nkeynes@905
    72
/**
nkeynes@905
    73
 * Read a double (64-bit) value from memory, writing the first word into arg2a
nkeynes@905
    74
 * and the second into arg2b. The addr must not be in EAX
nkeynes@905
    75
 */
nkeynes@905
    76
static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@905
    77
{
nkeynes@905
    78
    PUSH_r32(addr);
nkeynes@905
    79
    call_func1(sh4_read_long, addr);
nkeynes@905
    80
    POP_r32(R_ECX);
nkeynes@905
    81
    PUSH_r32(R_EAX);
nkeynes@905
    82
    MOV_r32_r32(R_ECX, R_EAX);
nkeynes@905
    83
    ADD_imm8s_r32(4, R_EAX);
nkeynes@905
    84
    call_func0(sh4_read_long);
nkeynes@905
    85
    if( arg2b != R_EAX ) {
nkeynes@905
    86
        MOV_r32_r32(R_EAX, arg2b);
nkeynes@905
    87
    }
nkeynes@905
    88
    POP_r32(arg2a);
nkeynes@905
    89
}
nkeynes@905
    90
#else
nkeynes@539
    91
static inline void call_func1( void *ptr, int arg1 )
nkeynes@539
    92
{
nkeynes@539
    93
    PUSH_r32(arg1);
nkeynes@539
    94
    call_func0(ptr);
nkeynes@539
    95
    ADD_imm8s_r32( 4, R_ESP );
nkeynes@539
    96
}
nkeynes@539
    97
nkeynes@539
    98
static inline void call_func2( void *ptr, int arg1, int arg2 )
nkeynes@539
    99
{
nkeynes@539
   100
    PUSH_r32(arg2);
nkeynes@539
   101
    PUSH_r32(arg1);
nkeynes@539
   102
    call_func0(ptr);
nkeynes@539
   103
    ADD_imm8s_r32( 8, R_ESP );
nkeynes@539
   104
}
nkeynes@539
   105
nkeynes@539
   106
/**
nkeynes@539
   107
 * Write a double (64-bit) value into memory, with the first word in arg2a, and
nkeynes@539
   108
 * the second in arg2b
nkeynes@539
   109
 */
nkeynes@539
   110
static inline void MEM_WRITE_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@539
   111
{
nkeynes@539
   112
    ADD_imm8s_r32( 4, addr );
nkeynes@539
   113
    PUSH_r32(arg2b);
nkeynes@539
   114
    PUSH_r32(addr);
nkeynes@539
   115
    ADD_imm8s_r32( -4, addr );
nkeynes@539
   116
    PUSH_r32(arg2a);
nkeynes@539
   117
    PUSH_r32(addr);
nkeynes@539
   118
    call_func0(sh4_write_long);
nkeynes@539
   119
    ADD_imm8s_r32( 8, R_ESP );
nkeynes@539
   120
    call_func0(sh4_write_long);
nkeynes@539
   121
    ADD_imm8s_r32( 8, R_ESP );
nkeynes@539
   122
}
nkeynes@539
   123
nkeynes@539
   124
/**
nkeynes@539
   125
 * Read a double (64-bit) value from memory, writing the first word into arg2a
nkeynes@539
   126
 * and the second into arg2b. The addr must not be in EAX
nkeynes@539
   127
 */
nkeynes@539
   128
static inline void MEM_READ_DOUBLE( int addr, int arg2a, int arg2b )
nkeynes@539
   129
{
nkeynes@539
   130
    PUSH_r32(addr);
nkeynes@539
   131
    call_func0(sh4_read_long);
nkeynes@586
   132
    POP_r32(R_ECX);
nkeynes@539
   133
    PUSH_r32(R_EAX);
nkeynes@586
   134
    ADD_imm8s_r32( 4, R_ECX );
nkeynes@586
   135
    PUSH_r32(R_ECX);
nkeynes@539
   136
    call_func0(sh4_read_long);
nkeynes@539
   137
    ADD_imm8s_r32( 4, R_ESP );
nkeynes@539
   138
    MOV_r32_r32( R_EAX, arg2b );
nkeynes@539
   139
    POP_r32(arg2a);
nkeynes@539
   140
}
nkeynes@905
   141
#endif
nkeynes@539
   142
nkeynes@539
   143
/**
nkeynes@539
   144
 * Emit the 'start of block' assembly. Sets up the stack frame and save
nkeynes@539
   145
 * SI/DI as required
nkeynes@539
   146
 */
nkeynes@901
   147
void enter_block( ) 
nkeynes@539
   148
{
nkeynes@539
   149
    PUSH_r32(R_EBP);
nkeynes@539
   150
    /* mov &sh4r, ebp */
nkeynes@669
   151
    load_ptr( R_EBP, ((uint8_t *)&sh4r) + 128 );
nkeynes@736
   152
nkeynes@539
   153
#ifdef STACK_ALIGN
nkeynes@736
   154
    sh4_x86.stack_posn = 8;
nkeynes@539
   155
#endif
nkeynes@539
   156
}
nkeynes@539
   157
nkeynes@539
   158
/**
nkeynes@539
   159
 * Exit the block with sh4r.pc already written
nkeynes@539
   160
 */
nkeynes@586
   161
void exit_block_pcset( sh4addr_t pc )
nkeynes@539
   162
{
nkeynes@539
   163
    load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@539
   164
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
nkeynes@590
   165
    load_spreg( R_EAX, R_PC );
nkeynes@590
   166
    if( sh4_x86.tlb_on ) {
nkeynes@736
   167
        call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@590
   168
    } else {
nkeynes@736
   169
        call_func1(xlat_get_code,R_EAX);
nkeynes@590
   170
    } 
nkeynes@590
   171
    POP_r32(R_EBP);
nkeynes@590
   172
    RET();
nkeynes@590
   173
}
nkeynes@590
   174
nkeynes@590
   175
/**
nkeynes@590
   176
 * Exit the block with sh4r.new_pc written with the target pc
nkeynes@590
   177
 */
nkeynes@590
   178
void exit_block_newpcset( sh4addr_t pc )
nkeynes@590
   179
{
nkeynes@590
   180
    load_imm32( R_ECX, ((pc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@590
   181
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );    // 6
nkeynes@590
   182
    load_spreg( R_EAX, R_NEW_PC );
nkeynes@590
   183
    store_spreg( R_EAX, R_PC );
nkeynes@586
   184
    if( sh4_x86.tlb_on ) {
nkeynes@736
   185
        call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@586
   186
    } else {
nkeynes@736
   187
        call_func1(xlat_get_code,R_EAX);
nkeynes@586
   188
    } 
nkeynes@539
   189
    POP_r32(R_EBP);
nkeynes@539
   190
    RET();
nkeynes@539
   191
}
nkeynes@539
   192
nkeynes@586
   193
#define EXIT_BLOCK_SIZE(pc)  (24 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
nkeynes@586
   194
nkeynes@586
   195
nkeynes@539
   196
/**
nkeynes@539
   197
 * Exit the block to an absolute PC
nkeynes@539
   198
 */
nkeynes@539
   199
void exit_block( sh4addr_t pc, sh4addr_t endpc )
nkeynes@539
   200
{
nkeynes@539
   201
    load_imm32( R_ECX, pc );                            // 5
nkeynes@539
   202
    store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
nkeynes@586
   203
    if( IS_IN_ICACHE(pc) ) {
nkeynes@736
   204
        MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
nkeynes@586
   205
    } else if( sh4_x86.tlb_on ) {
nkeynes@736
   206
        call_func1(xlat_get_code_by_vma,R_ECX);
nkeynes@586
   207
    } else {
nkeynes@736
   208
        call_func1(xlat_get_code,R_ECX);
nkeynes@586
   209
    }
nkeynes@586
   210
    AND_imm8s_r32( 0xFC, R_EAX ); // 3
nkeynes@586
   211
    load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@586
   212
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@586
   213
    POP_r32(R_EBP);
nkeynes@586
   214
    RET();
nkeynes@586
   215
}
nkeynes@586
   216
nkeynes@586
   217
#define EXIT_BLOCK_REL_SIZE(pc)  (27 + (IS_IN_ICACHE(pc)?5:CALL_FUNC1_SIZE))
nkeynes@586
   218
nkeynes@586
   219
/**
nkeynes@586
   220
 * Exit the block to a relative PC
nkeynes@586
   221
 */
nkeynes@586
   222
void exit_block_rel( sh4addr_t pc, sh4addr_t endpc )
nkeynes@586
   223
{
nkeynes@586
   224
    load_imm32( R_ECX, pc - sh4_x86.block_start_pc );   // 5
nkeynes@586
   225
    ADD_sh4r_r32( R_PC, R_ECX );
nkeynes@586
   226
    store_spreg( R_ECX, REG_OFFSET(pc) );               // 3
nkeynes@586
   227
    if( IS_IN_ICACHE(pc) ) {
nkeynes@736
   228
        MOV_moff32_EAX( xlat_get_lut_entry(GET_ICACHE_PHYS(pc)) ); // 5
nkeynes@586
   229
    } else if( sh4_x86.tlb_on ) {
nkeynes@736
   230
        call_func1(xlat_get_code_by_vma,R_ECX);
nkeynes@586
   231
    } else {
nkeynes@736
   232
        call_func1(xlat_get_code,R_ECX);
nkeynes@586
   233
    }
nkeynes@539
   234
    AND_imm8s_r32( 0xFC, R_EAX ); // 3
nkeynes@539
   235
    load_imm32( R_ECX, ((endpc - sh4_x86.block_start_pc)>>1)*sh4_cpu_period ); // 5
nkeynes@539
   236
    ADD_r32_sh4r( R_ECX, REG_OFFSET(slice_cycle) );     // 6
nkeynes@539
   237
    POP_r32(R_EBP);
nkeynes@539
   238
    RET();
nkeynes@539
   239
}
nkeynes@539
   240
nkeynes@539
   241
/**
nkeynes@539
   242
 * Write the block trailer (exception handling block)
nkeynes@539
   243
 */
nkeynes@539
   244
void sh4_translate_end_block( sh4addr_t pc ) {
nkeynes@539
   245
    if( sh4_x86.branch_taken == FALSE ) {
nkeynes@736
   246
        // Didn't exit unconditionally already, so write the termination here
nkeynes@736
   247
        exit_block_rel( pc, pc );
nkeynes@539
   248
    }
nkeynes@539
   249
    if( sh4_x86.backpatch_posn != 0 ) {
nkeynes@736
   250
        unsigned int i;
nkeynes@736
   251
        // Raise exception
nkeynes@736
   252
        uint8_t *end_ptr = xlat_output;
nkeynes@736
   253
        MOV_r32_r32( R_EDX, R_ECX );
nkeynes@736
   254
        ADD_r32_r32( R_EDX, R_ECX );
nkeynes@736
   255
        ADD_r32_sh4r( R_ECX, R_PC );
nkeynes@736
   256
        MOV_moff32_EAX( &sh4_cpu_period );
nkeynes@736
   257
        MUL_r32( R_EDX );
nkeynes@736
   258
        ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
nkeynes@539
   259
nkeynes@736
   260
        call_func0( sh4_raise_exception );
nkeynes@736
   261
        ADD_imm8s_r32( 4, R_ESP );
nkeynes@736
   262
        load_spreg( R_EAX, R_PC );
nkeynes@736
   263
        if( sh4_x86.tlb_on ) {
nkeynes@736
   264
            call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@736
   265
        } else {
nkeynes@736
   266
            call_func1(xlat_get_code,R_EAX);
nkeynes@736
   267
        }
nkeynes@736
   268
        POP_r32(R_EBP);
nkeynes@736
   269
        RET();
nkeynes@539
   270
nkeynes@736
   271
        // Exception already raised - just cleanup
nkeynes@736
   272
        uint8_t *preexc_ptr = xlat_output;
nkeynes@736
   273
        MOV_r32_r32( R_EDX, R_ECX );
nkeynes@736
   274
        ADD_r32_r32( R_EDX, R_ECX );
nkeynes@736
   275
        ADD_r32_sh4r( R_ECX, R_SPC );
nkeynes@736
   276
        MOV_moff32_EAX( &sh4_cpu_period );
nkeynes@736
   277
        MUL_r32( R_EDX );
nkeynes@736
   278
        ADD_r32_sh4r( R_EAX, REG_OFFSET(slice_cycle) );
nkeynes@736
   279
        load_spreg( R_EAX, R_PC );
nkeynes@736
   280
        if( sh4_x86.tlb_on ) {
nkeynes@736
   281
            call_func1(xlat_get_code_by_vma,R_EAX);
nkeynes@736
   282
        } else {
nkeynes@736
   283
            call_func1(xlat_get_code,R_EAX);
nkeynes@736
   284
        }
nkeynes@736
   285
        POP_r32(R_EBP);
nkeynes@736
   286
        RET();
nkeynes@586
   287
nkeynes@736
   288
        for( i=0; i< sh4_x86.backpatch_posn; i++ ) {
nkeynes@736
   289
            uint32_t *fixup_addr = (uint32_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset];
nkeynes@736
   290
            *fixup_addr = xlat_output - (uint8_t *)&xlat_current_block->code[sh4_x86.backpatch_list[i].fixup_offset] - 4;
nkeynes@736
   291
            if( sh4_x86.backpatch_list[i].exc_code < 0 ) {
nkeynes@736
   292
                load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
nkeynes@736
   293
                int stack_adj = -1 - sh4_x86.backpatch_list[i].exc_code;
nkeynes@736
   294
                if( stack_adj > 0 ) { 
nkeynes@736
   295
                    ADD_imm8s_r32( stack_adj, R_ESP );
nkeynes@736
   296
                }
nkeynes@736
   297
                int rel = preexc_ptr - xlat_output;
nkeynes@736
   298
                JMP_rel(rel);
nkeynes@736
   299
            } else {
nkeynes@736
   300
                PUSH_imm32( sh4_x86.backpatch_list[i].exc_code );
nkeynes@736
   301
                load_imm32( R_EDX, sh4_x86.backpatch_list[i].fixup_icount );
nkeynes@736
   302
                int rel = end_ptr - xlat_output;
nkeynes@736
   303
                JMP_rel(rel);
nkeynes@736
   304
            }
nkeynes@736
   305
        }
nkeynes@539
   306
    }
nkeynes@539
   307
}
nkeynes@539
   308
nkeynes@899
   309
/**
nkeynes@899
   310
 * The unwind methods only work if we compiled with DWARF2 frame information
nkeynes@899
   311
 * (ie -fexceptions), otherwise we have to use the direct frame scan.
nkeynes@899
   312
 */
nkeynes@899
   313
#ifdef HAVE_EXCEPTIONS
nkeynes@899
   314
#include <unwind.h>
nkeynes@899
   315
nkeynes@899
   316
struct UnwindInfo {
nkeynes@899
   317
	int have_result;
nkeynes@899
   318
	void *pc;
nkeynes@899
   319
};
nkeynes@899
   320
nkeynes@899
   321
_Unwind_Reason_Code xlat_check_frame( struct _Unwind_Context *context, void *arg )
nkeynes@899
   322
{
nkeynes@899
   323
    void *ebp = (void *)_Unwind_GetGR(context, 5);
nkeynes@899
   324
    void *expect = (((uint8_t *)&sh4r) + 128 );
nkeynes@899
   325
	struct UnwindInfo *info = arg;
nkeynes@899
   326
    if( ebp == expect ) { 
nkeynes@899
   327
        info->have_result = 1;
nkeynes@899
   328
        info->pc = (void *)_Unwind_GetIP(context);
nkeynes@899
   329
    } else if( info->have_result ) {
nkeynes@899
   330
        return _URC_NORMAL_STOP;
nkeynes@899
   331
    }
nkeynes@899
   332
nkeynes@899
   333
    return _URC_NO_REASON;
nkeynes@899
   334
}
nkeynes@899
   335
nkeynes@899
   336
void *xlat_get_native_pc()
nkeynes@899
   337
{
nkeynes@899
   338
    struct _Unwind_Exception exc;
nkeynes@899
   339
    struct UnwindInfo info;
nkeynes@899
   340
nkeynes@899
   341
    info.have_result = 0;
nkeynes@899
   342
    void *result = NULL;
nkeynes@899
   343
    _Unwind_Backtrace( xlat_check_frame, &info );
nkeynes@899
   344
    if( info.have_result )
nkeynes@899
   345
    	return info.pc;
nkeynes@899
   346
    return NULL;
nkeynes@899
   347
}
nkeynes@899
   348
#else 
nkeynes@586
   349
void *xlat_get_native_pc()
nkeynes@586
   350
{
nkeynes@586
   351
    void *result = NULL;
nkeynes@586
   352
    asm(
nkeynes@736
   353
        "mov %%ebp, %%eax\n\t"
nkeynes@736
   354
        "mov $0x8, %%ecx\n\t"
nkeynes@736
   355
        "mov %1, %%edx\n"
nkeynes@736
   356
        "frame_loop: test %%eax, %%eax\n\t"
nkeynes@736
   357
        "je frame_not_found\n\t"
nkeynes@736
   358
        "cmp (%%eax), %%edx\n\t"
nkeynes@736
   359
        "je frame_found\n\t"
nkeynes@736
   360
        "sub $0x1, %%ecx\n\t"
nkeynes@736
   361
        "je frame_not_found\n\t"
nkeynes@736
   362
        "movl (%%eax), %%eax\n\t"
nkeynes@736
   363
        "jmp frame_loop\n"
nkeynes@736
   364
        "frame_found: movl 0x4(%%eax), %0\n"
nkeynes@736
   365
        "frame_not_found:"
nkeynes@736
   366
        : "=r" (result)
nkeynes@736
   367
        : "r" (((uint8_t *)&sh4r) + 128 )
nkeynes@736
   368
        : "eax", "ecx", "edx" );
nkeynes@586
   369
    return result;
nkeynes@586
   370
}
nkeynes@899
   371
#endif
nkeynes@586
   372
nkeynes@736
   373
#endif /* !lxdream_ia32abi_H */
nkeynes@539
   374
nkeynes@539
   375
.