filename | src/xlat/xirsup.c |
changeset | 1011:fdd58619b760 |
prev | 1006:3a169c224c12 |
author | nkeynes |
date | Sun Apr 12 07:24:45 2009 +0000 (15 years ago) |
branch | xlat-refactor |
permissions | -rw-r--r-- |
last change | Restructure operand types - rename to forms to avoid conflict for actual data types temporary operands are now a first class form remove explicit types for immediates - now implied by opcode Initial work on promote-source-reg pass |
file | annotate | diff | log | raw |
nkeynes@1006 | 1 | /** |
nkeynes@1006 | 2 | * $Id: xirsup.c 931 2008-10-31 02:57:59Z nkeynes $ |
nkeynes@1006 | 3 | * |
nkeynes@1006 | 4 | * XIR support functions and transformations for the convenience of other |
nkeynes@1006 | 5 | * passes/targets. |
nkeynes@1006 | 6 | * |
nkeynes@1006 | 7 | * Copyright (c) 2009 Nathan Keynes. |
nkeynes@1006 | 8 | * |
nkeynes@1006 | 9 | * This program is free software; you can redistribute it and/or modify |
nkeynes@1006 | 10 | * it under the terms of the GNU General Public License as published by |
nkeynes@1006 | 11 | * the Free Software Foundation; either version 2 of the License, or |
nkeynes@1006 | 12 | * (at your option) any later version. |
nkeynes@1006 | 13 | * |
nkeynes@1006 | 14 | * This program is distributed in the hope that it will be useful, |
nkeynes@1006 | 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
nkeynes@1006 | 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
nkeynes@1006 | 17 | * GNU General Public License for more details. |
nkeynes@1006 | 18 | */ |
nkeynes@1006 | 19 | |
nkeynes@1006 | 20 | #include "xlat/xir.h" |
nkeynes@1006 | 21 | |
nkeynes@1006 | 22 | /**************************** Shuffle ****************************/ |
nkeynes@1006 | 23 | /** |
nkeynes@1006 | 24 | * Shuffle is a high-level instruction that rearranges bytes in an operand |
nkeynes@1006 | 25 | * according to an immediate pattern. This can be encoded directly on x86 |
nkeynes@1006 | 26 | * using SSE/MMX registers, otherwise it needs to be lowered first. |
nkeynes@1006 | 27 | */ |
nkeynes@1006 | 28 | |
nkeynes@1006 | 29 | /** |
nkeynes@1006 | 30 | * Apply a shuffle directly to the given operand, and return the result |
nkeynes@1006 | 31 | */ |
nkeynes@1006 | 32 | uint32_t xir_shuffle_imm32( uint32_t shuffle, uint32_t operand ) |
nkeynes@1006 | 33 | { |
nkeynes@1006 | 34 | int i=0,j; |
nkeynes@1006 | 35 | uint32_t tmp = shuffle; |
nkeynes@1006 | 36 | uint32_t result = 0; |
nkeynes@1006 | 37 | for( i=0; i<4; i++ ) { |
nkeynes@1006 | 38 | j = (tmp & 0x0F)-1; |
nkeynes@1006 | 39 | tmp >>= 4; |
nkeynes@1006 | 40 | if( j >= 0 && j < 4 ) { |
nkeynes@1006 | 41 | j = (operand >> ((3-j)<<3)) & 0xFF; |
nkeynes@1006 | 42 | result |= (j << (i<<3)); |
nkeynes@1006 | 43 | } |
nkeynes@1006 | 44 | } |
nkeynes@1006 | 45 | return result; |
nkeynes@1006 | 46 | } |
nkeynes@1006 | 47 | |
nkeynes@1006 | 48 | /** |
nkeynes@1006 | 49 | * Apply a shuffle transitively to the operation (which must also be a shuffle). |
nkeynes@1006 | 50 | * For example, given the sequence |
nkeynes@1006 | 51 | * op1: shuffle 0x1243, r12 |
nkeynes@1006 | 52 | * op2: shuffle 0x3412, r12 |
nkeynes@1006 | 53 | * xir_trans_shuffle( 0x1243, op2 ) can be used to replace op2 wih |
nkeynes@1006 | 54 | * shuffle 0x4312, r12 |
nkeynes@1006 | 55 | */ |
nkeynes@1006 | 56 | void xir_shuffle_op( uint32_t shuffle, xir_op_t it ) |
nkeynes@1006 | 57 | { |
nkeynes@1006 | 58 | int i=0,j; |
nkeynes@1006 | 59 | uint32_t in1 = shuffle; |
nkeynes@1006 | 60 | uint32_t in2 = it->operand[0].value.i; |
nkeynes@1006 | 61 | uint32_t result = 0; |
nkeynes@1006 | 62 | for( i=0; i<4; i++ ) { |
nkeynes@1006 | 63 | j = (in2 & 0x0F)-1; |
nkeynes@1006 | 64 | in2 >>= 4; |
nkeynes@1006 | 65 | if( j >= 0 && j < 4 ) { |
nkeynes@1006 | 66 | j = (in1 >> ((3-j)<<2)) & 0x0F; |
nkeynes@1006 | 67 | result |= (j << (i<<2)); |
nkeynes@1006 | 68 | } |
nkeynes@1006 | 69 | } |
nkeynes@1006 | 70 | it->operand[0].value.i = result; |
nkeynes@1006 | 71 | } |
nkeynes@1006 | 72 | |
nkeynes@1006 | 73 | /** |
nkeynes@1006 | 74 | * Return the cost of lowering the specified shuffle as the number of instructions |
nkeynes@1006 | 75 | * involved. |
nkeynes@1006 | 76 | */ |
nkeynes@1006 | 77 | int xir_shuffle_lower_size( xir_op_t it ) |
nkeynes@1006 | 78 | { |
nkeynes@1006 | 79 | int mask_for_shift[7] = {0,0,0,0,0,0,0}; /* -3 .. 0 .. +3 */ |
nkeynes@1006 | 80 | int arg = it->operand[0].value.i, i; |
nkeynes@1006 | 81 | int icount=0, found = 0; |
nkeynes@1006 | 82 | |
nkeynes@1006 | 83 | if( arg == 0x1234 ) { |
nkeynes@1006 | 84 | return 0; |
nkeynes@1006 | 85 | } |
nkeynes@1006 | 86 | |
nkeynes@1006 | 87 | /* Figure out the shift (in bytes) for each sub-byte and construct the mask/shift array */ |
nkeynes@1006 | 88 | for( i=0; i<4; i++ ) { |
nkeynes@1006 | 89 | int val = (arg&0x0F); |
nkeynes@1006 | 90 | if( val >= 1 && val <= 4 ) { |
nkeynes@1006 | 91 | int shift = val - (4-i); |
nkeynes@1006 | 92 | mask_for_shift[shift+3] |= ( (0xFF) << (i<<3) ); |
nkeynes@1006 | 93 | } |
nkeynes@1006 | 94 | arg >>= 4; |
nkeynes@1006 | 95 | } |
nkeynes@1006 | 96 | |
nkeynes@1006 | 97 | for( i=-3; i<4; i++ ) { |
nkeynes@1006 | 98 | if( mask_for_shift[i+3] != 0 ) { |
nkeynes@1006 | 99 | uint32_t maxmask = 0xFFFFFFFF; |
nkeynes@1006 | 100 | if( i < 0 ) { |
nkeynes@1006 | 101 | icount++; |
nkeynes@1006 | 102 | maxmask >>= ((-i)<<3); |
nkeynes@1006 | 103 | } else if( i > 0 ) { |
nkeynes@1006 | 104 | icount++; |
nkeynes@1006 | 105 | maxmask <<= (i<<3); |
nkeynes@1006 | 106 | } |
nkeynes@1006 | 107 | if( mask_for_shift[i+3] != maxmask ) { |
nkeynes@1006 | 108 | icount++; |
nkeynes@1006 | 109 | } |
nkeynes@1006 | 110 | if( found != 0 ) { |
nkeynes@1006 | 111 | icount += 2; |
nkeynes@1006 | 112 | } |
nkeynes@1006 | 113 | found++; |
nkeynes@1006 | 114 | } |
nkeynes@1006 | 115 | } |
nkeynes@1006 | 116 | return icount; |
nkeynes@1006 | 117 | } |
nkeynes@1006 | 118 | |
nkeynes@1006 | 119 | /** |
nkeynes@1006 | 120 | * Transform a shuffle instruction into an equivalent sequence of shifts, and |
nkeynes@1006 | 121 | * logical operations. |
nkeynes@1006 | 122 | */ |
nkeynes@1006 | 123 | xir_op_t xir_shuffle_lower( xir_basic_block_t xbb, xir_op_t it, int tmp1, int tmp2 ) |
nkeynes@1006 | 124 | { |
nkeynes@1006 | 125 | int mask_for_shift[7] = {0,0,0,0,0,0,0}; /* -3 .. 0 .. +3 */ |
nkeynes@1006 | 126 | int arg = it->operand[0].value.i, i, first=3, last=-3; |
nkeynes@1006 | 127 | |
nkeynes@1006 | 128 | if( arg == 0x1234 ) { /* Identity - NOP */ |
nkeynes@1006 | 129 | it->opcode = OP_NOP; |
nkeynes@1011 | 130 | it->operand[0].form = NO_OPERAND; |
nkeynes@1011 | 131 | it->operand[1].form = NO_OPERAND; |
nkeynes@1006 | 132 | return it; |
nkeynes@1006 | 133 | } |
nkeynes@1006 | 134 | |
nkeynes@1006 | 135 | |
nkeynes@1006 | 136 | /* Figure out the shift (in bytes) for each sub-byte and construct the mask/shift array */ |
nkeynes@1006 | 137 | for( i=0; i<4; i++ ) { |
nkeynes@1006 | 138 | int val = (arg&0x0F); |
nkeynes@1006 | 139 | if( val >= 1 && val <= 4 ) { |
nkeynes@1006 | 140 | int shift = val - (4-i); |
nkeynes@1006 | 141 | mask_for_shift[shift+3] |= ( (0xFF) << (i<<3) ); |
nkeynes@1006 | 142 | if( shift > last ) { |
nkeynes@1006 | 143 | last = shift; |
nkeynes@1006 | 144 | } |
nkeynes@1006 | 145 | if( shift < first ) { |
nkeynes@1006 | 146 | first = shift; |
nkeynes@1006 | 147 | } |
nkeynes@1006 | 148 | } |
nkeynes@1006 | 149 | arg >>= 4; |
nkeynes@1006 | 150 | } |
nkeynes@1006 | 151 | |
nkeynes@1011 | 152 | int shifterform = it->operand[1].form, shifterval = it->operand[1].value.i; |
nkeynes@1006 | 153 | xir_op_t seq = xbb->ir_ptr; |
nkeynes@1006 | 154 | |
nkeynes@1006 | 155 | for( i=first; i<=last; i++ ) { |
nkeynes@1006 | 156 | if( mask_for_shift[i+3] != 0 ) { |
nkeynes@1006 | 157 | uint32_t maxmask = 0xFFFFFFFF; |
nkeynes@1006 | 158 | if( first != i ) { |
nkeynes@1011 | 159 | shifterform = TEMP_OPERAND; |
nkeynes@1006 | 160 | if( last == i ) { |
nkeynes@1006 | 161 | shifterval = tmp1; |
nkeynes@1006 | 162 | } else { |
nkeynes@1006 | 163 | shifterval = tmp2; |
nkeynes@1011 | 164 | xir_append_op2( xbb, OP_MOV, TEMP_OPERAND, tmp1, shifterform, shifterval ); |
nkeynes@1006 | 165 | } |
nkeynes@1006 | 166 | } |
nkeynes@1006 | 167 | if( i < 0 ) { |
nkeynes@1011 | 168 | xir_append_op2( xbb, OP_SLR, IMMEDIATE_OPERAND, (-i)<<3, shifterform, shifterval ); |
nkeynes@1006 | 169 | maxmask >>= ((-i)<<3); |
nkeynes@1006 | 170 | } else if( i > 0 ) { |
nkeynes@1011 | 171 | xir_append_op2( xbb, OP_SLL, IMMEDIATE_OPERAND, i<<3, shifterform, shifterval ); |
nkeynes@1006 | 172 | maxmask <<= (i<<3); |
nkeynes@1006 | 173 | } |
nkeynes@1006 | 174 | if( mask_for_shift[i+3] != maxmask ) { |
nkeynes@1011 | 175 | xir_append_op2( xbb, OP_AND, IMMEDIATE_OPERAND, mask_for_shift[i+3], shifterform, shifterval ); |
nkeynes@1006 | 176 | } |
nkeynes@1006 | 177 | if( first != i ) { |
nkeynes@1011 | 178 | xir_append_op2( xbb, OP_OR, shifterform, shifterval, it->operand[1].form, it->operand[1].value.i ); |
nkeynes@1006 | 179 | } |
nkeynes@1006 | 180 | } |
nkeynes@1006 | 181 | } |
nkeynes@1006 | 182 | |
nkeynes@1006 | 183 | /* Replace original shuffle with either a temp move or a nop */ |
nkeynes@1006 | 184 | if( first != last ) { |
nkeynes@1006 | 185 | it->opcode = OP_MOV; |
nkeynes@1011 | 186 | it->operand[0].form = it->operand[1].form; |
nkeynes@1006 | 187 | it->operand[0].value.i = it->operand[1].value.i; |
nkeynes@1011 | 188 | it->operand[1].form = TEMP_OPERAND; |
nkeynes@1006 | 189 | it->operand[1].value.i = tmp1; |
nkeynes@1006 | 190 | } else { |
nkeynes@1006 | 191 | it->opcode = OP_NOP; |
nkeynes@1011 | 192 | it->operand[0].form = NO_OPERAND; |
nkeynes@1011 | 193 | it->operand[1].form = NO_OPERAND; |
nkeynes@1006 | 194 | } |
nkeynes@1006 | 195 | |
nkeynes@1006 | 196 | /* Finally insert the new sequence after the original op */ |
nkeynes@1006 | 197 | if( xbb->ir_ptr != seq ) { |
nkeynes@1006 | 198 | xir_op_t last = xbb->ir_ptr-1; |
nkeynes@1006 | 199 | last->next = it->next; |
nkeynes@1006 | 200 | it->next = seq; |
nkeynes@1006 | 201 | seq->prev = it; |
nkeynes@1006 | 202 | if( last->next != 0 ) { |
nkeynes@1006 | 203 | last->next->prev = last; |
nkeynes@1006 | 204 | } |
nkeynes@1006 | 205 | return last; |
nkeynes@1006 | 206 | } else { |
nkeynes@1006 | 207 | return it; |
nkeynes@1006 | 208 | } |
nkeynes@1006 | 209 | } |
.