| /* Multiply table generator for tile. |
| Copyright (C) 2011-2021 Free Software Foundation, Inc. |
| Contributed by Walter Lee (walt@tilera.com) |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published |
| by the Free Software Foundation; either version 3, or (at your |
| option) any later version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
| License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* This program creates a table used to compile multiply by constant |
| efficiently. |
| |
| This program should be compiled by a c++ compiler. If it's |
| compiled with -DTILEPRO, it generates the multiply table for |
| TILEPro; otherwise it generates the multiply table for TILE-Gx. |
| Running the program produces the table in stdout. |
| |
| The program works by generating every possible combination of up to |
| MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left |
| shift) and computing the multiplier computed by those instructions. |
| For example, |
| |
| s2a r2,r1,r1 |
| s2a r3,r2,r2 |
| |
| multiplies r1 by 25. |
| |
| There are usually multiple instruction sequences to multiply by a |
| given constant. This program keeps only the cheapest one. |
| "Cheapest" is defined first by the minimum theoretical schedule |
| length, and if those are equal then by the number of instructions, |
| and if those are equal then by which instructions we "prefer" |
| (e.g. because we think one might take infinitesimally less power |
| than another, or simply because we arbitrarily pick one to be more |
| canonical). |
| |
| Once this program has determined the best instruction sequence for |
| each multiplier, it emits them in a table sorted by the multiplier |
| so the user can binary-search it to look for a match. The table is |
| pruned based on various criteria to keep its sizes reasonable. */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <string.h> |
| #define __STDC_LIMIT_MACROS |
| #include <stdint.h> |
| |
| #include <map> |
| |
| #ifdef TILEPRO |
| |
| /* The string representing the architecture. */ |
| #define ARCH "tilepro" |
| |
| /* The type for the multiplication. */ |
| typedef int MUL_TYPE; |
| |
| #else |
| |
| /* The string representing the architecture. */ |
| #define ARCH "tilegx" |
| |
| /* The type for the multiplication. */ |
| typedef long long MUL_TYPE; |
| |
| #endif |
| |
| /* Longest instruction sequence this will produce. With the current |
| stupid algorithm runtime grows very quickly with this number. */ |
| #define MAX_INSTRUCTIONS 4 |
| |
| /* Maximum number of subexpressions in the expression DAG being |
| generated. This is the same as the number of instructions, except |
| that zero and the original register we'd like to multiply by a |
| constant are also thrown into the mix. */ |
| #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS) |
| |
| #define MIN(x, y) ((x) <= (y) ? (x) : (y)) |
| #define MAX(x, y) ((x) >= (y) ? (x) : (y)) |
| |
| /* For this program a unary op is one which has only one nonconstant |
| operand. So shift left by 5 is considered unary. */ |
| typedef MUL_TYPE (*unary_op_func) (MUL_TYPE); |
| typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE); |
| |
| /* This describes an operation like 'add two registers' or 'left-shift |
| by 7'. |
| |
| We call something a 'unary' operator if it only takes in one |
| register as input, even though it might have an implicit second |
| constant operand. Currently this is used for left-shift by |
| constant. */ |
| class Operator |
| { |
| public: |
| /* Construct for a binary operator. */ |
| Operator (const char *pattern, const char *name, binary_op_func func, |
| int cost) |
| : m_pattern (pattern), m_name (name), m_top_index (-1), |
| m_unary_func (0), m_binary_func (func), m_cost (cost), |
| m_rhs_if_unary (0) |
| { |
| } |
| |
| /* Construct for a unary operator. */ |
| Operator (const char *pattern, const char *name, unary_op_func func, |
| int rhs_if_unary, int cost) |
| : m_pattern (pattern), m_name (name), m_top_index (-1), |
| m_unary_func (func), m_binary_func (0), m_cost (cost), |
| m_rhs_if_unary (rhs_if_unary) |
| { |
| } |
| |
| bool is_unary () const |
| { |
| return m_binary_func == NULL; |
| } |
| |
| /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */ |
| const char *m_pattern; |
| |
| /* Name of the opcode for this operation, e.g. add. */ |
| const char *m_name; |
| |
| /* We don't have enough bits in our output representation to store |
| the original insn_code value, so we store a compressed form |
| instead. These values are decoded back into insn_code via the |
| machine-generated multiply_insn_seq_decode_opcode lookup |
| table. */ |
| int m_top_index; |
| |
| /* Unary operator to apply, or NULL if this is a binary operator. */ |
| unary_op_func m_unary_func; |
| |
| /* Binary operator to apply, or NULL if this is a unary operator. */ |
| binary_op_func m_binary_func; |
| |
| /* Function of how expensive we consider this operator. Higher is |
| worse. */ |
| int m_cost; |
| |
| /* the RHS value to write into the C file if unary; used for shift |
| count. */ |
| int m_rhs_if_unary; |
| }; |
| |
| |
| /* An entry in an expression DAG. */ |
| class Expr |
| { |
| public: |
| Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0), |
| m_critical_path_length (0) |
| { |
| } |
| |
| /* The operator being applied to the operands. */ |
| const Operator *m_op; |
| |
| /* The index of the left-hand operand in the array of subexpressions |
| already computed. */ |
| int m_lhs; |
| |
| /* For binary ops ,this is the index of the left-hand operand in the |
| array of subexpressions already computed. For unary ops, it is |
| specific to the op (e.g. it might hold a constant shift |
| count). */ |
| int m_rhs; |
| |
| /* The multiplier produced by this expression tree. For example, for |
| the tree ((x << 5) + x), the value would be 33. */ |
| MUL_TYPE m_produced_val; |
| |
| /* How far is this expression from the root, i.e. how many cycles |
| minimum will it take to compute this? */ |
| int m_critical_path_length; |
| }; |
| |
| |
| /* Make function pointers for the various linear operators we can |
| apply to compute a multiplicative value. */ |
| |
| static MUL_TYPE |
| add (MUL_TYPE n1, MUL_TYPE n2) |
| { |
| return n1 + n2; |
| } |
| |
| static MUL_TYPE |
| sub (MUL_TYPE n1, MUL_TYPE n2) |
| { |
| return n1 - n2; |
| } |
| |
| static MUL_TYPE |
| s1a (MUL_TYPE n1, MUL_TYPE n2) |
| { |
| return n1 * 2 + n2; |
| } |
| |
| static MUL_TYPE |
| s2a (MUL_TYPE n1, MUL_TYPE n2) |
| { |
| return n1 * 4 + n2; |
| } |
| |
| static MUL_TYPE |
| s3a (MUL_TYPE n1, MUL_TYPE n2) |
| { |
| return n1 * 8 + n2; |
| } |
| |
| #define SHIFT(count) \ |
| static MUL_TYPE \ |
| shift##count(MUL_TYPE n) \ |
| { \ |
| return n << (count); \ |
| } |
| |
| SHIFT (1); |
| SHIFT (2); |
| SHIFT (3); |
| SHIFT (4); |
| SHIFT (5); |
| SHIFT (6); |
| SHIFT (7); |
| SHIFT (8); |
| SHIFT (9); |
| SHIFT (10); |
| SHIFT (11); |
| SHIFT (12); |
| SHIFT (13); |
| SHIFT (14); |
| SHIFT (15); |
| SHIFT (16); |
| SHIFT (17); |
| SHIFT (18); |
| SHIFT (19); |
| SHIFT (20); |
| SHIFT (21); |
| SHIFT (22); |
| SHIFT (23); |
| SHIFT (24); |
| SHIFT (25); |
| SHIFT (26); |
| SHIFT (27); |
| SHIFT (28); |
| SHIFT (29); |
| SHIFT (30); |
| SHIFT (31); |
| #ifndef TILEPRO |
| SHIFT (32); |
| SHIFT (33); |
| SHIFT (34); |
| SHIFT (35); |
| SHIFT (36); |
| SHIFT (37); |
| SHIFT (38); |
| SHIFT (39); |
| SHIFT (40); |
| SHIFT (41); |
| SHIFT (42); |
| SHIFT (43); |
| SHIFT (44); |
| SHIFT (45); |
| SHIFT (46); |
| SHIFT (47); |
| SHIFT (48); |
| SHIFT (49); |
| SHIFT (50); |
| SHIFT (51); |
| SHIFT (52); |
| SHIFT (53); |
| SHIFT (54); |
| SHIFT (55); |
| SHIFT (56); |
| SHIFT (57); |
| SHIFT (58); |
| SHIFT (59); |
| SHIFT (60); |
| SHIFT (61); |
| SHIFT (62); |
| SHIFT (63); |
| #endif |
| |
| #ifdef TILEPRO |
| static Operator ops[] = { |
| Operator ("CODE_FOR_addsi3", "add", add, 1040), |
| Operator ("CODE_FOR_subsi3", "sub", sub, 1041), |
| Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042), |
| Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043), |
| Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044), |
| /* Note: shl by 1 is not necessary, since adding a value to itself |
| produces the same result. But the architecture team thinks |
| left-shift may use slightly less power, so we might as well |
| prefer it. */ |
| Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030), |
| Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031) |
| }; |
| #else |
| static Operator ops[] = { |
| Operator ("CODE_FOR_adddi3", "add", add, 1070), |
| Operator ("CODE_FOR_subdi3", "sub", sub, 1071), |
| Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072), |
| Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073), |
| Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074), |
| // Note: shl by 1 is not necessary, since adding a value to itself |
| // produces the same result. But the architecture team thinks left-shift |
| // may use slightly less power, so we might as well prefer it. |
| Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001), |
| Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002), |
| Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003), |
| Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004), |
| Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005), |
| Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006), |
| Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007), |
| Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008), |
| Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009), |
| Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010), |
| Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011), |
| Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012), |
| Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013), |
| Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014), |
| Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015), |
| Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016), |
| Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017), |
| Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018), |
| Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019), |
| Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020), |
| Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021), |
| Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022), |
| Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023), |
| Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024), |
| Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025), |
| Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026), |
| Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027), |
| Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028), |
| Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029), |
| Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030), |
| Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031), |
| Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032), |
| Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033), |
| Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034), |
| Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035), |
| Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036), |
| Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037), |
| Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038), |
| Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039), |
| Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040), |
| Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041), |
| Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042), |
| Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043), |
| Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044), |
| Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045), |
| Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046), |
| Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047), |
| Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048), |
| Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049), |
| Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050), |
| Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051), |
| Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052), |
| Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053), |
| Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054), |
| Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055), |
| Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056), |
| Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057), |
| Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058), |
| Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059), |
| Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060), |
| Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061), |
| Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062), |
| Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063) |
| }; |
| #endif |
| |
| /* An ExpressionTree is an expression DAG. */ |
| class ExpressionTree |
| { |
| public: |
| ExpressionTree () : m_num_vals (2) |
| { |
| m_exprs[0].m_produced_val = 0; |
| m_exprs[1].m_produced_val = 1; |
| } |
| |
| void copy_technique_from (ExpressionTree * other) |
| { |
| /* TODO: write this; other can compute the same products with less |
| cost. We update this ExpressionTree object because some int is |
| already mapped to it. */ |
| } |
| |
| int m_num_vals; |
| Expr m_exprs[MAX_SUBEXPRS]; |
| |
| int cost () const |
| { |
| int cost = 0; |
| for (int j = 2; j < m_num_vals; j++) |
| cost += m_exprs[j].m_op->m_cost; |
| return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000; |
| } |
| }; |
| |
| |
| typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap; |
| |
| |
| static void |
| find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution) |
| { |
| /* Don't look more if we can't add any new values to the expression |
| tree. */ |
| const int num_vals = s.m_num_vals; |
| if (num_vals == MAX_SUBEXPRS) |
| return; |
| |
| /* Grow array to make room for new values added as we loop. */ |
| s.m_num_vals = num_vals + 1; |
| |
| const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op; |
| const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1; |
| |
| for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++) |
| { |
| const Operator *const op = &ops[f]; |
| |
| for (int i = 0; i < num_vals; i++) |
| { |
| /* Only allow zero as the first operand to sub, otherwise |
| it is useless. */ |
| if (i == 0 && op->m_binary_func != sub) |
| continue; |
| |
| /* Unary ops don't actually use the second operand, so as a |
| big hack we trick it into only looping once in the inner |
| loop. */ |
| const int j_end = op->is_unary () ? 2 : num_vals; |
| |
| /* We never allow zero as the second operand, as it is |
| always useless. */ |
| for (int j = 1; j < j_end; j++) |
| { |
| /* Does this expression use the immediately previous |
| expression? */ |
| const bool uses_prev_value = |
| (i == num_vals - 1 |
| || (!op->is_unary () && j == num_vals - 1)); |
| |
| if (!uses_prev_value) |
| { |
| /* For search efficiency, prune redundant |
| instruction orderings. |
| |
| This op does not take the immediately preceding |
| value as input, which means we could have done it |
| in the previous slot. If its opcode is less than |
| the previous instruction's, we'll declare this |
| instruction order non-canonical and not pursue |
| this branch of the search. */ |
| if (op->m_top_index < prev_top_index) |
| continue; |
| } |
| |
| MUL_TYPE n; |
| if (op->is_unary ()) |
| { |
| n = op->m_unary_func (s.m_exprs[i].m_produced_val); |
| } |
| else |
| { |
| n = op->m_binary_func (s.m_exprs[i].m_produced_val, |
| s.m_exprs[j].m_produced_val); |
| } |
| |
| bool duplicate = false; |
| for (int k = 0; k < num_vals; k++) |
| if (n == s.m_exprs[j].m_produced_val) |
| { |
| duplicate = true; |
| break; |
| } |
| |
| if (duplicate) |
| continue; |
| |
| /* See if we found the best solution for n. */ |
| Expr *e = &s.m_exprs[num_vals]; |
| e->m_op = op; |
| e->m_lhs = i; |
| e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j; |
| e->m_produced_val = n; |
| e->m_critical_path_length = |
| 1 + MAX (s.m_exprs[i].m_critical_path_length, |
| s.m_exprs[j].m_critical_path_length); |
| |
| ExpressionTreeMap::iterator best (best_solution.find (n)); |
| if (best == best_solution.end () |
| || (*best).second->cost () > s.cost ()) |
| best_solution[n] = new ExpressionTree (s); |
| |
| /* Recurse and find more. */ |
| find_sequences (s, best_solution); |
| } |
| } |
| } |
| |
| /* Restore old size. */ |
| s.m_num_vals = num_vals; |
| } |
| |
| |
| /* For each insn_code used by an operator, choose a compact number so |
| it takes less space in the output data structure. This prints out a |
| lookup table used to map the compactified number back to an |
| insn_code. */ |
| static void |
| create_insn_code_compression_table () |
| { |
| int next_index = 1; |
| |
| /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */ |
| printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n" |
| " CODE_FOR_nothing /* must be first */ ", ARCH); |
| |
| for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++) |
| { |
| Operator *op = &ops[i]; |
| int index = -1; |
| |
| /* See if some previous Operator was using the same insn_code. |
| If so, reuse its table entry. */ |
| for (size_t j = 0; j < i; j++) |
| { |
| Operator *old = &ops[j]; |
| if (strcmp (old->m_pattern, op->m_pattern) == 0) |
| { |
| index = old->m_top_index; |
| break; |
| } |
| } |
| |
| if (index == -1) |
| { |
| /* We need to make a new entry in the table. */ |
| printf (",\n %s", op->m_pattern); |
| index = next_index++; |
| } |
| |
| op->m_top_index = index; |
| } |
| |
| printf ("\n};\n\n"); |
| } |
| |
| |
| /* These are constants we've seen in code, that we want to keep table |
| entries for. */ |
| static int multiply_constants_used[] = { |
| -11796480, |
| -27439, |
| -25148, |
| -22820, |
| -21709, |
| -20995, |
| -20284, |
| -20239, |
| -19595, |
| -19447, |
| -19183, |
| -19165, |
| -18730, |
| -17828, |
| -17799, |
| -17237, |
| -17036, |
| -16549, |
| -16423, |
| -16294, |
| -16244, |
| -16069, |
| -15137, |
| -15083, |
| -15038, |
| -14924, |
| -14905, |
| -14752, |
| -14731, |
| -14529, |
| -14273, |
| -14090, |
| -14084, |
| -14043, |
| -13850, |
| -13802, |
| -13631, |
| -13455, |
| -13275, |
| -12879, |
| -12700, |
| -12534, |
| -12399, |
| -12131, |
| -12112, |
| -12054, |
| -12019, |
| -11759, |
| -11585, |
| -11467, |
| -11395, |
| -11295, |
| -11248, |
| -11148, |
| -11116, |
| -11086, |
| -11059, |
| -11018, |
| -10811, |
| -10538, |
| -10258, |
| -10217, |
| -10033, |
| -9766, |
| -9754, |
| -9534, |
| -9527, |
| -9467, |
| -9262, |
| -9232, |
| -9222, |
| -9198, |
| -9191, |
| -9113, |
| -8825, |
| -8756, |
| -8697, |
| -8693, |
| -8565, |
| -8342, |
| -8208, |
| -8200, |
| -8170, |
| -8102, |
| -7770, |
| -7678, |
| -7562, |
| -7376, |
| -7373, |
| -7221, |
| -7121, |
| -6835, |
| -6810, |
| -6626, |
| -6581, |
| -6461, |
| -6278, |
| -6263, |
| -6163, |
| -6029, |
| -5816, |
| -5540, |
| -5461, |
| -5384, |
| -5329, |
| -4985, |
| -4926, |
| -4815, |
| -4788, |
| -4758, |
| -4433, |
| -4229, |
| -4209, |
| -4176, |
| -4104, |
| -4095, |
| -4078, |
| -3941, |
| -3818, |
| -3600, |
| -3474, |
| -3314, |
| -3264, |
| -3196, |
| -3072, |
| -2912, |
| -2836, |
| -2773, |
| -2269, |
| -2184, |
| -2100, |
| -1730, |
| -1512, |
| -1500, |
| -1396, |
| -1344, |
| -1312, |
| -1297, |
| -1059, |
| -1058, |
| 1027, |
| 1049, |
| 1059, |
| 1100, |
| 1104, |
| 1108, |
| 1136, |
| 1200, |
| 1204, |
| 1242, |
| 1292, |
| 1304, |
| 1312, |
| 1320, |
| 1336, |
| 1344, |
| 1348, |
| 1360, |
| 1364, |
| 1395, |
| 1448, |
| 1460, |
| 1461, |
| 1472, |
| 1488, |
| 1500, |
| 1512, |
| 1568, |
| 1576, |
| 1649, |
| 1664, |
| 1684, |
| 1696, |
| 1744, |
| 1812, |
| 1822, |
| 1884, |
| 1963, |
| 1978, |
| 2000, |
| 2012, |
| 2014, |
| 2037, |
| 2039, |
| 2100, |
| 2139, |
| 2144, |
| 2184, |
| 2237, |
| 2260, |
| 2320, |
| 2408, |
| 2446, |
| 2447, |
| 2499, |
| 2531, |
| 2578, |
| 2592, |
| 2611, |
| 2633, |
| 2704, |
| 2730, |
| 2773, |
| 2880, |
| 2896, |
| 2998, |
| 3000, |
| 3001, |
| 3021, |
| 3079, |
| 3112, |
| 3150, |
| 3179, |
| 3192, |
| 3240, |
| 3264, |
| 3271, |
| 3283, |
| 3328, |
| 3363, |
| 3367, |
| 3453, |
| 3529, |
| 3570, |
| 3580, |
| 3600, |
| 3624, |
| 3707, |
| 3783, |
| 3826, |
| 3897, |
| 3941, |
| 3962, |
| 3989, |
| 4000, |
| 4025, |
| 4073, |
| 4093, |
| 4099, |
| 4108, |
| 4184, |
| 4209, |
| 4369, |
| 4376, |
| 4416, |
| 4433, |
| 4434, |
| 4482, |
| 4582, |
| 4712, |
| 4717, |
| 4813, |
| 4815, |
| 4864, |
| 5000, |
| 5027, |
| 5040, |
| 5091, |
| 5195, |
| 5243, |
| 5260, |
| 5285, |
| 5329, |
| 5331, |
| 5350, |
| 5361, |
| 5387, |
| 5461, |
| 5492, |
| 5529, |
| 5573, |
| 5793, |
| 5819, |
| 5915, |
| 5946, |
| 5992, |
| 6000, |
| 6164, |
| 6205, |
| 6262, |
| 6263, |
| 6269, |
| 6270, |
| 6387, |
| 6400, |
| 6406, |
| 6476, |
| 6541, |
| 6565, |
| 6568, |
| 6626, |
| 6656, |
| 6732, |
| 6810, |
| 6817, |
| 6859, |
| 7040, |
| 7053, |
| 7141, |
| 7169, |
| 7221, |
| 7223, |
| 7274, |
| 7282, |
| 7350, |
| 7369, |
| 7373, |
| 7442, |
| 7447, |
| 7471, |
| 7518, |
| 7542, |
| 7566, |
| 7587, |
| 7663, |
| 7678, |
| 7682, |
| 7748, |
| 7752, |
| 7791, |
| 8000, |
| 8026, |
| 8048, |
| 8170, |
| 8203, |
| 8204, |
| 8290, |
| 8368, |
| 8520, |
| 8640, |
| 8666, |
| 8672, |
| 8697, |
| 8716, |
| 8728, |
| 8756, |
| 8820, |
| 8875, |
| 8918, |
| 8956, |
| 9058, |
| 9154, |
| 9175, |
| 9191, |
| 9217, |
| 9262, |
| 9321, |
| 9373, |
| 9434, |
| 9465, |
| 9514, |
| 9534, |
| 9633, |
| 9746, |
| 9810, |
| 9850, |
| 9947, |
| 9973, |
| 10000, |
| 10009, |
| 10033, |
| 10055, |
| 10217, |
| 10248, |
| 10298, |
| 10310, |
| 10323, |
| 10368, |
| 10438, |
| 10456, |
| 10486, |
| 10538, |
| 10664, |
| 10695, |
| 10700, |
| 10703, |
| 10832, |
| 10887, |
| 10935, |
| 10958, |
| 11018, |
| 11059, |
| 11061, |
| 11086, |
| 11116, |
| 11148, |
| 11190, |
| 11249, |
| 11314, |
| 11332, |
| 11363, |
| 11409, |
| 11415, |
| 11443, |
| 11467, |
| 11512, |
| 11522, |
| 11529, |
| 11585, |
| 11759, |
| 11768, |
| 11795, |
| 11893, |
| 11997, |
| 12131, |
| 12299, |
| 12536, |
| 12543, |
| 12893, |
| 12945, |
| 12998, |
| 13109, |
| 13213, |
| 13685, |
| 13930, |
| 14023, |
| 14024, |
| 14271, |
| 14564, |
| 14647, |
| 15326, |
| 15850, |
| 15855, |
| 15929, |
| 16000, |
| 16154, |
| 16496, |
| 16807, |
| 16819, |
| 16984, |
| 17203, |
| 17223, |
| 17333, |
| 17760, |
| 17799, |
| 17837, |
| 18029, |
| 18068, |
| 18336, |
| 18515, |
| 19595, |
| 20017, |
| 20131, |
| 20862, |
| 20995, |
| 21709, |
| 22554, |
| 25000, |
| 25172, |
| 25600, |
| 25733, |
| 27439, |
| 38470, |
| 46802, |
| 50000, |
| 11796480, |
| 16843009, |
| 23592960, |
| }; |
| |
| |
| const int num_mult_constants_used = |
| (int)(sizeof multiply_constants_used |
| / sizeof multiply_constants_used[0]); |
| |
| |
| #define XSIZE (sizeof multiply_constants_used / \ |
| sizeof multiply_constants_used[0] + 32) / 32 |
| unsigned multiply_constants_avail[XSIZE]; |
| #undef XSIZE |
| |
| |
| /* bsearch helper function. */ |
| static int |
| compare_constants (const void *key, const void *t) |
| { |
| return (*(int*)key) - *((int*)t); |
| } |
| |
| |
| static int * |
| find_mult_constants_used (int multiplier) |
| { |
| return (int *) bsearch (&multiplier, multiply_constants_used, |
| num_mult_constants_used, |
| sizeof multiply_constants_used[0], |
| compare_constants); |
| } |
| |
| |
| int num_ops (ExpressionTree *s) |
| { |
| int n = 0; |
| for (int i = 0; i < s->m_num_vals; i++) |
| { |
| Expr *e = &s->m_exprs[i]; |
| if (e->m_op != NULL) |
| n++; |
| } |
| return n; |
| } |
| |
| |
| #ifdef TILEPRO |
| bool |
| tilepro_emit (int multiplier, int num_ops) |
| { |
| int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier; |
| |
| /* Keep constants in range [-1024, 1024]. */ |
| if (abs_multiplier <= 1024) |
| return true; |
| |
| /* Keep constants near powers of two. */ |
| int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier)); |
| int next_pow2 = prev_pow2 << 1; |
| |
| if ((abs_multiplier - prev_pow2 <= 10) |
| || (next_pow2 - abs_multiplier <= 10)) |
| return true; |
| |
| /* Keep constants near powers of ten. */ |
| { |
| long long j = 1; |
| long long prev_pow10; |
| long long next_pow10; |
| |
| while (((j * 10) < abs_multiplier) |
| && (j < (j * 10))) |
| j = j * 10; |
| |
| prev_pow10 = j; |
| next_pow10 = j * 10; |
| |
| if ((abs_multiplier - prev_pow10 <= 10) |
| || (next_pow10 - abs_multiplier <= 10)) |
| return true; |
| } |
| |
| /* Keep short sequences that have two or fewer ops. */ |
| if (num_ops <= 2) |
| return true; |
| |
| /* Keep constants that are mostly 0's or mostly 1's. */ |
| if (__builtin_popcount (multiplier) <= 2 || |
| __builtin_popcount (multiplier) >= 30) |
| return true; |
| |
| /* Keep constants seen in actual code. */ |
| if ((find_mult_constants_used (multiplier))) |
| return true; |
| |
| return false; |
| } |
| #else |
| bool |
| tilegx_emit (long long multiplier, int num_ops) |
| { |
| long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier; |
| |
| /* Keep constants in range [-1024, 1024]. */ |
| if (abs_multiplier <= 1024) |
| return true; |
| |
| /* Otherwise exclude sequences with four ops. */ |
| if (num_ops > 3) |
| return false; |
| |
| /* Keep constants near powers of two. */ |
| { |
| unsigned long long prev_pow2 = |
| 1LL << (63 - __builtin_clzll (abs_multiplier)); |
| unsigned long long next_pow2 = prev_pow2 << 1; |
| |
| /* handle overflow case. */ |
| if (next_pow2 == 0) |
| { |
| if (prev_pow2 - abs_multiplier <= 10) |
| return true; |
| } |
| else if ((abs_multiplier - prev_pow2 <= 10) |
| || (next_pow2 - abs_multiplier <= 10)) |
| return true; |
| } |
| |
| /* Keep constants near powers of ten. */ |
| { |
| long long j = 1; |
| long long prev_pow10; |
| long long next_pow10; |
| |
| while (((j * 10) < abs_multiplier) |
| && (j < (INTMAX_MAX / 10))) |
| j = j * 10; |
| |
| prev_pow10 = j; |
| next_pow10 = (j > (INTMAX_MAX / 10)) ? 0 : j * 10; |
| |
| if ((abs_multiplier - prev_pow10 <= 100) |
| || (next_pow10 |
| && (next_pow10 - abs_multiplier <= 100))) |
| return true; |
| } |
| |
| if (num_ops <= 2) |
| return true; |
| |
| /* Keep constants that are mostly 0's or mostly 1's. */ |
| if (__builtin_popcountll (multiplier) <= 2 || |
| __builtin_popcountll (multiplier) >= 62) |
| return true; |
| |
| /* Keep constants seen in actual code. */ |
| if (find_mult_constants_used (multiplier)) |
| return true; |
| |
| return false; |
| } |
| #endif |
| |
| |
| int |
| main () |
| { |
| ExpressionTreeMap best_solution; |
| ExpressionTree s; |
| |
| #ifdef TILEPRO |
| printf ("/* Constant multiply table for TILEPro.\n"); |
| #else |
| printf ("/* Constant multiply table for TILE-Gx.\n"); |
| #endif |
| printf (" Copyright (C) 2011-2021 Free Software Foundation, Inc.\n"); |
| printf (" Contributed by Walter Lee (walt@tilera.com)\n"); |
| printf ("\n"); |
| printf (" This file is part of GCC.\n"); |
| printf ("\n"); |
| printf (" GCC is free software; you can redistribute it and/or modify it\n"); |
| printf (" under the terms of the GNU General Public License as published\n"); |
| printf (" by the Free Software Foundation; either version 3, or (at your\n"); |
| printf (" option) any later version.\n"); |
| printf ("\n"); |
| printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n"); |
| printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n"); |
| printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n"); |
| printf (" License for more details.\n"); |
| printf ("\n"); |
| printf (" You should have received a copy of the GNU General Public License\n"); |
| printf (" along with GCC; see the file COPYING3. If not see\n"); |
| printf (" <http://www.gnu.org/licenses/>. */\n"); |
| printf ("\n"); |
| printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n"); |
| printf (" Make any required changes there. */\n"); |
| printf ("\n"); |
| printf ("#define IN_TARGET_CODE 1\n"); |
| printf ("\n"); |
| printf ("#include \"config.h\"\n"); |
| printf ("#include \"system.h\"\n"); |
| printf ("#include \"coretypes.h\"\n"); |
| printf ("#include \"backend.h\"\n"); |
| printf ("#include \"rtl.h\"\n"); |
| printf ("#include \"expmed.h\"\n"); |
| printf ("#include \"%s-multiply.h\"\n\n", ARCH); |
| create_insn_code_compression_table (); |
| |
| /* Try all combinations of operators and see what constants we |
| produce. For each possible constant, record the most efficient |
| way to generate it. */ |
| find_sequences (s, best_solution); |
| |
| printf ("const struct %s_multiply_insn_seq " |
| "%s_multiply_insn_seq_table[] = {\n", |
| ARCH, ARCH); |
| |
| const char *comma_separator = ""; |
| |
| ExpressionTreeMap::iterator i (best_solution.begin ()); |
| for (; i != best_solution.end (); ++i) |
| { |
| ExpressionTree *s = (*i).second; |
| const MUL_TYPE n = (*i).first; |
| |
| if (n == 0 || n == 1) |
| { |
| /* Both of these require zero operations, so these entries |
| are bogus and should never be used. */ |
| continue; |
| } |
| |
| /* Prune the list of entries to keep the table to a reasonable |
| size. */ |
| #ifdef TILEPRO |
| if (!tilepro_emit (n, num_ops (s))) |
| continue; |
| #else |
| if (!tilegx_emit (n, num_ops (s))) |
| continue; |
| #endif |
| |
| printf ("%s", comma_separator); |
| |
| #ifdef TILEPRO |
| const MUL_TYPE int_min = INT32_MIN; |
| #else |
| const MUL_TYPE int_min = INT64_MIN; |
| #endif |
| if (n == int_min) |
| { |
| /* Handle C's woeful lack of unary negation. Without this, |
| printing out INT_MIN in decimal will yield an unsigned |
| int which could generate a compiler warning. */ |
| #ifdef TILEPRO |
| printf (" {%d - 1 /* 0x%x */ ,\n {", n + 1, |
| (unsigned) n); |
| #else |
| printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n + 1, |
| (unsigned MUL_TYPE) n); |
| #endif |
| } |
| else |
| { |
| #ifdef TILEPRO |
| printf (" {%d /* 0x%x */ ,\n {", n, (unsigned) n); |
| #else |
| printf (" {%lldll /* 0x%llx */ ,\n {", n, (unsigned MUL_TYPE) n); |
| #endif |
| } |
| |
| bool first = true; |
| for (int j = 0; j < s->m_num_vals; j++) |
| { |
| Expr *e = &s->m_exprs[j]; |
| |
| const Operator *op = e->m_op; |
| if (op == NULL) |
| continue; |
| |
| char buf[1024]; |
| snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s", |
| first ? "" : " ", |
| op->m_top_index, |
| e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ","); |
| |
| char opnd0[10]; |
| if (e->m_lhs) |
| snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs); |
| else |
| snprintf (opnd0, sizeof opnd0, "zero"); |
| |
| printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n", |
| buf, op->m_name, j, opnd0, |
| op->is_unary () ? "" : "r", e->m_rhs); |
| |
| first = false; |
| } |
| printf (" }"); |
| comma_separator = ",\n"; |
| } |
| |
| printf ("\n};\n\n"); |
| printf ("const int %s_multiply_insn_seq_table_size =\n" |
| " (int) (sizeof %s_multiply_insn_seq_table\n" |
| " / sizeof %s_multiply_insn_seq_table[0]);\n", |
| ARCH, ARCH, ARCH); |
| |
| return EXIT_SUCCESS; |
| } |