| /* Subroutines for the gcc driver. |
| Copyright (C) 2006-2015 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| |
| const char *host_detect_local_cpu (int argc, const char **argv); |
| |
| #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__)) |
| #include "cpuid.h" |
| |
| struct cache_desc |
| { |
| unsigned sizekb; |
| unsigned assoc; |
| unsigned line; |
| }; |
| |
| /* Returns command line parameters that describe size and |
| cache line size of the processor caches. */ |
| |
| static char * |
| describe_cache (struct cache_desc level1, struct cache_desc level2) |
| { |
| char size[100], line[100], size2[100]; |
| |
| /* At the moment, gcc does not use the information |
| about the associativity of the cache. */ |
| |
| snprintf (size, sizeof (size), |
| "--param l1-cache-size=%u ", level1.sizekb); |
| snprintf (line, sizeof (line), |
| "--param l1-cache-line-size=%u ", level1.line); |
| |
| snprintf (size2, sizeof (size2), |
| "--param l2-cache-size=%u ", level2.sizekb); |
| |
| return concat (size, line, size2, NULL); |
| } |
| |
| /* Detect L2 cache parameters using CPUID extended function 0x80000006. */ |
| |
| static void |
| detect_l2_cache (struct cache_desc *level2) |
| { |
| unsigned eax, ebx, ecx, edx; |
| unsigned assoc; |
| |
| __cpuid (0x80000006, eax, ebx, ecx, edx); |
| |
| level2->sizekb = (ecx >> 16) & 0xffff; |
| level2->line = ecx & 0xff; |
| |
| assoc = (ecx >> 12) & 0xf; |
| if (assoc == 6) |
| assoc = 8; |
| else if (assoc == 8) |
| assoc = 16; |
| else if (assoc >= 0xa && assoc <= 0xc) |
| assoc = 32 + (assoc - 0xa) * 16; |
| else if (assoc >= 0xd && assoc <= 0xe) |
| assoc = 96 + (assoc - 0xd) * 32; |
| |
| level2->assoc = assoc; |
| } |
| |
| /* Returns the description of caches for an AMD processor. */ |
| |
| static const char * |
| detect_caches_amd (unsigned max_ext_level) |
| { |
| unsigned eax, ebx, ecx, edx; |
| |
| struct cache_desc level1, level2 = {0, 0, 0}; |
| |
| if (max_ext_level < 0x80000005) |
| return ""; |
| |
| __cpuid (0x80000005, eax, ebx, ecx, edx); |
| |
| level1.sizekb = (ecx >> 24) & 0xff; |
| level1.assoc = (ecx >> 16) & 0xff; |
| level1.line = ecx & 0xff; |
| |
| if (max_ext_level >= 0x80000006) |
| detect_l2_cache (&level2); |
| |
| return describe_cache (level1, level2); |
| } |
| |
| /* Decodes the size, the associativity and the cache line size of |
| L1/L2 caches of an Intel processor. Values are based on |
| "Intel Processor Identification and the CPUID Instruction" |
| [Application Note 485], revision -032, December 2007. */ |
| |
| static void |
| decode_caches_intel (unsigned reg, bool xeon_mp, |
| struct cache_desc *level1, struct cache_desc *level2) |
| { |
| int i; |
| |
| for (i = 24; i >= 0; i -= 8) |
| switch ((reg >> i) & 0xff) |
| { |
| case 0x0a: |
| level1->sizekb = 8; level1->assoc = 2; level1->line = 32; |
| break; |
| case 0x0c: |
| level1->sizekb = 16; level1->assoc = 4; level1->line = 32; |
| break; |
| case 0x0d: |
| level1->sizekb = 16; level1->assoc = 4; level1->line = 64; |
| break; |
| case 0x0e: |
| level1->sizekb = 24; level1->assoc = 6; level1->line = 64; |
| break; |
| case 0x21: |
| level2->sizekb = 256; level2->assoc = 8; level2->line = 64; |
| break; |
| case 0x24: |
| level2->sizekb = 1024; level2->assoc = 16; level2->line = 64; |
| break; |
| case 0x2c: |
| level1->sizekb = 32; level1->assoc = 8; level1->line = 64; |
| break; |
| case 0x39: |
| level2->sizekb = 128; level2->assoc = 4; level2->line = 64; |
| break; |
| case 0x3a: |
| level2->sizekb = 192; level2->assoc = 6; level2->line = 64; |
| break; |
| case 0x3b: |
| level2->sizekb = 128; level2->assoc = 2; level2->line = 64; |
| break; |
| case 0x3c: |
| level2->sizekb = 256; level2->assoc = 4; level2->line = 64; |
| break; |
| case 0x3d: |
| level2->sizekb = 384; level2->assoc = 6; level2->line = 64; |
| break; |
| case 0x3e: |
| level2->sizekb = 512; level2->assoc = 4; level2->line = 64; |
| break; |
| case 0x41: |
| level2->sizekb = 128; level2->assoc = 4; level2->line = 32; |
| break; |
| case 0x42: |
| level2->sizekb = 256; level2->assoc = 4; level2->line = 32; |
| break; |
| case 0x43: |
| level2->sizekb = 512; level2->assoc = 4; level2->line = 32; |
| break; |
| case 0x44: |
| level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; |
| break; |
| case 0x45: |
| level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; |
| break; |
| case 0x48: |
| level2->sizekb = 3072; level2->assoc = 12; level2->line = 64; |
| break; |
| case 0x49: |
| if (xeon_mp) |
| break; |
| level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; |
| break; |
| case 0x4e: |
| level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; |
| break; |
| case 0x60: |
| level1->sizekb = 16; level1->assoc = 8; level1->line = 64; |
| break; |
| case 0x66: |
| level1->sizekb = 8; level1->assoc = 4; level1->line = 64; |
| break; |
| case 0x67: |
| level1->sizekb = 16; level1->assoc = 4; level1->line = 64; |
| break; |
| case 0x68: |
| level1->sizekb = 32; level1->assoc = 4; level1->line = 64; |
| break; |
| case 0x78: |
| level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; |
| break; |
| case 0x79: |
| level2->sizekb = 128; level2->assoc = 8; level2->line = 64; |
| break; |
| case 0x7a: |
| level2->sizekb = 256; level2->assoc = 8; level2->line = 64; |
| break; |
| case 0x7b: |
| level2->sizekb = 512; level2->assoc = 8; level2->line = 64; |
| break; |
| case 0x7c: |
| level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; |
| break; |
| case 0x7d: |
| level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; |
| break; |
| case 0x7f: |
| level2->sizekb = 512; level2->assoc = 2; level2->line = 64; |
| break; |
| case 0x80: |
| level2->sizekb = 512; level2->assoc = 8; level2->line = 64; |
| break; |
| case 0x82: |
| level2->sizekb = 256; level2->assoc = 8; level2->line = 32; |
| break; |
| case 0x83: |
| level2->sizekb = 512; level2->assoc = 8; level2->line = 32; |
| break; |
| case 0x84: |
| level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; |
| break; |
| case 0x85: |
| level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; |
| break; |
| case 0x86: |
| level2->sizekb = 512; level2->assoc = 4; level2->line = 64; |
| break; |
| case 0x87: |
| level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; |
| |
| default: |
| break; |
| } |
| } |
| |
| /* Detect cache parameters using CPUID function 2. */ |
| |
| static void |
| detect_caches_cpuid2 (bool xeon_mp, |
| struct cache_desc *level1, struct cache_desc *level2) |
| { |
| unsigned regs[4]; |
| int nreps, i; |
| |
| __cpuid (2, regs[0], regs[1], regs[2], regs[3]); |
| |
| nreps = regs[0] & 0x0f; |
| regs[0] &= ~0x0f; |
| |
| while (--nreps >= 0) |
| { |
| for (i = 0; i < 4; i++) |
| if (regs[i] && !((regs[i] >> 31) & 1)) |
| decode_caches_intel (regs[i], xeon_mp, level1, level2); |
| |
| if (nreps) |
| __cpuid (2, regs[0], regs[1], regs[2], regs[3]); |
| } |
| } |
| |
| /* Detect cache parameters using CPUID function 4. This |
| method doesn't require hardcoded tables. */ |
| |
| enum cache_type |
| { |
| CACHE_END = 0, |
| CACHE_DATA = 1, |
| CACHE_INST = 2, |
| CACHE_UNIFIED = 3 |
| }; |
| |
| static void |
| detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, |
| struct cache_desc *level3) |
| { |
| struct cache_desc *cache; |
| |
| unsigned eax, ebx, ecx, edx; |
| int count; |
| |
| for (count = 0;; count++) |
| { |
| __cpuid_count(4, count, eax, ebx, ecx, edx); |
| switch (eax & 0x1f) |
| { |
| case CACHE_END: |
| return; |
| case CACHE_DATA: |
| case CACHE_UNIFIED: |
| { |
| switch ((eax >> 5) & 0x07) |
| { |
| case 1: |
| cache = level1; |
| break; |
| case 2: |
| cache = level2; |
| break; |
| case 3: |
| cache = level3; |
| break; |
| default: |
| cache = NULL; |
| } |
| |
| if (cache) |
| { |
| unsigned sets = ecx + 1; |
| unsigned part = ((ebx >> 12) & 0x03ff) + 1; |
| |
| cache->assoc = ((ebx >> 22) & 0x03ff) + 1; |
| cache->line = (ebx & 0x0fff) + 1; |
| |
| cache->sizekb = (cache->assoc * part |
| * cache->line * sets) / 1024; |
| } |
| } |
| default: |
| break; |
| } |
| } |
| } |
| |
| /* Returns the description of caches for an Intel processor. */ |
| |
| static const char * |
| detect_caches_intel (bool xeon_mp, unsigned max_level, |
| unsigned max_ext_level, unsigned *l2sizekb) |
| { |
| struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; |
| |
| if (max_level >= 4) |
| detect_caches_cpuid4 (&level1, &level2, &level3); |
| else if (max_level >= 2) |
| detect_caches_cpuid2 (xeon_mp, &level1, &level2); |
| else |
| return ""; |
| |
| if (level1.sizekb == 0) |
| return ""; |
| |
| /* Let the L3 replace the L2. This assumes inclusive caches |
| and single threaded program for now. */ |
| if (level3.sizekb) |
| level2 = level3; |
| |
| /* Intel CPUs are equipped with AMD style L2 cache info. Try this |
| method if other methods fail to provide L2 cache parameters. */ |
| if (level2.sizekb == 0 && max_ext_level >= 0x80000006) |
| detect_l2_cache (&level2); |
| |
| *l2sizekb = level2.sizekb; |
| |
| return describe_cache (level1, level2); |
| } |
| |
| /* This will be called by the spec parser in gcc.c when it sees |
| a %:local_cpu_detect(args) construct. Currently it will be called |
| with either "arch" or "tune" as argument depending on if -march=native |
| or -mtune=native is to be substituted. |
| |
| It returns a string containing new command line parameters to be |
| put at the place of the above two options, depending on what CPU |
| this is executed. E.g. "-march=k8" on an AMD64 machine |
| for -march=native. |
| |
| ARGC and ARGV are set depending on the actual arguments given |
| in the spec. */ |
| |
| const char *host_detect_local_cpu (int argc, const char **argv) |
| { |
| enum processor_type processor = PROCESSOR_I386; |
| const char *cpu = "i386"; |
| |
| const char *cache = ""; |
| const char *options = ""; |
| |
| unsigned int eax, ebx, ecx, edx; |
| |
| unsigned int max_level, ext_level; |
| |
| unsigned int vendor; |
| unsigned int model, family; |
| |
| unsigned int has_sse3, has_ssse3, has_cmpxchg16b; |
| unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; |
| |
| /* Extended features */ |
| unsigned int has_lahf_lm = 0, has_sse4a = 0; |
| unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; |
| unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; |
| unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0; |
| unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; |
| unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0; |
| unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0; |
| unsigned int has_hle = 0, has_rtm = 0; |
| unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; |
| unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; |
| unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0; |
| unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0; |
| unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0; |
| unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0; |
| unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0; |
| unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0; |
| unsigned int has_mwaitx = 0; |
| |
| bool arch; |
| |
| unsigned int l2sizekb = 0; |
| |
| if (argc < 1) |
| return NULL; |
| |
| arch = !strcmp (argv[0], "arch"); |
| |
| if (!arch && strcmp (argv[0], "tune")) |
| return NULL; |
| |
| max_level = __get_cpuid_max (0, &vendor); |
| if (max_level < 1) |
| goto done; |
| |
| __cpuid (1, eax, ebx, ecx, edx); |
| |
| model = (eax >> 4) & 0x0f; |
| family = (eax >> 8) & 0x0f; |
| if (vendor == signature_INTEL_ebx |
| || vendor == signature_AMD_ebx) |
| { |
| unsigned int extended_model, extended_family; |
| |
| extended_model = (eax >> 12) & 0xf0; |
| extended_family = (eax >> 20) & 0xff; |
| if (family == 0x0f) |
| { |
| family += extended_family; |
| model += extended_model; |
| } |
| else if (family == 0x06) |
| model += extended_model; |
| } |
| |
| has_sse3 = ecx & bit_SSE3; |
| has_ssse3 = ecx & bit_SSSE3; |
| has_sse4_1 = ecx & bit_SSE4_1; |
| has_sse4_2 = ecx & bit_SSE4_2; |
| has_avx = ecx & bit_AVX; |
| has_osxsave = ecx & bit_OSXSAVE; |
| has_cmpxchg16b = ecx & bit_CMPXCHG16B; |
| has_movbe = ecx & bit_MOVBE; |
| has_popcnt = ecx & bit_POPCNT; |
| has_aes = ecx & bit_AES; |
| has_pclmul = ecx & bit_PCLMUL; |
| has_fma = ecx & bit_FMA; |
| has_f16c = ecx & bit_F16C; |
| has_rdrnd = ecx & bit_RDRND; |
| has_xsave = ecx & bit_XSAVE; |
| |
| has_cmpxchg8b = edx & bit_CMPXCHG8B; |
| has_cmov = edx & bit_CMOV; |
| has_mmx = edx & bit_MMX; |
| has_fxsr = edx & bit_FXSAVE; |
| has_sse = edx & bit_SSE; |
| has_sse2 = edx & bit_SSE2; |
| |
| if (max_level >= 7) |
| { |
| __cpuid_count (7, 0, eax, ebx, ecx, edx); |
| |
| has_bmi = ebx & bit_BMI; |
| has_hle = ebx & bit_HLE; |
| has_rtm = ebx & bit_RTM; |
| has_avx2 = ebx & bit_AVX2; |
| has_bmi2 = ebx & bit_BMI2; |
| has_fsgsbase = ebx & bit_FSGSBASE; |
| has_rdseed = ebx & bit_RDSEED; |
| has_adx = ebx & bit_ADX; |
| has_avx512f = ebx & bit_AVX512F; |
| has_avx512er = ebx & bit_AVX512ER; |
| has_avx512pf = ebx & bit_AVX512PF; |
| has_avx512cd = ebx & bit_AVX512CD; |
| has_sha = ebx & bit_SHA; |
| has_clflushopt = ebx & bit_CLFLUSHOPT; |
| has_clwb = ebx & bit_CLWB; |
| has_avx512dq = ebx & bit_AVX512DQ; |
| has_avx512bw = ebx & bit_AVX512BW; |
| has_avx512vl = ebx & bit_AVX512VL; |
| has_avx512vl = ebx & bit_AVX512IFMA; |
| |
| has_prefetchwt1 = ecx & bit_PREFETCHWT1; |
| has_avx512vl = ecx & bit_AVX512VBMI; |
| } |
| |
| if (max_level >= 13) |
| { |
| __cpuid_count (13, 1, eax, ebx, ecx, edx); |
| |
| has_xsaveopt = eax & bit_XSAVEOPT; |
| has_xsavec = eax & bit_XSAVEC; |
| has_xsaves = eax & bit_XSAVES; |
| } |
| |
| /* Check cpuid level of extended features. */ |
| __cpuid (0x80000000, ext_level, ebx, ecx, edx); |
| |
| if (ext_level > 0x80000000) |
| { |
| __cpuid (0x80000001, eax, ebx, ecx, edx); |
| |
| has_lahf_lm = ecx & bit_LAHF_LM; |
| has_sse4a = ecx & bit_SSE4a; |
| has_abm = ecx & bit_ABM; |
| has_lwp = ecx & bit_LWP; |
| has_fma4 = ecx & bit_FMA4; |
| has_xop = ecx & bit_XOP; |
| has_tbm = ecx & bit_TBM; |
| has_lzcnt = ecx & bit_LZCNT; |
| has_prfchw = ecx & bit_PRFCHW; |
| |
| has_longmode = edx & bit_LM; |
| has_3dnowp = edx & bit_3DNOWP; |
| has_3dnow = edx & bit_3DNOW; |
| has_mwaitx = ecx & bit_MWAITX; |
| } |
| |
| /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ |
| #define XCR_XFEATURE_ENABLED_MASK 0x0 |
| #define XSTATE_FP 0x1 |
| #define XSTATE_SSE 0x2 |
| #define XSTATE_YMM 0x4 |
| #define XSTATE_OPMASK 0x20 |
| #define XSTATE_ZMM 0x40 |
| #define XSTATE_HI_ZMM 0x80 |
| if (has_osxsave) |
| asm (".byte 0x0f; .byte 0x01; .byte 0xd0" |
| : "=a" (eax), "=d" (edx) |
| : "c" (XCR_XFEATURE_ENABLED_MASK)); |
| |
| /* Check if SSE and YMM states are supported. */ |
| if (!has_osxsave |
| || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) |
| { |
| has_avx = 0; |
| has_avx2 = 0; |
| has_fma = 0; |
| has_fma4 = 0; |
| has_f16c = 0; |
| has_xop = 0; |
| has_xsave = 0; |
| has_xsaveopt = 0; |
| has_xsaves = 0; |
| has_xsavec = 0; |
| } |
| |
| if (!has_osxsave |
| || (eax & |
| (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)) |
| != (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)) |
| { |
| has_avx512f = 0; |
| has_avx512er = 0; |
| has_avx512pf = 0; |
| has_avx512cd = 0; |
| has_avx512dq = 0; |
| has_avx512bw = 0; |
| has_avx512vl = 0; |
| } |
| |
| if (!arch) |
| { |
| if (vendor == signature_AMD_ebx |
| || vendor == signature_CENTAUR_ebx |
| || vendor == signature_CYRIX_ebx |
| || vendor == signature_NSC_ebx) |
| cache = detect_caches_amd (ext_level); |
| else if (vendor == signature_INTEL_ebx) |
| { |
| bool xeon_mp = (family == 15 && model == 6); |
| cache = detect_caches_intel (xeon_mp, max_level, |
| ext_level, &l2sizekb); |
| } |
| } |
| |
| if (vendor == signature_AMD_ebx) |
| { |
| unsigned int name; |
| |
| /* Detect geode processor by its processor signature. */ |
| if (ext_level > 0x80000001) |
| __cpuid (0x80000002, name, ebx, ecx, edx); |
| else |
| name = 0; |
| |
| if (name == signature_NSC_ebx) |
| processor = PROCESSOR_GEODE; |
| else if (has_movbe && family == 22) |
| processor = PROCESSOR_BTVER2; |
| else if (has_avx2) |
| processor = PROCESSOR_BDVER4; |
| else if (has_xsaveopt) |
| processor = PROCESSOR_BDVER3; |
| else if (has_bmi) |
| processor = PROCESSOR_BDVER2; |
| else if (has_xop) |
| processor = PROCESSOR_BDVER1; |
| else if (has_sse4a && has_ssse3) |
| processor = PROCESSOR_BTVER1; |
| else if (has_sse4a) |
| processor = PROCESSOR_AMDFAM10; |
| else if (has_sse2 || has_longmode) |
| processor = PROCESSOR_K8; |
| else if (has_3dnowp && family == 6) |
| processor = PROCESSOR_ATHLON; |
| else if (has_mmx) |
| processor = PROCESSOR_K6; |
| else |
| processor = PROCESSOR_PENTIUM; |
| } |
| else if (vendor == signature_CENTAUR_ebx) |
| { |
| processor = PROCESSOR_GENERIC; |
| |
| switch (family) |
| { |
| default: |
| /* We have no idea. */ |
| break; |
| |
| case 5: |
| if (has_3dnow || has_mmx) |
| processor = PROCESSOR_I486; |
| break; |
| |
| case 6: |
| if (model > 9 || has_longmode) |
| /* Use the default detection procedure. */ |
| ; |
| else if (model == 9) |
| processor = PROCESSOR_PENTIUMPRO; |
| else if (model >= 6) |
| processor = PROCESSOR_I486; |
| } |
| } |
| else |
| { |
| switch (family) |
| { |
| case 4: |
| processor = PROCESSOR_I486; |
| break; |
| case 5: |
| processor = PROCESSOR_PENTIUM; |
| break; |
| case 6: |
| processor = PROCESSOR_PENTIUMPRO; |
| break; |
| case 15: |
| processor = PROCESSOR_PENTIUM4; |
| break; |
| default: |
| /* We have no idea. */ |
| processor = PROCESSOR_GENERIC; |
| } |
| } |
| |
| switch (processor) |
| { |
| case PROCESSOR_I386: |
| /* Default. */ |
| break; |
| case PROCESSOR_I486: |
| if (arch && vendor == signature_CENTAUR_ebx) |
| { |
| if (model >= 6) |
| cpu = "c3"; |
| else if (has_3dnow) |
| cpu = "winchip2"; |
| else |
| /* Assume WinChip C6. */ |
| cpu = "winchip-c6"; |
| } |
| else |
| cpu = "i486"; |
| break; |
| case PROCESSOR_PENTIUM: |
| if (arch && has_mmx) |
| cpu = "pentium-mmx"; |
| else |
| cpu = "pentium"; |
| break; |
| case PROCESSOR_PENTIUMPRO: |
| switch (model) |
| { |
| case 0x1c: |
| case 0x26: |
| /* Bonnell. */ |
| cpu = "bonnell"; |
| break; |
| case 0x37: |
| case 0x4a: |
| case 0x4d: |
| case 0x5a: |
| case 0x5d: |
| /* Silvermont. */ |
| cpu = "silvermont"; |
| break; |
| case 0x0f: |
| /* Merom. */ |
| case 0x17: |
| case 0x1d: |
| /* Penryn. */ |
| cpu = "core2"; |
| break; |
| case 0x1a: |
| case 0x1e: |
| case 0x1f: |
| case 0x2e: |
| /* Nehalem. */ |
| cpu = "nehalem"; |
| break; |
| case 0x25: |
| case 0x2c: |
| case 0x2f: |
| /* Westmere. */ |
| cpu = "westmere"; |
| break; |
| case 0x2a: |
| case 0x2d: |
| /* Sandy Bridge. */ |
| cpu = "sandybridge"; |
| break; |
| case 0x3a: |
| case 0x3e: |
| /* Ivy Bridge. */ |
| cpu = "ivybridge"; |
| break; |
| case 0x3c: |
| case 0x3f: |
| case 0x45: |
| case 0x46: |
| /* Haswell. */ |
| cpu = "haswell"; |
| break; |
| case 0x3d: |
| case 0x4f: |
| case 0x56: |
| /* Broadwell. */ |
| cpu = "broadwell"; |
| break; |
| case 0x57: |
| /* Knights Landing. */ |
| cpu = "knl"; |
| break; |
| default: |
| if (arch) |
| { |
| /* This is unknown family 0x6 CPU. */ |
| /* Assume Knights Landing. */ |
| if (has_avx512f) |
| cpu = "knl"; |
| /* Assume Broadwell. */ |
| else if (has_adx) |
| cpu = "broadwell"; |
| else if (has_avx2) |
| /* Assume Haswell. */ |
| cpu = "haswell"; |
| else if (has_avx) |
| /* Assume Sandy Bridge. */ |
| cpu = "sandybridge"; |
| else if (has_sse4_2) |
| { |
| if (has_movbe) |
| /* Assume Silvermont. */ |
| cpu = "silvermont"; |
| else |
| /* Assume Nehalem. */ |
| cpu = "nehalem"; |
| } |
| else if (has_ssse3) |
| { |
| if (has_movbe) |
| /* Assume Bonnell. */ |
| cpu = "bonnell"; |
| else |
| /* Assume Core 2. */ |
| cpu = "core2"; |
| } |
| else if (has_longmode) |
| /* Perhaps some emulator? Assume x86-64, otherwise gcc |
| -march=native would be unusable for 64-bit compilations, |
| as all the CPUs below are 32-bit only. */ |
| cpu = "x86-64"; |
| else if (has_sse3) |
| /* It is Core Duo. */ |
| cpu = "pentium-m"; |
| else if (has_sse2) |
| /* It is Pentium M. */ |
| cpu = "pentium-m"; |
| else if (has_sse) |
| { |
| if (vendor == signature_CENTAUR_ebx) |
| cpu = "c3-2"; |
| else |
| /* It is Pentium III. */ |
| cpu = "pentium3"; |
| } |
| else if (has_mmx) |
| /* It is Pentium II. */ |
| cpu = "pentium2"; |
| else |
| /* Default to Pentium Pro. */ |
| cpu = "pentiumpro"; |
| } |
| else |
| /* For -mtune, we default to -mtune=generic. */ |
| cpu = "generic"; |
| break; |
| } |
| break; |
| case PROCESSOR_PENTIUM4: |
| if (has_sse3) |
| { |
| if (has_longmode) |
| cpu = "nocona"; |
| else |
| cpu = "prescott"; |
| } |
| else |
| cpu = "pentium4"; |
| break; |
| case PROCESSOR_GEODE: |
| cpu = "geode"; |
| break; |
| case PROCESSOR_K6: |
| if (arch && has_3dnow) |
| cpu = "k6-3"; |
| else |
| cpu = "k6"; |
| break; |
| case PROCESSOR_ATHLON: |
| if (arch && has_sse) |
| cpu = "athlon-4"; |
| else |
| cpu = "athlon"; |
| break; |
| case PROCESSOR_K8: |
| if (arch && has_sse3) |
| cpu = "k8-sse3"; |
| else |
| cpu = "k8"; |
| break; |
| case PROCESSOR_AMDFAM10: |
| cpu = "amdfam10"; |
| break; |
| case PROCESSOR_BDVER1: |
| cpu = "bdver1"; |
| break; |
| case PROCESSOR_BDVER2: |
| cpu = "bdver2"; |
| break; |
| case PROCESSOR_BDVER3: |
| cpu = "bdver3"; |
| break; |
| case PROCESSOR_BDVER4: |
| cpu = "bdver4"; |
| break; |
| case PROCESSOR_BTVER1: |
| cpu = "btver1"; |
| break; |
| case PROCESSOR_BTVER2: |
| cpu = "btver2"; |
| break; |
| |
| default: |
| /* Use something reasonable. */ |
| if (arch) |
| { |
| if (has_ssse3) |
| cpu = "core2"; |
| else if (has_sse3) |
| { |
| if (has_longmode) |
| cpu = "nocona"; |
| else |
| cpu = "prescott"; |
| } |
| else if (has_longmode) |
| /* Perhaps some emulator? Assume x86-64, otherwise gcc |
| -march=native would be unusable for 64-bit compilations, |
| as all the CPUs below are 32-bit only. */ |
| cpu = "x86-64"; |
| else if (has_sse2) |
| cpu = "pentium4"; |
| else if (has_cmov) |
| cpu = "pentiumpro"; |
| else if (has_mmx) |
| cpu = "pentium-mmx"; |
| else if (has_cmpxchg8b) |
| cpu = "pentium"; |
| } |
| else |
| cpu = "generic"; |
| } |
| |
| if (arch) |
| { |
| const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx"; |
| const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow"; |
| const char *sse = has_sse ? " -msse" : " -mno-sse"; |
| const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2"; |
| const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3"; |
| const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3"; |
| const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a"; |
| const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16"; |
| const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf"; |
| const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe"; |
| const char *aes = has_aes ? " -maes" : " -mno-aes"; |
| const char *sha = has_sha ? " -msha" : " -mno-sha"; |
| const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul"; |
| const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt"; |
| const char *abm = has_abm ? " -mabm" : " -mno-abm"; |
| const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp"; |
| const char *fma = has_fma ? " -mfma" : " -mno-fma"; |
| const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4"; |
| const char *xop = has_xop ? " -mxop" : " -mno-xop"; |
| const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi"; |
| const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2"; |
| const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm"; |
| const char *avx = has_avx ? " -mavx" : " -mno-avx"; |
| const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2"; |
| const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2"; |
| const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1"; |
| const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt"; |
| const char *hle = has_hle ? " -mhle" : " -mno-hle"; |
| const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm"; |
| const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd"; |
| const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c"; |
| const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase"; |
| const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed"; |
| const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw"; |
| const char *adx = has_adx ? " -madx" : " -mno-adx"; |
| const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr"; |
| const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave"; |
| const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt"; |
| const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f"; |
| const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er"; |
| const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd"; |
| const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf"; |
| const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1"; |
| const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt"; |
| const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec"; |
| const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves"; |
| const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq"; |
| const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw"; |
| const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl"; |
| const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma"; |
| const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi"; |
| const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb"; |
| const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx"; |
| |
| options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, |
| sse4a, cx16, sahf, movbe, aes, sha, pclmul, |
| popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, |
| tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, |
| hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, |
| fxsr, xsave, xsaveopt, avx512f, avx512er, |
| avx512cd, avx512pf, prefetchwt1, clflushopt, |
| xsavec, xsaves, avx512dq, avx512bw, avx512vl, |
| avx512ifma, avx512vbmi, clwb, mwaitx, NULL); |
| } |
| |
| done: |
| return concat (cache, "-m", argv[0], "=", cpu, options, NULL); |
| } |
| #else |
| |
| /* If we are compiling with GCC where %EBX register is fixed, then the |
| driver will just ignore -march and -mtune "native" target and will leave |
| to the newly built compiler to generate code for its default target. */ |
| |
| const char *host_detect_local_cpu (int, const char **) |
| { |
| return NULL; |
| } |
| #endif /* __GNUC__ */ |