5dce7964ef
* guix/cpu.scm (gcc-architecture->micro-architecture-level): Sort gcc-architectures which have AVX512F support into x86-64-v4. Change-Id: I8af0ceb692eefec7433e1fd5149379244da799c4
318 lines
13 KiB
Scheme
318 lines
13 KiB
Scheme
;;; GNU Guix --- Functional package management for GNU
|
|
;;; Copyright © 2021 Ludovic Courtès <ludo@gnu.org>
|
|
;;; Copyright © 2022-2024 Efraim Flashner <efraim@flashner.co.il>
|
|
;;;
|
|
;;; This file is part of GNU Guix.
|
|
;;;
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
;;; under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
;;; your option) any later version.
|
|
;;;
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (guix cpu)
|
|
#:use-module (guix sets)
|
|
#:use-module (guix memoization)
|
|
#:use-module (srfi srfi-1)
|
|
#:use-module (srfi srfi-9)
|
|
#:use-module (ice-9 match)
|
|
#:use-module (ice-9 rdelim)
|
|
#:export (current-cpu
|
|
cpu?
|
|
cpu-architecture
|
|
cpu-vendor
|
|
cpu-family
|
|
cpu-model
|
|
cpu-flags
|
|
|
|
cpu->gcc-architecture
|
|
gcc-architecture->micro-architecture-level))
|
|
|
|
;;; Commentary:
|
|
;;;
|
|
;;; This module provides tools to determine the micro-architecture supported
|
|
;;; by the CPU and to map it to a name known to GCC's '-march'.
|
|
;;;
|
|
;;; Code:
|
|
|
|
;; CPU description.
|
|
(define-record-type <cpu>
|
|
(cpu architecture vendor family model flags)
|
|
cpu?
|
|
(architecture cpu-architecture) ;string, from 'uname'
|
|
(vendor cpu-vendor) ;string
|
|
(family cpu-family) ;integer
|
|
(model cpu-model) ;integer
|
|
(flags cpu-flags)) ;set of strings
|
|
|
|
(define current-cpu
|
|
(mlambda ()
|
|
"Return a <cpu> record representing the host CPU."
|
|
(define (prefix? prefix)
|
|
(lambda (str)
|
|
(string-prefix? prefix str)))
|
|
|
|
(call-with-input-file "/proc/cpuinfo"
|
|
(lambda (port)
|
|
(let loop ((vendor #f)
|
|
(family #f)
|
|
(model #f)
|
|
(flags (set)))
|
|
(match (read-line port)
|
|
((? eof-object?)
|
|
(cpu (utsname:machine (uname))
|
|
vendor family model flags))
|
|
;; vendor for x86_64 and i686
|
|
((? (prefix? "vendor_id") str)
|
|
(match (string-tokenize str)
|
|
(("vendor_id" ":" vendor)
|
|
(loop vendor family model flags))))
|
|
;; vendor for aarch64 and armhf
|
|
((? (prefix? "CPU implementer") str)
|
|
(match (string-tokenize str)
|
|
(("CPU" "implementer" ":" vendor)
|
|
(loop vendor family model flags))))
|
|
;; family for x86_64 and i686
|
|
((? (prefix? "cpu family") str)
|
|
(match (string-tokenize str)
|
|
(("cpu" "family" ":" family)
|
|
(loop vendor (string->number family) model flags))))
|
|
;; model for x86_64 and i686
|
|
((? (prefix? "model") str)
|
|
(match (string-tokenize str)
|
|
(("model" ":" model)
|
|
(loop vendor family (string->number model) flags))
|
|
(_
|
|
(loop vendor family model flags))))
|
|
;; model for aarch64 and armhf
|
|
((? (prefix? "CPU part") str)
|
|
(match (string-tokenize str)
|
|
(("CPU" "part" ":" model)
|
|
(loop vendor family (string->number (string-drop model 2) 16) flags))))
|
|
;; flags for x86_64 and i686
|
|
((? (prefix? "flags") str)
|
|
(match (string-tokenize str)
|
|
(("flags" ":" flags ...)
|
|
(loop vendor family model (list->set flags)))))
|
|
;; flags for aarch64 and armhf
|
|
((? (prefix? "Features") str)
|
|
(match (string-tokenize str)
|
|
(("Features" ":" flags ...)
|
|
(loop vendor family model (list->set flags)))))
|
|
(_
|
|
(loop vendor family model flags))))))))
|
|
|
|
(define (cpu->gcc-architecture cpu)
|
|
"Return the architecture name, suitable for GCC's '-march' flag, that
|
|
corresponds to CPU, a record as returned by 'current-cpu'."
|
|
(match (cpu-architecture cpu)
|
|
("x86_64"
|
|
;; Transcribed from GCC's 'host_detect_local_cpu' in driver-i386.cc.
|
|
(letrec-syntax ((if-flags (syntax-rules (=>)
|
|
((_)
|
|
#f)
|
|
((_ (flags ... => name) rest ...)
|
|
(if (every (lambda (flag)
|
|
(set-contains? (cpu-flags cpu)
|
|
flag))
|
|
'(flags ...))
|
|
name
|
|
(if-flags rest ...))))))
|
|
|
|
(or (and (equal? "GenuineIntel" (cpu-vendor cpu))
|
|
(= 6 (cpu-family cpu)) ;the "Pentium Pro" family
|
|
(if-flags ("avx512f" "amx_complex" => "graniterapids-d")
|
|
("avx512f" "amx_fp16" => "graniterapids")
|
|
("avx512f" "avx512vp2intersect" => "tigerlake")
|
|
("avx512f" "tsxldtrk" => "sapphirerapids")
|
|
("avx512f" "avx512bf16" => "cooperlake")
|
|
("avx512f" "wbnoinvd" => "icelake-server")
|
|
("avx512f" "avx512bitalg" => "icelake-client")
|
|
("avx512f" "avx512vbmi" => "cannonlake")
|
|
("avx512f" "avx5124vnniw" => "knm")
|
|
("avx512f" "avx512er" => "knl")
|
|
("avx512f" => "skylake-avx512")
|
|
("avx" "prefetchi" => "pantherlake")
|
|
("avx" "user_msr" => "clearwaterforest")
|
|
("avx" "sm3" => "arrowlake-s")
|
|
("avx" "avxvnniint8" => "sierraforest")
|
|
("avx" "serialize" => "alderlake")
|
|
("avx" "clflushopt" => "skylake")
|
|
("avx" "adx" => "broadwell")
|
|
("avx" "avx2" => "haswell")
|
|
("avx" => "sandybridge")
|
|
("sse4_2" "gfni" => "tremont")
|
|
("sse4_2" "sgx" => "goldmont-plus")
|
|
("sse4_2" "xsave" => "goldmont")
|
|
("sse4_2" "movbe" => "silvermont")
|
|
("sse4_2" => "nehalem")
|
|
("ssse3" "movbe" => "bonnell")
|
|
("ssse3" => "core2")
|
|
("longmode" => "x86-64")
|
|
("lm" => "x86-64")))
|
|
|
|
(and (equal? "AuthenticAMD" (cpu-vendor cpu))
|
|
(or (and (= 22 (cpu-family cpu))
|
|
(if-flags ("movbe" => "btver2")))
|
|
(and (= 6 (cpu-family cpu))
|
|
(if-flags ("3dnowp" => "athalon")
|
|
("longmode" "sse3" => "k8-sse3")
|
|
("lm" "sse3" => "k8-sse3")
|
|
("longmode" => "k8")
|
|
("lm" => "k8")))
|
|
(if-flags ("avx512f" => "znver4")
|
|
("vaes" => "znver3")
|
|
("clwb" => "znver2")
|
|
("clzero" => "znver1")
|
|
("avx2" => "bdver4")
|
|
("xsaveopt" => "bdver3")
|
|
("bmi" => "bdver2")
|
|
("xop" => "bdver1")
|
|
("sse4a" "has_ssse3" => "btver1")
|
|
("sse4a" => "amdfam10")
|
|
("sse2" "sse3" => "k8-sse3")
|
|
("longmode" "sse3" => "k8-sse3")
|
|
("lm" "sse3" => "k8-sse3")
|
|
("sse2" => "k8")
|
|
("longmode" => "k8")
|
|
("lm" => "k8")
|
|
("mmx" "3dnow" => "k6-3")
|
|
("mmx" => "k6")
|
|
(_ => "pentium"))))
|
|
|
|
;; Fallback case for non-Intel processors or for processors not
|
|
;; recognized above.
|
|
(if (and (= 7 (cpu-family cpu))
|
|
(= #x3b (cpu-model cpu)))
|
|
"lujiazui"
|
|
(cpu->micro-architecture-level cpu))
|
|
(if (and (= 7 (cpu-family cpu))
|
|
(>= #x5b (cpu-model cpu)))
|
|
"yongfeng"
|
|
(cpu->micro-architecture-level cpu))
|
|
|
|
;; TODO: Recognize CENTAUR/CYRIX/NSC?
|
|
|
|
"x86_64")))
|
|
("aarch64"
|
|
;; Transcribed from GCC's list of aarch64 processors in aarch64-cores.def
|
|
;; What to do with big.LITTLE cores?
|
|
(match (cpu-vendor cpu)
|
|
("0x41"
|
|
(match (cpu-model cpu)
|
|
((or #xd02 #xd04 #xd03 #xd07 #xd08 #xd09)
|
|
"armv8-a")
|
|
((or #xd05 #xd0a #xd0b #xd0e #xd0d #xd41 #xd42 #xd4b #xd06 #xd43 #xd44
|
|
#xd4c #xd0c #xd4a)
|
|
"armv8.2-a")
|
|
(#xd40
|
|
"armv8.4-a")
|
|
(#xd15
|
|
"armv8-r")
|
|
((or #xd46 #xd47 #xd4d #xd48 #xd4e #xd49 #xd4f)
|
|
"armv9-a")))
|
|
("0x42"
|
|
"armv8.1-a")
|
|
("0x43"
|
|
(match (cpu-model cpu)
|
|
((or #x0a0 #x0a1 #x0a2 #x0a3)
|
|
"armv8-a")
|
|
(#x0af
|
|
"armv8.1-a")
|
|
((or #x0b0 #x0b1 #x0b2 #x0b3 #x0b4 #x0b5)
|
|
"armv8.2-a")
|
|
(#x0b8
|
|
"armv8.3-a")))
|
|
("0x46"
|
|
"armv8.2-a")
|
|
("0x48"
|
|
"armv8.2-a")
|
|
("0x50"
|
|
"armv8-a")
|
|
("0x51"
|
|
(match (cpu-model cpu)
|
|
(#xC00
|
|
"armv8-a")
|
|
(#x516
|
|
"armv8.1-a")
|
|
(#xC01
|
|
"armv8.4-a")))
|
|
("0x53"
|
|
"armv8-a")
|
|
("0x68"
|
|
"armv8-a")
|
|
("0xC0"
|
|
"armv8.6-a")
|
|
("0xC00"
|
|
"armv8-a")
|
|
(_
|
|
"armv8-a"))
|
|
"armv8-a")
|
|
(architecture
|
|
;; TODO: More architectures
|
|
architecture)))
|
|
|
|
(define (cpu->micro-architecture-level cpu)
|
|
"Return a micro-architecture name, suitable for generalized optimizations that
|
|
correspond roughly to CPU, a record as returned by 'current-cpu'."
|
|
(match (cpu-architecture cpu)
|
|
("x86_64"
|
|
(or (letrec-syntax ((if-flags (syntax-rules (=>)
|
|
((_)
|
|
#f)
|
|
((_ (flags ... => name) rest ...)
|
|
(if (every (lambda (flag)
|
|
(set-contains? (cpu-flags cpu)
|
|
flag))
|
|
'(flags ...))
|
|
name
|
|
(if-flags rest ...))))))
|
|
|
|
(if-flags
|
|
;; https://gitlab.com/x86-psABIs/x86-64-ABI/-/blob/master/x86-64-ABI/low-level-sys-info.tex
|
|
;; v4: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
|
|
;; v3: AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, OSXSAVE
|
|
;; v2: CMPXCHG16B, LAHF, SAHF, POPCNT, SSE3, SSE4.1, SSE4.2, SSSE3
|
|
("avx512f" "avx512bw" "abx512cd" "abx512dq" "avx512vl"
|
|
"avx" "avx2" "bmi1" "bmi2" "f16c" "fma" "movbe"
|
|
"popcnt" "sse3" "sse4_1" "sse4_2" "ssse3" => "x86_64-v4")
|
|
("avx" "avx2" "bmi1" "bmi2" "f16c" "fma" "movbe"
|
|
"popcnt" "sse3" "sse4_1" "sse4_2" "ssse3" => "x86_64-v3")
|
|
("popcnt" "sse3" "sse4_1" "sse4_2" "ssse3" => "x86_64-v2")
|
|
(_ => "x86_64-v1")))
|
|
"x86_64-v1"))
|
|
(architecture
|
|
;; TODO: More architectures
|
|
architecture)))
|
|
|
|
(define (gcc-architecture->micro-architecture-level gcc-architecture)
|
|
"Return a matching psABI micro-architecture, allowing optimizations for x86_64
|
|
CPUs for compilers which don't allow for more focused optimizing."
|
|
;; Matching gcc-architectures isn't an easy task, with the rule-of-thumb being
|
|
;; AVX512F+ for x86_64-v4, AVX+ for x86_64-v3.
|
|
;; https://gitlab.com/x86-psABIs/x86-64-ABI/-/blob/master/x86-64-ABI/low-level-sys-info.tex
|
|
(match gcc-architecture
|
|
((or "graniterapids-d" "graniterapids" "tigerlake" "sapphirerapids"
|
|
"cooperlake" "icelake-server" "icelake-client" "cannonlake" "knm"
|
|
"knl" "skylake-avx512"
|
|
"znver4")
|
|
"x86_64-v4")
|
|
((or "pantherlake" "clearwaterforest" "arrowlake-s" "sierraforest"
|
|
"alderlake" "skylake" "broadwell" "haswell"
|
|
"znver3" "znver2" "znver1" "bdver4")
|
|
"x86_64-v3")
|
|
((or "sandybridge" "tremont" "goldmont-plus" "goldmont" "silvermont"
|
|
"nehalem" "bonnell" "core2"
|
|
"btver2" "athalon" "k8-sse3" "k8" "bdver3" "bdver2" "bdver1" "btver1"
|
|
"amdfam10"
|
|
"lujiazui" "yongfeng" "x86-64")
|
|
"x86_64-v1")
|
|
(_ gcc-architecture)))
|