diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0ec355aa3c..c1de17e164 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -272,12 +272,12 @@ jobs: name: Intrinsic Test runs-on: ubuntu-latest strategy: + fail-fast: false matrix: target: - aarch64-unknown-linux-gnu - aarch64_be-unknown-linux-gnu - armv7-unknown-linux-gnueabihf - - arm-unknown-linux-gnueabihf - x86_64-unknown-linux-gnu profile: [dev, release] include: diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 2768c521eb..27cb80053b 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,17 +1,19 @@ FROM ubuntu:25.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ gcc-aarch64-linux-gnu \ - g++-aarch64-linux-gnu \ libc6-dev-arm64-cross \ qemu-user \ make \ file \ - clang \ - lld + xz-utils \ + wget + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.1-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \ diff --git a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile index f85c6a2592..889471590b 100644 --- a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile @@ -2,17 +2,15 @@ FROM ubuntu:25.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ libc6-dev-arm64-cross \ qemu-user \ make \ file \ - clang \ curl \ xz-utils \ - lld + wget ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu" @@ -21,6 +19,10 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin RUN tar -xvf "${TOOLCHAIN}.tar.xz" RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.1-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}" ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc" diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index 6d4ff24828..23e4d5a341 100644 --- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -7,7 +7,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libc6-dev-armhf-cross \ qemu-user \ make \ - file + file \ + clang \ + lld ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 602249c0ec..b4ff6fb49d 100644 --- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,17 +1,19 @@ FROM ubuntu:24.04 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ gcc-arm-linux-gnueabihf \ - g++-arm-linux-gnueabihf \ libc6-dev-armhf-cross \ qemu-user \ make \ file \ - clang \ - lld + wget + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.1-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index a357449d51..e8f3933bfc 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -6,15 +6,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ make \ ca-certificates \ wget \ - xz-utils \ - clang \ - libstdc++-14-dev \ - build-essential \ - lld + xz-utils RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-10.5.0-2026-01-13-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.1-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \ -rtm-mode full -tsx --" diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 89104e2672..68c48ba27f 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -5,127 +5,56 @@ set -ex : "${TARGET?The TARGET environment variable must be set.}" export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" -export HOST_RUSTFLAGS="${RUSTFLAGS}" export PROFILE="${PROFILE:="release"}" -case ${TARGET} in - # On 32-bit use a static relocation model which avoids some extra - # instructions when dealing with static data, notably allowing some - # instruction assertion checks to pass below the 20 instruction limit. If - # this is the default, dynamic, then too many instructions are generated - # when we assert the instruction for a function and it causes tests to fail. - i686-* | i586-*) - export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static" - ;; - # Some x86_64 targets enable by default more features beyond SSE2, - # which cause some instruction assertion checks to fail. - x86_64-*) - export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3" - ;; - #Unoptimized build uses fast-isel which breaks with msa - mips-* | mipsel-*) - export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false" - ;; - armv7-*eabihf | thumbv7-*eabihf) - export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon" - ;; - # Some of our test dependencies use the deprecated `gcc` crates which - # doesn't detect RISC-V compilers automatically, so do it manually here. - riscv*) - export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc" - ;; -esac - echo "RUSTFLAGS=${RUSTFLAGS}" -echo "OBJDUMP=${OBJDUMP}" echo "PROFILE=${PROFILE}" INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" -# Test targets compiled with extra features. +export CC="/llvm/bin/clang" + case ${TARGET} in - # Setup aarch64 & armv7 specific variables, the runner, along with some - # tests to skip - aarch64-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" + aarch64_be*) + export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/include/ --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt ;; - aarch64_be-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" + aarch64*) + export CFLAGS="-I/usr/aarch64-linux-gnu/include/" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt ;; - armv7-unknown-linux-gnueabihf*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/" + armv7*) + export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" ;; - x86_64-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/" - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" + x86_64*) + export CFLAGS="-I/usr/include/x86_64-linux-gnu/" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=20}" ;; *) ;; esac -# Arm specific case "${TARGET}" in - aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ - cargo run "${INTRINSIC_TEST}" --release \ - --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --runner "${TEST_RUNNER}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" - ;; - - aarch64_be-unknown-linux-gnu*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ - cargo run "${INTRINSIC_TEST}" --release \ - --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --runner "${TEST_RUNNER}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \ - --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" - ;; - x86_64-unknown-linux-gnu*) - # `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER` is not necessary for `intrinsic-test` - # because the binary needs to run directly on the host. - # Hence the use of `env -u`. env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \ - RUST_LOG=warn RUST_BACKTRACE=1 \ cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ - --runner "${TEST_RUNNER}" \ --skip "${TEST_SKIP_INTRINSICS}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" + --target "${TARGET}" + + echo "${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" ;; - *) + *) + cargo run "${INTRINSIC_TEST}" --release \ + --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ + --skip "${TEST_SKIP_INTRINSICS}" \ + --target "${TARGET}" ;; esac + +cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" --no-fail-fast diff --git a/crates/intrinsic-test/missing_x86.txt b/crates/intrinsic-test/missing_x86.txt index f88a125bfd..22b2cd0a43 100644 --- a/crates/intrinsic-test/missing_x86.txt +++ b/crates/intrinsic-test/missing_x86.txt @@ -52,8 +52,19 @@ _mm_sm3rnds2_epi32 _xgetbv # top bits are undefined, unclear how to test these +_mm256_castph128_ph256 +_mm256_castps128_ps256 +_mm256_castpd128_pd256 _mm256_castsi128_si256 + +_mm512_castph128_ph512 +_mm512_castps128_ps512 +_mm512_castpd128_pd512 _mm512_castsi128_si512 + +_mm512_castph256_ph512 +_mm512_castps256_ps512 +_mm512_castpd256_pd512 _mm512_castsi256_si512 # Clang bug @@ -65,3 +76,7 @@ _mm512_mask_reduce_min_pd _mm512_mask_reduce_min_ps _mm_extract_epi16 _mm_extract_epi8 + +# TODO: fix +_mm_movemask_epi8 +_mm_movemask_pd diff --git a/crates/intrinsic-test/src/arm/compile.rs b/crates/intrinsic-test/src/arm/compile.rs deleted file mode 100644 index 7da35f9a11..0000000000 --- a/crates/intrinsic-test/src/arm/compile.rs +++ /dev/null @@ -1,51 +0,0 @@ -use crate::common::cli::ProcessedCli; -use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation}; - -pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { - let cpp_compiler = config.cpp_compiler.as_ref()?; - - // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations - let mut command = CompilationCommandBuilder::new() - .add_arch_flags(["armv8.6-a", "crypto", "crc", "dotprod", "fp16"]) - .set_compiler(cpp_compiler) - .set_target(&config.target) - .set_opt_level("2") - .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) - .set_project_root("c_programs") - .add_extra_flags(["-ffp-contract=off", "-Wno-narrowing"]); - - if !config.target.contains("v7") { - command = command.add_arch_flags(["faminmax", "lut", "sha3"]); - } - - if !cpp_compiler.contains("clang") { - command = command.add_extra_flag("-flax-vector-conversions"); - } - - let mut cpp_compiler = command.into_cpp_compilation(); - - if config.target.contains("aarch64_be") { - let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else { - panic!( - "target `{}` must specify `cxx_toolchain_dir`", - config.target - ) - }; - - cpp_compiler.command_mut().args([ - &format!("--sysroot={cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc"), - "--include-directory", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1"), - "--include-directory", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1/aarch64_be-none-linux-gnu"), - "-L", - &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"), - "-L", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/lib"), - "-B", - &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"), - ]); - } - - Some(cpp_compiler) -} diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs index 85cb21c2d6..6ccf885d25 100644 --- a/crates/intrinsic-test/src/arm/config.rs +++ b/crates/intrinsic-test/src/arm/config.rs @@ -3,51 +3,6 @@ pub const NOTICE: &str = "\ // test are derived from a JSON specification, published under the same license as the // `intrinsic-test` crate.\n"; -pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" -#ifdef __aarch64__ -std::ostream& operator<<(std::ostream& os, poly128_t value); -#endif - -std::ostream& operator<<(std::ostream& os, float16_t value); -std::ostream& operator<<(std::ostream& os, uint8_t value); - -// T1 is the `To` type, T2 is the `From` type -template T1 cast(T2 x) { - static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); - T1 ret{}; - memcpy(&ret, &x, sizeof(T1)); - return ret; -} -"#; - -pub const PLATFORM_C_DEFINITIONS: &str = r#" -#ifdef __aarch64__ -std::ostream& operator<<(std::ostream& os, poly128_t value) { - std::stringstream temp; - do { - int n = value % 10; - value /= 10; - temp << n; - } while (value != 0); - std::string tempstr(temp.str()); - std::string res(tempstr.rbegin(), tempstr.rend()); - os << res; - return os; -} - -#endif - -std::ostream& operator<<(std::ostream& os, float16_t value) { - os << static_cast(value); - return os; -} - -std::ostream& operator<<(std::ostream& os, uint8_t value) { - os << (unsigned int) value; - return os; -} -"#; - pub const PLATFORM_RUST_DEFINITIONS: &str = ""; pub const PLATFORM_RUST_CFGS: &str = r#" @@ -58,7 +13,6 @@ pub const PLATFORM_RUST_CFGS: &str = r#" #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_aarch64_jscvt))] #![feature(fmt_helpers_for_derive)] #![feature(stdarch_neon_f16)] diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 99c8da854c..9bf6c95ffd 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -1,5 +1,4 @@ mod argument; -mod compile; mod config; mod intrinsic; mod json_parser; @@ -7,7 +6,6 @@ mod types; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; -use crate::common::compile_c::CppCompilation; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::ArmIntrinsicType; @@ -15,16 +13,11 @@ use json_parser::get_neon_intrinsics; pub struct ArmArchitectureTest { intrinsics: Vec>, - cli_options: ProcessedCli, } impl SupportedArchitectureTest for ArmArchitectureTest { type IntrinsicImpl = ArmIntrinsicType; - fn cli_options(&self) -> &ProcessedCli { - &self.cli_options - } - fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics } @@ -32,18 +25,16 @@ impl SupportedArchitectureTest for ArmArchitectureTest { const NOTICE: &str = config::NOTICE; const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; - const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - fn cpp_compilation(&self) -> Option { - compile::build_cpp_compilation(&self.cli_options) + fn arch_flags(&self) -> Vec<&str> { + vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"] } fn create(cli_options: ProcessedCli) -> Self { - let a32 = cli_options.target.contains("v7"); + let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target) .expect("Error parsing input file"); @@ -68,9 +59,6 @@ impl SupportedArchitectureTest for ArmArchitectureTest { .take(sample_size) .collect::>(); - Self { - intrinsics, - cli_options, - } + Self { intrinsics } } } diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 4be8d1e48b..94e9035465 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -1,6 +1,4 @@ use super::intrinsic::ArmIntrinsicType; -use crate::common::cli::Language; -use crate::common::indentation::Indentation; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; impl IntrinsicTypeDefinition for ArmIntrinsicType { @@ -9,8 +7,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { let prefix = self.kind.c_prefix(); let const_prefix = if self.constant { "const " } else { "" }; - if let (Some(bit_len), simd_len, vec_len) = (self.bit_len, self.simd_len, self.vec_len) { - match (simd_len, vec_len) { + if let Some(bit_len) = self.bit_len { + match (self.simd_len, self.vec_len) { (None, None) => format!("{const_prefix}{prefix}{bit_len}_t"), (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"), (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"), @@ -21,19 +19,24 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { } } - fn c_single_vector_type(&self) -> String { - if let (Some(bit_len), Some(simd_len)) = (self.bit_len, self.simd_len) { - format!( - "{prefix}{bit_len}x{simd_len}_t", - prefix = self.kind.c_prefix() - ) + fn rust_type(&self) -> String { + let rust_prefix = self.kind.rust_prefix(); + let c_prefix = self.kind.c_prefix(); + + if let Some(bit_len) = self.bit_len { + match (self.simd_len, self.vec_len) { + (None, None) => format!("{rust_prefix}{bit_len}"), + (Some(simd), None) => format!("{c_prefix}{bit_len}x{simd}_t"), + (Some(simd), Some(vec)) => format!("{c_prefix}{bit_len}x{simd}x{vec}_t"), + (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case + } } else { - unreachable!("Shouldn't be called on this type") + todo!("{self:#?}") } } /// Determines the load function for this type. - fn get_load_function(&self, language: Language) -> String { + fn get_load_function(&self) -> String { if let IntrinsicType { kind: k, bit_len: Some(bl), @@ -48,16 +51,13 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { "" }; - let choose_workaround = language == Language::C && self.target.contains("v7"); format!( "vld{len}{quad}_{type}{size}", type = match k { TypeKind::Int(Sign::Unsigned) => "u", TypeKind::Int(Sign::Signed) => "s", TypeKind::Float => "f", - // The ACLE doesn't support 64-bit polynomial loads on Armv7 - // if armv7 and bl == 64, use "s", else "p" - TypeKind::Poly => if choose_workaround && *bl == 64 {"s"} else {"p"}, + TypeKind::Poly => "p", x => todo!("get_load_function TypeKind: {x:#?}"), }, size = bl, @@ -68,97 +68,6 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { todo!("get_load_function IntrinsicType: {self:#?}") } } - - /// Determines the get lane function for this type. - fn get_lane_function(&self) -> String { - if let IntrinsicType { - kind: k, - bit_len: Some(bl), - simd_len, - .. - } = &self.data - { - let quad = if (simd_len.unwrap_or(1) * bl) > 64 { - "q" - } else { - "" - }; - format!( - "vget{quad}_lane_{type}{size}", - type = match k { - TypeKind::Int(Sign::Unsigned) => "u", - TypeKind::Int(Sign::Signed) => "s", - TypeKind::Float => "f", - TypeKind::Poly => "p", - x => todo!("get_load_function TypeKind: {x:#?}"), - }, - size = bl, - quad = quad, - ) - } else { - todo!("get_lane_function IntrinsicType: {self:#?}") - } - } - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { - let lanes = if self.num_vectors() > 1 { - (0..self.num_vectors()) - .map(|vector| { - format!( - r#""{ty}(" << {lanes} << ")""#, - ty = self.c_single_vector_type(), - lanes = (0..self.num_lanes()) - .map(move |idx| -> std::string::String { - let lane_fn = self.get_lane_function(); - let final_cast = self.generate_final_type_cast(); - format!( - "{final_cast}{lane_fn}(__return_value.val[{vector}], {idx})" - ) - }) - .collect::>() - .join(r#" << ", " << "#) - ) - }) - .collect::>() - .join(r#" << ", " << "#) - } else if self.num_lanes() > 1 { - (0..self.num_lanes()) - .map(|idx| -> std::string::String { - let lane_fn = self.get_lane_function(); - let final_cast = self.generate_final_type_cast(); - format!("{final_cast}{lane_fn}(__return_value, {idx})") - }) - .collect::>() - .join(r#" << ", " << "#) - } else { - format!( - "{promote}cast<{cast}>(__return_value)", - cast = match self.kind() { - TypeKind::Float if self.inner_size() == 16 => "float16_t".to_string(), - TypeKind::Float if self.inner_size() == 32 => "float".to_string(), - TypeKind::Float if self.inner_size() == 64 => "double".to_string(), - TypeKind::Int(Sign::Signed) => format!("int{}_t", self.inner_size()), - TypeKind::Int(Sign::Unsigned) => format!("uint{}_t", self.inner_size()), - TypeKind::Poly => format!("poly{}_t", self.inner_size()), - ty => todo!("print_result_c - Unknown type: {ty:#?}"), - }, - promote = self.generate_final_type_cast(), - ) - }; - - format!( - r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, - ty = if self.is_simd() { - format!("{}(", self.c_type()) - } else { - String::from("") - }, - close = if self.is_simd() { ")" } else { "" }, - ) - } } impl ArmIntrinsicType { diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 385cf32d3b..8a760883a4 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -1,4 +1,5 @@ -use super::cli::Language; +use itertools::Itertools; + use super::constraint::Constraint; use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; @@ -73,10 +74,6 @@ where format!("{}_vals", self.name.to_lowercase()) } } - - fn as_call_param_c(&self) -> String { - self.ty.as_call_param_c(&self.generate_name()) - } } #[derive(Debug, PartialEq, Clone)] @@ -88,13 +85,40 @@ impl ArgumentList where T: IntrinsicTypeDefinition, { - /// Converts the argument list into the call parameters for a C function call. - /// e.g. this would generate something like `a, &b, c` - pub fn as_call_param_c(&self) -> String { + pub fn as_non_imm_arglist_c(&self) -> String { self.iter() - .map(|arg| arg.as_call_param_c()) - .collect::>() - .join(", ") + .filter(|arg| !arg.has_constraint()) + .format_with("", |arg, fmt| { + fmt(&format_args!(", const {}* {}", arg.to_c_type(), arg.name)) + }) + .to_string() + } + + pub fn as_non_imm_arglist_rust(&self) -> String { + self.iter() + .filter(|arg| !arg.has_constraint()) + .format_with("", |arg, fmt| { + fmt(&format_args!( + ", {}: *const {}", + arg.name, + arg.ty.rust_type() + )) + }) + .to_string() + } + + pub fn as_call_params_c(&self, imm_args: &[i64]) -> String { + let mut imm_args = imm_args.iter(); + self.iter() + .format_with(", ", |arg, fmt| { + if arg.has_constraint() { + fmt(&imm_args.next().unwrap()) + } else { + fmt(&"*")?; + fmt(&arg.name) + } + }) + .to_string() } /// Converts the argument list into the call parameters for a Rust function. @@ -103,32 +127,14 @@ where self.iter() .filter(|a| !a.has_constraint()) .map(|arg| arg.generate_name() + " as _") - .collect::>() .join(", ") } - /// Creates a line for each argument that initializes an array for C from which `loads` argument - /// values can be loaded as a sliding window. - /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2. - pub fn gen_arglists_c( - &self, - w: &mut impl std::io::Write, - indentation: Indentation, - loads: u32, - ) -> std::io::Result<()> { - for arg in self.iter().filter(|&arg| !arg.has_constraint()) { - // Setting the variables on an aligned boundary to make it easier to pick - // functions (of a specific architecture) that would help load the values. - writeln!( - w, - "{indentation}alignas(64) const {ty} {name}_vals[] = {values};", - ty = arg.ty.c_scalar_type(), - name = arg.generate_name(), - values = arg.ty.populate_random(indentation, loads, &Language::C) - )? - } - - Ok(()) + pub fn as_c_call_param_rust(&self) -> String { + self.iter() + .filter(|a| !a.has_constraint()) + .map(|arg| format!(", &raw const {} as _", arg.generate_name())) + .join("") } /// Creates a line for each argument that initializes an array for Rust from which `loads` argument @@ -164,33 +170,10 @@ where name = arg.rust_vals_array_name(), ty = arg.ty.rust_scalar_type(), load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, - values = arg.ty.populate_random(indentation, loads, &Language::Rust) + values = arg.ty.populate_random(indentation, loads) ) } - /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at - /// an offset `i` using a load intrinsic, in C. - /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);` - /// - /// ARM-specific - pub fn load_values_c(&self, indentation: Indentation) -> String { - self.iter() - .filter(|&arg| !arg.has_constraint()) - .map(|arg| { - format!( - "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i]));\n", - ty = arg.to_c_type(), - name = arg.generate_name(), - load = if arg.is_simd() { - arg.ty.get_load_function(Language::C) - } else { - "*".to_string() - } - ) - }) - .collect() - } - /// Creates a line for each argument that initializes the argument from array `[ARG]_VALS` at /// an offset `i` using a load intrinsic, in Rust. /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));` @@ -199,7 +182,7 @@ where .filter(|&arg| !arg.has_constraint()) .map(|arg| { let load = if arg.is_simd() { - arg.ty.get_load_function(Language::Rust) + arg.ty.get_load_function() } else { "*".to_string() }; diff --git a/crates/intrinsic-test/src/common/cli.rs b/crates/intrinsic-test/src/common/cli.rs index bed8259de8..f407b5ceb7 100644 --- a/crates/intrinsic-test/src/common/cli.rs +++ b/crates/intrinsic-test/src/common/cli.rs @@ -1,12 +1,6 @@ use itertools::Itertools; use std::path::PathBuf; -#[derive(Debug, PartialEq)] -pub enum Language { - Rust, - C, -} - /// Intrinsic test tool #[derive(clap::Parser)] #[command( @@ -17,41 +11,13 @@ pub struct Cli { /// The input file containing the intrinsics pub input: PathBuf, - /// The rust toolchain to use for building the rust code - #[arg(long)] - pub toolchain: Option, - - /// The C++ compiler to use for compiling the c++ code - #[arg(long, default_value_t = String::from("clang++"))] - pub cppcompiler: String, - - /// Run the C programs under emulation with this command - #[arg(long)] - pub runner: Option, - /// Filename for a list of intrinsics to skip (one per line) #[arg(long)] pub skip: Option, - /// Regenerate test programs, but don't build or run them - #[arg(long)] - pub generate_only: bool, - /// Pass a target the test suite - #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))] - pub target: String, - - /// Pass a profile (release, dev) - #[arg(long, default_value_t = String::from("release"))] - pub profile: String, - - /// Set the linker - #[arg(long)] - pub linker: Option, - - /// Set the sysroot for the C++ compiler #[arg(long)] - pub cxx_toolchain_dir: Option, + pub target: String, #[arg(long, default_value_t = 100u8)] pub sample_percentage: u8, @@ -59,13 +25,7 @@ pub struct Cli { pub struct ProcessedCli { pub filename: PathBuf, - pub toolchain: Option, - pub cpp_compiler: Option, - pub runner: String, pub target: String, - pub profile: String, - pub linker: Option, - pub cxx_toolchain_dir: Option, pub skip: Vec, pub sample_percentage: u8, } @@ -73,11 +33,7 @@ pub struct ProcessedCli { impl ProcessedCli { pub fn new(cli_options: Cli) -> Self { let filename = cli_options.input; - let runner = cli_options.runner.unwrap_or_default(); let target = cli_options.target; - let profile = cli_options.profile; - let linker = cli_options.linker; - let cxx_toolchain_dir = cli_options.cxx_toolchain_dir; let sample_percentage = cli_options.sample_percentage; let skip = if let Some(filename) = cli_options.skip { @@ -91,27 +47,8 @@ impl ProcessedCli { Default::default() }; - let (toolchain, cpp_compiler) = if cli_options.generate_only { - (None, None) - } else { - ( - Some( - cli_options - .toolchain - .map_or_else(String::new, |t| format!("+{t}")), - ), - Some(cli_options.cppcompiler), - ) - }; - Self { - toolchain, - cpp_compiler, - runner, target, - profile, - linker, - cxx_toolchain_dir, skip, filename, sample_percentage, diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs deleted file mode 100644 index c22d7fd4ec..0000000000 --- a/crates/intrinsic-test/src/common/compare.rs +++ /dev/null @@ -1,142 +0,0 @@ -use itertools::Itertools; -use rayon::prelude::*; -use std::{collections::HashMap, process::Command}; - -pub const INTRINSIC_DELIMITER: &str = "############"; -fn runner_command(runner: &str) -> Command { - let mut it = runner.split_whitespace(); - let mut cmd = Command::new(it.next().unwrap()); - cmd.args(it); - - cmd -} - -pub fn compare_outputs( - intrinsic_name_list: &Vec, - runner: &str, - target: &str, - profile: &str, -) -> bool { - let profile_dir = match profile { - "dev" => "debug", - _ => "release", - }; - - let (c, rust) = rayon::join( - || { - runner_command(runner) - .arg("./intrinsic-test-programs") - .current_dir("c_programs") - .output() - }, - || { - runner_command(runner) - .arg(format!( - "./target/{target}/{profile_dir}/intrinsic-test-programs" - )) - .current_dir("rust_programs") - .output() - }, - ); - let (c, rust) = match (c, rust) { - (Ok(c), Ok(rust)) => (c, rust), - failure => panic!("Failed to run: {failure:#?}"), - }; - - if !c.status.success() { - error!( - "Failed to run C program.\nstdout: {stdout}\nstderr: {stderr}", - stdout = std::str::from_utf8(&c.stdout).unwrap_or(""), - stderr = std::str::from_utf8(&c.stderr).unwrap_or(""), - ); - } - - if !rust.status.success() { - error!( - "Failed to run Rust program.\nstdout: {stdout}\nstderr: {stderr}", - stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""), - stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""), - ); - } - - info!("Completed running C++ and Rust test binaries"); - let c = std::str::from_utf8(&c.stdout) - .unwrap() - .to_lowercase() - .replace("-nan", "nan"); - let rust = std::str::from_utf8(&rust.stdout) - .unwrap() - .to_lowercase() - .replace("-nan", "nan"); - - let c_output_map = c - .split(INTRINSIC_DELIMITER) - .filter_map(|output| output.trim().split_once("\n")) - .collect::>(); - let rust_output_map = rust - .split(INTRINSIC_DELIMITER) - .filter_map(|output| output.trim().split_once("\n")) - .collect::>(); - - let intrinsics = c_output_map - .keys() - .chain(rust_output_map.keys()) - .unique() - .collect_vec(); - - info!("Comparing outputs"); - let intrinsics_diff_count = intrinsics - .par_iter() - .filter_map(|&&intrinsic| { - let c_output = c_output_map.get(intrinsic).unwrap(); - let rust_output = rust_output_map.get(intrinsic).unwrap(); - if rust_output.eq(c_output) { - None - } else { - let diff = diff::lines(c_output, rust_output); - let diffs = diff - .into_iter() - .filter_map(|diff| match diff { - diff::Result::Left(_) | diff::Result::Right(_) => Some(diff), - diff::Result::Both(_, _) => None, - }) - .collect_vec(); - if diffs.len() > 0 { - Some((intrinsic, diffs)) - } else { - None - } - } - }) - .inspect(|(intrinsic, diffs)| { - use std::io::Write; - - let stdout = std::io::stdout(); - let mut out = stdout.lock(); - - writeln!(out, "Difference for intrinsic: {intrinsic}").unwrap(); - diffs.into_iter().for_each(|diff| match diff { - diff::Result::Left(c) => { - writeln!(out, "C: {c}").unwrap(); - } - diff::Result::Right(rust) => { - writeln!(out, "Rust: {rust}").unwrap(); - } - _ => (), - }); - writeln!( - out, - "****************************************************************" - ) - .unwrap(); - }) - .count(); - - println!( - "{} differences found (tested {} intrinsics)", - intrinsics_diff_count, - intrinsic_name_list.len() - ); - - intrinsics_diff_count == 0 -} diff --git a/crates/intrinsic-test/src/common/compile_c.rs b/crates/intrinsic-test/src/common/compile_c.rs deleted file mode 100644 index fa78b332a7..0000000000 --- a/crates/intrinsic-test/src/common/compile_c.rs +++ /dev/null @@ -1,136 +0,0 @@ -#[derive(Clone)] -pub struct CompilationCommandBuilder { - compiler: String, - target: Option, - cxx_toolchain_dir: Option, - arch_flags: Vec, - optimization: String, - project_root: Option, - extra_flags: Vec, -} - -impl CompilationCommandBuilder { - pub fn new() -> Self { - Self { - compiler: String::new(), - target: None, - cxx_toolchain_dir: None, - arch_flags: Vec::new(), - optimization: "2".to_string(), - project_root: None, - extra_flags: Vec::new(), - } - } - - pub fn set_compiler(mut self, compiler: &str) -> Self { - self.compiler = compiler.to_string(); - self - } - - pub fn set_target(mut self, target: &str) -> Self { - self.target = Some(target.to_string()); - self - } - - pub fn set_cxx_toolchain_dir(mut self, path: Option<&str>) -> Self { - self.cxx_toolchain_dir = path.map(|p| p.to_string()); - self - } - - pub fn add_arch_flags<'a>(mut self, flags: impl IntoIterator) -> Self { - self.arch_flags - .extend(flags.into_iter().map(|s| s.to_owned())); - - self - } - - pub fn set_opt_level(mut self, optimization: &str) -> Self { - self.optimization = optimization.to_string(); - self - } - - /// Sets the root path of all the generated test files. - pub fn set_project_root(mut self, path: &str) -> Self { - self.project_root = Some(path.to_string()); - self - } - - pub fn add_extra_flags<'a>(mut self, flags: impl IntoIterator) -> Self { - self.extra_flags - .extend(flags.into_iter().map(|s| s.to_owned())); - - self - } - - pub fn add_extra_flag(self, flag: &str) -> Self { - self.add_extra_flags([flag]) - } -} - -impl CompilationCommandBuilder { - pub fn into_cpp_compilation(self) -> CppCompilation { - let mut cpp_compiler = std::process::Command::new(self.compiler); - - if let Some(project_root) = self.project_root { - cpp_compiler.current_dir(project_root); - } - - let flags = std::env::var("CPPFLAGS").unwrap_or("".into()); - cpp_compiler.args(flags.split_whitespace()); - - cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+"))); - - cpp_compiler.arg(format!("-O{}", self.optimization)); - - cpp_compiler.args(self.extra_flags); - - if let Some(target) = &self.target { - cpp_compiler.arg(format!("--target={target}")); - } - - CppCompilation(cpp_compiler) - } -} - -pub struct CppCompilation(std::process::Command); - -fn clone_command(command: &std::process::Command) -> std::process::Command { - let mut cmd = std::process::Command::new(command.get_program()); - if let Some(current_dir) = command.get_current_dir() { - cmd.current_dir(current_dir); - } - cmd.args(command.get_args()); - - for (key, val) in command.get_envs() { - cmd.env(key, val.unwrap_or_default()); - } - - cmd -} - -impl CppCompilation { - pub fn command_mut(&mut self) -> &mut std::process::Command { - &mut self.0 - } - - pub fn compile_object_file( - &self, - input: &str, - output: &str, - ) -> std::io::Result { - let mut cmd = clone_command(&self.0); - cmd.args([input, "-v", "-c", "-o", output]); - cmd.output() - } - - pub fn link_executable( - &self, - inputs: impl Iterator, - output: &str, - ) -> std::io::Result { - let mut cmd = clone_command(&self.0); - cmd.args(inputs); - cmd.args(["-o", output]); - cmd.output() - } -} diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index a95b4c36b7..bdf6f68d58 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -1,166 +1,42 @@ +use itertools::Itertools; + use crate::common::intrinsic::Intrinsic; -use super::argument::Argument; -use super::compare::INTRINSIC_DELIMITER; -use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; -// The number of times each intrinsic will be called. -const PASSES: u32 = 20; -const COMMON_HEADERS: [&str; 7] = [ - "iostream", - "string", - "cstring", - "iomanip", - "sstream", - "type_traits", - "cassert", -]; - -pub fn generate_c_test_loop( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, - indentation: Indentation, - additional: &str, - passes: u32, -) -> std::io::Result<()> { - let body_indentation = indentation.nested(); - writeln!( - w, - "{indentation}for (int i=0; i<{passes}; i++) {{\n\ - {loaded_args}\ - {body_indentation}auto __return_value = {intrinsic_call}({args});\n\ - {print_result}\n\ - {indentation}}}", - loaded_args = intrinsic.arguments.load_values_c(body_indentation), - intrinsic_call = intrinsic.name, - args = intrinsic.arguments.as_call_param_c(), - print_result = intrinsic - .results - .print_result_c(body_indentation, additional) - ) -} - -pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, - indentation: Indentation, - constraints: &mut (impl Iterator> + Clone), - name: String, -) -> std::io::Result<()> { - let Some(current) = constraints.next() else { - return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES); - }; - - let body_indentation = indentation.nested(); - for i in current.constraint.iter().flat_map(|c| c.iter()) { - let ty = current.ty.c_type(); - - writeln!(w, "{indentation}{{")?; - - // TODO: Move to actually specifying the enum value - // instead of typecasting integers, for better clarity - // of generated code. - writeln!( - w, - "{body_indentation}const {ty} {} = ({ty}){i};", - current.generate_name() - )?; - - generate_c_constraint_blocks( - w, - intrinsic, - body_indentation, - &mut constraints.clone(), - format!("{name}-{i}"), - )?; - - writeln!(w, "{indentation}}}")?; - } - - Ok(()) -} - -// Compiles C test programs using specified compiler -pub fn create_c_test_function( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, -) -> std::io::Result<()> { - let indentation = Indentation::default(); - - writeln!(w, "int run_{}() {{", intrinsic.name)?; - - // Define the arrays of arguments. - let arguments = &intrinsic.arguments; - arguments.gen_arglists_c(w, indentation.nested(), PASSES)?; - - generate_c_constraint_blocks( - w, - intrinsic, - indentation.nested(), - &mut arguments.iter().rev().filter(|&i| i.has_constraint()), - Default::default(), - )?; - - writeln!(w, " return 0;")?; - writeln!(w, "}}")?; - - Ok(()) -} - -pub fn write_mod_cpp( +pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, platform_headers: &[&str], - forward_declarations: &str, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!(w, "{notice}")?; - for header in COMMON_HEADERS.iter().chain(platform_headers.iter()) { - writeln!(w, "#include <{header}>")?; - } - - writeln!(w, "{}", forward_declarations)?; - - for intrinsic in intrinsics { - create_c_test_function(w, intrinsic)?; - } - - Ok(()) -} + writeln!(w, "#include ")?; + writeln!(w, "#include ")?; -pub fn write_main_cpp<'a>( - w: &mut impl std::io::Write, - arch_specific_definitions: &str, - arch_specific_headers: &[&str], - intrinsics: impl Iterator + Clone, -) -> std::io::Result<()> { - for header in COMMON_HEADERS.iter().chain(arch_specific_headers.iter()) { + for header in platform_headers { writeln!(w, "#include <{header}>")?; } - // NOTE: It's assumed that this value contains the required `ifdef`s. - writeln!(w, "{arch_specific_definitions }")?; - - for intrinsic in intrinsics.clone() { - writeln!(w, "extern int run_{intrinsic}(void);")?; - } - - writeln!(w, "int main(int argc, char **argv) {{")?; - for intrinsic in intrinsics { - writeln!( - w, - " std::cout << \"{INTRINSIC_DELIMITER}\" << std::endl;" - )?; - writeln!(w, " std::cout << \"{intrinsic}\" << std::endl;")?; - writeln!(w, " run_{intrinsic}();\n")?; + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " +void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{ + *__dst = {name}({params}); +}}", + return_ty = intrinsic.results.c_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_c(), + params = intrinsic.arguments.as_call_params_c(&imm_values) + ) + })?; } - writeln!(w, " return 0;")?; - - writeln!(w, "}}")?; - Ok(()) } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 82b97701bb..041d92c0b2 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -1,23 +1,53 @@ use itertools::Itertools; -use std::process::Command; -use super::compare::INTRINSIC_DELIMITER; use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; use crate::common::argument::ArgumentList; use crate::common::intrinsic::Intrinsic; +use crate::common::intrinsic_helpers::TypeKind; // The number of times each intrinsic will be called. pub(crate) const PASSES: u32 = 20; +const COMMON_RUST_DEFINITIONS: &str = r#" +macro_rules! make_nice { + ($($wrapper:ident ($inner:ty)),*) => {$( + #[derive(Debug, Copy, Clone)] + #[repr(transparent)] + pub struct $wrapper($inner); + + impl PartialEq for $wrapper { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 || (self.0.is_nan() && other.0.is_nan()) + } + } + + impl Eq for $wrapper {} + )*} +} + +make_nice!(NiceF16(f16), NiceF32(f32), NiceF64(f64)); +"#; + macro_rules! concatln { ($($lines:expr),* $(,)?) => { concat!($( $lines, "\n" ),*) }; } -fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { - writeln!( +pub fn write_bin_cargo_toml( + w: &mut impl std::io::Write, + module_count: usize, +) -> std::io::Result<()> { + write!(w, concatln!("[workspace]", "members = ["))?; + for i in 0..module_count { + writeln!(w, " \"mod_{i}\",")?; + } + writeln!(w, "]") +} + +pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { + write!( w, concatln!( "[package]", @@ -26,6 +56,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io:: "authors = [{authors}]", "license = \"{license}\"", "edition = \"2018\"", + "", + "[dependencies]", + "core_arch = {{ path = \"../../crates/core_arch\" }}", + "", + "[build-dependencies]", + "cc = \"1\"" ), name = name, version = env!("CARGO_PKG_VERSION"), @@ -36,72 +72,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io:: ) } -pub fn write_bin_cargo_toml( - w: &mut impl std::io::Write, - module_count: usize, -) -> std::io::Result<()> { - write_cargo_toml_header(w, "intrinsic-test-programs")?; - - writeln!(w, "[dependencies]")?; - writeln!(w, "core_arch = {{ path = \"../crates/core_arch\" }}")?; - - for i in 0..module_count { - writeln!(w, "mod_{i} = {{ path = \"mod_{i}/\" }}")?; - } - - Ok(()) -} - -pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { - write_cargo_toml_header(w, name)?; - - writeln!(w, "[dependencies]")?; - writeln!(w, "core_arch = {{ path = \"../../crates/core_arch\" }}")?; - - Ok(()) -} - -pub fn write_main_rs<'a>( - w: &mut impl std::io::Write, - chunk_count: usize, - cfg: &str, - definitions: &str, - intrinsics: impl Iterator + Clone, -) -> std::io::Result<()> { - writeln!(w, "#![feature(simd_ffi)]")?; - writeln!(w, "#![feature(f16)]")?; - writeln!(w, "#![allow(unused)]")?; - - // Cargo will spam the logs if these warnings are not silenced. - writeln!(w, "#![allow(non_upper_case_globals)]")?; - writeln!(w, "#![allow(non_camel_case_types)]")?; - writeln!(w, "#![allow(non_snake_case)]")?; - - writeln!(w, "{cfg}")?; - writeln!(w, "{definitions}")?; - - for module in 0..chunk_count { - writeln!(w, "use mod_{module}::*;")?; - } - - writeln!(w, "fn main() {{")?; - - for binary in intrinsics { - writeln!(w, " println!(\"{INTRINSIC_DELIMITER}\");")?; - writeln!(w, " println!(\"{binary}\");")?; - writeln!(w, " run_{binary}();\n")?; - } - - writeln!(w, "}}")?; - - Ok(()) -} - pub fn write_lib_rs( w: &mut impl std::io::Write, notice: &str, cfg: &str, definitions: &str, + i: usize, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!(w, "{notice}")?; @@ -109,6 +85,7 @@ pub fn write_lib_rs( writeln!(w, "#![feature(simd_ffi)]")?; writeln!(w, "#![feature(f16)]")?; writeln!(w, "#![allow(unused)]")?; + writeln!(w, "#![allow(inline_always_mismatching_target_features)]")?; // Cargo will spam the logs if these warnings are not silenced. writeln!(w, "#![allow(non_upper_case_globals)]")?; @@ -117,6 +94,8 @@ pub fn write_lib_rs( writeln!(w, "{cfg}")?; + writeln!(w, "{}", COMMON_RUST_DEFINITIONS)?; + writeln!(w, "{definitions}")?; let mut seen = std::collections::HashSet::new(); @@ -133,190 +112,193 @@ pub fn write_lib_rs( } } + write_bindings_rust(w, i, intrinsics)?; + for intrinsic in intrinsics { - crate::common::gen_rust::create_rust_test_module(w, intrinsic)?; + create_rust_test(w, intrinsic)?; } Ok(()) } -pub fn compile_rust_programs( - toolchain: Option<&str>, - target: &str, - profile: &str, - linker: Option<&str>, -) -> bool { - /* If there has been a linker explicitly set from the command line then - * we want to set it via setting it in the RUSTFLAGS*/ - - // This is done because `toolchain` is None when - // the --generate-only flag is passed - if toolchain.is_none() { - return true; - } - - trace!("Building cargo command"); - - let mut cargo_command = Command::new("cargo"); - cargo_command.current_dir("rust_programs"); - - // Do not use the target directory of the workspace please. - cargo_command.env("CARGO_TARGET_DIR", "target"); - - if toolchain.is_some_and(|val| !val.is_empty()) { - cargo_command.arg(toolchain.unwrap()); - } - cargo_command.args(["build", "--target", target, "--profile", profile]); - - let mut rust_flags = "-Cdebuginfo=0".to_string(); - if let Some(linker) = linker { - rust_flags.push_str(" -C linker="); - rust_flags.push_str(linker); - rust_flags.push_str(" -C link-args=-static"); - - cargo_command.env("CPPFLAGS", "-fuse-ld=lld"); - } - - cargo_command.env("RUSTFLAGS", rust_flags); - - trace!("running cargo"); - - if log::log_enabled!(log::Level::Trace) { - cargo_command.stdout(std::process::Stdio::inherit()); - cargo_command.stderr(std::process::Stdio::inherit()); - } - - let output = cargo_command.output(); - trace!("cargo is done"); - - if let Ok(output) = output { - if output.status.success() { - true - } else { - error!( - "Failed to compile code for rust intrinsics\n\nstdout:\n{}\n\nstderr:\n{}", - std::str::from_utf8(&output.stdout).unwrap_or(""), - std::str::from_utf8(&output.stderr).unwrap_or("") - ); - false - } - } else { - error!("Command failed: {output:#?}"); - false - } -} - -pub fn generate_rust_test_loop( +fn generate_rust_test_loop( w: &mut impl std::io::Write, intrinsic: &Intrinsic, - indentation: Indentation, - specializations: &[Vec], passes: u32, ) -> std::io::Result<()> { let intrinsic_name = &intrinsic.name; // Each function (and each specialization) has its own type. Erase that type with a cast. - let mut coerce = String::from("unsafe fn("); + let mut coerce = String::from("fn("); + let mut c_coerce = String::from("fn(_, "); for _ in intrinsic.arguments.iter().filter(|a| !a.has_constraint()) { coerce += "_, "; + c_coerce += "_, "; } coerce += ") -> _"; - - match specializations { - [] => { - writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?; - } - [const_args] if const_args.is_empty() => { - writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?; - } - _ => { - writeln!(w, " let specializations = [")?; - - for specialization in specializations { - let mut specialization: Vec<_> = - specialization.iter().map(|d| d.to_string()).collect(); - - let const_args = specialization.join(","); - - // The identifier is reversed. - specialization.reverse(); - let id = specialization.join("-"); - - writeln!( - w, - " (\"-{id}\", {intrinsic_name}::<{const_args}> as {coerce})," - )?; - } - - writeln!(w, " ];")?; - } + c_coerce += ")"; + + if intrinsic + .arguments + .iter() + .filter(|arg| arg.has_constraint()) + .count() + == 0 + { + writeln!( + w, + " let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];" + )?; + } else { + writeln!(w, " let specializations = [")?; + + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),", + const_args = imm_values.iter().join(","), + c_const_args = imm_values.iter().join("_"), + ) + })?; + + writeln!(w, " ];")?; } + let (cast_prefix, cast_suffix) = if intrinsic.results.is_simd() { + ( + format!( + "std::mem::transmute::<_, [{}; {}]>(", + intrinsic.results.rust_scalar_type().replace("f", "NiceF"), + intrinsic.results.num_lanes() * intrinsic.results.num_vectors() + ), + ")", + ) + } else if intrinsic.results.kind == TypeKind::Float { + ( + match intrinsic.results.inner_size() { + 16 => format!("NiceF16("), + 32 => format!("NiceF32("), + 64 => format!("NiceF64("), + _ => unimplemented!(), + }, + ")", + ) + } else { + ("".to_string(), "") + }; + write!( w, concatln!( - " for (id, f) in specializations {{", + " for (id, rust, c) in specializations {{", " for i in 0..{passes} {{", " unsafe {{", "{loaded_args}", - " let __return_value = f({args});", - " println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});", + " let __rust_return_value = rust({rust_args});", + "", + " let mut __c_return_value = std::mem::MaybeUninit::uninit();", + " c(__c_return_value.as_mut_ptr(){c_args});", + " let __c_return_value = __c_return_value.assume_init();", + "", + " assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, \"{{id}}\");", " }}", " }}", " }}", ), - loaded_args = intrinsic.arguments.load_values_rust(indentation.nest_by(4)), - args = intrinsic.arguments.as_call_param_rust(), - return_value = intrinsic.results.print_result_rust(), + loaded_args = intrinsic + .arguments + .load_values_rust(Indentation::default().nest_by(4)), + rust_args = intrinsic.arguments.as_call_param_rust(), + c_args = intrinsic.arguments.as_c_call_param_rust(), passes = passes, + cast_prefix = cast_prefix, + cast_suffix = cast_suffix, ) } -/// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic. -fn generate_rust_specializations( - constraints: &mut impl Iterator>, -) -> Vec> { - let mut specializations = vec![vec![]]; - - for constraint in constraints { - specializations = constraint - .flat_map(|right| { - specializations.iter().map(move |left| { - let mut left = left.clone(); - left.push(i32::try_from(right).unwrap()); - left - }) - }) - .collect(); - } - - specializations -} - -// Top-level function to create complete test program -pub fn create_rust_test_module( +fn create_rust_test( w: &mut impl std::io::Write, intrinsic: &Intrinsic, ) -> std::io::Result<()> { trace!("generating `{}`", intrinsic.name); - let indentation = Indentation::default(); - writeln!(w, "pub fn run_{}() {{", intrinsic.name)?; + write!( + w, + concatln!("#[test]", "fn test_{intrinsic_name}() {{"), + intrinsic_name = intrinsic.name, + )?; // Define the arrays of arguments. let arguments = &intrinsic.arguments; - arguments.gen_arglists_rust(w, indentation.nested(), PASSES)?; - - // Define any const generics as `const` items, then generate the actual test loop. - let specializations = generate_rust_specializations( - &mut arguments - .iter() - .filter_map(|i| i.constraint.as_ref().map(|v| v.iter())), - ); + arguments.gen_arglists_rust(w, Indentation::default().nested(), PASSES)?; - generate_rust_test_loop(w, intrinsic, indentation, &specializations, PASSES)?; + generate_rust_test_loop(w, intrinsic, PASSES)?; writeln!(w, "}}")?; Ok(()) } + +pub fn write_bindings_rust( + w: &mut impl std::io::Write, + i: usize, + intrinsics: &[Intrinsic], +) -> std::io::Result<()> { + writeln!( + w, + concatln!( + "#[allow(improper_ctypes)]", + "#[link(name = \"wrapper_{i}\")]", + "unsafe extern \"C\" {{" + ), + i = i + )?; + + for intrinsic in intrinsics { + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", + return_ty = intrinsic.results.rust_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_rust(), + ) + })?; + } + + writeln!(w, "}}") +} + +pub fn write_build_rs( + w: &mut impl std::io::Write, + i: usize, + arch_flags: &[&str], +) -> std::io::Result<()> { + const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-Wno-narrowing"]; + + write!( + w, + concatln!( + "fn main() {{", + " cc::Build::new()", + " .file(\"../../c_programs/wrapper_{i}.c\")", + " .opt_level(2)", + " .flags(&[", + ), + i = i + )?; + + let indentation = Indentation::default().nest_by(2); + for flag in COMMON_FLAGS.iter().chain(arch_flags) { + writeln!(w, "{indentation}\"{flag}\",")?; + } + + write!( + w, + concatln!(" ])", " .compile(\"wrapper_{i}\");", "}}"), + i = i + ) +} diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index 81f6d6d8b5..76e5959153 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,3 +1,5 @@ +use crate::common::constraint::Constraint; + use super::argument::ArgumentList; use super::intrinsic_helpers::IntrinsicTypeDefinition; @@ -16,3 +18,36 @@ pub struct Intrinsic { /// Any architecture-specific tags. pub arch_tags: Vec, } + +fn recurse_specializations<'a, E>( + constraints: &mut (impl Iterator + Clone), + imm_values: &mut Vec, + f: &mut impl FnMut(&[i64]) -> Result<(), E>, +) -> Result<(), E> { + if let Some(current) = constraints.next() { + for i in current.iter() { + imm_values.push(i); + recurse_specializations(&mut constraints.clone(), imm_values, f)?; + imm_values.pop(); + } + Ok(()) + } else { + f(&imm_values) + } +} + +impl Intrinsic { + pub fn iter_specializations( + &self, + mut f: impl FnMut(&[i64]) -> Result<(), E>, + ) -> Result<(), E> { + recurse_specializations( + &mut self + .arguments + .iter() + .filter_map(|arg| arg.constraint.as_ref()), + &mut Vec::new(), + &mut f, + ) + } +} diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index a14d7ef05f..f6732eea8f 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -5,7 +5,6 @@ use std::str::FromStr; use itertools::Itertools as _; -use super::cli::Language; use super::indentation::Indentation; use super::values::value_for_array; @@ -94,6 +93,7 @@ impl TypeKind { Self::Poly => "u", Self::Char(Sign::Unsigned) => "u", Self::Char(Sign::Signed) => "i", + Self::Mask => "u", _ => unreachable!("Unused type kind: {self:#?}"), } } @@ -154,67 +154,7 @@ impl IntrinsicType { self.ptr } - pub fn c_scalar_type(&self) -> String { - match self.kind() { - TypeKind::Char(_) => String::from("char"), - TypeKind::Vector => String::from("int32_t"), - _ => format!( - "{prefix}{bits}_t", - prefix = self.kind().c_prefix(), - bits = self.inner_size() - ), - } - } - - pub fn c_promotion(&self) -> &str { - match *self { - IntrinsicType { - kind, - bit_len: Some(8), - .. - } => match kind { - TypeKind::Int(Sign::Signed) => "int", - TypeKind::Int(Sign::Unsigned) => "unsigned int", - TypeKind::Poly => "uint8_t", - _ => "", - }, - IntrinsicType { - kind: TypeKind::Poly, - bit_len: Some(bit_len), - .. - } => match bit_len { - 8 => unreachable!("handled above"), - 16 => "uint16_t", - 32 => "uint32_t", - 64 => "uint64_t", - 128 => "", - _ => panic!("invalid bit_len"), - }, - IntrinsicType { - kind: TypeKind::Float, - bit_len: Some(bit_len), - .. - } => match bit_len { - 16 => "float16_t", - 32 => "float", - 64 => "double", - 128 => "", - _ => panic!("invalid bit_len"), - }, - IntrinsicType { - kind: TypeKind::Char(_), - .. - } => "char", - _ => "", - } - } - - pub fn populate_random( - &self, - indentation: Indentation, - loads: u32, - language: &Language, - ) -> String { + pub fn populate_random(&self, indentation: Indentation, loads: u32) -> String { match self { IntrinsicType { bit_len: Some(bit_len @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 16 | 32 | 64)), @@ -224,13 +164,9 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, suffix) = match language { - Language::Rust => ('[', ']'), - Language::C => ('{', '}'), - }; let body_indentation = indentation.nested(); format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| { let src = value_for_array(*bit_len, i); @@ -241,13 +177,7 @@ impl IntrinsicType { let mask = !0u64 >> (64 - *bit_len); let ones_compl = src ^ mask; let twos_compl = ones_compl + 1; - if (twos_compl == src) && (language == &Language::C) { - // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid - // undefined literal overflow behaviour. - fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1")) - } else { - fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) - } + fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) } else { fmt(&format_args!("{body_indentation}{src:#x}")) } @@ -261,20 +191,11 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, cast_prefix, cast_suffix, suffix) = match (language, bit_len) { - (&Language::Rust, 16) => ('[', "f16::from_bits(", ")", ']'), - (&Language::Rust, 32) => ('[', "f32::from_bits(", ")", ']'), - (&Language::Rust, 64) => ('[', "f64::from_bits(", ")", ']'), - (&Language::C, 16) => ('{', "cast(", ")", '}'), - (&Language::C, 32) => ('{', "cast(", ")", '}'), - (&Language::C, 64) => ('{', "cast(", ")", '}'), - _ => unreachable!(), - }; format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| fmt(&format_args!( - "{indentation}{cast_prefix}{src:#x}{cast_suffix}", + "{indentation}f{bit_len}::from_bits({src:#x})", indentation = indentation.nested(), src = value_for_array(*bit_len, i) ))) @@ -287,14 +208,10 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, suffix) = match language { - Language::Rust => ('[', ']'), - Language::C => ('{', '}'), - }; let body_indentation = indentation.nested(); let effective_bit_len = 32; format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(vec_len.unwrap_or(1) * simd_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| { let src = value_for_array(effective_bit_len, i); @@ -304,13 +221,7 @@ impl IntrinsicType { let mask = !0u64 >> (64 - effective_bit_len); let ones_compl = src ^ mask; let twos_compl = ones_compl + 1; - if (twos_compl == src) && (language == &Language::C) { - // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid - // undefined literal overflow behaviour. - fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1")) - } else { - fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) - } + fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) } else { fmt(&format_args!("{body_indentation}{src:#x}")) } @@ -335,42 +246,20 @@ impl IntrinsicType { _ => true, } } - - pub fn as_call_param_c(&self, name: &String) -> String { - if self.ptr { - format!("&{name}") - } else { - name.clone() - } - } } pub trait IntrinsicTypeDefinition: Deref { /// Determines the load function for this type. /// can be implemented in an `impl` block - fn get_load_function(&self, _language: Language) -> String; - - /// can be implemented in an `impl` block - fn get_lane_function(&self) -> String; + fn get_load_function(&self) -> String; /// Gets a string containing the typename for this type in C format. /// can be directly defined in `impl` blocks fn c_type(&self) -> String; + /// Gets a string containing the typename for this type in Rust format. /// can be directly defined in `impl` blocks - fn c_single_vector_type(&self) -> String; - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String; - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_rust(&self) -> String { - String::from("format_args!(\"{__return_value:.150?}\")") - } + fn rust_type(&self) -> String; /// To enable architecture-specific logic fn rust_scalar_type(&self) -> String { @@ -380,13 +269,4 @@ pub trait IntrinsicTypeDefinition: Deref { bits = self.inner_size() ) } - - fn generate_final_type_cast(&self) -> String { - let type_data = self.c_promotion(); - if type_data.len() > 2 { - format!("({type_data})") - } else { - String::new() - } - } } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index a1062b3a87..a1a2ed89b0 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -1,38 +1,32 @@ -use std::fs::File; +use std::{fs::File, io}; use rayon::prelude::*; use cli::ProcessedCli; use crate::common::{ - compile_c::CppCompilation, - gen_c::{write_main_cpp, write_mod_cpp}, - gen_rust::{ - compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs, - write_main_rs, - }, + gen_c::write_wrapper_c, + gen_rust::{write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs}, intrinsic::Intrinsic, intrinsic_helpers::IntrinsicTypeDefinition, }; pub mod argument; pub mod cli; -pub mod compare; -pub mod compile_c; pub mod constraint; -pub mod gen_c; -pub mod gen_rust; -pub mod indentation; pub mod intrinsic; pub mod intrinsic_helpers; -pub mod values; + +mod gen_c; +mod gen_rust; +mod indentation; +mod values; /// Architectures must support this trait /// to be successfully tested. pub trait SupportedArchitectureTest { type IntrinsicImpl: IntrinsicTypeDefinition + Sync; - fn cli_options(&self) -> &ProcessedCli; fn intrinsics(&self) -> &[Intrinsic]; fn create(cli_options: ProcessedCli) -> Self; @@ -40,118 +34,40 @@ pub trait SupportedArchitectureTest { const NOTICE: &str; const PLATFORM_C_HEADERS: &[&str]; - const PLATFORM_C_DEFINITIONS: &str; - const PLATFORM_C_FORWARD_DECLARATIONS: &str; const PLATFORM_RUST_CFGS: &str; const PLATFORM_RUST_DEFINITIONS: &str; - fn cpp_compilation(&self) -> Option; - - fn build_c_file(&self) -> bool { - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); + fn arch_flags(&self) -> Vec<&str>; - let cpp_compiler_wrapped = self.cpp_compilation(); + fn generate_c_file(&self) { + let (chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); std::fs::create_dir_all("c_programs").unwrap(); self.intrinsics() .par_chunks(chunk_size) .enumerate() .map(|(i, chunk)| { - let c_filename = format!("c_programs/mod_{i}.cpp"); + let c_filename = format!("c_programs/wrapper_{i}.c"); let mut file = File::create(&c_filename).unwrap(); - let mod_file_write_result = write_mod_cpp( - &mut file, - Self::NOTICE, - Self::PLATFORM_C_HEADERS, - Self::PLATFORM_C_FORWARD_DECLARATIONS, - chunk, - ); - - if let Err(error) = mod_file_write_result { - return Err(format!("Error writing to mod_{i}.cpp: {error:?}")); - } - - // compile this cpp file into a .o file. - // - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed - trace!("compiling mod_{i}.cpp"); - if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - let compile_output = cpp_compiler - .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o")) - .map_err(|e| format!("Error compiling mod_{i}.cpp: {e:?}"))?; - - assert!( - compile_output.status.success(), - "{}", - String::from_utf8_lossy(&compile_output.stderr) - ); - - trace!("finished compiling mod_{i}.cpp"); - } - Ok(()) + write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk) }) - .collect::>() + .collect::>() .unwrap(); - - let mut file = File::create("c_programs/main.cpp").unwrap(); - write_main_cpp( - &mut file, - Self::PLATFORM_C_DEFINITIONS, - Self::PLATFORM_C_HEADERS, - self.intrinsics().iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed - if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - // compile this cpp file into a .o file - trace!("compiling main.cpp"); - let output = cpp_compiler - .compile_object_file("main.cpp", "intrinsic-test-programs.o") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - - let object_files = (0..chunk_count) - .map(|i| format!("mod_{i}.o")) - .chain(["intrinsic-test-programs.o".to_owned()]); - - let output = cpp_compiler - .link_executable(object_files, "intrinsic-test-programs") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - } - - true } - fn build_rust_file(&self) -> bool { - std::fs::create_dir_all("rust_programs/src").unwrap(); + fn generate_rust_file(&self) { + let arch_flags = self.arch_flags(); - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); + std::fs::create_dir_all("rust_programs").unwrap(); + + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len()); let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); - let mut main_rs = File::create("rust_programs/src/main.rs").unwrap(); - write_main_rs( - &mut main_rs, - chunk_count, - Self::PLATFORM_RUST_CFGS, - "", - self.intrinsics().iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - let target = &self.cli_options().target; - let profile = &self.cli_options().profile; - let toolchain = self.cli_options().toolchain.as_deref(); - let linker = self.cli_options().linker.as_deref(); - self.intrinsics() - .par_chunks(chunk_size) + .chunks(chunk_size) .enumerate() .map(|(i, chunk)| { std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?; @@ -165,6 +81,7 @@ pub trait SupportedArchitectureTest { Self::NOTICE, Self::PLATFORM_RUST_CFGS, Self::PLATFORM_RUST_DEFINITIONS, + i, chunk, )?; @@ -174,41 +91,20 @@ pub trait SupportedArchitectureTest { write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?; + let build_rs_filename = format!("rust_programs/mod_{i}/build.rs"); + trace!("generating `{build_rs_filename}`"); + let mut file = File::create(build_rs_filename).unwrap(); + + write_build_rs(&mut file, i, &arch_flags).unwrap(); + Ok(()) }) .collect::>() .unwrap(); - - compile_rust_programs(toolchain, target, profile, linker) - } - - fn compare_outputs(&self) -> bool { - if self.cli_options().toolchain.is_some() { - let intrinsics_name_list = self - .intrinsics() - .iter() - .map(|i| i.name.clone()) - .collect::>(); - - compare::compare_outputs( - &intrinsics_name_list, - &self.cli_options().runner, - &self.cli_options().target, - &self.cli_options().profile, - ) - } else { - true - } } } -// pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) { -// let available_parallelism = std::thread::available_parallelism().unwrap().get(); -// let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count)); - -// (chunk_size, intrinsic_count.div_ceil(chunk_size)) -// } - -pub fn manual_chunk(intrinsic_count: usize, chunk_size: usize) -> (usize, usize) { - (chunk_size, intrinsic_count.div_ceil(chunk_size)) +pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { + let ncores = std::thread::available_parallelism().unwrap(); + (intrinsic_count / ncores, ncores.into()) } diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index e5c846877c..9f57c99f12 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -15,27 +15,21 @@ fn main() { let args: Cli = clap::Parser::parse(); let processed_cli_options = ProcessedCli::new(args); - match processed_cli_options.target.as_str() { - "aarch64-unknown-linux-gnu" - | "armv7-unknown-linux-gnueabihf" - | "aarch64_be-unknown-linux-gnu" => run(ArmArchitectureTest::create(processed_cli_options)), - - "x86_64-unknown-linux-gnu" => run(X86ArchitectureTest::create(processed_cli_options)), - _ => std::process::exit(0), + if processed_cli_options.target.starts_with("arm") + | processed_cli_options.target.starts_with("aarch64") + { + run(ArmArchitectureTest::create(processed_cli_options)) + } else if processed_cli_options.target.starts_with("x86") { + run(X86ArchitectureTest::create(processed_cli_options)) + } else { + unimplemented!("Unsupported target {}", processed_cli_options.target) } } fn run(test_environment: impl SupportedArchitectureTest) { info!("building C binaries"); - if !test_environment.build_c_file() { - std::process::exit(2); - } + test_environment.generate_c_file(); + info!("building Rust binaries"); - if !test_environment.build_rust_file() { - std::process::exit(3); - } - info!("Running binaries"); - if !test_environment.compare_outputs() { - std::process::exit(1); - } + test_environment.generate_rust_file(); } diff --git a/crates/intrinsic-test/src/x86/compile.rs b/crates/intrinsic-test/src/x86/compile.rs deleted file mode 100644 index 65cd291b1b..0000000000 --- a/crates/intrinsic-test/src/x86/compile.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::common::cli::ProcessedCli; -use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation}; - -pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { - let cpp_compiler = config.cpp_compiler.as_ref()?; - - // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations - let mut command = CompilationCommandBuilder::new() - .add_arch_flags(["icelake-client"]) - .set_compiler(cpp_compiler) - .set_target(&config.target) - .set_opt_level("2") - .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) - .set_project_root("c_programs") - .add_extra_flags(vec![ - "-ffp-contract=off", - "-Wno-narrowing", - "-mavx", - "-mavx2", - "-mavx512f", - "-msse2", - "-mavx512vl", - "-mavx512bw", - "-mavx512dq", - "-mavx512cd", - "-mavx512fp16", - "-msha512", - "-msm3", - "-msm4", - "-mavxvnni", - "-mavxvnniint8", - "-mavxneconvert", - "-mavxifma", - "-mavxvnniint16", - "-mavx512bf16", - "-mavx512bitalg", - "-mavx512ifma", - "-mavx512vbmi", - "-mavx512vbmi2", - "-mavx512vnni", - "-mavx512vpopcntdq", - "-mavx512vp2intersect", - "-mbmi", - "-mbmi2", - "-mgfni", - "-mvaes", - "-mvpclmulqdq", - "-ferror-limit=1000", - "-std=c++23", - ]); - - if !cpp_compiler.contains("clang") { - command = command.add_extra_flag("-flax-vector-conversions"); - } - - let cpp_compiler = command.into_cpp_compilation(); - - Some(cpp_compiler) -} diff --git a/crates/intrinsic-test/src/x86/config.rs b/crates/intrinsic-test/src/x86/config.rs index 491dbb5147..68737ab5ac 100644 --- a/crates/intrinsic-test/src/x86/config.rs +++ b/crates/intrinsic-test/src/x86/config.rs @@ -3,7 +3,6 @@ pub const NOTICE: &str = "\ // test are derived from an XML specification, published under the same license as the // `intrinsic-test` crate.\n"; -// Format f16 values (and vectors containing them) in a way that is consistent with C. pub const PLATFORM_RUST_DEFINITIONS: &str = r#" use core_arch::arch::x86_64::*; @@ -129,206 +128,11 @@ unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) } -#[inline] -fn debug_simd_finish( - formatter: &mut core::fmt::Formatter<'_>, - type_name: &str, - array: &[T; N], -) -> core::fmt::Result { - core::fmt::Formatter::debug_tuple_fields_finish( - formatter, - type_name, - &core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]), - ) -} - -trait DebugAs { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result; -} - -impl DebugAs for T { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{self}") - } -} - -macro_rules! impl_debug_as { - ($simd:ty, $name:expr, $bits:expr, [$($type:ty),+]) => { - $( - impl DebugAs<$type> for $simd { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - const ELEMENT_BITS: usize = core::mem::size_of::<$type>() * 8; - const NUM_ELEMENTS: usize = $bits / ELEMENT_BITS; - let array = unsafe { core::mem::transmute::<_, [$type; NUM_ELEMENTS]>(*self) }; - debug_simd_finish(f, $name, &array) - } - } - )+ - }; -} - -impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64, f16]); -impl_debug_as!(__m256i, "__m256i", 256, [u8, i8, u16, i16, u32, i32, u64, i64]); -impl_debug_as!(__m512i, "__m512i", 512, [u8, i8, u16, i16, u32, i32, u64, i64]); -impl_debug_as!(__m128h, "__m128h", 128, [f32]); -impl_debug_as!(__m256h, "__m256h", 256, [f32]); -impl_debug_as!(__m512h, "__m512h", 512, [f32]); - -fn debug_as(x: V) -> impl core::fmt::Debug -where V: DebugAs -{ - struct DebugWrapper(V, core::marker::PhantomData); - impl, T> core::fmt::Debug for DebugWrapper { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - self.0.fmt(f) - } - } - DebugWrapper(x, core::marker::PhantomData) -} - -"#; - -pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" -#ifndef X86_DECLARATIONS -#define X86_DECLARATIONS - typedef _Float16 float16_t; - typedef float float32_t; - typedef double float64_t; - - #define __int64 long long - #define __int32 int - - std::ostream& operator<<(std::ostream& os, _Float16 value); - std::ostream& operator<<(std::ostream& os, __m128i value); - std::ostream& operator<<(std::ostream& os, __m256i value); - std::ostream& operator<<(std::ostream& os, __m512i value); - std::ostream& operator<<(std::ostream& os, __mmask8 value); - - #define _mm512_extract_intrinsic_test_epi8(m, lane) \ - _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) - - #define _mm512_extract_intrinsic_test_epi16(m, lane) \ - _mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8) - - #define _mm512_extract_intrinsic_test_epi32(m, lane) \ - _mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4) - - #define _mm512_extract_intrinsic_test_epi64(m, lane) \ - _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2) - - // Load f16 (__m128h) and cast to integer (__m128i) - #define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr)) - #define _mm256_loadu_ph_to___m256i(mem_addr) _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) - #define _mm512_loadu_ph_to___m512i(mem_addr) _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) - - // Load f32 (__m128) and cast to f16 (__m128h) - #define _mm_loadu_ps_to___m128h(mem_addr) _mm_castps_ph(_mm_loadu_ps(mem_addr)) - #define _mm256_loadu_ps_to___m256h(mem_addr) _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) - #define _mm512_loadu_ps_to___m512h(mem_addr) _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) - - // Load integer types and cast to double (__m128d, __m256d, __m512d) - #define _mm_loadu_epi16_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi16_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi16_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi32_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi32_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi32_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi64_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi64_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi64_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - // Load integer types and cast to float (__m128, __m256, __m512) - #define _mm_loadu_epi16_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi16_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi16_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi32_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi32_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi32_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi64_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - // T1 is the `To` type, T2 is the `From` type - template T1 cast(T2 x) { - if constexpr ((std::is_integral_v && std::is_integral_v) || (std::is_floating_point_v && std::is_floating_point_v)) { - return x; - } else if constexpr (sizeof(T1) <= sizeof(T2)) { - T1 ret{}; - std::memcpy(&ret, &x, sizeof(T1)); - return ret; - } else { - static_assert(sizeof(T1) == sizeof(T2) || std::is_convertible_v, - "T2 must either be convertible to T1, or have the same size as T1!"); - return T1{}; - } - } -#endif -"#; -pub const PLATFORM_C_DEFINITIONS: &str = r#" - -std::ostream& operator<<(std::ostream& os, _Float16 value) { - os << static_cast(value); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m128i value) { - void* temp = malloc(sizeof(__m128i)); - _mm_storeu_si128((__m128i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 16; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m256i value) { - void* temp = malloc(sizeof(__m256i)); - _mm256_storeu_si256((__m256i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 32; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m512i value) { - void* temp = malloc(sizeof(__m512i)); - _mm512_storeu_si512((__m512i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 64; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __mmask8 value) { - os << static_cast(value); - return os; -} "#; pub const PLATFORM_RUST_CFGS: &str = r#" -#![cfg_attr(target_arch = "x86", feature(avx))] -#![cfg_attr(target_arch = "x86", feature(sse))] -#![cfg_attr(target_arch = "x86", feature(sse2))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] -#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))] -#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))] -#![feature(fmt_helpers_for_derive)] +#![feature(stdarch_x86_avx512_bf16)] +#![feature(stdarch_x86_avx512_f16)] +#![feature(stdarch_x86_rtm)] +#![feature(x86_amx_intrinsics)] "#; diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index f2baf07071..5d4798482a 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -1,4 +1,3 @@ -mod compile; mod config; mod constraint; mod intrinsic; @@ -7,7 +6,6 @@ mod xml_parser; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; -use crate::common::compile_c::CppCompilation; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::X86IntrinsicType; @@ -15,33 +13,59 @@ use xml_parser::get_xml_intrinsics; pub struct X86ArchitectureTest { intrinsics: Vec>, - cli_options: ProcessedCli, } impl SupportedArchitectureTest for X86ArchitectureTest { type IntrinsicImpl = X86IntrinsicType; - fn cli_options(&self) -> &ProcessedCli { - &self.cli_options - } - fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics } - fn cpp_compilation(&self) -> Option { - compile::build_cpp_compilation(&self.cli_options) - } - const NOTICE: &str = config::NOTICE; - const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h", "cstddef", "cstdint"]; - const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; + const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; + fn arch_flags(&self) -> Vec<&str> { + vec![ + "-mavx", + "-mavx2", + "-mavx512f", + "-msse2", + "-mavx512vl", + "-mavx512bw", + "-mavx512dq", + "-mavx512cd", + "-mavx512fp16", + "-msha", + "-msha512", + "-msm3", + "-msm4", + "-mavxvnni", + "-mavxvnniint8", + "-mavxneconvert", + "-mavxifma", + "-mavxvnniint16", + "-mavx512bf16", + "-mavx512bitalg", + "-mavx512ifma", + "-mavx512vbmi", + "-mavx512vbmi2", + "-mavx512vnni", + "-mavx512vpopcntdq", + "-mavx512vp2intersect", + "-mbmi", + "-mbmi2", + "-mgfni", + "-mvaes", + "-mvpclmulqdq", + "-mlzcnt", + ] + } + fn create(cli_options: ProcessedCli) -> Self { let mut intrinsics = get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file"); @@ -67,9 +91,6 @@ impl SupportedArchitectureTest for X86ArchitectureTest { .take(sample_size) .collect::>(); - Self { - intrinsics: intrinsics, - cli_options: cli_options, - } + Self { intrinsics } } } diff --git a/crates/intrinsic-test/src/x86/types.rs b/crates/intrinsic-test/src/x86/types.rs index 2391ee9c2d..cd7c41e06f 100644 --- a/crates/intrinsic-test/src/x86/types.rs +++ b/crates/intrinsic-test/src/x86/types.rs @@ -1,11 +1,8 @@ use std::str::FromStr; use itertools::Itertools; -use regex::Regex; use super::intrinsic::X86IntrinsicType; -use crate::common::cli::Language; -use crate::common::indentation::Indentation; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; use crate::x86::xml_parser::Parameter; @@ -26,82 +23,16 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { .replace("const ", "") } - fn c_single_vector_type(&self) -> String { - // matches __m128, __m256 and similar types - let re = Regex::new(r"__m\d+").unwrap(); - if re.is_match(self.param.type_data.as_str()) { + fn rust_type(&self) -> String { + if self.is_simd() { self.param.type_data.clone() } else { - unreachable!("Shouldn't be called on this type") + format!("{}{}", self.kind.rust_prefix(), self.inner_size()) } } - // fn rust_type(&self) -> String { - // // handling edge cases first - // // the general handling is implemented below - // if let Some(val) = self.metadata.get("type") { - // match val.as_str() { - // "__m128 const *" => { - // return "&__m128".to_string(); - // } - // "__m128d const *" => { - // return "&__m128d".to_string(); - // } - // "const void*" => { - // return "&__m128d".to_string(); - // } - // _ => {} - // } - // } - - // if self.kind() == TypeKind::Void && self.ptr { - // // this has been handled by default settings in - // // the from_param function of X86IntrinsicType - // unreachable!() - // } - - // // general handling cases - // let core_part = if self.kind() == TypeKind::Mask { - // // all types of __mmask are handled here - // format!("__mask{}", self.bit_len.unwrap()) - // } else if self.simd_len.is_some() { - // // all types of __m vector types are handled here - // let re = Regex::new(r"\__m\d+[a-z]*").unwrap(); - // let rust_type = self - // .metadata - // .get("type") - // .map(|val| re.find(val).unwrap().as_str()); - // rust_type.unwrap().to_string() - // } else { - // format!( - // "{}{}", - // self.kind.rust_prefix().to_string(), - // self.bit_len.unwrap() - // ) - // }; - - // // extracting "memsize" so that even vector types can be involved - // let memwidth = self - // .metadata - // .get("memwidth") - // .map(|n| str::parse::(n).unwrap()); - // let prefix_part = if self.ptr && self.constant && self.bit_len.eq(&memwidth) { - // "&" - // } else if self.ptr && self.bit_len.eq(&memwidth) { - // "&mut " - // } else if self.ptr && self.constant { - // "*const " - // } else if self.ptr { - // "*mut " - // } else { - // "" - // }; - - // return prefix_part.to_string() + core_part.as_str(); - // } - /// Determines the load function for this type. - fn get_load_function(&self, _language: Language) -> String { + fn get_load_function(&self) -> String { let type_value = self.param.type_data.clone(); if type_value.len() == 0 { unimplemented!("the value for key 'type' is not present!"); @@ -168,82 +99,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { } } - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { - let lanes = if self.num_lanes() > 1 { - (0..self.num_lanes()) - .map(|idx| -> std::string::String { - let cast_type = self.c_promotion(); - let lane_fn = self.get_lane_function(); - if cast_type.len() > 2 { - format!("cast<{cast_type}>({lane_fn}(__return_value, {idx}))") - } else { - format!("{lane_fn}(__return_value, {idx})") - } - }) - .collect::>() - .join(r#" << ", " << "#) - } else { - format!( - "{promote}cast<{cast}>(__return_value)", - cast = match self.kind() { - TypeKind::Void => "void".to_string(), - TypeKind::Float if self.inner_size() == 64 => "double".to_string(), - TypeKind::Float if self.inner_size() == 32 => "float".to_string(), - TypeKind::Mask => format!( - "__mmask{}", - self.bit_len.expect(format!("self: {self:#?}").as_str()) - ), - TypeKind::Vector => format!( - "__m{}i", - self.bit_len.expect(format!("self: {self:#?}").as_str()) - ), - _ => self.c_scalar_type(), - }, - promote = self.generate_final_type_cast(), - ) - }; - - format!( - r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, - ty = if self.is_simd() { - format!("{}(", self.c_type()) - } else { - String::from("") - }, - close = if self.is_simd() { ")" } else { "" }, - ) - } - - /// Determines the get lane function for this type. - fn get_lane_function(&self) -> String { - let total_vector_bits: Option = self - .simd_len - .zip(self.bit_len) - .and_then(|(simd_len, bit_len)| Some(simd_len * bit_len)); - - match (self.bit_len, total_vector_bits) { - (Some(8), Some(128)) => String::from("(uint8_t)_mm_extract_epi8"), - (Some(16), Some(128)) => String::from("(uint16_t)_mm_extract_epi16"), - (Some(32), Some(128)) => String::from("(uint32_t)_mm_extract_epi32"), - (Some(64), Some(128)) => String::from("(uint64_t)_mm_extract_epi64"), - (Some(8), Some(256)) => String::from("(uint8_t)_mm256_extract_epi8"), - (Some(16), Some(256)) => String::from("(uint16_t)_mm256_extract_epi16"), - (Some(32), Some(256)) => String::from("(uint32_t)_mm256_extract_epi32"), - (Some(64), Some(256)) => String::from("(uint64_t)_mm256_extract_epi64"), - (Some(8), Some(512)) => String::from("(uint8_t)_mm512_extract_intrinsic_test_epi8"), - (Some(16), Some(512)) => String::from("(uint16_t)_mm512_extract_intrinsic_test_epi16"), - (Some(32), Some(512)) => String::from("(uint32_t)_mm512_extract_intrinsic_test_epi32"), - (Some(64), Some(512)) => String::from("(uint64_t)_mm512_extract_intrinsic_test_epi64"), - _ => unreachable!( - "invalid length for vector argument: {:?}, {:?}", - self.bit_len, self.simd_len - ), - } - } - fn rust_scalar_type(&self) -> String { let prefix = match self.data.kind { TypeKind::Mask => String::from("__mmask"), @@ -258,23 +113,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { }; format!("{prefix}{bits}") } - - fn print_result_rust(&self) -> String { - let return_value = match self.kind() { - // `_mm{256}_cvtps_ph` has return type __m128i but contains f16 values - TypeKind::Float if self.param.type_data == "__m128i" => { - "format_args!(\"{:.150?}\", debug_as::<_, f16>(__return_value))".to_string() - } - TypeKind::Int(_) - if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) => - { - format!("debug_as::<_, u{}>(__return_value)", self.inner_size()) - } - _ => "format_args!(\"{__return_value:.150?}\")".to_string(), - }; - - return_value - } } impl X86IntrinsicType {