diff --git a/CMakeLists.txt b/CMakeLists.txt index b341304..a03f2c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ set (CMAKE_CXX_STANDARD 17) set (CMAKE_CUDA_STANDARD 17) # Doesn't work? set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ") if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX -D_CRT_SECURE_NO_WARNINGS") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2") else() set(CMAKE_CXX_FLAGS @@ -30,7 +30,7 @@ endif() set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) if (MSVC) set(CMAKE_CUDA_FLAGS - "${CMAKE_CUDA_FLAGS} -Xcompiler=\"/WX\" -rdc=true") + "${CMAKE_CUDA_FLAGS} -Xcompiler=\"/WX\" -D_CRT_SECURE_NO_WARNINGS -rdc=true") set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -O3 -Xcompiler=\"/O2\"") else() @@ -46,9 +46,35 @@ endif() find_package(CUDA REQUIRED) # Required for CUDA_INCLUDE_DIRS # Add macro definitions used in tests. -add_compile_definitions( - CUDA_INC_DIR="${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - CUB_DIR=${CUDA_INC_DIR}) +if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0.0) + # CCCL's include directories have moved in CUDA 13 compared to CUDA 12. + # On Windows, CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES includes both include paths, which must be separated before including in compile definitions. + # On *nix, only the main ctk include dir is included + # It may be cleaner to switch to the more modern find_package(CUDAToolkit) and find_package(CCCL) + list(LENGTH CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES ctk_inc_dirs_length) + if (ctk_inc_dirs_length GREATER 1) + list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 0 cuda_inc_dir) + list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 1 cccl_inc_dir) + else() + list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 0 cuda_inc_dir) + # Check the default location within the CTK if not in the variable. + if (EXISTS "${cuda_inc_dir}/cccl") + set(cccl_inc_dir "${cuda_inc_dir}/cccl") + else() + set(cccl_inc_dir "${cuda_inc_dir}") + endif() + endif() + add_compile_definitions( + CUDA_INC_DIR="${cuda_inc_dir}" + CUB_DIR="${cccl_inc_dir}") + unset(cccl_inc_dir) + unset(cuda_inc_dir) + unset(inc_dirs_length) +else() + add_compile_definitions( + CUDA_INC_DIR="${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" + CUB_DIR=${CUDA_INC_DIR}) +endif() # Copy the example_headers directory for use at runtime by tests. file(COPY example_headers DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) @@ -100,7 +126,7 @@ function(add_stringify_command arg) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${arg}.jit WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ./stringify ${CMAKE_CURRENT_SOURCE_DIR}/${arg} > ${arg}.jit + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/${arg} > ${arg}.jit DEPENDS stringify) endfunction() add_executable(jitify2_preprocess jitify2_preprocess.cpp) @@ -118,7 +144,7 @@ add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/jitify2_test_kernels.cu.jit.hpp ${CMAKE_CURRENT_BINARY_DIR}/jitify2_test_kernels.cu.headers.jit.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMAND ${CMAKE_CURRENT_BINARY_DIR}/jitify2_preprocess -i --minify + COMMAND $ -i --minify -o ${CMAKE_CURRENT_BINARY_DIR} -s jitify2_test_kernels.cu.headers jitify2_test_kernels.cu @@ -172,8 +198,12 @@ foreach(test ${TESTS}) endforeach(test) # Add "check" command that *builds and* runs tests, with verbose output. # (The default "test" command neither builds nor gives verbose output). -add_custom_target(check ALL COMMAND ${CMAKE_CTEST_COMMAND} --verbose - DEPENDS ${TESTS}) +# --build-config is required for multi-config generators, and uses COMMAND_EXPAND_LISTS to ensure that the flag and value are not provided as a single string to ctest +add_custom_target(check ALL + COMMAND ${CMAKE_CTEST_COMMAND} --verbose "$,>,,--build-config;$>" + DEPENDS ${TESTS} + COMMAND_EXPAND_LISTS +) # ---- # Docs diff --git a/example_headers/class_arg_kernel.cuh b/example_headers/class_arg_kernel.cuh index b452ba3..15d0999 100644 --- a/example_headers/class_arg_kernel.cuh +++ b/example_headers/class_arg_kernel.cuh @@ -50,9 +50,6 @@ class Managed { struct Arg : public Managed { const int x; Arg(int x_) : x(x_) {} - - // there can be no call to the copy constructor - Arg(const Arg& arg) = delete; }; template diff --git a/jitify2.hpp b/jitify2.hpp index a32e001..c2ad6fd 100644 --- a/jitify2.hpp +++ b/jitify2.hpp @@ -155,7 +155,7 @@ #include #include -#if __cplusplus >= 201703L +#if JITIFY_CPLUSPLUS >= 201703L #include #endif @@ -194,7 +194,7 @@ #include // For UndecorateSymbolName #include // For mkdir #include // For open, O_RDWR etc. -#include // For GetTempPath2A +#include // For GetTempPath2A, GetLongPathNameA #include // For _sopen_s #include // For _getpid #include // For SHGetFolderPathA @@ -2786,6 +2786,15 @@ inline bool path_exists(const char* filename, bool* is_dir = nullptr) { return ret; } +inline std::string quoted_path_if_needed(const std::string& p) { + // If a path includes spaces or single backslashes, the full path may need warpping with quotes when passed to run_system_command, either as the executable or an include path. + if (p.find(' ') == std::string::npos && p.find('\\') == std::string::npos) { + return p; + } else { + return "\"" + p + "\""; + } +} + inline const char* get_current_executable_path() { static const char* path = []() -> const char* { static char buffer[JITIFY_PATH_MAX + 1] = {}; @@ -4117,15 +4126,31 @@ inline int run_system_command(const char* command, if (output) { output->clear(); std::array buffer; - while (fgets(buffer.data(), buffer.size(), pipe)) { + while (fgets(buffer.data(), static_cast(buffer.size()), pipe)) { *output += buffer.data(); } + } else { + // Must always read from the pipe for the exit code from the command to be available + std::array buffer; + while (fgets(buffer.data(), static_cast(buffer.size()), pipe)) { } } const int result = JITIFY_PCLOSE(pipe); if (result == -1 && failure) { *failure = get_errno_string(); } - return result; + + // Extract the exit code from the called program if possible, otherwise return -1; + int exitCode = -1; + #ifdef _MSC_VER + // _pclose is documented as having the same return code format as for _cwait, but with the high and low order bytes swapped. However the _cwait docs do not describe a corresponding value. Just extracting the lsb seems to behave + exitCode = result & 0xFF; + #else + // Extract the exit code from the pclose result if it was a 'normal' exit + if (WIFEXITED(result)){ + exitCode = WEXITSTATUS(result); + } + #endif + return exitCode; } #endif // JITIFY_ENABLE_NVCC @@ -4135,9 +4160,15 @@ inline const char* guess_cuda_home() { if (env_jitify_cuda_home) return env_jitify_cuda_home; const char* env_cuda_home = std::getenv("CUDA_HOME"); if (env_cuda_home) return env_cuda_home; + // CUDA_PATH is set by the CUDA installer on windows + const char* env_cuda_path = std::getenv("CUDA_PATH"); + if (env_cuda_path) return env_cuda_path; // Guess the default location. #if defined _WIN32 || defined _WIN64 - return "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"; + constexpr int cuda_version_major = CUDA_VERSION / 1000; + constexpr int cuda_version_minor = (CUDA_VERSION % 1000) / 10; + std::string default_path = std::string("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v") + std::to_string(cuda_version_major) + "." + std::to_string(cuda_version_minor) + "\\"; + return default_path.c_str(); #else return "/usr/local/cuda"; #endif @@ -4150,7 +4181,7 @@ class Nvcc { std::string nvcc_path_; static bool is_valid_nvcc(std::string nvcc_path) { - return run_system_command((nvcc_path + " --version").c_str()); + return run_system_command((quoted_path_if_needed(nvcc_path) + " --version").c_str()) == 0; } static std::string find_nvcc_path() { @@ -4180,7 +4211,7 @@ class Nvcc { std::string* failure = nullptr) const { // Note: We redirect stderr to stdout so that we capture it too. const std::string command = - detail::string_concat(nvcc_path_, " ", options, " ", "2>&1"); + detail::string_concat(quoted_path_if_needed(nvcc_path_), " ", options, " ", "2>&1"); return run_system_command(command.c_str(), output, failure); } }; @@ -4206,7 +4237,9 @@ inline std::string make_temp_dir() { char tmpdir[JITIFY_PATH_MAX + 1]; // Note: tmpdir is guaranteed to end with a '\'. if (!GetTempPath2A(sizeof(tmpdir), tmpdir)) return ""; - std::string path = tmpdir + "__jitify_" + std::to_string(uid); + // Get the long-form of the tmpdir + GetLongPathNameA(tmpdir, tmpdir, sizeof(tmpdir)); + std::string path = std::string(tmpdir) + "__jitify_" + std::to_string(uid); if (::_mkdir(path.c_str()) != 0) return ""; return path; #else @@ -4216,7 +4249,7 @@ inline std::string make_temp_dir() { #endif } -#if __cplusplus < 201703L && (!defined(_WIN32) && !defined(_WIN64)) +#if JITIFY_CPLUSPLUS < 201703L && (!defined(_WIN32) && !defined(_WIN64)) inline int delete_file_visitor(const char* path, const struct stat* sbuf, int type, struct FTW* ftwb) { (void)sbuf; @@ -4227,11 +4260,11 @@ inline int delete_file_visitor(const char* path, const struct stat* sbuf, #endif inline bool remove_all(const std::string& path) { -#if __cplusplus >= 201703L +#if JITIFY_CPLUSPLUS >= 201703L std::error_code ec; return std::filesystem::remove_all(path, ec) != static_cast(-1); -#else // __cplusplus < 201703L +#else // JITIFY_CPLUSPLUS < 201703L #if defined(_WIN32) || defined(_WIN64) // TODO: Implement this if anyone cares about it. return false; @@ -4244,7 +4277,7 @@ inline bool remove_all(const std::string& path) { const int max_depth = 20; return ::nftw(path.c_str(), delete_file_visitor, max_depth, flags) == 0; #endif // not Windows -#endif // __cplusplus < 201703L +#endif // JITIFY_CPLUSPLUS < 201703L } class TempDirectory { @@ -4349,8 +4382,8 @@ class NvccProgram { // Note: This ensures the cuda toolkit headers are found before any that // were embedded during preprocessing (which probably won't work with nvcc). options.emplace_back( - "-I", detail::path_join(detail::guess_cuda_home(), "include")); - options.emplace_back("-I", tmp_include_dir); + "-I", detail::quoted_path_if_needed(detail::path_join(detail::guess_cuda_home(), "include"))); + options.emplace_back("-I", detail::quoted_path_if_needed(tmp_include_dir)); static const char* const kJitifyExpressionPrefix = "__jitify_expression"; @@ -4388,7 +4421,7 @@ class NvccProgram { if (!options.find({"--dlink-time-opt, -dlto"}).empty()) { options.emplace_back("-ltoir", ""); options.emplace_back(tmp_source_file, ""); - if (nvcc(options, &log_, error)) return infer_nvcc_error_type(); + if (nvcc(options, &log_, error) != 0) return infer_nvcc_error_type(); if (!read_binary_file(tmp_ltoir_file, &nvvm_)) { if (error) *error = "Failed to read binary file: " + tmp_ltoir_file; return NVRTC_ERROR_PROGRAM_CREATION_FAILURE; @@ -4399,7 +4432,7 @@ class NvccProgram { options.emplace_back("-ptx", ""); options.emplace_back(tmp_source_file, ""); options.emplace_back("-o", tmp_ptx_file); - if (nvcc(options, &log_, error)) return infer_nvcc_error_type(); + if (nvcc(options, &log_, error) != 0) return infer_nvcc_error_type(); options.pop_back(); // Remove -o option options.pop_back(); // Remove source file options.pop_back(); // Remove -ptx @@ -4439,7 +4472,7 @@ class NvccProgram { options.emplace_back("-cubin", ""); options.emplace_back(tmp_ptx_file, ""); options.emplace_back("-o", tmp_cubin_file); - if (nvcc(options, &log_, error)) { + if (nvcc(options, &log_, error) != 0) { return NVRTC_ERROR_PROGRAM_CREATION_FAILURE; } if (!read_binary_file(tmp_cubin_file, &cubin_)) { @@ -4531,7 +4564,24 @@ inline nvrtcResult compile_program_nvrtc( header_sources_c.push_back(name_source.second.c_str()); } +#if defined(__CUDACC__) + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diag_suppress 550 + #else // __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma diag_suppress 550 + #endif // __NVCC_DIAG_PRAGMA_SUPPORT__ +#endif // defined(__CUDACC__) + bool pch_verbose = true; + +#if defined(__CUDACC__) + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diag_default 550 + #else // __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma diag_default 550 + #endif // __NVCC_DIAG_PRAGMA_SUPPORT__ +#endif // #if defined(__CUDACC__) + std::vector options_c; options_c.reserve(options.size()); for (const Option& option : options) { @@ -6626,7 +6676,8 @@ struct __add_reference_helper<_Tp, true> { }; template struct add_reference : public __add_reference_helper<_Tp> {}; - +)" +R"( namespace __jitify_detail { template struct is_int_or_cref { @@ -9687,9 +9738,19 @@ class LRUFileCache { file_suffix_(sanitize_filename(file_suffix)), lock_file_name_(path_join(path_, file_prefix_ + "lock")) {} + +// std::result_of was deprecated in c++17 and removed in c++20. +#if JITIFY_CPLUSPLUS >= 201703L + template + using invoke_result_type = typename std::invoke_result::type; +#else // JITIFY_CPLUSPLUS >= 201703L + template + using invoke_result_type = typename std::result_of::type; +#endif // JITIFY_CPLUSPLUS >= 201703L + template std::string get(const std::string& name, - typename std::result_of::type* result, + invoke_result_type* result, Construct construct, Serialize serialize, Deserialize deserialize, bool* hit = nullptr) const { if (path_.empty() || max_size_ == 0) { diff --git a/jitify2_test.cu b/jitify2_test.cu index 2aae9d2..4f44f1b 100644 --- a/jitify2_test.cu +++ b/jitify2_test.cu @@ -1017,15 +1017,22 @@ __global__ void my_kernel() {} )"; auto preprog = Program("my_program", source) ->preprocess({"-I.", "-Iexample_headers", "-Ifoo/bar", - "-I" CUDA_INC_DIR}); + "-I" CUDA_INC_DIR, "-I" CUB_DIR}); ASSERT_EQ(get_error(preprog), ""); auto compiled = preprog->compile(); ASSERT_EQ(get_error(compiled), ""); // Note: The '2' in "I2@" here is the index of the cuda include dir amongst // the "-I" options (excluding invalid paths like "foo/bar"). + // This is 3 on windows. +#if defined _WIN32 || defined _WIN64 + EXPECT_TRUE( + preprog->header_sources().at("cuda_fp16.h").find("__jitify_I3@") != + std::string::npos); +#else // defined _WIN32 || defined _WIN64 EXPECT_TRUE( preprog->header_sources().at("cuda_fp16.h").find("__jitify_I2@") != std::string::npos); +#endif // defined _WIN32 || defined _WIN64 std::string cwd = jitify2::detail::get_real_path("."); for (const auto& name_header : preprog->header_sources()) { const std::string& header_name = name_header.first; @@ -1036,7 +1043,7 @@ __global__ void my_kernel() {} } // Repeat without "-I.", which will rely on the implicit current working // directory include path for quote includes. - preprog = Program("my_program", source)->preprocess({"-I" CUDA_INC_DIR}); + preprog = Program("my_program", source)->preprocess({"-I" CUDA_INC_DIR, "-I" CUB_DIR}); compiled = preprog->compile(); ASSERT_EQ(get_error(compiled), ""); ASSERT_EQ(get_error(preprog), ""); @@ -1164,7 +1171,7 @@ __device__ T cube(T x) { return x * x * x; } // Note also that this isn't really recommended. It's likely better to use // angle-includes, or to use "-include" to add a completely new header. preprog = Program("my_program", source) - ->preprocess({"-DUSE_QUOTE_INCLUDE", "-I" CUDA_INC_DIR}); + ->preprocess({"-DUSE_QUOTE_INCLUDE", "-I" CUB_DIR, "-I" CUDA_INC_DIR}); ASSERT_EQ(get_error(preprog), ""); kernel = preprog->get_kernel( "my_kernel", {}, @@ -1360,6 +1367,11 @@ TEST(Jitify2Test, InvalidPrograms) { EXPECT_EQ(error.info("headers"), ""); } +#if defined(_MSC_VER) + // Disable deprecation warnings under windows for use of deprecated nvvm() method + #pragma warning(push) + #pragma warning(disable : 4996) +#endif // _MSC_VER TEST(Jitify2Test, CompileLTO_IR) { static const char* const source = R"( const int arch = __CUDA_ARCH__ / 10; @@ -1387,6 +1399,10 @@ const int arch = __CUDA_ARCH__ / 10; EXPECT_EQ(arch, current_arch); } } +#if defined(_MSC_VER) + // Restore warnings, re-enabling deprecated method warnings + #pragma warning(pop) +#endif // _MSC_VER TEST(Jitify2Test, LinkMultiplePrograms) { static const char* const source1 = R"( @@ -1772,15 +1788,15 @@ TEST(Jitify2Test, Option) { TEST(Jitify2Test, OptionsVec) { OptionsVec options0; EXPECT_TRUE(options0.ok()); - OptionsVec options1({Option("-arch", "sm_50"), Option("-G")}); + OptionsVec options1({Option("-arch", "sm_75"), Option("-G")}); EXPECT_TRUE(options1.ok()); - StringVec options_sv({"-arch", "sm_50", "-G"}); + StringVec options_sv({"-arch", "sm_75", "-G"}); OptionsVec options2(options_sv); EXPECT_TRUE(options2.ok()); - OptionsVec options3({"-arch", "sm_50", "-G"}); + OptionsVec options3({"-arch", "sm_75", "-G"}); EXPECT_TRUE(options3.ok()); - OptionsVec options({"--gpu-architecture", "compute_50", "-arch", "sm_50", + OptionsVec options({"--gpu-architecture", "compute_75", "-arch", "sm_75", "-maxrregcount=100", "-Ifoo", "-I=foo2", "--device-debug", "-G", "--restrict", "-restrict", "-lbar", "-l=bar2", "-lineinfo"}); @@ -1788,12 +1804,12 @@ TEST(Jitify2Test, OptionsVec) { EXPECT_EQ(options.size(), 12); EXPECT_EQ(options.serialize(), - StringVec({"--gpu-architecture", "compute_50", "-arch", "sm_50", + StringVec({"--gpu-architecture", "compute_75", "-arch", "sm_75", "-maxrregcount=100", "-Ifoo", "-I=foo2", "--device-debug", "-G", "--restrict", "-restrict", "-lbar", "-l=bar2", "-lineinfo"})); EXPECT_EQ(options.serialize_canonical(), - StringVec({"--gpu-architecture=compute_50", "-arch=sm_50", + StringVec({"--gpu-architecture=compute_75", "-arch=sm_75", "-maxrregcount=100", "-I=foo", "-I=foo2", "--device-debug", "-G", "--restrict", "-restrict", "-l=bar", "-l=bar2", "-lineinfo"})); @@ -1853,11 +1869,11 @@ const int arch = __CUDA_ARCH__ / 10; // Test explicit virtual architecture (compile to PTX). // Note: PTX is forwards compatible. - program = preprocessed->compile("", {}, {"-arch=compute_50"}); + program = preprocessed->compile("", {}, {"-arch=compute_75"}); ASSERT_GT(program->ptx().size(), 0); ASSERT_EQ(program->cubin().size(), 0); ASSERT_EQ(program->link()->load()->get_global_value("arch", &arch), ""); - EXPECT_EQ(arch, 50); + EXPECT_EQ(arch, 75); #define JITIFY_EXPECT_CUBIN_SIZE_IF_AVAILABLE(cubin_size) \ do { \ @@ -1892,7 +1908,7 @@ const int arch = __CUDA_ARCH__ / 10; // Test that preprocessing and compilation use separate arch flags. program = Program("arch_flags_program", source) - ->preprocess({"-arch=sm_50"}) + ->preprocess({"-arch=sm_75"}) ->compile("", {}, {"-arch=sm_."}); EXPECT_GT(program->ptx().size(), 0); JITIFY_EXPECT_CUBIN_SIZE_IF_AVAILABLE(program->cubin().size()); @@ -1928,10 +1944,14 @@ const int arch = __CUDA_ARCH__ / 10; #undef JITIFY_EXPECT_CUBIN_SIZE_IF_AVAILABLE +#if CUDA_VERSION >= 13000 + OptionsVec arch_flags = {"-arch=compute_75", "-arch=compute_80", "-arch=compute_86"}; +#else + OptionsVec arch_flags = {"-arch=compute_50", "-arch=compute_52", "-arch=compute_61"}; +#endif // Test that multiple architectures can be specified for preprocessing. program = Program("arch_flags_program", source) - ->preprocess({"-arch=compute_50", "-arch=compute_52", - "-arch=compute_61"}) + ->preprocess(arch_flags) ->compile("", {}, {"-arch=compute_."}); EXPECT_GT(program->ptx().size(), 0); EXPECT_EQ(program->cubin().size(), 0); @@ -2000,6 +2020,14 @@ __global__ void enum_kernel() {} Template type_kernel("type_kernel"); +#if defined(__CUDACC__) + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diag_suppress 3013 + #else // __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma diag_suppress 3013 + #endif // __NVCC_DIAG_PRAGMA_SUPPORT__ +#endif // defined(__CUDACC__) + #define JITIFY_TYPE_REFLECTION_TEST(T) \ EXPECT_EQ( \ preprog->get_kernel(type_kernel.instantiate())->lowered_name(), \ @@ -2013,6 +2041,14 @@ __global__ void enum_kernel() {} #undef JITIFY_TYPE_REFLECTION_TEST +#if defined(__CUDACC__) + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diag_default 3013 + #else // __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma diag_default 3013 + #endif // __NVCC_DIAG_PRAGMA_SUPPORT__ +#endif // #if defined(__CUDACC__) + typedef Derived derived_type; const Base& base = derived_type(); EXPECT_EQ(preprog->get_kernel(type_kernel.instantiate(instance_of(base))) @@ -2119,7 +2155,7 @@ __global__ void my_kernel() {} Program("curand_program", source) // Note: --remove-unused-globals is added to remove huge precomputed // arrays that come from CURAND. - ->preprocess({"-I" CUDA_INC_DIR, "--remove-unused-globals"}) + ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "--remove-unused-globals"}) ->get_kernel("my_kernel"); // TODO: Expand this test to actually call curand kernels and check outputs. (void)kernel; @@ -2144,11 +2180,13 @@ __global__ void my_kernel(thrust::counting_iterator begin, // Checks that basic Thrust headers can be compiled. #if CUDA_VERSION < 11000 const char* cppstd = "-std=c++03"; -#else +#elif CUDA_VERSION < 13000 const char* cppstd = "-std=c++14"; +#else + const char* cppstd = "-std=c++17"; #endif PreprocessedProgram preprog = Program("thrust_program", source) - ->preprocess({"-I" CUDA_INC_DIR, cppstd}); + ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, cppstd}); ASSERT_EQ(get_error(preprog), ""); ASSERT_EQ(get_error(preprog->compile()), ""); } @@ -2270,7 +2308,7 @@ TEST(Jitify2Test, LibCudaCxx) { // only supported for sm_60 and up on *nix and sm_70 and up on // Windows." Program("libcudacxx_program", source) - ->preprocess({"-I" CUDA_INC_DIR, "-arch=compute_70", + ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-arch=compute_75", "-no-builtin-headers", "-no-preinclude-workarounds", "-no-system-headers-workaround", "-no-replace-pragma-once"}) @@ -2283,7 +2321,7 @@ TEST(Jitify2Test, LibCudaCxx) { __global__ void my_kernel() {} )"; Program("libcudacxx_program", source) - ->preprocess({"-I" CUDA_INC_DIR, "-arch=compute_70", + ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-arch=compute_75", "-no-builtin-headers", "-no-preinclude-workarounds", "-no-system-headers-workaround", "-no-replace-pragma-once"}) ->get_kernel("my_kernel"); @@ -2297,7 +2335,7 @@ TEST(Jitify2Test, LibCudaCxxAndBuiltinLimits) { )"; PreprocessedProgram preprog = - Program("limits_program", source)->preprocess({"-I" CUDA_INC_DIR}); + Program("limits_program", source)->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR}); ASSERT_EQ(get_error(preprog), ""); CompiledProgram compiled = preprog->compile(); ASSERT_EQ(get_error(compiled), ""); @@ -2311,7 +2349,7 @@ TEST(Jitify2Test, LibCudaCxxAndBuiltinTuple) { )"; PreprocessedProgram preprog = - Program("tuple_program", source)->preprocess({"-I" CUDA_INC_DIR}); + Program("tuple_program", source)->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR}); ASSERT_EQ(get_error(preprog), ""); CompiledProgram compiled = preprog->compile(); ASSERT_EQ(get_error(compiled), ""); @@ -2528,7 +2566,11 @@ bool read_binary_file(const char* filename, std::string* contents) { template void check_or_update_serialization_goldens( JitifyObjectMaker make_jitify_object) { +#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) + using JitifyObject = std::invoke_result_t; +#else // __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) using JitifyObject = typename std::result_of::type; +#endif // __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) constexpr size_t version = jitify2::serialization::kSerializationVersion; std::string object_type_name = jitify2::reflection::reflect(); // Remove namespace prefix from type name. @@ -2621,7 +2663,7 @@ __global__ void my_kernel() {} for (int i = 0; i < 3; ++i) { CompiledProgram compiled = jitify2::Program(program_name, source) - ->preprocess({"-I" CUDA_INC_DIR, "-pch"}) + ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-pch"}) ->compile(Template("my_kernel").instantiate(i)); ASSERT_EQ(get_error(compiled), ""); // Check that PCH succeeded. @@ -2663,7 +2705,7 @@ __global__ void my_kernel() {} // Start with PCH auto-resizing disabled. CompiledProgram compiled = jitify2::Program(program_name, source) - ->preprocess({"-I" CUDA_INC_DIR, "-pch", "-no-pch-auto-resize"}) + ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-pch", "-no-pch-auto-resize"}) ->compile(Template("my_kernel").instantiate(0)); ASSERT_EQ(get_error(compiled), ""); EXPECT_FALSE(compiled->log().find("creating precompiled header file") != @@ -2676,7 +2718,7 @@ __global__ void my_kernel() {} // Try again with PCH auto-resizing enabled. compiled = jitify2::Program(program_name, source) - ->preprocess({"-I" CUDA_INC_DIR, "-pch"}) + ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-pch"}) ->compile(Template("my_kernel").instantiate(1)); ASSERT_EQ(get_error(compiled), ""); EXPECT_FALSE(compiled->log().find("creating precompiled header file") != @@ -2689,7 +2731,7 @@ __global__ void my_kernel() {} // This time PCH generation should succeed. compiled = jitify2::Program(program_name, source) - ->preprocess({"-I" CUDA_INC_DIR, "-pch"}) + ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-pch"}) ->compile(Template("my_kernel").instantiate(2)); ASSERT_EQ(get_error(compiled), ""); EXPECT_TRUE(compiled->log().find("creating precompiled header file") !=