diff --git a/CMakeLists.txt b/CMakeLists.txt
index b341304..a03f2c5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,7 +8,7 @@ set (CMAKE_CXX_STANDARD 17)
 set (CMAKE_CUDA_STANDARD 17)  # Doesn't work?
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ")
 if (MSVC)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX -D_CRT_SECURE_NO_WARNINGS")
   set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2")
 else()
   set(CMAKE_CXX_FLAGS
@@ -30,7 +30,7 @@ endif()
 set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
 if (MSVC)
   set(CMAKE_CUDA_FLAGS
-    "${CMAKE_CUDA_FLAGS} -Xcompiler=\"/WX\" -rdc=true")
+    "${CMAKE_CUDA_FLAGS} -Xcompiler=\"/WX\" -D_CRT_SECURE_NO_WARNINGS -rdc=true")
   set(CMAKE_CUDA_FLAGS_RELEASE
     "${CMAKE_CUDA_FLAGS_RELEASE} -O3 -Xcompiler=\"/O2\"")
 else()
@@ -46,9 +46,35 @@ endif()
 find_package(CUDA REQUIRED) # Required for CUDA_INCLUDE_DIRS
 
 # Add macro definitions used in tests.
-add_compile_definitions(
-    CUDA_INC_DIR="${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
-    CUB_DIR=${CUDA_INC_DIR})
+if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0.0)
+  # CCCL's include directories have moved in CUDA 13 compared to CUDA 12.
+  # On Windows, CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES includes both include paths, which must be separated before including in compile definitions.
+  # On *nix, only the main ctk include dir is included
+  # It may be cleaner to switch to the more modern find_package(CUDAToolkit) and find_package(CCCL)
+  list(LENGTH CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES ctk_inc_dirs_length)
+  if (ctk_inc_dirs_length GREATER 1)
+    list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 0 cuda_inc_dir)
+    list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 1 cccl_inc_dir)
+  else()
+    list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 0 cuda_inc_dir)
+    # Check the default location within the CTK if not in the variable.
+    if (EXISTS "${cuda_inc_dir}/cccl")
+      set(cccl_inc_dir "${cuda_inc_dir}/cccl")
+    else()
+      set(cccl_inc_dir "${cuda_inc_dir}")
+    endif()
+  endif()
+  add_compile_definitions(
+      CUDA_INC_DIR="${cuda_inc_dir}"
+      CUB_DIR="${cccl_inc_dir}")
+  unset(cccl_inc_dir)
+  unset(cuda_inc_dir)
+  unset(inc_dirs_length)
+else()
+  add_compile_definitions(
+      CUDA_INC_DIR="${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
+      CUB_DIR=${CUDA_INC_DIR})
+endif()
 # Copy the example_headers directory for use at runtime by tests.
 file(COPY example_headers DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 
@@ -100,7 +126,7 @@ function(add_stringify_command arg)
   add_custom_command(
       OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${arg}.jit
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-      COMMAND ./stringify ${CMAKE_CURRENT_SOURCE_DIR}/${arg} > ${arg}.jit
+      COMMAND $<TARGET_FILE:stringify> ${CMAKE_CURRENT_SOURCE_DIR}/${arg} > ${arg}.jit
       DEPENDS stringify)
 endfunction()
 add_executable(jitify2_preprocess jitify2_preprocess.cpp)
@@ -118,7 +144,7 @@ add_custom_command(
     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/jitify2_test_kernels.cu.jit.hpp
            ${CMAKE_CURRENT_BINARY_DIR}/jitify2_test_kernels.cu.headers.jit.cpp
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-    COMMAND ${CMAKE_CURRENT_BINARY_DIR}/jitify2_preprocess -i --minify
+    COMMAND $<TARGET_FILE:jitify2_preprocess> -i --minify
                 -o ${CMAKE_CURRENT_BINARY_DIR}
                 -s jitify2_test_kernels.cu.headers
                 jitify2_test_kernels.cu
@@ -172,8 +198,12 @@ foreach(test ${TESTS})
 endforeach(test)
 # Add "check" command that *builds and* runs tests, with verbose output.
 # (The default "test" command neither builds nor gives verbose output).
-add_custom_target(check ALL COMMAND ${CMAKE_CTEST_COMMAND} --verbose
-                  DEPENDS ${TESTS})
+# --build-config is required for multi-config generators, and uses COMMAND_EXPAND_LISTS to ensure that the flag and value are not provided as a single string to ctest
+add_custom_target(check ALL
+    COMMAND ${CMAKE_CTEST_COMMAND} --verbose "$<IF:$<STREQUAL:$<CONFIG>,>,,--build-config;$<CONFIG>>"
+    DEPENDS ${TESTS}
+    COMMAND_EXPAND_LISTS
+)
 
 # ----
 # Docs
diff --git a/example_headers/class_arg_kernel.cuh b/example_headers/class_arg_kernel.cuh
index b452ba3..15d0999 100644
--- a/example_headers/class_arg_kernel.cuh
+++ b/example_headers/class_arg_kernel.cuh
@@ -50,9 +50,6 @@ class Managed {
 struct Arg : public Managed {
   const int x;
   Arg(int x_) : x(x_) {}
-
-  // there can be no call to the copy constructor
-  Arg(const Arg& arg) = delete;
 };
 
 template <typename T>
diff --git a/jitify2.hpp b/jitify2.hpp
index a32e001..c2ad6fd 100644
--- a/jitify2.hpp
+++ b/jitify2.hpp
@@ -155,7 +155,7 @@
 #include <type_traits>
 #include <unordered_set>
 
-#if __cplusplus >= 201703L
+#if JITIFY_CPLUSPLUS >= 201703L
 #include <filesystem>
 #endif
 
@@ -194,7 +194,7 @@
 #include <dbghelp.h>      // For UndecorateSymbolName
 #include <direct.h>       // For mkdir
 #include <fcntl.h>        // For open, O_RDWR etc.
-#include <fileapi.h>      // For GetTempPath2A
+#include <fileapi.h>      // For GetTempPath2A, GetLongPathNameA
 #include <io.h>           // For _sopen_s
 #include <process.h>      // For _getpid
 #include <shlobj_core.h>  // For SHGetFolderPathA
@@ -2786,6 +2786,15 @@ inline bool path_exists(const char* filename, bool* is_dir = nullptr) {
   return ret;
 }
 
+inline std::string quoted_path_if_needed(const std::string& p) {
+  // If a path includes spaces or single backslashes, the full path may need warpping with quotes when passed to run_system_command, either as the executable or an include path.
+  if (p.find(' ') == std::string::npos && p.find('\\') == std::string::npos) {
+    return p;
+  } else {
+    return "\"" + p + "\"";
+  }
+}
+
 inline const char* get_current_executable_path() {
   static const char* path = []() -> const char* {
     static char buffer[JITIFY_PATH_MAX + 1] = {};
@@ -4117,15 +4126,31 @@ inline int run_system_command(const char* command,
   if (output) {
     output->clear();
     std::array<char, 128> buffer;
-    while (fgets(buffer.data(), buffer.size(), pipe)) {
+    while (fgets(buffer.data(), static_cast<int>(buffer.size()), pipe)) {
       *output += buffer.data();
     }
+  } else {
+    // Must always read from the pipe for the exit code from the command to be available
+    std::array<char, 128> buffer;
+    while (fgets(buffer.data(), static_cast<int>(buffer.size()), pipe)) { }
   }
   const int result = JITIFY_PCLOSE(pipe);
   if (result == -1 && failure) {
     *failure = get_errno_string();
   }
-  return result;
+
+  // Extract the exit code from the called program if possible, otherwise return -1;
+  int exitCode = -1;
+  #ifdef _MSC_VER
+    // _pclose is documented as having the same return code format as for _cwait, but with the high and low order bytes swapped. However the _cwait docs do not describe a corresponding value. Just extracting the lsb seems to behave
+    exitCode = result & 0xFF;
+  #else
+    // Extract the exit code from the pclose result if it was a 'normal' exit
+    if (WIFEXITED(result)){
+      exitCode = WEXITSTATUS(result);
+    }
+  #endif
+  return exitCode;
 }
 #endif  // JITIFY_ENABLE_NVCC
 
@@ -4135,9 +4160,15 @@ inline const char* guess_cuda_home() {
     if (env_jitify_cuda_home) return env_jitify_cuda_home;
     const char* env_cuda_home = std::getenv("CUDA_HOME");
     if (env_cuda_home) return env_cuda_home;
+    // CUDA_PATH is set by the CUDA installer on windows
+    const char* env_cuda_path = std::getenv("CUDA_PATH");
+    if (env_cuda_path) return env_cuda_path;
     // Guess the default location.
 #if defined _WIN32 || defined _WIN64
-    return "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA";
+    constexpr int cuda_version_major = CUDA_VERSION / 1000;
+    constexpr int cuda_version_minor = (CUDA_VERSION % 1000) / 10;
+    std::string default_path = std::string("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v") + std::to_string(cuda_version_major) + "." + std::to_string(cuda_version_minor) + "\\";
+    return default_path.c_str();
 #else
     return "/usr/local/cuda";
 #endif
@@ -4150,7 +4181,7 @@ class Nvcc {
   std::string nvcc_path_;
 
   static bool is_valid_nvcc(std::string nvcc_path) {
-    return run_system_command((nvcc_path + " --version").c_str());
+    return run_system_command((quoted_path_if_needed(nvcc_path) + " --version").c_str()) == 0;
   }
 
   static std::string find_nvcc_path() {
@@ -4180,7 +4211,7 @@ class Nvcc {
                  std::string* failure = nullptr) const {
     // Note: We redirect stderr to stdout so that we capture it too.
     const std::string command =
-        detail::string_concat(nvcc_path_, " ", options, " ", "2>&1");
+        detail::string_concat(quoted_path_if_needed(nvcc_path_), " ", options, " ", "2>&1");
     return run_system_command(command.c_str(), output, failure);
   }
 };
@@ -4206,7 +4237,9 @@ inline std::string make_temp_dir() {
   char tmpdir[JITIFY_PATH_MAX + 1];
   // Note: tmpdir is guaranteed to end with a '\'.
   if (!GetTempPath2A(sizeof(tmpdir), tmpdir)) return "";
-  std::string path = tmpdir + "__jitify_" + std::to_string(uid);
+  // Get the long-form of the tmpdir
+  GetLongPathNameA(tmpdir, tmpdir, sizeof(tmpdir));
+  std::string path = std::string(tmpdir) + "__jitify_" + std::to_string(uid);
   if (::_mkdir(path.c_str()) != 0) return "";
   return path;
 #else
@@ -4216,7 +4249,7 @@ inline std::string make_temp_dir() {
 #endif
 }
 
-#if __cplusplus < 201703L && (!defined(_WIN32) && !defined(_WIN64))
+#if JITIFY_CPLUSPLUS < 201703L && (!defined(_WIN32) && !defined(_WIN64))
 inline int delete_file_visitor(const char* path, const struct stat* sbuf,
                                int type, struct FTW* ftwb) {
   (void)sbuf;
@@ -4227,11 +4260,11 @@ inline int delete_file_visitor(const char* path, const struct stat* sbuf,
 #endif
 
 inline bool remove_all(const std::string& path) {
-#if __cplusplus >= 201703L
+#if JITIFY_CPLUSPLUS >= 201703L
   std::error_code ec;
   return std::filesystem::remove_all(path, ec) !=
          static_cast<std::uintmax_t>(-1);
-#else  // __cplusplus < 201703L
+#else  // JITIFY_CPLUSPLUS < 201703L
 #if defined(_WIN32) || defined(_WIN64)
   // TODO: Implement this if anyone cares about it.
   return false;
@@ -4244,7 +4277,7 @@ inline bool remove_all(const std::string& path) {
   const int max_depth = 20;
   return ::nftw(path.c_str(), delete_file_visitor, max_depth, flags) == 0;
 #endif  // not Windows
-#endif  // __cplusplus < 201703L
+#endif  // JITIFY_CPLUSPLUS < 201703L
 }
 
 class TempDirectory {
@@ -4349,8 +4382,8 @@ class NvccProgram {
     // Note: This ensures the cuda toolkit headers are found before any that
     // were embedded during preprocessing (which probably won't work with nvcc).
     options.emplace_back(
-        "-I", detail::path_join(detail::guess_cuda_home(), "include"));
-    options.emplace_back("-I", tmp_include_dir);
+        "-I", detail::quoted_path_if_needed(detail::path_join(detail::guess_cuda_home(), "include")));
+    options.emplace_back("-I", detail::quoted_path_if_needed(tmp_include_dir));
 
     static const char* const kJitifyExpressionPrefix = "__jitify_expression";
 
@@ -4388,7 +4421,7 @@ class NvccProgram {
     if (!options.find({"--dlink-time-opt, -dlto"}).empty()) {
       options.emplace_back("-ltoir", "");
       options.emplace_back(tmp_source_file, "");
-      if (nvcc(options, &log_, error)) return infer_nvcc_error_type();
+      if (nvcc(options, &log_, error) != 0) return infer_nvcc_error_type();
       if (!read_binary_file(tmp_ltoir_file, &nvvm_)) {
         if (error) *error = "Failed to read binary file: " + tmp_ltoir_file;
         return NVRTC_ERROR_PROGRAM_CREATION_FAILURE;
@@ -4399,7 +4432,7 @@ class NvccProgram {
     options.emplace_back("-ptx", "");
     options.emplace_back(tmp_source_file, "");
     options.emplace_back("-o", tmp_ptx_file);
-    if (nvcc(options, &log_, error)) return infer_nvcc_error_type();
+    if (nvcc(options, &log_, error) != 0) return infer_nvcc_error_type();
     options.pop_back();  // Remove -o option
     options.pop_back();  // Remove source file
     options.pop_back();  // Remove -ptx
@@ -4439,7 +4472,7 @@ class NvccProgram {
       options.emplace_back("-cubin", "");
       options.emplace_back(tmp_ptx_file, "");
       options.emplace_back("-o", tmp_cubin_file);
-      if (nvcc(options, &log_, error)) {
+      if (nvcc(options, &log_, error) != 0) {
         return NVRTC_ERROR_PROGRAM_CREATION_FAILURE;
       }
       if (!read_binary_file(tmp_cubin_file, &cubin_)) {
@@ -4531,7 +4564,24 @@ inline nvrtcResult compile_program_nvrtc(
     header_sources_c.push_back(name_source.second.c_str());
   }
 
+#if defined(__CUDACC__)
+  #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+    #pragma nv_diag_suppress 550
+  #else  // __NVCC_DIAG_PRAGMA_SUPPORT__
+    #pragma diag_suppress 550
+  #endif  // __NVCC_DIAG_PRAGMA_SUPPORT__
+#endif  // defined(__CUDACC__)
+
   bool pch_verbose = true;
+
+#if defined(__CUDACC__)
+  #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+    #pragma nv_diag_default 550
+  #else  // __NVCC_DIAG_PRAGMA_SUPPORT__
+    #pragma diag_default 550
+  #endif  // __NVCC_DIAG_PRAGMA_SUPPORT__
+#endif  // #if defined(__CUDACC__)
+
   std::vector<const char*> options_c;
   options_c.reserve(options.size());
   for (const Option& option : options) {
@@ -6626,7 +6676,8 @@ struct __add_reference_helper<_Tp, true> {
 };
 template <typename _Tp>
 struct add_reference : public __add_reference_helper<_Tp> {};
-
+)"
+R"(
 namespace __jitify_detail {
 template <typename T>
 struct is_int_or_cref {
@@ -9687,9 +9738,19 @@ class LRUFileCache {
         file_suffix_(sanitize_filename(file_suffix)),
         lock_file_name_(path_join(path_, file_prefix_ + "lock")) {}
 
+
+// std::result_of was deprecated in c++17 and removed in c++20.
+#if JITIFY_CPLUSPLUS >= 201703L
+  template <typename T>
+  using invoke_result_type = typename std::invoke_result<T>::type;
+#else  // JITIFY_CPLUSPLUS >= 201703L
+  template <typename T>
+  using invoke_result_type = typename std::result_of<T()>::type;
+#endif // JITIFY_CPLUSPLUS >= 201703L
+
   template <class Construct, class Serialize, class Deserialize>
   std::string get(const std::string& name,
-                  typename std::result_of<Construct()>::type* result,
+                  invoke_result_type<Construct>* result,
                   Construct construct, Serialize serialize,
                   Deserialize deserialize, bool* hit = nullptr) const {
     if (path_.empty() || max_size_ == 0) {
diff --git a/jitify2_test.cu b/jitify2_test.cu
index 2aae9d2..4f44f1b 100644
--- a/jitify2_test.cu
+++ b/jitify2_test.cu
@@ -1017,15 +1017,22 @@ __global__ void my_kernel() {}
 )";
   auto preprog = Program("my_program", source)
                      ->preprocess({"-I.", "-Iexample_headers", "-Ifoo/bar",
-                                   "-I" CUDA_INC_DIR});
+                                   "-I" CUDA_INC_DIR, "-I" CUB_DIR});
   ASSERT_EQ(get_error(preprog), "");
   auto compiled = preprog->compile();
   ASSERT_EQ(get_error(compiled), "");
   // Note: The '2' in "I2@" here is the index of the cuda include dir amongst
   // the "-I" options (excluding invalid paths like "foo/bar").
+  // This is 3 on windows.
+#if defined _WIN32 || defined _WIN64
+  EXPECT_TRUE(
+      preprog->header_sources().at("cuda_fp16.h").find("__jitify_I3@") !=
+      std::string::npos);
+#else  // defined _WIN32 || defined _WIN64
   EXPECT_TRUE(
       preprog->header_sources().at("cuda_fp16.h").find("__jitify_I2@") !=
       std::string::npos);
+#endif  // defined _WIN32 || defined _WIN64
   std::string cwd = jitify2::detail::get_real_path(".");
   for (const auto& name_header : preprog->header_sources()) {
     const std::string& header_name = name_header.first;
@@ -1036,7 +1043,7 @@ __global__ void my_kernel() {}
   }
   // Repeat without "-I.", which will rely on the implicit current working
   // directory include path for quote includes.
-  preprog = Program("my_program", source)->preprocess({"-I" CUDA_INC_DIR});
+  preprog = Program("my_program", source)->preprocess({"-I" CUDA_INC_DIR, "-I" CUB_DIR});
   compiled = preprog->compile();
   ASSERT_EQ(get_error(compiled), "");
   ASSERT_EQ(get_error(preprog), "");
@@ -1164,7 +1171,7 @@ __device__ T cube(T x) { return x * x * x; }
   // Note also that this isn't really recommended. It's likely better to use
   // angle-includes, or to use "-include" to add a completely new header.
   preprog = Program("my_program", source)
-                ->preprocess({"-DUSE_QUOTE_INCLUDE", "-I" CUDA_INC_DIR});
+                ->preprocess({"-DUSE_QUOTE_INCLUDE", "-I" CUB_DIR, "-I" CUDA_INC_DIR});
   ASSERT_EQ(get_error(preprog), "");
   kernel = preprog->get_kernel(
       "my_kernel<int>", {},
@@ -1360,6 +1367,11 @@ TEST(Jitify2Test, InvalidPrograms) {
   EXPECT_EQ(error.info("headers"), "");
 }
 
+#if defined(_MSC_VER)
+  // Disable deprecation warnings under windows for use of deprecated nvvm() method
+  #pragma warning(push)
+  #pragma warning(disable : 4996)
+#endif  // _MSC_VER
 TEST(Jitify2Test, CompileLTO_IR) {
   static const char* const source = R"(
 const int arch = __CUDA_ARCH__ / 10;
@@ -1387,6 +1399,10 @@ const int arch = __CUDA_ARCH__ / 10;
     EXPECT_EQ(arch, current_arch);
   }
 }
+#if defined(_MSC_VER)
+  // Restore warnings, re-enabling deprecated method warnings
+  #pragma warning(pop)
+#endif  // _MSC_VER
 
 TEST(Jitify2Test, LinkMultiplePrograms) {
   static const char* const source1 = R"(
@@ -1772,15 +1788,15 @@ TEST(Jitify2Test, Option) {
 TEST(Jitify2Test, OptionsVec) {
   OptionsVec options0;
   EXPECT_TRUE(options0.ok());
-  OptionsVec options1({Option("-arch", "sm_50"), Option("-G")});
+  OptionsVec options1({Option("-arch", "sm_75"), Option("-G")});
   EXPECT_TRUE(options1.ok());
-  StringVec options_sv({"-arch", "sm_50", "-G"});
+  StringVec options_sv({"-arch", "sm_75", "-G"});
   OptionsVec options2(options_sv);
   EXPECT_TRUE(options2.ok());
-  OptionsVec options3({"-arch", "sm_50", "-G"});
+  OptionsVec options3({"-arch", "sm_75", "-G"});
   EXPECT_TRUE(options3.ok());
 
-  OptionsVec options({"--gpu-architecture", "compute_50", "-arch", "sm_50",
+  OptionsVec options({"--gpu-architecture", "compute_75", "-arch", "sm_75",
                       "-maxrregcount=100", "-Ifoo", "-I=foo2", "--device-debug",
                       "-G", "--restrict", "-restrict", "-lbar", "-l=bar2",
                       "-lineinfo"});
@@ -1788,12 +1804,12 @@ TEST(Jitify2Test, OptionsVec) {
 
   EXPECT_EQ(options.size(), 12);
   EXPECT_EQ(options.serialize(),
-            StringVec({"--gpu-architecture", "compute_50", "-arch", "sm_50",
+            StringVec({"--gpu-architecture", "compute_75", "-arch", "sm_75",
                        "-maxrregcount=100", "-Ifoo", "-I=foo2",
                        "--device-debug", "-G", "--restrict", "-restrict",
                        "-lbar", "-l=bar2", "-lineinfo"}));
   EXPECT_EQ(options.serialize_canonical(),
-            StringVec({"--gpu-architecture=compute_50", "-arch=sm_50",
+            StringVec({"--gpu-architecture=compute_75", "-arch=sm_75",
                        "-maxrregcount=100", "-I=foo", "-I=foo2",
                        "--device-debug", "-G", "--restrict", "-restrict",
                        "-l=bar", "-l=bar2", "-lineinfo"}));
@@ -1853,11 +1869,11 @@ const int arch = __CUDA_ARCH__ / 10;
 
   // Test explicit virtual architecture (compile to PTX).
   // Note: PTX is forwards compatible.
-  program = preprocessed->compile("", {}, {"-arch=compute_50"});
+  program = preprocessed->compile("", {}, {"-arch=compute_75"});
   ASSERT_GT(program->ptx().size(), 0);
   ASSERT_EQ(program->cubin().size(), 0);
   ASSERT_EQ(program->link()->load()->get_global_value("arch", &arch), "");
-  EXPECT_EQ(arch, 50);
+  EXPECT_EQ(arch, 75);
 
 #define JITIFY_EXPECT_CUBIN_SIZE_IF_AVAILABLE(cubin_size) \
   do {                                                    \
@@ -1892,7 +1908,7 @@ const int arch = __CUDA_ARCH__ / 10;
 
   // Test that preprocessing and compilation use separate arch flags.
   program = Program("arch_flags_program", source)
-                ->preprocess({"-arch=sm_50"})
+                ->preprocess({"-arch=sm_75"})
                 ->compile("", {}, {"-arch=sm_."});
   EXPECT_GT(program->ptx().size(), 0);
   JITIFY_EXPECT_CUBIN_SIZE_IF_AVAILABLE(program->cubin().size());
@@ -1928,10 +1944,14 @@ const int arch = __CUDA_ARCH__ / 10;
 
 #undef JITIFY_EXPECT_CUBIN_SIZE_IF_AVAILABLE
 
+#if CUDA_VERSION >= 13000
+  OptionsVec arch_flags = {"-arch=compute_75", "-arch=compute_80", "-arch=compute_86"};
+#else
+  OptionsVec arch_flags = {"-arch=compute_50", "-arch=compute_52", "-arch=compute_61"};
+#endif
   // Test that multiple architectures can be specified for preprocessing.
   program = Program("arch_flags_program", source)
-                ->preprocess({"-arch=compute_50", "-arch=compute_52",
-                              "-arch=compute_61"})
+                ->preprocess(arch_flags)
                 ->compile("", {}, {"-arch=compute_."});
   EXPECT_GT(program->ptx().size(), 0);
   EXPECT_EQ(program->cubin().size(), 0);
@@ -2000,6 +2020,14 @@ __global__ void enum_kernel() {}
 
   Template type_kernel("type_kernel");
 
+#if defined(__CUDACC__)
+  #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+    #pragma nv_diag_suppress 3013
+  #else  // __NVCC_DIAG_PRAGMA_SUPPORT__
+    #pragma diag_suppress 3013
+  #endif  // __NVCC_DIAG_PRAGMA_SUPPORT__
+#endif  // defined(__CUDACC__)
+
 #define JITIFY_TYPE_REFLECTION_TEST(T)                                   \
   EXPECT_EQ(                                                             \
       preprog->get_kernel(type_kernel.instantiate<T>())->lowered_name(), \
@@ -2013,6 +2041,14 @@ __global__ void enum_kernel() {}
 
 #undef JITIFY_TYPE_REFLECTION_TEST
 
+#if defined(__CUDACC__)
+  #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+    #pragma nv_diag_default 3013
+  #else  // __NVCC_DIAG_PRAGMA_SUPPORT__
+    #pragma diag_default 3013
+  #endif  // __NVCC_DIAG_PRAGMA_SUPPORT__
+#endif  // #if defined(__CUDACC__)
+
   typedef Derived<float> derived_type;
   const Base& base = derived_type();
   EXPECT_EQ(preprog->get_kernel(type_kernel.instantiate(instance_of(base)))
@@ -2119,7 +2155,7 @@ __global__ void my_kernel() {}
       Program("curand_program", source)
           // Note: --remove-unused-globals is added to remove huge precomputed
           // arrays that come from CURAND.
-          ->preprocess({"-I" CUDA_INC_DIR, "--remove-unused-globals"})
+          ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "--remove-unused-globals"})
           ->get_kernel("my_kernel");
   // TODO: Expand this test to actually call curand kernels and check outputs.
   (void)kernel;
@@ -2144,11 +2180,13 @@ __global__ void my_kernel(thrust::counting_iterator<int> begin,
   // Checks that basic Thrust headers can be compiled.
 #if CUDA_VERSION < 11000
   const char* cppstd = "-std=c++03";
-#else
+#elif CUDA_VERSION < 13000
   const char* cppstd = "-std=c++14";
+#else
+  const char* cppstd = "-std=c++17";
 #endif
   PreprocessedProgram preprog = Program("thrust_program", source)
-                                    ->preprocess({"-I" CUDA_INC_DIR, cppstd});
+                                    ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, cppstd});
   ASSERT_EQ(get_error(preprog), "");
   ASSERT_EQ(get_error(preprog->compile()), "");
 }
@@ -2270,7 +2308,7 @@ TEST(Jitify2Test, LibCudaCxx) {
     // only supported for sm_60 and up on *nix and sm_70 and up on
     // Windows."
     Program("libcudacxx_program", source)
-        ->preprocess({"-I" CUDA_INC_DIR, "-arch=compute_70",
+        ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-arch=compute_75",
                       "-no-builtin-headers", "-no-preinclude-workarounds",
                       "-no-system-headers-workaround",
                       "-no-replace-pragma-once"})
@@ -2283,7 +2321,7 @@ TEST(Jitify2Test, LibCudaCxx) {
 __global__ void my_kernel() {}
 )";
   Program("libcudacxx_program", source)
-      ->preprocess({"-I" CUDA_INC_DIR, "-arch=compute_70",
+      ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-arch=compute_75",
                     "-no-builtin-headers", "-no-preinclude-workarounds",
                     "-no-system-headers-workaround", "-no-replace-pragma-once"})
       ->get_kernel("my_kernel");
@@ -2297,7 +2335,7 @@ TEST(Jitify2Test, LibCudaCxxAndBuiltinLimits) {
 )";
 
   PreprocessedProgram preprog =
-      Program("limits_program", source)->preprocess({"-I" CUDA_INC_DIR});
+      Program("limits_program", source)->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR});
   ASSERT_EQ(get_error(preprog), "");
   CompiledProgram compiled = preprog->compile();
   ASSERT_EQ(get_error(compiled), "");
@@ -2311,7 +2349,7 @@ TEST(Jitify2Test, LibCudaCxxAndBuiltinTuple) {
 )";
 
   PreprocessedProgram preprog =
-      Program("tuple_program", source)->preprocess({"-I" CUDA_INC_DIR});
+      Program("tuple_program", source)->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR});
   ASSERT_EQ(get_error(preprog), "");
   CompiledProgram compiled = preprog->compile();
   ASSERT_EQ(get_error(compiled), "");
@@ -2528,7 +2566,11 @@ bool read_binary_file(const char* filename, std::string* contents) {
 template <class JitifyObjectMaker>
 void check_or_update_serialization_goldens(
     JitifyObjectMaker make_jitify_object) {
+#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+  using JitifyObject = std::invoke_result_t<JitifyObjectMaker>;
+#else  // __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
   using JitifyObject = typename std::result_of<JitifyObjectMaker()>::type;
+#endif  // __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
   constexpr size_t version = jitify2::serialization::kSerializationVersion;
   std::string object_type_name = jitify2::reflection::reflect<JitifyObject>();
   // Remove namespace prefix from type name.
@@ -2621,7 +2663,7 @@ __global__ void my_kernel() {}
   for (int i = 0; i < 3; ++i) {
     CompiledProgram compiled =
         jitify2::Program(program_name, source)
-            ->preprocess({"-I" CUDA_INC_DIR, "-pch"})
+            ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-pch"})
             ->compile(Template("my_kernel").instantiate(i));
     ASSERT_EQ(get_error(compiled), "");
     // Check that PCH succeeded.
@@ -2663,7 +2705,7 @@ __global__ void my_kernel() {}
   // Start with PCH auto-resizing disabled.
   CompiledProgram compiled =
       jitify2::Program(program_name, source)
-          ->preprocess({"-I" CUDA_INC_DIR, "-pch", "-no-pch-auto-resize"})
+          ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-pch", "-no-pch-auto-resize"})
           ->compile(Template("my_kernel").instantiate(0));
   ASSERT_EQ(get_error(compiled), "");
   EXPECT_FALSE(compiled->log().find("creating precompiled header file") !=
@@ -2676,7 +2718,7 @@ __global__ void my_kernel() {}
 
   // Try again with PCH auto-resizing enabled.
   compiled = jitify2::Program(program_name, source)
-                 ->preprocess({"-I" CUDA_INC_DIR, "-pch"})
+                 ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-pch"})
                  ->compile(Template("my_kernel").instantiate(1));
   ASSERT_EQ(get_error(compiled), "");
   EXPECT_FALSE(compiled->log().find("creating precompiled header file") !=
@@ -2689,7 +2731,7 @@ __global__ void my_kernel() {}
 
   // This time PCH generation should succeed.
   compiled = jitify2::Program(program_name, source)
-                 ->preprocess({"-I" CUDA_INC_DIR, "-pch"})
+                 ->preprocess({"-I" CUB_DIR, "-I" CUDA_INC_DIR, "-pch"})
                  ->compile(Template("my_kernel").instantiate(2));
   ASSERT_EQ(get_error(compiled), "");
   EXPECT_TRUE(compiled->log().find("creating precompiled header file") !=