diff --git a/Common/helper_image.h b/Common/helper_image.h
index 9b7edc062..7f1e2ee4b 100644
--- a/Common/helper_image.h
+++ b/Common/helper_image.h
@@ -373,13 +373,10 @@ inline bool sdkReadFile(const char *filename, T **data, unsigned int *len,
   // read all data elements
   T token;
 
-  while (!feof(fh)) {
-    fscanf(fh, "%f", &token);
+  while (fscanf(fh, "%f", &token) == 1) {
     data_read.push_back(token);
   }
 
-  // the last element is read twice
-  data_read.pop_back();
   fclose(fh);
 
   // check if the given handle is already initialized
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu
index 46ffc886d..b1aa1f4a3 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu
@@ -84,7 +84,7 @@ __device__ void reduceBlockData(cuda::barrier<cuda::thread_scope_block> &barrier
 __global__ void normVecByDotProductAWBarrier(float *vecA, float *vecB, double *partialResults, int size)
 {
 #if __CUDA_ARCH__ >= 700
-#pragma diag_suppress static_var_with_dynamic_init
+#pragma nv_diag_suppress 20054
     cg::thread_block cta  = cg::this_thread_block();
     cg::grid_group   grid = cg::this_grid();
     ;
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu
index 061b700d8..21f9a2847 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu
+++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu
@@ -172,7 +172,7 @@ void runTest(int argc, char **argv)
         cudaMemcpy2D(d_idataPL, d_pitchBytes, h_idata, h_pitchBytes, nx * sizeof(float), ny, cudaMemcpyHostToDevice));
 
     // Array
-    checkCudaErrors(cudaMemcpyToArray(d_idataArray, 0, 0, h_idata, nx * ny * sizeof(float), cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy2DToArray(d_idataArray, 0, 0, h_idata, nx * sizeof(float), nx * sizeof(float), ny, cudaMemcpyHostToDevice));
 
     cudaTextureObject_t texRefPL;
     cudaTextureObject_t texRefArray;
diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture.cu b/Samples/0_Introduction/simpleTexture/simpleTexture.cu
index 6b418cef2..a013b0e5e 100644
--- a/Samples/0_Introduction/simpleTexture/simpleTexture.cu
+++ b/Samples/0_Introduction/simpleTexture/simpleTexture.cu
@@ -168,7 +168,7 @@ void runTest(int argc, char **argv)
     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
     cudaArray            *cuArray;
     checkCudaErrors(cudaMallocArray(&cuArray, &channelDesc, width, height));
-    checkCudaErrors(cudaMemcpyToArray(cuArray, 0, 0, hData, size, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy2DToArray(cuArray, 0, 0, hData, width * sizeof(float), width * sizeof(float), height, cudaMemcpyHostToDevice));
 
     cudaTextureObject_t tex;
     cudaResourceDesc    texRes;
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/main.cpp b/Samples/2_Concepts_and_Techniques/convolutionTexture/main.cpp
index b7ba2d86d..b4da55796 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/main.cpp
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/main.cpp
@@ -109,7 +109,7 @@ int main(int argc, char **argv)
     }
 
     setConvolutionKernel(h_Kernel);
-    checkCudaErrors(cudaMemcpyToArray(a_Src, 0, 0, h_Input, imageW * imageH * sizeof(float), cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy2DToArray(a_Src, 0, 0, h_Input, imageW * sizeof(float), imageW * sizeof(float), imageH, cudaMemcpyHostToDevice));
 
     printf("Running GPU rows convolution (%u identical iterations)...\n", iterations);
     checkCudaErrors(cudaDeviceSynchronize());
@@ -134,7 +134,7 @@ int main(int argc, char **argv)
     sdkResetTimer(&hTimer);
     sdkStartTimer(&hTimer);
     checkCudaErrors(
-        cudaMemcpyToArray(a_Src, 0, 0, d_Output, imageW * imageH * sizeof(float), cudaMemcpyDeviceToDevice));
+        cudaMemcpy2DToArray(a_Src, 0, 0, d_Output, imageW * sizeof(float), imageW * sizeof(float), imageH, cudaMemcpyDeviceToDevice));
     checkCudaErrors(cudaDeviceSynchronize());
     sdkStopTimer(&hTimer);
     gpuTime = sdkGetTimerValue(&hTimer);
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp b/Samples/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp
index b3524caca..d22a21e0e 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp
@@ -261,13 +261,20 @@ int PreLoadBmp(char *FileName, int *Width, int *Height)
         return 1; // invalid filename
     }
 
-    fread(&FileHeader, sizeof(BMPFileHeader), 1, fh);
+    if (fread(&FileHeader, sizeof(BMPFileHeader), 1, fh) != 1) {
+        fclose(fh);
+        return 2; // invalid file format
+    }
 
     if (FileHeader._bm_signature != 0x4D42) {
+        fclose(fh);
         return 2; // invalid file format
     }
 
-    fread(&InfoHeader, sizeof(BMPInfoHeader), 1, fh);
+    if (fread(&InfoHeader, sizeof(BMPInfoHeader), 1, fh) != 1) {
+        fclose(fh);
+        return 2; // invalid file format
+    }
 
     if (InfoHeader._bm_color_depth != 24) {
         return 3; // invalid color depth
@@ -302,6 +309,8 @@ void LoadBmpAsGray(char *FileName, int Stride, ROI ImSize, byte *Img)
     FILE         *fh;
     fh = fopen(FileName, "rb");
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
     fread(&FileHeader, sizeof(BMPFileHeader), 1, fh);
     fread(&InfoHeader, sizeof(BMPInfoHeader), 1, fh);
 
@@ -315,6 +324,7 @@ void LoadBmpAsGray(char *FileName, int Stride, ROI ImSize, byte *Img)
             Img[i * Stride + j] = (byte)clamp_0_255(val);
         }
     }
+#pragma GCC diagnostic pop
 
     fclose(fh);
     return;
diff --git a/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt
index d0d7e0979..b29e5bb2c 100644
--- a/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt
+++ b/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt
@@ -10,6 +10,8 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
 set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 110 120)
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
+# Suppress nvlink warning about stack size not being statically determined (due to device-side recursion)
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink --suppress-stack-size-warning")
 if(ENABLE_CUDA_DEBUG)
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G")        # enable cuda-gdb (may significantly affect performance on some targets)
 else()
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu
index b473e8dab..173b3466e 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu
@@ -146,7 +146,7 @@ static void childProcess(int id)
     // and import the pointer to the allocated buffer using
     // exportData filled in shared memory by the master process.
     for (i = 0; i < procCount; i++) {
-        checkCudaErrors(cudaMemPoolImportFromShareableHandle(&pools[i], (void *)shHandle[i], handleType, 0));
+        checkCudaErrors(cudaMemPoolImportFromShareableHandle(&pools[i], (void *)(uintptr_t)shHandle[i], handleType, 0));
 
         cudaMemAccessFlags accessFlags;
         cudaMemLocation    location;
@@ -416,7 +416,7 @@ static void parentProcess(char *app)
 
     // Launch the child processes!
     for (i = 0; i < shm->nprocesses; i++) {
-        char        devIdx[10];
+        char        devIdx[16];
         char *const args[] = {app, devIdx, NULL};
         Process     process;
 
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu
index 513bc2a92..a3685b01b 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu
@@ -261,7 +261,7 @@ __global__ void MatrixMulAsyncCopyLargeChunkAWBarrier(float *__restrict__ C,
                                                       int wB)
 {
 #if __CUDA_ARCH__ >= 700
-#pragma diag_suppress static_var_with_dynamic_init
+#pragma nv_diag_suppress 20054
     // Requires BLOCK_SIZE % 4 == 0
 
     __shared__ cuda::barrier<cuda::thread_scope_block> bar;
@@ -540,6 +540,7 @@ __global__ void MatrixMulAsyncCopyMultiStageSharedState(float *__restrict__ C,
     auto cta = cg::this_thread_block();
 
     const auto shape1 = cuda::aligned_size_t<alignof(float)>(sizeof(float));
+#pragma nv_diag_suppress 20054
     __shared__ cuda::pipeline_shared_state<cuda::thread_scope_block, maxPipelineStages> shared_state;
     constexpr int consumer_row_count = BLOCK_SIZE_X;
 
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
index 09937acc9..6e52d040c 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
@@ -650,7 +650,10 @@ bool inline findModulePath(const char *module_file, string &module_path, char **
             int   file_size = ftell(fp);
             char *buf       = new char[file_size + 1];
             fseek(fp, 0, SEEK_SET);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
             fread(buf, sizeof(char), file_size, fp);
+#pragma GCC diagnostic pop
             fclose(fp);
             buf[file_size] = '\0';
             ptx_source     = buf;
diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit.cpp b/Samples/3_CUDA_Features/ptxjit/ptxjit.cpp
index c68a6a5a8..aa2a7c189 100644
--- a/Samples/3_CUDA_Features/ptxjit/ptxjit.cpp
+++ b/Samples/3_CUDA_Features/ptxjit/ptxjit.cpp
@@ -82,7 +82,10 @@ bool inline findModulePath(const char *module_file, std::string &module_path, ch
             int   file_size = ftell(fp);
             char *buf       = new char[file_size + 1];
             fseek(fp, 0, SEEK_SET);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
             fread(buf, sizeof(char), file_size, fp);
+#pragma GCC diagnostic pop
             fclose(fp);
             buf[file_size] = '\0';
             ptx_source     = buf;
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp
index 1d1ea9a9d..d5a1f77c3 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp
@@ -54,6 +54,8 @@
 
 // Using updated (v2) interfaces for CUBLAS and CUSPARSE
 #include <cublas_v2.h>
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include <cusparse.h>
 
 // Utilities and system includes
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp
index 8794d2310..13f6232b7 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp
@@ -42,6 +42,8 @@
  *
  */
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include "cusolverRf.h"
 
 #include <assert.h>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp
index dde0734ab..07fb16ef3 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp
@@ -78,6 +78,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include "cusolverSp.h"
 #include "cusparse.h"
 #include "helper_cuda.h"
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp
index baee4342f..a3cc787db 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp
@@ -16,6 +16,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include "cusolverSp.h"
 #include "cusolverSp_LOWLEVEL_PREVIEW.h"
 #include "helper_cuda.h"
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp
index b1e336b9f..1c30f6ecd 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp
@@ -32,6 +32,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include "cusolverSp.h"
 #include "cusolverSp_LOWLEVEL_PREVIEW.h"
 #include "helper_cuda.h"
diff --git a/Samples/5_Domain_Specific/dxtc/dxtc.cu b/Samples/5_Domain_Specific/dxtc/dxtc.cu
index 546506b26..ffa58b005 100644
--- a/Samples/5_Domain_Specific/dxtc/dxtc.cu
+++ b/Samples/5_Domain_Specific/dxtc/dxtc.cu
@@ -750,7 +750,16 @@ int main(int argc, char **argv)
     fseek(fp, sizeof(DDSHeader), SEEK_SET);
     uint  referenceSize = (W / 4) * (H / 4) * 8;
     uint *reference     = (uint *)malloc(referenceSize);
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+#endif
+#pragma nv_diag_suppress 1650
     fread(reference, referenceSize, 1, fp);
+#pragma nv_diag_default 1650
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
     fclose(fp);
 
     printf("\nChecking accuracy...\n");
diff --git a/Samples/7_libNVVM/device-side-launch/dsl.c b/Samples/7_libNVVM/device-side-launch/dsl.c
index 3dbce3908..c867d800f 100644
--- a/Samples/7_libNVVM/device-side-launch/dsl.c
+++ b/Samples/7_libNVVM/device-side-launch/dsl.c
@@ -79,7 +79,10 @@ static char *loadProgramSource(const char *filename, size_t *size)
         stat(filename, &statbuf);
         source = (char *)malloc(statbuf.st_size + 1);
         if (source) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
             fread(source, statbuf.st_size, 1, fh);
+#pragma GCC diagnostic pop
             source[statbuf.st_size] = 0;
             *size                   = statbuf.st_size + 1;
         }
diff --git a/Samples/7_libNVVM/simple/simple.c b/Samples/7_libNVVM/simple/simple.c
index 25baa356e..db565b547 100644
--- a/Samples/7_libNVVM/simple/simple.c
+++ b/Samples/7_libNVVM/simple/simple.c
@@ -111,7 +111,10 @@ static char *loadProgramSource(const char *filename, size_t *size)
         stat(filename, &statbuf);
         source = malloc(statbuf.st_size + 1);
         assert(source);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
         fread(source, statbuf.st_size, 1, fh);
+#pragma GCC diagnostic pop
         source[statbuf.st_size] = 0;
         *size                   = statbuf.st_size + 1;
     }
diff --git a/Samples/7_libNVVM/uvmlite/uvmlite.c b/Samples/7_libNVVM/uvmlite/uvmlite.c
index cfe61cd96..6997620fe 100644
--- a/Samples/7_libNVVM/uvmlite/uvmlite.c
+++ b/Samples/7_libNVVM/uvmlite/uvmlite.c
@@ -135,7 +135,10 @@ static char *loadProgramSource(const char *filename, size_t *size)
         stat(filename, &statbuf);
         source = (char *)malloc(statbuf.st_size + 1);
         if (source) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
             fread(source, statbuf.st_size, 1, fh);
+#pragma GCC diagnostic pop
             source[statbuf.st_size] = 0;
             *size                   = statbuf.st_size + 1;
         }