Merge pull request #220402 from ConnorBaker/fix/cuda-nvcc-compress-fatbins

cudaPackages: fix #220357; use -Xfatbin=-compress-all; prune default cudaCapabilities
2023-03-13 17:03:58 -04:00 · 2023-03-13 17:03:58 -04:00 · 13939e25a3
parent 39d3f1d90a bdfb61e117
commit 13939e25a3
4 changed files with 85 additions and 25 deletions
--- a/pkgs/development/compilers/cudatoolkit/common.nix
+++ b/pkgs/development/compilers/cudatoolkit/common.nix
@ -151,6 +151,10 @@ backendStdenv.mkDerivation rec {
  # Refer to comments in the overrides for cuda_nvcc for explanation
  # CUDA_TOOLKIT_ROOT_DIR is legacy,
  # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
+  # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the compiled
+  #   binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
+  #   the default set of CUDA capabilities we build can regularly cause this to occur (for
+  #   example, with Magma).
  ''
    mkdir -p $out/nix-support
    cat <<EOF >> $out/nix-support/setup-hook
@ -160,7 +164,7 @@ backendStdenv.mkDerivation rec {
    if [ -z "\''${CUDAHOSTCXX-}" ]; then
      export CUDAHOSTCXX=${backendStdenv.cc}/bin;
    fi
-    export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
+    export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin -Xfatbin=-compress-all'
    EOF

    # Move some libraries to the lib output so that programs that
--- a/pkgs/development/compilers/cudatoolkit/flags.nix
+++ b/pkgs/development/compilers/cudatoolkit/flags.nix
@ -4,12 +4,8 @@
 }:

 # Type aliases
-# Gpu = {
-#   archName: String, # e.g., "Hopper"
-#   computeCapability: String, # e.g., "9.0"
-#   minCudaVersion: String, # e.g., "11.8"
-#   maxCudaVersion: String, # e.g., "12.0"
-# }
+# Gpu :: AttrSet
+#   - See the documentation in ./gpus.nix.

 let
  inherit (lib) attrsets lists strings trivial versions;
@ -34,22 +30,40 @@ let
  # gpus :: List Gpu
  gpus = builtins.import ./gpus.nix;

-  # isVersionIn :: Gpu -> Bool
+  # isSupported :: Gpu -> Bool
  isSupported = gpu:
    let
      inherit (gpu) minCudaVersion maxCudaVersion;
      lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion;
-      upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion);
+      upperBoundSatisfied = (maxCudaVersion == null)
+        || !(strings.versionOlder maxCudaVersion cudaVersion);
    in
    lowerBoundSatisfied && upperBoundSatisfied;

+  # isDefault :: Gpu -> Bool
+  isDefault = gpu:
+    let
+      inherit (gpu) dontDefaultAfter;
+      newGpu = dontDefaultAfter == null;
+      recentGpu = newGpu || strings.versionAtLeast dontDefaultAfter cudaVersion;
+    in
+    recentGpu;
+
  # supportedGpus :: List Gpu
  # GPUs which are supported by the provided CUDA version.
  supportedGpus = builtins.filter isSupported gpus;

+  # defaultGpus :: List Gpu
+  # GPUs which are supported by the provided CUDA version and we want to build for by default.
+  defaultGpus = builtins.filter isDefault supportedGpus;
+
  # supportedCapabilities :: List Capability
  supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;

+  # defaultCapabilities :: List Capability
+  # The default capabilities to target, if not overridden by the user.
+  defaultCapabilities = lists.map (gpu: gpu.computeCapability) defaultGpus;
+
  # cudaArchNameToVersions :: AttrSet String (List String)
  # Maps the name of a GPU architecture to different versions of that architecture.
  # For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
@ -151,6 +165,6 @@ assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
  # dropDot :: String -> String
  inherit dropDot;
 } // formatCapabilities {
-  cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
+  cudaCapabilities = config.cudaCapabilities or defaultCapabilities;
  enableForwardCompat = config.cudaForwardCompat or true;
 }
--- a/pkgs/development/compilers/cudatoolkit/gpus.nix
+++ b/pkgs/development/compilers/cudatoolkit/gpus.nix
@ -1,110 +1,148 @@
 [
+  # Type alias
+  # Gpu = {
+  #   archName: String
+  #     - The name of the microarchitecture.
+  #   computeCapability: String
+  #     - The compute capability of the GPU.
+  #   minCudaVersion: String
+  #     - The minimum (inclusive) CUDA version that supports this GPU.
+  #   dontDefaultAfter: null | String
+  #     - The CUDA version after which to exclude this GPU from the list of default capabilities
+  #       we build. null means we always include this GPU in the default capabilities if it is
+  #       supported.
+  #   maxCudaVersion: null | String
+  #     - The maximum (exclusive) CUDA version that supports this GPU. null means there is no
+  #       maximum.
+  # }
  {
    archName = "Kepler";
    computeCapability = "3.0";
    minCudaVersion = "10.0";
+    dontDefaultAfter = "10.2";
    maxCudaVersion = "10.2";
  }
  {
    archName = "Kepler";
    computeCapability = "3.2";
    minCudaVersion = "10.0";
+    dontDefaultAfter = "10.2";
    maxCudaVersion = "10.2";
  }
  {
    archName = "Kepler";
    computeCapability = "3.5";
    minCudaVersion = "10.0";
+    dontDefaultAfter = "11.0";
    maxCudaVersion = "11.8";
  }
  {
    archName = "Kepler";
    computeCapability = "3.7";
    minCudaVersion = "10.0";
+    dontDefaultAfter = "11.0";
    maxCudaVersion = "11.8";
  }
  {
    archName = "Maxwell";
    computeCapability = "5.0";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = "11.0";
+    maxCudaVersion = null;
  }
  {
    archName = "Maxwell";
    computeCapability = "5.2";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = "11.0";
+    maxCudaVersion = null;
  }
  {
    archName = "Maxwell";
    computeCapability = "5.3";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = "11.0";
+    maxCudaVersion = null;
  }
  {
    archName = "Pascal";
    computeCapability = "6.0";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Pascal";
    computeCapability = "6.1";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Pascal";
    computeCapability = "6.2";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Volta";
    computeCapability = "7.0";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Volta";
    computeCapability = "7.2";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Turing";
    computeCapability = "7.5";
    minCudaVersion = "10.0";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Ampere";
    computeCapability = "8.0";
    minCudaVersion = "11.2";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Ampere";
    computeCapability = "8.6";
    minCudaVersion = "11.2";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Ampere";
    computeCapability = "8.7";
    minCudaVersion = "11.5";
-    maxCudaVersion = "12.0";
+    # NOTE: This is purposefully before 11.5 to ensure it is never a capability we target by
+    #   default. 8.7 is the Jetson Orin series of devices which are a very specific platform.
+    #   We keep this entry here in case we ever want to target it explicitly, but we don't
+    #   want to target it by default.
+    dontDefaultAfter = "11.4";
+    maxCudaVersion = null;
  }
  {
    archName = "Ada";
    computeCapability = "8.9";
    minCudaVersion = "11.8";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
  {
    archName = "Hopper";
    computeCapability = "9.0";
    minCudaVersion = "11.8";
-    maxCudaVersion = "12.0";
+    dontDefaultAfter = null;
+    maxCudaVersion = null;
  }
 ]
--- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
+++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
@ -41,6 +41,10 @@ in
      # uses the last --compiler-bindir it gets on the command line.
      # FIXME: this results in "incompatible redefinition" warnings.
      # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
+      # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the
+      #   compiled binaries. If binaries grow over 2GB, they will fail to link. This is a problem
+      #   for us, as the default set of CUDA capabilities we build can regularly cause this to
+      #   occur (for example, with Magma).
      postInstall = (oldAttrs.postInstall or "") + ''
        mkdir -p $out/nix-support
        cat <<EOF >> $out/nix-support/setup-hook
@ -49,7 +53,7 @@ in
        if [ -z "\''${CUDAHOSTCXX-}" ]; then
          export CUDAHOSTCXX=${cc}/bin;
        fi
-        export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin'
+        export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin -Xfatbin=-compress-all'
        EOF
      '';
    });