1
0
Fork 1
mirror of https://github.com/NixOS/nixpkgs.git synced 2024-11-17 19:21:04 +00:00

treewide: cuda: use propagatedBuildInputs, lib.getOutput

This commit is contained in:
Someone Serge 2024-06-28 01:09:23 +00:00
parent f1ddae47e3
commit 82018339bd
23 changed files with 182 additions and 215 deletions

View file

@ -47,12 +47,9 @@ stdenv.mkDerivation (finalAttrs: {
] ++ lib.optionals cudaSupport (
with cudaPackages;
[
cuda_cccl.dev
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
libcublas.dev
libcublas.lib
cuda_cccl
cuda_cudart
libcublas
]);
cmakeFlags = [

View file

@ -46,16 +46,12 @@ let
++ optionals metalSupport [ MetalKit ];
cudaBuildInputs = with cudaPackages; [
cuda_cccl.dev # <nv/target>
cuda_cccl # <nv/target>
# A temporary hack for reducing the closure size, remove once cudaPackages
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
libcublas.dev
libcublas.lib
libcublas.static
cuda_cudart
libcublas
];
rocmBuildInputs = with rocmPackages; [

View file

@ -101,12 +101,12 @@ let
};
cudaToolkit = buildEnv {
name = "cuda-toolkit";
ignoreCollisions = true; # FIXME: find a cleaner way to do this without ignoring collisions
name = "cuda-merged";
paths = [
cudaPackages.cudatoolkit
cudaPackages.cuda_cudart
cudaPackages.cuda_cudart.static
(lib.getBin (cudaPackages.cuda_nvcc.__spliced.buildHost or cudaPackages.cuda_nvcc))
(lib.getLib cudaPackages.cuda_cudart)
(lib.getOutput "static" cudaPackages.cuda_cudart)
(lib.getLib cudaPackages.libcublas)
];
};
@ -140,10 +140,6 @@ in
goBuild ((lib.optionalAttrs enableRocm {
ROCM_PATH = rocmPath;
CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
}) // (lib.optionalAttrs enableCuda {
CUDA_LIB_DIR = "${cudaToolkit}/lib";
CUDACXX = "${cudaToolkit}/bin/nvcc";
CUDAToolkit_ROOT = cudaToolkit;
}) // {
inherit pname version src vendorHash;
@ -151,6 +147,8 @@ goBuild ((lib.optionalAttrs enableRocm {
cmake
] ++ lib.optionals enableRocm [
rocmPackages.llvm.bintools
] ++ lib.optionals enableCuda [
cudaPackages.cuda_nvcc
] ++ lib.optionals (enableRocm || enableCuda) [
makeWrapper
] ++ lib.optionals stdenv.isDarwin
@ -160,6 +158,7 @@ goBuild ((lib.optionalAttrs enableRocm {
(rocmLibs ++ [ libdrm ])
++ lib.optionals enableCuda [
cudaPackages.cuda_cudart
cudaPackages.libcublas
] ++ lib.optionals stdenv.isDarwin
metalFrameworks;

View file

@ -44,7 +44,7 @@ filterAndCreateOverrides {
}:
prevAttrs: {
buildInputs = prevAttrs.buildInputs ++ [
libcublas.lib
libcublas
numactl
rdma-core
];
@ -66,17 +66,17 @@ filterAndCreateOverrides {
buildInputs =
prevAttrs.buildInputs
# Always depends on this
++ [ libcublas.lib ]
++ [ libcublas ]
# Dependency from 12.0 and on
++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ]
++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink ]
# Dependency from 12.1 and on
++ lib.lists.optionals (cudaAtLeast "12.1") [ libcusparse.lib ];
++ lib.lists.optionals (cudaAtLeast "12.1") [ libcusparse ];
brokenConditions = prevAttrs.brokenConditions // {
"libnvjitlink missing (CUDA >= 12.0)" =
!(cudaAtLeast "12.0" -> (libnvjitlink != null && libnvjitlink.lib != null));
!(cudaAtLeast "12.0" -> (libnvjitlink != null && libnvjitlink != null));
"libcusparse missing (CUDA >= 12.1)" =
!(cudaAtLeast "12.1" -> (libcusparse != null && libcusparse.lib != null));
!(cudaAtLeast "12.1" -> (libcusparse != null && libcusparse != null));
};
};
@ -90,16 +90,16 @@ filterAndCreateOverrides {
buildInputs =
prevAttrs.buildInputs
# Dependency from 12.0 and on
++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ];
++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink ];
brokenConditions = prevAttrs.brokenConditions // {
"libnvjitlink missing (CUDA >= 12.0)" =
!(cudaAtLeast "12.0" -> (libnvjitlink != null && libnvjitlink.lib != null));
!(cudaAtLeast "12.0" -> (libnvjitlink != null && libnvjitlink != null));
};
};
# TODO(@connorbaker): cuda_cudart.dev depends on crt/host_config.h, which is from
# cuda_nvcc.dev. It would be nice to be able to encode that.
# (getDev cuda_nvcc). It would be nice to be able to encode that.
cuda_cudart =
{ addDriverRunpath, lib }:
prevAttrs: {
@ -248,8 +248,8 @@ filterAndCreateOverrides {
prevAttrs: {
buildInputs = prevAttrs.buildInputs ++ [
freeglut
libcufft.lib
libcurand.lib
libcufft
libcurand
libGLU
libglvnd
mesa

View file

@ -307,7 +307,6 @@ backendStdenv.mkDerivation (finalAttrs: {
# Make the CUDA-patched stdenv available
passthru.stdenv = backendStdenv;
meta = {
description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
sourceProvenance = [ sourceTypes.binaryNativeCode ];

View file

@ -45,11 +45,11 @@ backendStdenv.mkDerivation (finalAttrs: {
[ nccl ]
++ lib.optionals (lib.versionOlder cudaVersion "11.4") [ cudatoolkit ]
++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
cuda_nvcc.dev # crt/host_config.h
cuda_nvcc # crt/host_config.h
cuda_cudart
]
++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [
cuda_cccl.dev # <nv/target>
cuda_cccl # <nv/target>
]
++ lib.optionals mpiSupport [ mpi ];

View file

@ -54,7 +54,7 @@ backendStdenv.mkDerivation (finalAttrs: {
buildInputs =
lib.optionals (cudaOlder "11.4") [ cudatoolkit ]
++ lib.optionals (cudaAtLeast "11.4") [
cuda_nvcc.dev # crt/host_config.h
cuda_nvcc # crt/host_config.h
cuda_cudart
]
# NOTE: CUDA versions in Nixpkgs only use a major and minor version. When we do comparisons

View file

@ -57,23 +57,13 @@
]
++ lib.optionals guiSupport [ libX11 ]
++ lib.optionals cudaSupport (with cudaPackages; [
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
cuda_nvcc.dev
libcublas.dev
libcublas.lib
libcublas.static
libcurand.dev
libcurand.lib
libcurand.static
libcusolver.dev
libcusolver.lib
libcusolver.static
cudnn.dev
cudnn.lib
cudnn.static
cuda_cccl.dev
cuda_cudart
cuda_nvcc
libcublas
libcurand
libcusolver
cudnn
cuda_cccl
]);
passthru = {

View file

@ -372,28 +372,19 @@ effectiveStdenv.mkDerivation {
doxygen
graphviz-nox
] ++ lib.optionals enableCuda (with cudaPackages; [
cuda_cudart.lib
cuda_cudart.dev
cuda_cccl.dev # <thrust/*>
libnpp.dev # npp.h
libnpp.lib
libnpp.static
cuda_cudart
cuda_cccl # <thrust/*>
libnpp # npp.h
nvidia-optical-flow-sdk
] ++ lib.optionals enableCublas [
# May start using the default $out instead once
# https://github.com/NixOS/nixpkgs/issues/271792
# has been addressed
libcublas.static
libcublas.lib
libcublas.dev # cublas_v2.h
libcublas # cublas_v2.h
] ++ lib.optionals enableCudnn [
cudnn.dev # cudnn.h
cudnn.lib
cudnn.static
cudnn # cudnn.h
] ++ lib.optionals enableCufft [
libcufft.dev # cufft.h
libcufft.lib
libcufft.static
libcufft # cufft.h
]);
propagatedBuildInputs = lib.optionals enablePython [ pythonPackages.numpy ];

View file

@ -63,7 +63,7 @@ stdenv.mkDerivation rec {
# TODO: add UCX support, which is recommended to use with cuda for the most robust OpenMPI build
# https://github.com/openucx/ucx
# https://www.open-mpi.org/faq/?category=buildcuda
++ lib.optionals cudaSupport [ "--with-cuda=${cudaPackages.cuda_cudart}" "--enable-dlopen" ]
++ lib.optionals cudaSupport [ "--with-cuda=${lib.getDev cudaPackages.cuda_cudart}" "--enable-dlopen" ]
++ lib.optionals fabricSupport [ "--with-psm2=${lib.getDev libpsm2}" "--with-libfabric=${lib.getDev libfabric}" ]
;

View file

@ -33,19 +33,15 @@ let
stdenv = if cudaSupport then backendStdenv else inputs.stdenv;
cudaJoined = symlinkJoin {
name = "cuda-packages-unsplit";
paths = with cudaPackages; [
cuda_cudart # cuda_runtime.h
libcublas
libcurand
cuda_cccl
] ++ lib.optionals (cudaPackages ? cuda_profiler_api) [
cuda_profiler_api # cuda_profiler_api.h
] ++ lib.optionals (!(cudaPackages ? cuda_profiler_api)) [
cuda_nvprof # cuda_profiler_api.h
];
};
cudaComponents = with cudaPackages; [
cuda_cudart # cuda_runtime.h
libcublas
libcurand
cuda_cccl
# cuda_profiler_api.h
(cudaPackages.cuda_profiler_api or cudaPackages.cuda_nvprof)
];
in
stdenv.mkDerivation {
inherit pname version;
@ -68,9 +64,7 @@ stdenv.mkDerivation {
pythonPackages.wheel
] ++ lib.optionals stdenv.cc.isClang [
llvmPackages.openmp
] ++ lib.optionals cudaSupport [
cudaJoined
];
] ++ lib.optionals cudaSupport cudaComponents;
propagatedBuildInputs = lib.optionals pythonSupport [
pythonPackages.numpy
@ -93,7 +87,6 @@ stdenv.mkDerivation {
"-DFAISS_OPT_LEVEL=${optLevel}"
] ++ lib.optionals cudaSupport [
"-DCMAKE_CUDA_ARCHITECTURES=${flags.cmakeCudaArchitecturesString}"
"-DCUDAToolkit_INCLUDE_DIR=${cudaJoined}/include"
];
buildFlags = [

View file

@ -134,19 +134,15 @@ stdenv.mkDerivation {
blas
python3
] ++ lists.optionals cudaSupport (with effectiveCudaPackages; [
cuda_cudart.dev # cuda_runtime.h
cuda_cudart.lib # cudart
cuda_cudart.static # cudart_static
libcublas.dev # cublas_v2.h
libcublas.lib # cublas
libcusparse.dev # cusparse.h
libcusparse.lib # cusparse
cuda_cudart # cuda_runtime.h
libcublas # cublas_v2.h
libcusparse # cusparse.h
] ++ lists.optionals (cudaOlder "11.8") [
cuda_nvprof.dev # <cuda_profiler_api.h>
cuda_nvprof # <cuda_profiler_api.h>
] ++ lists.optionals (cudaAtLeast "11.8") [
cuda_profiler_api.dev # <cuda_profiler_api.h>
cuda_profiler_api # <cuda_profiler_api.h>
] ++ lists.optionals (cudaAtLeast "12.0") [
cuda_cccl.dev # <nv/target>
cuda_cccl # <nv/target>
]) ++ lists.optionals rocmSupport [
rocmPackages.clr
rocmPackages.hipblas

View file

@ -36,17 +36,15 @@ stdenv.mkDerivation rec {
buildInputs = assert (blas.isILP64 == lapack.isILP64); [
blas lapack
metis
gfortran.cc.lib
(lib.getLib gfortran.cc)
gmp
mpfr
] ++ lib.optionals stdenv.cc.isClang [
openmp
] ++ lib.optionals enableCuda [
cudaPackages.cuda_cudart.dev
cudaPackages.cuda_cudart.lib
cudaPackages.cuda_cccl.dev
cudaPackages.libcublas.dev
cudaPackages.libcublas.lib
cudaPackages.cuda_cudart
cudaPackages.cuda_cccl
cudaPackages.libcublas
];
preConfigure = ''
@ -63,8 +61,8 @@ stdenv.mkDerivation rec {
"CFLAGS=-DBLAS64"
] ++ lib.optionals enableCuda [
"CUDA_PATH=${cudaPackages.cuda_nvcc}"
"CUDART_LIB=${cudaPackages.cuda_cudart.lib}/lib/libcudart.so"
"CUBLAS_LIB=${cudaPackages.libcublas.lib}/lib/libcublas.so"
"CUDART_LIB=${lib.getLib cudaPackages.cuda_cudart}/lib/libcudart.so"
"CUBLAS_LIB=${lib.getLib cudaPackages.libcublas}/lib/libcublas.so"
] ++ lib.optionals stdenv.isDarwin [
# Unless these are set, the build will attempt to use `Accelerate` on darwin, see:
# https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/v5.13.0/SuiteSparse_config/SuiteSparse_config.mk#L368

View file

@ -14,15 +14,15 @@
inherit (cudaPackages) backendStdenv flags;
cuda-common-redist = with cudaPackages; [
cuda_cudart.dev # cuda_runtime.h
cuda_cudart.lib
cuda_cccl.dev # <nv/target>
libcublas.dev # cublas_v2.h
libcublas.lib
libcusolver.dev # cusolverDn.h
libcusolver.lib
libcusparse.dev # cusparse.h
libcusparse.lib
(lib.getDev cuda_cudart) # cuda_runtime.h
(lib.getLib cuda_cudart)
(lib.getDev cuda_cccl) # <nv/target>
(lib.getDev libcublas) # cublas_v2.h
(lib.getLib libcublas)
(lib.getDev libcusolver) # cusolverDn.h
(lib.getLib libcusolver)
(lib.getDev libcusparse) # cusparse.h
(lib.getLib libcusparse)
];
cuda-native-redist = symlinkJoin {

View file

@ -55,7 +55,7 @@ stdenv.mkDerivation rec {
LDFLAGS = lib.optionals enableCuda [
# Fake libnvidia-ml.so (the real one is deployed impurely)
"-L${cudaPackages.cuda_nvml_dev}/lib/stubs"
"-L${lib.getLib cudaPackages.cuda_nvml_dev}/lib/stubs"
];
configureFlags = [

View file

@ -21,9 +21,8 @@ let
name = "cudatoolkit-joined-${cudaPackages.cudaVersion}";
paths = with cudaPackages; [
cuda_cccl # <nv/target>
cuda_cccl.dev
cuda_cudart
cuda_nvcc.dev # <crt/host_defines.h>
cuda_nvcc # <crt/host_defines.h>
cuda_nvprof
cuda_nvrtc
cuda_nvtx

View file

@ -34,12 +34,12 @@ let
cudaLibPath = lib.makeLibraryPath (
with cudaPackages;
[
cuda_cudart.lib # libcudart.so
cuda_cupti.lib # libcupti.so
cudnn.lib # libcudnn.so
libcufft.lib # libcufft.so
libcusolver.lib # libcusolver.so
libcusparse.lib # libcusparse.so
(lib.getLib cuda_cudart) # libcudart.so
(lib.getLib cuda_cupti) # libcupti.so
(lib.getLib cudnn) # libcudnn.so
(lib.getLib libcufft) # libcufft.so
(lib.getLib libcusolver) # libcusolver.so
(lib.getLib libcusparse) # libcusparse.so
]
);

View file

@ -55,7 +55,6 @@ let
inherit (cudaPackages)
cudaFlags
cudaVersion
cudnn
nccl
;
@ -80,18 +79,26 @@ let
broken = effectiveStdenv.isDarwin || nccl.meta.unsupported;
};
# Bazel wants a merged cudnn at configuration time
cudnnMerged = symlinkJoin {
name = "cudnn-merged";
paths = with cudaPackages; [
(lib.getDev cudnn)
(lib.getLib cudnn)
];
};
# These are necessary at build time and run time.
cuda_libs_joined = symlinkJoin {
name = "cuda-joined";
paths = with cudaPackages; [
cuda_cudart.lib # libcudart.so
cuda_cudart.static # libcudart_static.a
cuda_cupti.lib # libcupti.so
libcublas.lib # libcublas.so
libcufft.lib # libcufft.so
libcurand.lib # libcurand.so
libcusolver.lib # libcusolver.so
libcusparse.lib # libcusparse.so
(lib.getLib cuda_cudart) # libcudart.so
(lib.getLib cuda_cupti) # libcupti.so
(lib.getLib libcublas) # libcublas.so
(lib.getLib libcufft) # libcufft.so
(lib.getLib libcurand) # libcurand.so
(lib.getLib libcusolver) # libcusolver.so
(lib.getLib libcusparse) # libcusparse.so
];
};
# These are only necessary at build time.
@ -101,20 +108,23 @@ let
cuda_libs_joined
# Binaries
cudaPackages.cuda_nvcc.bin # nvcc
(lib.getBin cuda_nvcc) # nvcc
# Archives
(lib.getOutput "static" cuda_cudart) # libcudart_static.a
# Headers
cuda_cccl.dev # block_load.cuh
cuda_cudart.dev # cuda.h
cuda_cupti.dev # cupti.h
cuda_nvcc.dev # See https://github.com/google/jax/issues/19811
cuda_nvml_dev # nvml.h
cuda_nvtx.dev # nvToolsExt.h
libcublas.dev # cublas_api.h
libcufft.dev # cufft.h
libcurand.dev # curand.h
libcusolver.dev # cusolver_common.h
libcusparse.dev # cusparse.h
(lib.getDev cuda_cccl) # block_load.cuh
(lib.getDev cuda_cudart) # cuda.h
(lib.getDev cuda_cupti) # cupti.h
(lib.getDev cuda_nvcc) # See https://github.com/google/jax/issues/19811
(lib.getDev cuda_nvml_dev) # nvml.h
(lib.getDev cuda_nvtx) # nvToolsExt.h
(lib.getDev libcublas) # cublas_api.h
(lib.getDev libcufft) # cufft.h
(lib.getDev libcurand) # curand.h
(lib.getDev libcusolver) # cusolver_common.h
(lib.getDev libcusparse) # cusparse.h
];
};
@ -308,10 +318,10 @@ let
+ lib.optionalString cudaSupport ''
build --config=cuda
build --action_env CUDA_TOOLKIT_PATH="${cuda_build_deps_joined}"
build --action_env CUDNN_INSTALL_PATH="${cudnn}"
build --action_env TF_CUDA_PATHS="${cuda_build_deps_joined},${cudnn},${nccl}"
build --action_env CUDNN_INSTALL_PATH="${cudnnMerged}"
build --action_env TF_CUDA_PATHS="${cuda_build_deps_joined},${cudnnMerged},${lib.getDev nccl}"
build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudaVersion}"
build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
build --action_env TF_CUDNN_VERSION="${lib.versions.major cudaPackages.cudnn.version}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.realArches}"
''
+
@ -431,13 +441,13 @@ buildPythonPackage {
# for more info.
postInstall = lib.optionalString cudaSupport ''
mkdir -p $out/bin
ln -s ${cudaPackages.cuda_nvcc.bin}/bin/ptxas $out/bin/ptxas
ln -s ${lib.getExe' cudaPackages.cuda_nvcc "ptxas"} $out/bin/ptxas
find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
patchelf --add-rpath "${
lib.makeLibraryPath [
cuda_libs_joined
cudnn
(lib.getLib cudaPackages.cudnn)
nccl
]
}" "$lib"

View file

@ -116,7 +116,13 @@ let
# cudaPackages.cudnn led to this:
# https://github.com/tensorflow/tensorflow/issues/60398
cudnnAttribute = "cudnn_8_6";
cudnn = cudaPackages.${cudnnAttribute};
cudnnMerged = symlinkJoin {
name = "cudnn-merged";
paths = [
(lib.getDev cudaPackages.${cudnnAttribute})
(lib.getLib cudaPackages.${cudnnAttribute})
];
};
gentoo-patches = fetchzip {
url = "https://dev.gentoo.org/~perfinion/patches/tensorflow-patches-2.12.0.tar.bz2";
hash = "sha256-SCRX/5/zML7LmKEPJkcM5Tebez9vv/gmE4xhT/jyqWs=";
@ -130,19 +136,30 @@ let
withTensorboard = (pythonOlder "3.6") || tensorboardSupport;
# FIXME: migrate to redist cudaPackages
cudatoolkit_joined = symlinkJoin {
name = "${cudatoolkit.name}-merged";
paths =
[
cudatoolkit.lib
cudatoolkit.out
]
++ lib.optionals (lib.versionOlder cudatoolkit.version "11") [
# for some reason some of the required libs are in the targets/x86_64-linux
# directory; not sure why but this works around it
"${cudatoolkit}/targets/${stdenv.system}"
];
cudaComponents = with cudaPackages; [
(cuda_nvcc.__spliced.buildHost or cuda_nvcc)
(cuda_nvprune.__spliced.buildHost or cuda_nvprune)
cuda_cccl # block_load.cuh
cuda_cudart # cuda.h
cuda_cupti # cupti.h
cuda_nvcc # See https://github.com/google/jax/issues/19811
cuda_nvml_dev # nvml.h
cuda_nvtx # nvToolsExt.h
libcublas # cublas_api.h
libcufft # cufft.h
libcurand # curand.h
libcusolver # cusolver_common.h
libcusparse # cusparse.h
];
cudatoolkitDevMerged = symlinkJoin {
name = "cuda-${cudaPackages.cudaVersion}-dev-merged";
paths = lib.concatMap (p: [
(lib.getBin p)
(lib.getDev p)
(lib.getLib p)
(lib.getOutput "static" p) # Makes for a very fat closure
]) cudaComponents;
};
# Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar
@ -321,7 +338,7 @@ let
]
++ lib.optionals cudaSupport [
cudatoolkit
cudnn
cudnnMerged
]
++ lib.optionals mklSupport [ mkl ]
++ lib.optionals stdenv.isDarwin [
@ -402,7 +419,7 @@ let
TF_NEED_MPI = tfFeature cudaSupport;
TF_NEED_CUDA = tfFeature cudaSupport;
TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkitDevMerged},${cudnnMerged},${lib.getLib nccl}";
TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities;
# Needed even when we override stdenv: e.g. for ar
@ -653,7 +670,7 @@ buildPythonPackage {
find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
addOpenGLRunpath "$lib"
patchelf --set-rpath "${cudatoolkit}/lib:${cudatoolkit.lib}/lib:${cudnn}/lib:${nccl}/lib:$(patchelf --print-rpath "$lib")" "$lib"
patchelf --set-rpath "${cudatoolkit}/lib:${cudatoolkit.lib}/lib:${cudnnMerged}/lib:${lib.getLib nccl}/lib:$(patchelf --print-rpath "$lib")" "$lib"
done
'';

View file

@ -301,11 +301,11 @@ buildPythonPackage rec {
preConfigure =
lib.optionalString cudaSupport ''
export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
export CUPTI_INCLUDE_DIR=${cudaPackages.cuda_cupti.dev}/include
export CUPTI_LIBRARY_DIR=${cudaPackages.cuda_cupti.lib}/lib
export CUPTI_INCLUDE_DIR=${lib.getDev cudaPackages.cuda_cupti}/include
export CUPTI_LIBRARY_DIR=${lib.getLib cudaPackages.cuda_cupti}/lib
''
+ lib.optionalString (cudaSupport && cudaPackages ? cudnn) ''
export CUDNN_INCLUDE_DIR=${cudnn.dev}/include
export CUDNN_INCLUDE_DIR=${lib.getLib cudnn}/include
export CUDNN_LIB_DIR=${cudnn.lib}/lib
''
+ lib.optionalString rocmSupport ''
@ -453,42 +453,31 @@ buildPythonPackage rec {
++ lib.optionals cudaSupport (
with cudaPackages;
[
cuda_cccl.dev # <thrust/*>
cuda_cudart.dev # cuda_runtime.h and libraries
cuda_cudart.lib
cuda_cudart.static
cuda_cupti.dev # For kineto
cuda_cupti.lib # For kineto
cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
cuda_nvml_dev.dev # <nvml.h>
cuda_nvrtc.dev
cuda_nvrtc.lib
cuda_nvtx.dev
cuda_nvtx.lib # -llibNVToolsExt
libcublas.dev
libcublas.lib
libcufft.dev
libcufft.lib
libcurand.dev
libcurand.lib
libcusolver.dev
libcusolver.lib
libcusparse.dev
libcusparse.lib
cuda_cccl # <thrust/*>
cuda_cudart # cuda_runtime.h and libraries
cuda_cupti # For kineto
cuda_nvcc # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
cuda_nvml_dev # <nvml.h>
cuda_nvrtc
cuda_nvtx # -llibNVToolsExt
libcublas
libcufft
libcurand
libcusolver
libcusparse
]
++ lists.optionals (cudaPackages ? cudnn) [
cudnn.dev
cudnn.lib
cudnn
]
++ lists.optionals useSystemNccl [
# Some platforms do not support NCCL (i.e., Jetson)
nccl.dev # Provides nccl.h AND a static copy of NCCL!
nccl # Provides nccl.h AND a static copy of NCCL!
]
++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
cuda_nvprof.dev # <cuda_profiler_api.h>
cuda_nvprof # <cuda_profiler_api.h>
]
++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [
cuda_profiler_api.dev # <cuda_profiler_api.h>
cuda_profiler_api # <cuda_profiler_api.h>
]
)
++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ]

View file

@ -100,9 +100,9 @@ buildPythonPackage rec {
with cudaPackages;
[
cuda_cudart # cuda_runtime.h, -lcudart
cuda_cccl.dev # <thrust/*>
libcusparse.dev # cusparse.h
libcublas.dev # cublas_v2.h
cuda_cccl # <thrust/*>
libcusparse # cusparse.h
libcublas # cublas_v2.h
libcusolver # cusolverDn.h
]
))

View file

@ -66,11 +66,11 @@ buildPythonPackage {
[
# flash-attn build
cuda_cudart # cuda_runtime_api.h
libcusparse.dev # cusparse.h
cuda_cccl.dev # nv/target
libcublas.dev # cublas_v2.h
libcusolver.dev # cusolverDn.h
libcurand.dev # curand_kernel.h
libcusparse # cusparse.h
cuda_cccl # nv/target
libcublas # cublas_v2.h
libcusolver # cusolverDn.h
libcurand # curand_kernel.h
]
);

View file

@ -57,21 +57,14 @@ effectiveStdenv.mkDerivation (finalAttrs: {
CoreVideo
MetalKit
] ++ lib.optionals cudaSupport ( with cudaPackages; [
# A temporary hack for reducing the closure size, remove once cudaPackages
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cccl.dev # provides nv/target
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
libcublas.dev
libcublas.lib
libcublas.static
cuda_cccl # provides nv/target
cuda_cudart
libcublas
]);
postPatch = let
cudaOldStr = "-lcuda ";
cudaNewStr = "-lcuda -L${cudaPackages.cuda_cudart.lib}/lib/stubs ";
cudaNewStr = "-lcuda -L${cudaPackages.cuda_cudart}/lib/stubs ";
in lib.optionalString cudaSupport ''
substituteInPlace Makefile \
--replace '${cudaOldStr}' '${cudaNewStr}'