diff --git a/maintainers/maintainer-list.nix b/maintainers/maintainer-list.nix index a342f1801b58..5ff5d1c6c499 100644 --- a/maintainers/maintainer-list.nix +++ b/maintainers/maintainer-list.nix @@ -385,6 +385,13 @@ githubId = 2526296; name = "Adrien Bustany"; }; + abysssol = { + name = "abysssol"; + email = "abysssol@pm.me"; + matrix = "@abysssol:tchncs.de"; + github = "abysssol"; + githubId = 76763323; + }; acairncross = { email = "acairncross@gmail.com"; github = "acairncross"; diff --git a/pkgs/tools/misc/ollama/cmake-include.patch b/pkgs/tools/misc/ollama/cmake-include.patch new file mode 100644 index 000000000000..013ed66bf91c --- /dev/null +++ b/pkgs/tools/misc/ollama/cmake-include.patch @@ -0,0 +1,7 @@ +--- a/llm/llama.cpp/examples/server/CMakeLists.txt ++++ b/llm/llama.cpp/examples/server/CMakeLists.txt +@@ -11,3 +11,4 @@ + TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32) + endif() + target_compile_features(${TARGET} PRIVATE cxx_std_11) ++include (../../../ext_server/CMakeLists.txt) # ollama diff --git a/pkgs/tools/misc/ollama/default.nix b/pkgs/tools/misc/ollama/default.nix index 2176582e1fe9..6ce576644d49 100644 --- a/pkgs/tools/misc/ollama/default.nix +++ b/pkgs/tools/misc/ollama/default.nix @@ -1,50 +1,182 @@ { lib , buildGoModule , fetchFromGitHub -, llama-cpp +, fetchpatch +, buildEnv +, linkFarm +, overrideCC +, makeWrapper +, stdenv + +, cmake +, gcc12 +, clblast +, libdrm +, rocmPackages +, cudaPackages +, linuxPackages +, darwin + +, enableRocm ? false +, enableCuda ? false }: -buildGoModule rec { +let pname = "ollama"; - version = "0.1.17"; + version = "0.1.24"; + + warnIfNotLinux = warning: (lib.warnIfNot stdenv.isLinux warning stdenv.isLinux); + gpuWarning = api: "building ollama with ${api} is only supported on linux; falling back to cpu"; + rocmIsEnabled = enableRocm && (warnIfNotLinux (gpuWarning "rocm")); + cudaIsEnabled = enableCuda && (warnIfNotLinux (gpuWarning "cuda")); + enableLinuxGpu = rocmIsEnabled || cudaIsEnabled; + + appleFrameworks = darwin.apple_sdk_11_0.frameworks; + metalFrameworks = [ + appleFrameworks.Accelerate + appleFrameworks.Metal + appleFrameworks.MetalKit + appleFrameworks.MetalPerformanceShaders + ]; src = fetchFromGitHub { owner = "jmorganca"; repo = "ollama"; rev = "v${version}"; - hash = "sha256-eXukNn9Lu1hF19GEi7S7a96qktsjnmXCUp38gw+3MzY="; + hash = "sha256-GwZA1QUH8I8m2bGToIcMMaB5MBnioQP4+n1SauUJYP8="; + fetchSubmodules = true; + }; + preparePatch = patch: hash: fetchpatch { + url = "file://${src}/llm/patches/${patch}"; + inherit hash; + stripLen = 1; + extraPrefix = "llm/llama.cpp/"; + }; + inherit (lib) licenses platforms maintainers; + ollama = { + inherit pname version src; + vendorHash = "sha256-wXRbfnkbeXPTOalm7SFLvHQ9j46S/yLNbFy+OWNSamQ="; + + nativeBuildInputs = [ + cmake + ] ++ lib.optionals enableLinuxGpu [ + makeWrapper + ] ++ lib.optionals stdenv.isDarwin + metalFrameworks; + + patches = [ + # remove uses of `git` in the `go generate` script + # instead use `patch` where necessary + ./remove-git.patch + # replace a hardcoded use of `g++` with `$CXX` + ./replace-gcc.patch + + # ollama's patches of llama.cpp's example server + # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream" + (preparePatch "01-cache.diff" "sha256-PC4yN98hFvK+PEITiDihL8ki3bJuLVXrAm0CGf8GPJE=") + (preparePatch "02-shutdown.diff" "sha256-cElAp9Z9exxN964vB/YFuBhZoEcoAwGSMCnbh+l/V4Q=") + ]; + postPatch = '' + # use a patch from the nix store in the `go generate` script + substituteInPlace llm/generate/gen_common.sh \ + --subst-var-by cmakeIncludePatch '${./cmake-include.patch}' + # `ollama/llm/generate/gen_common.sh` -> "avoid duplicate main symbols when we link into the cgo binary" + substituteInPlace llm/llama.cpp/examples/server/server.cpp \ + --replace-fail 'int main(' 'int __main(' + # replace inaccurate version number with actual release version + substituteInPlace version/version.go --replace-fail 0.0.0 '${version}' + ''; + preBuild = '' + export OLLAMA_SKIP_PATCHING=true + # build llama.cpp libraries for ollama + go generate ./... + ''; + + ldflags = [ + "-s" + "-w" + "-X=github.com/jmorganca/ollama/version.Version=${version}" + "-X=github.com/jmorganca/ollama/server.mode=release" + ]; + + meta = { + description = "Get up and running with large language models locally"; + homepage = "https://github.com/jmorganca/ollama"; + license = licenses.mit; + platforms = platforms.unix; + mainProgram = "ollama"; + maintainers = with maintainers; [ abysssol dit7ya elohmeier ]; + }; }; - patches = [ - # disable passing the deprecated gqa flag to llama-cpp-server - # see https://github.com/ggerganov/llama.cpp/issues/2975 - ./disable-gqa.patch - # replace the call to the bundled llama-cpp-server with the one in the llama-cpp package - ./set-llamacpp-path.patch - ]; - - postPatch = '' - substituteInPlace llm/llama.go \ - --subst-var-by llamaCppServer "${llama-cpp}/bin/llama-cpp-server" - substituteInPlace server/routes_test.go --replace "0.0.0" "${version}" - ''; - - vendorHash = "sha256-yGdCsTJtvdwHw21v0Ot6I8gxtccAvNzZyRu1T0vaius="; - - ldflags = [ - "-s" - "-w" - "-X=github.com/jmorganca/ollama/version.Version=${version}" - "-X=github.com/jmorganca/ollama/server.mode=release" - ]; - - meta = with lib; { - description = "Get up and running with large language models locally"; - homepage = "https://github.com/jmorganca/ollama"; - license = licenses.mit; - mainProgram = "ollama"; - maintainers = with maintainers; [ dit7ya elohmeier ]; - platforms = platforms.unix; + rocmClang = linkFarm "rocm-clang" { + llvm = rocmPackages.llvm.clang; }; -} + rocmPath = buildEnv { + name = "rocm-path"; + paths = [ + rocmPackages.rocm-device-libs + rocmClang + ]; + }; + rocmVars = { + ROCM_PATH = rocmPath; + CLBlast_DIR = "${clblast}/lib/cmake/CLBlast"; + }; + + cudaToolkit = buildEnv { + name = "cuda-toolkit"; + ignoreCollisions = true; # FIXME: find a cleaner way to do this without ignoring collisions + paths = [ + cudaPackages.cudatoolkit + cudaPackages.cuda_cudart + ]; + }; + cudaVars = { + CUDA_LIB_DIR = "${cudaToolkit}/lib"; + CUDACXX = "${cudaToolkit}/bin/nvcc"; + CUDAToolkit_ROOT = cudaToolkit; + }; + + linuxGpuLibs = { + buildInputs = lib.optionals rocmIsEnabled [ + rocmPackages.clr + rocmPackages.hipblas + rocmPackages.rocblas + rocmPackages.rocsolver + rocmPackages.rocsparse + libdrm + ] ++ lib.optionals cudaIsEnabled [ + cudaPackages.cuda_cudart + ]; + }; + + appleGpuLibs = { buildInputs = metalFrameworks; }; + + runtimeLibs = lib.optionals rocmIsEnabled [ + rocmPackages.rocm-smi + ] ++ lib.optionals cudaIsEnabled [ + linuxPackages.nvidia_x11 + ]; + runtimeLibWrapper = { + postFixup = '' + mv "$out/bin/${pname}" "$out/bin/.${pname}-unwrapped" + makeWrapper "$out/bin/.${pname}-unwrapped" "$out/bin/${pname}" \ + --suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath runtimeLibs}' + ''; + }; + + goBuild = + if cudaIsEnabled then + buildGoModule.override { stdenv = overrideCC stdenv gcc12; } + else + buildGoModule; +in +goBuild (ollama + // (lib.optionalAttrs rocmIsEnabled rocmVars) + // (lib.optionalAttrs cudaIsEnabled cudaVars) + // (lib.optionalAttrs enableLinuxGpu linuxGpuLibs) + // (lib.optionalAttrs enableLinuxGpu runtimeLibWrapper) + + // (lib.optionalAttrs stdenv.isDarwin appleGpuLibs)) diff --git a/pkgs/tools/misc/ollama/disable-gqa.patch b/pkgs/tools/misc/ollama/disable-gqa.patch deleted file mode 100644 index b54440cd3d53..000000000000 --- a/pkgs/tools/misc/ollama/disable-gqa.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/llm/llama.go b/llm/llama.go -index 0b460e9..b79e04a 100644 ---- a/llm/llama.go -+++ b/llm/llama.go -@@ -299,10 +299,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers - params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU)) - } - -- if opts.NumGQA > 0 { -- params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA)) -- } -- - if len(adapters) > 0 { - // TODO: applying multiple adapters is not supported by the llama.cpp server yet - params = append(params, "--lora", adapters[0]) diff --git a/pkgs/tools/misc/ollama/remove-git.patch b/pkgs/tools/misc/ollama/remove-git.patch new file mode 100644 index 000000000000..9ef4487051ff --- /dev/null +++ b/pkgs/tools/misc/ollama/remove-git.patch @@ -0,0 +1,21 @@ +--- a/llm/generate/gen_common.sh ++++ b/llm/generate/gen_common.sh +@@ -60,6 +60,9 @@ + } + + apply_patches() { ++ patch -i '@cmakeIncludePatch@' "${LLAMACPP_DIR}/examples/server/CMakeLists.txt" ++ return ++ + # Wire up our CMakefile + if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then + echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt +@@ -113,6 +116,8 @@ + + # Keep the local tree clean after we're done with the build + cleanup() { ++ return ++ + (cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp) + + if [ -n "$(ls -A ../patches/*.diff)" ]; then diff --git a/pkgs/tools/misc/ollama/replace-gcc.patch b/pkgs/tools/misc/ollama/replace-gcc.patch new file mode 100644 index 000000000000..2ebd24e1dc3f --- /dev/null +++ b/pkgs/tools/misc/ollama/replace-gcc.patch @@ -0,0 +1,11 @@ +--- a/llm/generate/gen_common.sh ++++ b/llm/generate/gen_common.sh +@@ -86,7 +89,7 @@ + cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS} + cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8 + mkdir -p ${BUILD_DIR}/lib/ +- g++ -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \ ++ $CXX -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \ + ${GCC_ARCH} \ + ${WHOLE_ARCHIVE} ${BUILD_DIR}/examples/server/libext_server.a ${NO_WHOLE_ARCHIVE} \ + ${BUILD_DIR}/common/libcommon.a \ diff --git a/pkgs/tools/misc/ollama/set-llamacpp-path.patch b/pkgs/tools/misc/ollama/set-llamacpp-path.patch deleted file mode 100644 index e90e552bab45..000000000000 --- a/pkgs/tools/misc/ollama/set-llamacpp-path.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff --git a/llm/llama.go b/llm/llama.go -index f23d5d8..6563550 100644 ---- a/llm/llama.go -+++ b/llm/llama.go -@@ -25,7 +25,6 @@ import ( - "github.com/jmorganca/ollama/api" - ) - --//go:embed llama.cpp/*/build/*/bin/* - var llamaCppEmbed embed.FS - - type ModelRunner struct { -@@ -33,6 +32,10 @@ type ModelRunner struct { - } - - func chooseRunners(workDir, runnerType string) []ModelRunner { -+ return []ModelRunner{ -+ {Path: "@llamaCppServer@"}, -+ } -+ - buildPath := path.Join("llama.cpp", runnerType, "build") - var runners []string -