Merge pull request #289108 from abysssol/update-ollama-0.1.24

ollama: 0.1.17 -> 0.1.24
2024-09-11 15:08:33 +01:00 · 2024-02-19 16:21:31 +01:00 · 2024-02-19 16:21:31 +01:00 · 36c6317517
parent 7fda7715cc 30a7446a15
commit 36c6317517
7 changed files with 213 additions and 73 deletions
--- a/maintainers/maintainer-list.nix
+++ b/maintainers/maintainer-list.nix
@ -385,6 +385,13 @@
    githubId = 2526296;
    name = "Adrien Bustany";
  };
+  abysssol = {
+    name = "abysssol";
+    email = "abysssol@pm.me";
+    matrix = "@abysssol:tchncs.de";
+    github = "abysssol";
+    githubId = 76763323;
+  };
  acairncross = {
    email = "acairncross@gmail.com";
    github = "acairncross";
--- a/pkgs/tools/misc/ollama/cmake-include.patch
+++ b/pkgs/tools/misc/ollama/cmake-include.patch
@ -0,0 +1,7 @@
+--- a/llm/llama.cpp/examples/server/CMakeLists.txt
+++ b/llm/llama.cpp/examples/server/CMakeLists.txt
+@@ -11,3 +11,4 @@
+     TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
+ endif()
+ target_compile_features(${TARGET} PRIVATE cxx_std_11)
+include (../../../ext_server/CMakeLists.txt) # ollama
--- a/pkgs/tools/misc/ollama/default.nix
+++ b/pkgs/tools/misc/ollama/default.nix
@ -1,50 +1,182 @@
 { lib
 , buildGoModule
 , fetchFromGitHub
-, llama-cpp
+, fetchpatch
+, buildEnv
+, linkFarm
+, overrideCC
+, makeWrapper
+, stdenv
+
+, cmake
+, gcc12
+, clblast
+, libdrm
+, rocmPackages
+, cudaPackages
+, linuxPackages
+, darwin
+
+, enableRocm ? false
+, enableCuda ? false
 }:

-buildGoModule rec {
+let
  pname = "ollama";
-  version = "0.1.17";
+  version = "0.1.24";
+
+  warnIfNotLinux = warning: (lib.warnIfNot stdenv.isLinux warning stdenv.isLinux);
+  gpuWarning = api: "building ollama with ${api} is only supported on linux; falling back to cpu";
+  rocmIsEnabled = enableRocm && (warnIfNotLinux (gpuWarning "rocm"));
+  cudaIsEnabled = enableCuda && (warnIfNotLinux (gpuWarning "cuda"));
+  enableLinuxGpu = rocmIsEnabled || cudaIsEnabled;
+
+  appleFrameworks = darwin.apple_sdk_11_0.frameworks;
+  metalFrameworks = [
+    appleFrameworks.Accelerate
+    appleFrameworks.Metal
+    appleFrameworks.MetalKit
+    appleFrameworks.MetalPerformanceShaders
+  ];

  src = fetchFromGitHub {
    owner = "jmorganca";
    repo = "ollama";
    rev = "v${version}";
-    hash = "sha256-eXukNn9Lu1hF19GEi7S7a96qktsjnmXCUp38gw+3MzY=";
+    hash = "sha256-GwZA1QUH8I8m2bGToIcMMaB5MBnioQP4+n1SauUJYP8=";
+    fetchSubmodules = true;
+  };
+  preparePatch = patch: hash: fetchpatch {
+    url = "file://${src}/llm/patches/${patch}";
+    inherit hash;
+    stripLen = 1;
+    extraPrefix = "llm/llama.cpp/";
+  };
+  inherit (lib) licenses platforms maintainers;
+  ollama = {
+    inherit pname version src;
+    vendorHash = "sha256-wXRbfnkbeXPTOalm7SFLvHQ9j46S/yLNbFy+OWNSamQ=";
+
+    nativeBuildInputs = [
+      cmake
+    ] ++ lib.optionals enableLinuxGpu [
+      makeWrapper
+    ] ++ lib.optionals stdenv.isDarwin
+      metalFrameworks;
+
+    patches = [
+      # remove uses of `git` in the `go generate` script
+      # instead use `patch` where necessary
+      ./remove-git.patch
+      # replace a hardcoded use of `g++` with `$CXX`
+      ./replace-gcc.patch
+
+      # ollama's patches of llama.cpp's example server
+      # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream"
+      (preparePatch "01-cache.diff" "sha256-PC4yN98hFvK+PEITiDihL8ki3bJuLVXrAm0CGf8GPJE=")
+      (preparePatch "02-shutdown.diff" "sha256-cElAp9Z9exxN964vB/YFuBhZoEcoAwGSMCnbh+l/V4Q=")
+    ];
+    postPatch = ''
+      # use a patch from the nix store in the `go generate` script
+      substituteInPlace llm/generate/gen_common.sh \
+        --subst-var-by cmakeIncludePatch '${./cmake-include.patch}'
+      # `ollama/llm/generate/gen_common.sh` -> "avoid duplicate main symbols when we link into the cgo binary"
+      substituteInPlace llm/llama.cpp/examples/server/server.cpp \
+        --replace-fail 'int main(' 'int __main('
+      # replace inaccurate version number with actual release version
+      substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
+    '';
+    preBuild = ''
+      export OLLAMA_SKIP_PATCHING=true
+      # build llama.cpp libraries for ollama
+      go generate ./...
+    '';
+
+    ldflags = [
+      "-s"
+      "-w"
+      "-X=github.com/jmorganca/ollama/version.Version=${version}"
+      "-X=github.com/jmorganca/ollama/server.mode=release"
+    ];
+
+    meta = {
+      description = "Get up and running with large language models locally";
+      homepage = "https://github.com/jmorganca/ollama";
+      license = licenses.mit;
+      platforms = platforms.unix;
+      mainProgram = "ollama";
+      maintainers = with maintainers; [ abysssol dit7ya elohmeier ];
+    };
  };

-  patches = [
-    # disable passing the deprecated gqa flag to llama-cpp-server
-    # see https://github.com/ggerganov/llama.cpp/issues/2975
-    ./disable-gqa.patch

-    # replace the call to the bundled llama-cpp-server with the one in the llama-cpp package
-    ./set-llamacpp-path.patch
-  ];
-
-  postPatch = ''
-    substituteInPlace llm/llama.go \
-      --subst-var-by llamaCppServer "${llama-cpp}/bin/llama-cpp-server"
-    substituteInPlace server/routes_test.go --replace "0.0.0" "${version}"
-  '';
-
-  vendorHash = "sha256-yGdCsTJtvdwHw21v0Ot6I8gxtccAvNzZyRu1T0vaius=";
-
-  ldflags = [
-    "-s"
-    "-w"
-    "-X=github.com/jmorganca/ollama/version.Version=${version}"
-    "-X=github.com/jmorganca/ollama/server.mode=release"
-  ];
-
-  meta = with lib; {
-    description = "Get up and running with large language models locally";
-    homepage = "https://github.com/jmorganca/ollama";
-    license = licenses.mit;
-    mainProgram = "ollama";
-    maintainers = with maintainers; [ dit7ya elohmeier ];
-    platforms = platforms.unix;
+  rocmClang = linkFarm "rocm-clang" {
+    llvm = rocmPackages.llvm.clang;
  };
-}
+  rocmPath = buildEnv {
+    name = "rocm-path";
+    paths = [
+      rocmPackages.rocm-device-libs
+      rocmClang
+    ];
+  };
+  rocmVars = {
+    ROCM_PATH = rocmPath;
+    CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
+  };
+
+  cudaToolkit = buildEnv {
+    name = "cuda-toolkit";
+    ignoreCollisions = true; # FIXME: find a cleaner way to do this without ignoring collisions
+    paths = [
+      cudaPackages.cudatoolkit
+      cudaPackages.cuda_cudart
+    ];
+  };
+  cudaVars = {
+    CUDA_LIB_DIR = "${cudaToolkit}/lib";
+    CUDACXX = "${cudaToolkit}/bin/nvcc";
+    CUDAToolkit_ROOT = cudaToolkit;
+  };
+
+  linuxGpuLibs = {
+    buildInputs = lib.optionals rocmIsEnabled [
+      rocmPackages.clr
+      rocmPackages.hipblas
+      rocmPackages.rocblas
+      rocmPackages.rocsolver
+      rocmPackages.rocsparse
+      libdrm
+    ] ++ lib.optionals cudaIsEnabled [
+      cudaPackages.cuda_cudart
+    ];
+  };
+
+  appleGpuLibs = { buildInputs = metalFrameworks; };
+
+  runtimeLibs = lib.optionals rocmIsEnabled [
+    rocmPackages.rocm-smi
+  ] ++ lib.optionals cudaIsEnabled [
+    linuxPackages.nvidia_x11
+  ];
+  runtimeLibWrapper = {
+    postFixup = ''
+      mv "$out/bin/${pname}" "$out/bin/.${pname}-unwrapped"
+      makeWrapper "$out/bin/.${pname}-unwrapped" "$out/bin/${pname}" \
+        --suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath runtimeLibs}'
+    '';
+  };
+
+  goBuild =
+    if cudaIsEnabled then
+      buildGoModule.override { stdenv = overrideCC stdenv gcc12; }
+    else
+      buildGoModule;
+in
+goBuild (ollama
+  // (lib.optionalAttrs rocmIsEnabled rocmVars)
+  // (lib.optionalAttrs cudaIsEnabled cudaVars)
+  // (lib.optionalAttrs enableLinuxGpu linuxGpuLibs)
+  // (lib.optionalAttrs enableLinuxGpu runtimeLibWrapper)
+
+  // (lib.optionalAttrs stdenv.isDarwin appleGpuLibs))
--- a/pkgs/tools/misc/ollama/disable-gqa.patch
+++ b/pkgs/tools/misc/ollama/disable-gqa.patch
@ -1,15 +0,0 @@
-diff --git a/llm/llama.go b/llm/llama.go
-index 0b460e9..b79e04a 100644
--- a/llm/llama.go
-+++ b/llm/llama.go
-@@ -299,10 +299,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
- 		params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU))
- 	}
- 
-	if opts.NumGQA > 0 {
-		params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
-	}
-
- 	if len(adapters) > 0 {
- 		// TODO: applying multiple adapters is not supported by the llama.cpp server yet
- 		params = append(params, "--lora", adapters[0])
--- a/pkgs/tools/misc/ollama/remove-git.patch
+++ b/pkgs/tools/misc/ollama/remove-git.patch
@ -0,0 +1,21 @@
+--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
+@@ -60,6 +60,9 @@
+ }
+ 
+ apply_patches() {
+    patch -i '@cmakeIncludePatch@' "${LLAMACPP_DIR}/examples/server/CMakeLists.txt"
+    return
+    
+     # Wire up our CMakefile
+     if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
+         echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
+@@ -113,6 +116,8 @@
+ 
+ # Keep the local tree clean after we're done with the build
+ cleanup() {
+    return
+
+     (cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
+ 
+     if [ -n "$(ls -A ../patches/*.diff)" ]; then
--- a/pkgs/tools/misc/ollama/replace-gcc.patch
+++ b/pkgs/tools/misc/ollama/replace-gcc.patch
@ -0,0 +1,11 @@
+--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
+@@ -86,7 +89,7 @@
+     cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
+     cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
+     mkdir -p ${BUILD_DIR}/lib/
+-    g++ -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \
+    $CXX -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \
+         ${GCC_ARCH} \
+         ${WHOLE_ARCHIVE} ${BUILD_DIR}/examples/server/libext_server.a ${NO_WHOLE_ARCHIVE} \
+         ${BUILD_DIR}/common/libcommon.a \
--- a/pkgs/tools/misc/ollama/set-llamacpp-path.patch
+++ b/pkgs/tools/misc/ollama/set-llamacpp-path.patch
@ -1,23 +0,0 @@
-diff --git a/llm/llama.go b/llm/llama.go
-index f23d5d8..6563550 100644
--- a/llm/llama.go
-+++ b/llm/llama.go
-@@ -25,7 +25,6 @@ import (
- 	"github.com/jmorganca/ollama/api"
- )
- 
-//go:embed llama.cpp/*/build/*/bin/*
- var llamaCppEmbed embed.FS
- 
- type ModelRunner struct {
-@@ -33,6 +32,10 @@ type ModelRunner struct {
- }
- 
- func chooseRunners(workDir, runnerType string) []ModelRunner {
-+	return []ModelRunner{
-+		{Path: "@llamaCppServer@"},
-+	}
-+
- 	buildPath := path.Join("llama.cpp", runnerType, "build")
- 	var runners []string
-