{ config, pkgs, lib, ... }: let toplevelConfig = config; inherit (lib) types; inherit (import ../system/boot/systemd-lib.nix { inherit config pkgs lib; }) mkPathSafeName; in { options.systemd.services = lib.mkOption { type = types.attrsOf (types.submodule ({ name, config, ... }: { options.confinement.enable = lib.mkOption { type = types.bool; default = false; description = '' If set, all the required runtime store paths for this service are bind-mounted into a tmpfs-based chroot 2 . ''; }; options.confinement.packages = lib.mkOption { type = types.listOf (types.either types.str types.package); default = []; description = let mkScOption = optName: ""; in '' Additional packages or strings with context to add to the closure of the chroot. By default, this includes all the packages from the ${lib.concatMapStringsSep ", " mkScOption [ "ExecReload" "ExecStartPost" "ExecStartPre" "ExecStop" "ExecStopPost" ]} and ${mkScOption "ExecStart"} options. Only the latter (${mkScOption "ExecStart"}) will be used if ${mkScOption "RootDirectoryStartOnly"} is enabled. Also, the store paths listed in are not included in the closure as well as paths from other options except those listed above. ''; }; options.confinement.binSh = lib.mkOption { type = types.nullOr types.path; default = toplevelConfig.environment.binsh; defaultText = "config.environment.binsh"; example = lib.literalExample "\${pkgs.dash}/bin/dash"; description = '' The program to make available as /bin/sh inside the chroot. If this is set to null, no /bin/sh is provided at all. This is useful for some applications, which for example use the system 3 library function to execute commands. ''; }; options.confinement.mode = lib.mkOption { type = types.enum [ "full-apivfs" "chroot-only" ]; default = "full-apivfs"; description = '' The value full-apivfs (the default) sets up private /dev, /proc, /sys and /tmp file systems in a separate user name space. If this is set to chroot-only, only the file system name space is set up along with the call to chroot 2 . This doesn't cover network namespaces and is solely for file system level isolation. ''; }; config = let rootName = "${mkPathSafeName name}-chroot"; inherit (config.confinement) binSh; in lib.mkIf config.confinement.enable { serviceConfig = { RootDirectory = pkgs.runCommand rootName {} "mkdir \"$out\""; TemporaryFileSystem = "/"; MountFlags = lib.mkDefault "private"; } // lib.optionalAttrs (config.confinement.mode == "full-apivfs") { MountAPIVFS = true; PrivateDevices = true; PrivateTmp = true; PrivateUsers = true; ProtectControlGroups = true; ProtectKernelModules = true; ProtectKernelTunables = true; }; confinement.packages = let startOnly = config.serviceConfig.RootDirectoryStartOnly or false; execOpts = if startOnly then [ "ExecStart" ] else [ "ExecReload" "ExecStart" "ExecStartPost" "ExecStartPre" "ExecStop" "ExecStopPost" ]; execPkgs = lib.concatMap (opt: let isSet = config.serviceConfig ? ${opt}; in lib.optional isSet config.serviceConfig.${opt}) execOpts; in execPkgs ++ lib.optional (binSh != null) binSh; }; })); }; config.assertions = lib.concatLists (lib.mapAttrsToList (name: cfg: let whatOpt = optName: "The 'serviceConfig' option '${optName}' for" + " service '${name}' is enabled in conjunction with" + " 'confinement.enable'"; in lib.optionals cfg.confinement.enable [ { assertion = !cfg.serviceConfig.RootDirectoryStartOnly or false; message = "${whatOpt "RootDirectoryStartOnly"}, but right now systemd" + " doesn't support restricting bind-mounts to 'ExecStart'." + " Please either define a separate service or find a way to run" + " commands other than ExecStart within the chroot."; } { assertion = !cfg.serviceConfig.DynamicUser or false; message = "${whatOpt "DynamicUser"}. Please create a dedicated user via" + " the 'users.users' option instead as this combination is" + " currently not supported."; } ]) config.systemd.services); config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let rootPaths = let contents = lib.concatStringsSep "\n" cfg.confinement.packages; in pkgs.writeText "${mkPathSafeName name}-string-contexts.txt" contents; chrootPaths = pkgs.runCommand "${mkPathSafeName name}-chroot-paths" { closureInfo = pkgs.closureInfo { inherit rootPaths; }; serviceName = "${name}.service"; excludedPath = rootPaths; } '' mkdir -p "$out/lib/systemd/system" serviceFile="$out/lib/systemd/system/$serviceName" echo '[Service]' > "$serviceFile" # /bin/sh is special here, because the option value could contain a # symlink and we need to properly resolve it. ${lib.optionalString (cfg.confinement.binSh != null) '' binsh=${lib.escapeShellArg cfg.confinement.binSh} realprog="$(readlink -e "$binsh")" echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile" ''} while read storePath; do if [ -L "$storePath" ]; then # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths, # so let's just bind-mount the target to that location. echo "BindReadOnlyPaths=$(readlink -e "$storePath"):$storePath" elif [ "$storePath" != "$excludedPath" ]; then echo "BindReadOnlyPaths=$storePath" fi done < "$closureInfo/store-paths" >> "$serviceFile" ''; in lib.optional cfg.confinement.enable chrootPaths) config.systemd.services); }