From 565f22d27aba3c53fcf4155dba639f2071d08ed6 Mon Sep 17 00:00:00 2001 From: lejonet Date: Thu, 1 Mar 2018 12:47:13 +0100 Subject: [PATCH] nixos/ceph: init module (#35299) All 5 daemon types can be enabled and configured through the module and the module both creates the ceph.conf required but also creates and enables specific services for each daemon, based on the systemd service files that upstream provides. --- lib/maintainers.nix | 1 + nixos/modules/misc/ids.nix | 2 + nixos/modules/module-list.nix | 1 + .../services/network-filesystems/ceph.nix | 371 ++++++++++++++++++ nixos/release.nix | 1 + nixos/tests/ceph.nix | 140 +++++++ 6 files changed, 516 insertions(+) create mode 100644 nixos/modules/services/network-filesystems/ceph.nix create mode 100644 nixos/tests/ceph.nix diff --git a/lib/maintainers.nix b/lib/maintainers.nix index a2aa88a807de..71b2a7a08bba 100644 --- a/lib/maintainers.nix +++ b/lib/maintainers.nix @@ -380,6 +380,7 @@ ledif = "Adam Fidel "; leemachin = "Lee Machin "; leenaars = "Michiel Leenaars "; + lejonet = "Daniel Kuehn "; leonardoce = "Leonardo Cecchi "; lethalman = "Luca Bruno "; lewo = "Antoine Eiche "; diff --git a/nixos/modules/misc/ids.nix b/nixos/modules/misc/ids.nix index c0c6a6ef9244..8d775ffc82d3 100644 --- a/nixos/modules/misc/ids.nix +++ b/nixos/modules/misc/ids.nix @@ -304,6 +304,7 @@ mighttpd2 = 285; hass = 286; monero = 287; + ceph = 288; # When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399! @@ -576,6 +577,7 @@ mighttpd2 = 285; hass = 286; monero = 287; + ceph = 288; # When adding a gid, make sure it doesn't match an existing # uid. Users and groups with the same name should have equal diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 13a32b968dcb..3bb65c6b295a 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -439,6 +439,7 @@ ./services/network-filesystems/u9fs.nix ./services/network-filesystems/yandex-disk.nix ./services/network-filesystems/xtreemfs.nix + ./services/network-filesystems/ceph.nix ./services/networking/amuled.nix ./services/networking/aria2.nix ./services/networking/asterisk.nix diff --git a/nixos/modules/services/network-filesystems/ceph.nix b/nixos/modules/services/network-filesystems/ceph.nix new file mode 100644 index 000000000000..5de8ae79a246 --- /dev/null +++ b/nixos/modules/services/network-filesystems/ceph.nix @@ -0,0 +1,371 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + ceph = pkgs.ceph; + cfg = config.services.ceph; + # function that translates "camelCaseOptions" to "camel case options", credits to tilpner in #nixos@freenode + translateOption = replaceStrings upperChars (map (s: " ${s}") lowerChars); + generateDaemonList = (daemonType: daemons: extraServiceConfig: + mkMerge ( + map (daemon: + { "ceph-${daemonType}-${daemon}" = generateServiceFile daemonType daemon cfg.global.clusterName ceph extraServiceConfig; } + ) daemons + ) + ); + generateServiceFile = (daemonType: daemonId: clusterName: ceph: extraServiceConfig: { + enable = true; + description = "Ceph ${builtins.replaceStrings lowerChars upperChars daemonType} daemon ${daemonId}"; + after = [ "network-online.target" "local-fs.target" "time-sync.target" ] ++ optional (daemonType == "osd") "ceph-mon.target"; + wants = [ "network-online.target" "local-fs.target" "time-sync.target" ]; + partOf = [ "ceph-${daemonType}.target" ]; + wantedBy = [ "ceph-${daemonType}.target" ]; + + serviceConfig = { + LimitNOFILE = 1048576; + LimitNPROC = 1048576; + Environment = "CLUSTER=${clusterName}"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + PrivateDevices = "yes"; + PrivateTmp = "true"; + ProtectHome = "true"; + ProtectSystem = "full"; + Restart = "on-failure"; + StartLimitBurst = "5"; + StartLimitInterval = "30min"; + ExecStart = "${ceph.out}/bin/${if daemonType == "rgw" then "radosgw" else "ceph-${daemonType}"} -f --cluster ${clusterName} --id ${if daemonType == "rgw" then "client.${daemonId}" else daemonId} --setuser ceph --setgroup ceph"; + } // extraServiceConfig + // optionalAttrs (daemonType == "osd") { ExecStartPre = "${ceph.out}/libexec/ceph/ceph-osd-prestart.sh --id ${daemonId} --cluster ${clusterName}"; }; + } // optionalAttrs (builtins.elem daemonType [ "mds" "mon" "rgw" "mgr" ]) { preStart = '' + daemonPath="/var/lib/ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}" + if [ ! -d ''$daemonPath ]; then + mkdir -m 755 -p ''$daemonPath + chown -R ceph:ceph ''$daemonPath + fi + ''; + } // optionalAttrs (daemonType == "osd") { path = [ pkgs.getopt ]; } + ); + generateTargetFile = (daemonType: + { + "ceph-${daemonType}" = { + description = "Ceph target allowing to start/stop all ceph-${daemonType} services at once"; + partOf = [ "ceph.target" ]; + before = [ "ceph.target" ]; + }; + } + ); +in +{ + options.services.ceph = { + # Ceph has a monolithic configuration file but different sections for + # each daemon, a separate client section and a global section + enable = mkEnableOption "Ceph global configuration"; + + global = { + fsid = mkOption { + type = types.str; + example = '' + 433a2193-4f8a-47a0-95d2-209d7ca2cca5 + ''; + description = '' + Filesystem ID, a generated uuid, its must be generated and set before + attempting to start a cluster + ''; + }; + + clusterName = mkOption { + type = types.str; + default = "ceph"; + description = '' + Name of cluster + ''; + }; + + monInitialMembers = mkOption { + type = with types; nullOr commas; + default = null; + example = '' + node0, node1, node2 + ''; + description = '' + List of hosts that will be used as monitors at startup. + ''; + }; + + monHost = mkOption { + type = with types; nullOr commas; + default = null; + example = '' + 10.10.0.1, 10.10.0.2, 10.10.0.3 + ''; + description = '' + List of hostname shortnames/IP addresses of the initial monitors. + ''; + }; + + maxOpenFiles = mkOption { + type = types.int; + default = 131072; + description = '' + Max open files for each OSD daemon. + ''; + }; + + authClusterRequired = mkOption { + type = types.enum [ "cephx" "none" ]; + default = "cephx"; + description = '' + Enables requiring daemons to authenticate with eachother in the cluster. + ''; + }; + + authServiceRequired = mkOption { + type = types.enum [ "cephx" "none" ]; + default = "cephx"; + description = '' + Enables requiring clients to authenticate with the cluster to access services in the cluster (e.g. radosgw, mds or osd). + ''; + }; + + authClientRequired = mkOption { + type = types.enum [ "cephx" "none" ]; + default = "cephx"; + description = '' + Enables requiring the cluster to authenticate itself to the client. + ''; + }; + + publicNetwork = mkOption { + type = with types; nullOr commas; + default = null; + example = '' + 10.20.0.0/24, 192.168.1.0/24 + ''; + description = '' + A comma-separated list of subnets that will be used as public networks in the cluster. + ''; + }; + + clusterNetwork = mkOption { + type = with types; nullOr commas; + default = null; + example = '' + 10.10.0.0/24, 192.168.0.0/24 + ''; + description = '' + A comma-separated list of subnets that will be used as cluster networks in the cluster. + ''; + }; + }; + + mgr = { + enable = mkEnableOption "Ceph MGR daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of names for manager daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in mgr.name1 + ''; + }; + extraConfig = mkOption { + type = with types; attrsOf str; + default = {}; + description = '' + Extra configuration to add to the global section for manager daemons. + ''; + }; + }; + + mon = { + enable = mkEnableOption "Ceph MON daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of monitor daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in mon.name1 + ''; + }; + extraConfig = mkOption { + type = with types; attrsOf str; + default = {}; + description = '' + Extra configuration to add to the monitor section. + ''; + }; + }; + + osd = { + enable = mkEnableOption "Ceph OSD daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of OSD daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in osd.name1 + ''; + }; + extraConfig = mkOption { + type = with types; attrsOf str; + default = { + "osd journal size" = "10000"; + "osd pool default size" = "3"; + "osd pool default min size" = "2"; + "osd pool default pg num" = "200"; + "osd pool default pgp num" = "200"; + "osd crush chooseleaf type" = "1"; + }; + description = '' + Extra configuration to add to the OSD section. + ''; + }; + }; + + mds = { + enable = mkEnableOption "Ceph MDS daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of metadata service daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in mds.name1 + ''; + }; + extraConfig = mkOption { + type = with types; attrsOf str; + default = {}; + description = '' + Extra configuration to add to the MDS section. + ''; + }; + }; + + rgw = { + enable = mkEnableOption "Ceph RadosGW daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of rados gateway daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in client.name1, radosgw daemons + aren't daemons to cluster in the sense that OSD, MGR or MON daemons are. They are simply + daemons, from ceph, that uses the cluster as a backend. + ''; + }; + }; + + client = { + enable = mkEnableOption "Ceph client configuration"; + extraConfig = mkOption { + type = with types; attrsOf str; + default = {}; + example = '' + { + # This would create a section for a radosgw daemon named node0 and related + # configuration for it + "client.radosgw.node0" = { "some config option" = "true"; }; + }; + ''; + description = '' + Extra configuration to add to the client section. Configuration for rados gateways + would be added here, with their own sections, see example. + ''; + }; + }; + }; + + config = mkIf config.services.ceph.enable { + assertions = [ + { assertion = cfg.global.fsid != ""; + message = "fsid has to be set to a valid uuid for the cluster to function"; + } + { assertion = cfg.mgr.enable == true; + message = "ceph 12.x requires atleast 1 MGR daemon enabled for the cluster to function"; + } + { assertion = cfg.mon.enable == true -> cfg.mon.daemons != []; + message = "have to set id of atleast one MON if you're going to enable Monitor"; + } + { assertion = cfg.mds.enable == true -> cfg.mds.daemons != []; + message = "have to set id of atleast one MDS if you're going to enable Metadata Service"; + } + { assertion = cfg.osd.enable == true -> cfg.osd.daemons != []; + message = "have to set id of atleast one OSD if you're going to enable OSD"; + } + { assertion = cfg.mgr.enable == true -> cfg.mgr.daemons != []; + message = "have to set id of atleast one MGR if you're going to enable MGR"; + } + ]; + + warnings = optional (cfg.global.monInitialMembers == null) + ''Not setting up a list of members in monInitialMembers requires that you set the host variable for each mon daemon or else the cluster won't function''; + + environment.etc."ceph/ceph.conf".text = let + # Translate camelCaseOptions to the expected camel case option for ceph.conf + translatedGlobalConfig = mapAttrs' (name: value: nameValuePair (translateOption name) value) cfg.global; + # Merge the extraConfig set for mgr daemons, as mgr don't have their own section + globalAndMgrConfig = translatedGlobalConfig // optionalAttrs cfg.mgr.enable cfg.mgr.extraConfig; + # Remove all name-value pairs with null values from the attribute set to avoid making empty sections in the ceph.conf + globalConfig = mapAttrs' (name: value: nameValuePair (translateOption name) value) (filterAttrs (name: value: value != null) globalAndMgrConfig); + totalConfig = { + "global" = globalConfig; + } // optionalAttrs (cfg.mon.enable && cfg.mon.extraConfig != {}) { "mon" = cfg.mon.extraConfig; } + // optionalAttrs (cfg.mds.enable && cfg.mds.extraConfig != {}) { "mds" = cfg.mds.extraConfig; } + // optionalAttrs (cfg.osd.enable && cfg.osd.extraConfig != {}) { "osd" = cfg.osd.extraConfig; } + // optionalAttrs (cfg.client.enable && cfg.client.extraConfig != {}) cfg.client.extraConfig; + in + generators.toINI {} totalConfig; + + users.extraUsers = singleton { + name = "ceph"; + uid = config.ids.uids.ceph; + description = "Ceph daemon user"; + }; + + users.extraGroups = singleton { + name = "ceph"; + gid = config.ids.gids.ceph; + }; + + systemd.services = let + services = [] + ++ optional cfg.mon.enable (generateDaemonList "mon" cfg.mon.daemons { RestartSec = "10"; }) + ++ optional cfg.mds.enable (generateDaemonList "mds" cfg.mds.daemons { StartLimitBurst = "3"; }) + ++ optional cfg.osd.enable (generateDaemonList "osd" cfg.osd.daemons { StartLimitBurst = "30"; RestartSec = "20s"; }) + ++ optional cfg.rgw.enable (generateDaemonList "rgw" cfg.rgw.daemons { }) + ++ optional cfg.mgr.enable (generateDaemonList "mgr" cfg.mgr.daemons { StartLimitBurst = "3"; }); + in + mkMerge services; + + systemd.targets = let + targets = [ + { "ceph" = { description = "Ceph target allowing to start/stop all ceph service instances at once"; }; } + ] ++ optional cfg.mon.enable (generateTargetFile "mon") + ++ optional cfg.mds.enable (generateTargetFile "mds") + ++ optional cfg.osd.enable (generateTargetFile "osd") + ++ optional cfg.rgw.enable (generateTargetFile "rgw") + ++ optional cfg.mgr.enable (generateTargetFile "mgr"); + in + mkMerge targets; + + systemd.tmpfiles.rules = [ + "d /run/ceph 0770 ceph ceph -" + ]; + }; +} diff --git a/nixos/release.nix b/nixos/release.nix index 558bbbf9a9d4..473b11313bef 100644 --- a/nixos/release.nix +++ b/nixos/release.nix @@ -230,6 +230,7 @@ in rec { tests.borgbackup = callTest tests/borgbackup.nix {}; tests.buildbot = callTest tests/buildbot.nix {}; tests.cadvisor = callTestOnTheseSystems ["x86_64-linux"] tests/cadvisor.nix {}; + tests.ceph = callTestOnTheseSystems ["x86_64-linux"] tests/ceph.nix {}; tests.chromium = (callSubTestsOnTheseSystems ["x86_64-linux"] tests/chromium.nix {}).stable; tests.cjdns = callTest tests/cjdns.nix {}; tests.cloud-init = callTest tests/cloud-init.nix {}; diff --git a/nixos/tests/ceph.nix b/nixos/tests/ceph.nix new file mode 100644 index 000000000000..b9993062c079 --- /dev/null +++ b/nixos/tests/ceph.nix @@ -0,0 +1,140 @@ +import ./make-test.nix ({pkgs, ...}: rec { + name = "All-in-one-basic-ceph-cluster"; + meta = with pkgs.stdenv.lib.maintainers; { + maintainers = [ lejonet ]; + }; + + nodes = { + aio = { config, pkgs, ... }: { + virtualisation = { + emptyDiskImages = [ 20480 20480 ]; + vlans = [ 1 ]; + }; + + networking = { + firewall.allowPing = true; + useDHCP = false; + interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = "192.168.1.1"; prefixLength = 24; } + ]; + }; + + environment.systemPackages = with pkgs; [ + bash + sudo + ceph + xfsprogs + ]; + nixpkgs.config.packageOverrides = super: { + ceph = super.ceph.override({ nss = super.nss; libxfs = super.libxfs; libaio = super.libaio; jemalloc = super.jemalloc; }); + }; + + boot.kernelModules = [ "xfs" ]; + + services.ceph.enable = true; + services.ceph.global = { + fsid = "066ae264-2a5d-4729-8001-6ad265f50b03"; + monInitialMembers = "aio"; + monHost = "192.168.1.1"; + }; + + services.ceph.mon = { + enable = true; + daemons = [ "aio" ]; + }; + + services.ceph.mgr = { + enable = true; + daemons = [ "aio" ]; + }; + + services.ceph.osd = { + enable = true; + daemons = [ "0" "1" ]; + }; + }; + }; + + testScript = { nodes, ... }: '' + startAll; + + $aio->waitForUnit("network.target"); + + # Create the ceph-related directories + $aio->mustSucceed( + "mkdir -p /var/lib/ceph/mgr/ceph-aio/", + "mkdir -p /var/lib/ceph/mon/ceph-aio/", + "mkdir -p /var/lib/ceph/osd/ceph-{0..1}/", + "chown ceph:ceph -R /var/lib/ceph/" + ); + + # Bootstrap ceph-mon daemon + $aio->mustSucceed( + "mkdir -p /var/lib/ceph/bootstrap-osd && chown ceph:ceph /var/lib/ceph/bootstrap-osd", + "sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'", + "ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'", + "ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring", + "monmaptool --create --add aio 192.168.1.1 --fsid 066ae264-2a5d-4729-8001-6ad265f50b03 /tmp/monmap", + "sudo -u ceph ceph-mon --mkfs -i aio --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring", + "touch /var/lib/ceph/mon/ceph-aio/done", + "systemctl start ceph-mon-aio" + ); + $aio->waitForUnit("ceph-mon-aio"); + + # Can't check ceph status until a mon is up + $aio->succeed("ceph -s | grep 'mon: 1 daemons'"); + + # Start the ceph-mgr daemon, it has no deps and hardly any setup + $aio->mustSucceed( + "ceph auth get-or-create mgr.aio mon 'allow profile mgr' osd 'allow *' mds 'allow *' > /var/lib/ceph/mgr/ceph-aio/keyring", + "systemctl start ceph-mgr-aio" + ); + $aio->waitForUnit("ceph-mgr-aio"); + $aio->waitUntilSucceeds("ceph -s | grep 'quorum aio'"); + + # Bootstrap both OSDs + $aio->mustSucceed( + "mkfs.xfs /dev/vdb", + "mkfs.xfs /dev/vdc", + "mount /dev/vdb /var/lib/ceph/osd/ceph-0", + "mount /dev/vdc /var/lib/ceph/osd/ceph-1", + "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-0/keyring --name osd.0 --add-key AQBCEJNa3s8nHRAANvdsr93KqzBznuIWm2gOGg==", + "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-1/keyring --name osd.1 --add-key AQBEEJNac00kExAAXEgy943BGyOpVH1LLlHafQ==", + "echo '{\"cephx_secret\": \"AQBCEJNa3s8nHRAANvdsr93KqzBznuIWm2gOGg==\"}' | ceph osd new 55ba2294-3e24-478f-bee0-9dca4c231dd9 -i -", + "echo '{\"cephx_secret\": \"AQBEEJNac00kExAAXEgy943BGyOpVH1LLlHafQ==\"}' | ceph osd new 5e97a838-85b6-43b0-8950-cb56d554d1e5 -i -" + ); + + # Initialize the OSDs with regular filestore + $aio->mustSucceed( + "ceph-osd -i 0 --mkfs --osd-uuid 55ba2294-3e24-478f-bee0-9dca4c231dd9", + "ceph-osd -i 1 --mkfs --osd-uuid 5e97a838-85b6-43b0-8950-cb56d554d1e5", + "chown -R ceph:ceph /var/lib/ceph/osd", + "systemctl start ceph-osd-0", + "systemctl start ceph-osd-1" + ); + + $aio->waitUntilSucceeds("ceph osd stat | grep '2 osds: 2 up, 2 in'"); + $aio->waitUntilSucceeds("ceph -s | grep 'mgr: aio(active)'"); + $aio->waitUntilSucceeds("ceph -s | grep 'HEALTH_OK'"); + + $aio->mustSucceed( + "ceph osd pool create aio-test 100 100", + "ceph osd pool ls | grep 'aio-test'", + "ceph osd pool rename aio-test aio-other-test", + "ceph osd pool ls | grep 'aio-other-test'", + "ceph -s | grep '1 pools, 100 pgs'", + "ceph osd getcrushmap -o crush", + "crushtool -d crush -o decrushed", + "sed 's/step chooseleaf firstn 0 type host/step chooseleaf firstn 0 type osd/' decrushed > modcrush", + "crushtool -c modcrush -o recrushed", + "ceph osd setcrushmap -i recrushed", + "ceph osd pool set aio-other-test size 2" + ); + $aio->waitUntilSucceeds("ceph -s | grep 'HEALTH_OK'"); + $aio->waitUntilSucceeds("ceph -s | grep '100 active+clean'"); + $aio->mustFail( + "ceph osd pool ls | grep 'aio-test'", + "ceph osd pool delete aio-other-test aio-other-test --yes-i-really-really-mean-it" + ); + ''; +})