From d2ce318bd4676a45acb9c0cb3836c0d230648ef3 Mon Sep 17 00:00:00 2001 From: Paul-Henri Froidmont Date: Mon, 8 Aug 2022 14:25:30 +0200 Subject: [PATCH] nixos/patroni: init --- .../from_md/release-notes/rl-2211.section.xml | 8 + .../manual/release-notes/rl-2211.section.md | 3 + nixos/modules/module-list.nix | 1 + .../services/cluster/patroni/default.nix | 268 ++++++++++++++++++ nixos/tests/all-tests.nix | 1 + nixos/tests/patroni.nix | 204 +++++++++++++ pkgs/servers/sql/patroni/default.nix | 5 + 7 files changed, 490 insertions(+) create mode 100644 nixos/modules/services/cluster/patroni/default.nix create mode 100644 nixos/tests/patroni.nix diff --git a/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml index 0d0e2647a070..47f8fbb3abc5 100644 --- a/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml +++ b/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml @@ -191,6 +191,14 @@ services.tempo. + + + Patroni, + a template for PostgreSQL HA with ZooKeeper, etcd or Consul. + Available as + services.patroni. + +
diff --git a/nixos/doc/manual/release-notes/rl-2211.section.md b/nixos/doc/manual/release-notes/rl-2211.section.md index c560da8df27a..2fd8b1bbe753 100644 --- a/nixos/doc/manual/release-notes/rl-2211.section.md +++ b/nixos/doc/manual/release-notes/rl-2211.section.md @@ -75,6 +75,9 @@ In addition to numerous new and upgraded packages, this release has the followin - [Grafana Tempo](https://www.grafana.com/oss/tempo/), a distributed tracing store. Available as [services.tempo](#opt-services.tempo.enable). +- [Patroni](https://github.com/zalando/patroni), a template for PostgreSQL HA with ZooKeeper, etcd or Consul. +Available as [services.patroni](options.html#opt-services.patroni.enable). + ## Backward Incompatibilities {#sec-release-22.11-incompatibilities} diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 837bc7635a06..c1f435ec569c 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -329,6 +329,7 @@ ./services/cluster/kubernetes/proxy.nix ./services/cluster/kubernetes/scheduler.nix ./services/cluster/pacemaker/default.nix + ./services/cluster/patroni/default.nix ./services/cluster/spark/default.nix ./services/computing/boinc/client.nix ./services/computing/foldingathome/client.nix diff --git a/nixos/modules/services/cluster/patroni/default.nix b/nixos/modules/services/cluster/patroni/default.nix new file mode 100644 index 000000000000..1685351e48d3 --- /dev/null +++ b/nixos/modules/services/cluster/patroni/default.nix @@ -0,0 +1,268 @@ +{ config, lib, pkgs, ... }: +with lib; +let + cfg = config.services.patroni; + defaultUser = "patroni"; + defaultGroup = "patroni"; + format = pkgs.formats.yaml { }; + + #boto doesn't support python 3.10 yet + patroni = pkgs.patroni.override { pythonPackages = pkgs.python39Packages; }; + + configFileName = "patroni-${cfg.scope}-${cfg.name}.yaml"; + configFile = format.generate configFileName cfg.settings; +in +{ + options.services.patroni = { + + enable = mkEnableOption "Patroni"; + + postgresqlPackage = mkOption { + type = types.package; + example = literalExpression "pkgs.postgresql_14"; + description = mdDoc '' + PostgreSQL package to use. + Plugins can be enabled like this `pkgs.postgresql_14.withPackages (p: [ p.pg_safeupdate p.postgis ])`. + ''; + }; + + postgresqlDataDir = mkOption { + type = types.path; + defaultText = literalExpression ''"/var/lib/postgresql/''${config.services.patroni.postgresqlPackage.psqlSchema}"''; + example = "/var/lib/postgresql/14"; + default = "/var/lib/postgresql/${cfg.postgresqlPackage.psqlSchema}"; + description = mdDoc '' + The data directory for PostgreSQL. If left as the default value + this directory will automatically be created before the PostgreSQL server starts, otherwise + the sysadmin is responsible for ensuring the directory exists with appropriate ownership + and permissions. + ''; + }; + + postgresqlPort = mkOption { + type = types.port; + default = 5432; + description = mdDoc '' + The port on which PostgreSQL listens. + ''; + }; + + user = mkOption { + type = types.str; + default = defaultUser; + example = "postgres"; + description = mdDoc '' + The user for the service. If left as the default value this user will automatically be created, + otherwise the sysadmin is responsible for ensuring the user exists. + ''; + }; + + group = mkOption { + type = types.str; + default = defaultGroup; + example = "postgres"; + description = mdDoc '' + The group for the service. If left as the default value this group will automatically be created, + otherwise the sysadmin is responsible for ensuring the group exists. + ''; + }; + + dataDir = mkOption { + type = types.path; + default = "/var/lib/patroni"; + description = mdDoc '' + Folder where Patroni data will be written, used by Raft as well if enabled. + ''; + }; + + scope = mkOption { + type = types.str; + example = "cluster1"; + description = mdDoc '' + Cluster name. + ''; + }; + + name = mkOption { + type = types.str; + example = "node1"; + description = mdDoc '' + The name of the host. Must be unique for the cluster. + ''; + }; + + namespace = mkOption { + type = types.str; + default = "/service"; + description = mdDoc '' + Path within the configuration store where Patroni will keep information about the cluster. + ''; + }; + + nodeIp = mkOption { + type = types.str; + example = "192.168.1.1"; + description = mdDoc '' + IP address of this node. + ''; + }; + + otherNodesIps = mkOption { + type = types.listOf types.string; + example = [ "192.168.1.2" "192.168.1.3" ]; + description = mdDoc '' + IP addresses of the other nodes. + ''; + }; + + restApiPort = mkOption { + type = types.port; + default = 8008; + description = mdDoc '' + The port on Patroni's REST api listens. + ''; + }; + + raft = mkOption { + type = types.bool; + default = false; + description = mdDoc '' + This will configure Patroni to use its own RAFT implementation instead of using a dedicated DCS. + ''; + }; + + raftPort = mkOption { + type = types.port; + default = 5010; + description = mdDoc '' + The port on which RAFT listens. + ''; + }; + + softwareWatchdog = mkOption { + type = types.bool; + default = false; + description = mdDoc '' + This will configure Patroni to use the software watchdog built into the Linux kernel + as described in the [documentation](https://patroni.readthedocs.io/en/latest/watchdog.html#setting-up-software-watchdog-on-linux). + ''; + }; + + settings = mkOption { + type = format.type; + default = { }; + description = mdDoc '' + The primary patroni configuration. See the [documentation](https://patroni.readthedocs.io/en/latest/SETTINGS.html) + for possible values. + Secrets should be passed in by using the `environmentFiles` option. + ''; + }; + + environmentFiles = mkOption { + type = with types; attrsOf (nullOr (oneOf [ str path package ])); + default = { }; + example = { + PATRONI_REPLICATION_PASSWORD = "/secret/file"; + PATRONI_SUPERUSER_PASSWORD = "/secret/file"; + }; + description = mdDoc "Environment variables made available to Patroni as files content, useful for providing secrets from files."; + }; + }; + + config = mkIf cfg.enable { + + services.patroni.settings = { + scope = cfg.scope; + name = cfg.name; + namespace = cfg.namespace; + + restapi = { + listen = "${cfg.nodeIp}:${toString cfg.restApiPort}"; + connect_address = "${cfg.nodeIp}:${toString cfg.restApiPort}"; + }; + + raft = mkIf cfg.raft { + data_dir = "${cfg.dataDir}/raft"; + self_addr = "${cfg.nodeIp}:5010"; + partner_addrs = map (ip: ip + ":5010") cfg.otherNodesIps; + }; + + postgresql = { + listen = "${cfg.nodeIp}:${toString cfg.postgresqlPort}"; + connect_address = "${cfg.nodeIp}:${toString cfg.postgresqlPort}"; + data_dir = cfg.postgresqlDataDir; + bin_dir = "${cfg.postgresqlPackage}/bin"; + pgpass = "${cfg.dataDir}/pgpass"; + }; + + watchdog = mkIf cfg.softwareWatchdog { + mode = "required"; + device = "/dev/watchdog"; + safety_margin = 5; + }; + }; + + + users = { + users = mkIf (cfg.user == defaultUser) { + patroni = { + group = cfg.group; + isSystemUser = true; + }; + }; + groups = mkIf (cfg.group == defaultGroup) { + patroni = { }; + }; + }; + + systemd.services = { + patroni = { + description = "Runners to orchestrate a high-availability PostgreSQL"; + + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + + script = '' + ${concatStringsSep "\n" (attrValues (mapAttrs (name: path: ''export ${name}="$(< ${escapeShellArg path})"'') cfg.environmentFiles))} + exec ${patroni}/bin/patroni ${configFile} + ''; + + serviceConfig = mkMerge [ + { + User = cfg.user; + Group = cfg.group; + Type = "simple"; + Restart = "on-failure"; + TimeoutSec = 30; + ExecReload = "${pkgs.coreutils}/bin/kill -s HUP $MAINPID"; + KillMode = "process"; + } + (mkIf (cfg.postgresqlDataDir == "/var/lib/postgresql/${cfg.postgresqlPackage.psqlSchema}" && cfg.dataDir == "/var/lib/patroni") { + StateDirectory = "patroni patroni/raft postgresql postgresql/${cfg.postgresqlPackage.psqlSchema}"; + StateDirectoryMode = "0750"; + }) + ]; + }; + }; + + boot.kernelModules = mkIf cfg.softwareWatchdog [ "softdog" ]; + + services.udev.extraRules = mkIf cfg.softwareWatchdog '' + KERNEL=="watchdog", OWNER="${cfg.user}", GROUP="${cfg.group}", MODE="0600" + ''; + + environment.systemPackages = [ + patroni + cfg.postgresqlPackage + (mkIf cfg.raft pkgs.python310Packages.pysyncobj) + ]; + + environment.etc."${configFileName}".source = configFile; + + environment.sessionVariables = { + PATRONICTL_CONFIG_FILE = "/etc/${configFileName}"; + }; + }; + + meta.maintainers = [ maintainers.phfroidmont ]; +} diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index affb179a92d3..a0af42b0056a 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -417,6 +417,7 @@ in { pam-u2f = handleTest ./pam/pam-u2f.nix {}; pam-ussh = handleTest ./pam/pam-ussh.nix {}; pass-secret-service = handleTest ./pass-secret-service.nix {}; + patroni = handleTest ./patroni.nix {}; pantalaimon = handleTest ./matrix/pantalaimon.nix {}; pantheon = handleTest ./pantheon.nix {}; paperless = handleTest ./paperless.nix {}; diff --git a/nixos/tests/patroni.nix b/nixos/tests/patroni.nix new file mode 100644 index 000000000000..f512fddcdbdd --- /dev/null +++ b/nixos/tests/patroni.nix @@ -0,0 +1,204 @@ +import ./make-test-python.nix ({ pkgs, lib, ... }: + + let + nodesIps = [ + "192.168.1.1" + "192.168.1.2" + "192.168.1.3" + ]; + + createNode = index: { pkgs, ... }: + let + ip = builtins.elemAt nodesIps index; # since we already use IPs to identify servers + in + { + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = ip; prefixLength = 16; } + ]; + + networking.firewall.allowedTCPPorts = [ 5432 8008 5010 ]; + + environment.systemPackages = [ pkgs.jq ]; + + services.patroni = { + + enable = true; + + postgresqlPackage = pkgs.postgresql_14.withPackages (p: [ p.pg_safeupdate ]); + + scope = "cluster1"; + name = "node${toString(index + 1)}"; + nodeIp = ip; + otherNodesIps = builtins.filter (h: h != ip) nodesIps; + softwareWatchdog = true; + + settings = { + bootstrap = { + dcs = { + ttl = 30; + loop_wait = 10; + retry_timeout = 10; + maximum_lag_on_failover = 1048576; + }; + initdb = [ + { encoding = "UTF8"; } + "data-checksums" + ]; + }; + + postgresql = { + use_pg_rewind = true; + use_slots = true; + authentication = { + replication = { + username = "replicator"; + }; + superuser = { + username = "postgres"; + }; + rewind = { + username = "rewind"; + }; + }; + parameters = { + listen_addresses = "${ip}"; + wal_level = "replica"; + hot_standby_feedback = "on"; + unix_socket_directories = "/tmp"; + }; + pg_hba = [ + "host replication replicator 192.168.1.0/24 md5" + # Unsafe, do not use for anything other than tests + "host all all 0.0.0.0/0 trust" + ]; + }; + + etcd3 = { + host = "192.168.1.4:2379"; + }; + }; + + environmentFiles = { + PATRONI_REPLICATION_PASSWORD = pkgs.writeText "replication-password" "postgres"; + PATRONI_SUPERUSER_PASSWORD = pkgs.writeText "superuser-password" "postgres"; + PATRONI_REWIND_PASSWORD = pkgs.writeText "rewind-password" "postgres"; + }; + }; + + # We always want to restart so the tests never hang + systemd.services.patroni.serviceConfig.StartLimitIntervalSec = 0; + }; + in + { + name = "patroni"; + + nodes = { + node1 = createNode 0; + node2 = createNode 1; + node3 = createNode 2; + + etcd = { pkgs, ... }: { + + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = "192.168.1.4"; prefixLength = 16; } + ]; + + services.etcd = { + enable = true; + listenClientUrls = [ "http://192.168.1.4:2379" ]; + }; + + networking.firewall.allowedTCPPorts = [ 2379 ]; + }; + + client = { pkgs, ... }: { + environment.systemPackages = [ pkgs.postgresql_14 ]; + + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = "192.168.2.1"; prefixLength = 16; } + ]; + + services.haproxy = { + enable = true; + config = '' + global + maxconn 100 + + defaults + log global + mode tcp + retries 2 + timeout client 30m + timeout connect 4s + timeout server 30m + timeout check 5s + + listen cluster1 + bind 127.0.0.1:5432 + option httpchk + http-check expect status 200 + default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions + ${builtins.concatStringsSep "\n" (map (ip: "server postgresql_${ip}_5432 ${ip}:5432 maxconn 100 check port 8008") nodesIps)} + ''; + }; + }; + }; + + + + testScript = '' + nodes = [node1, node2, node3] + + def wait_for_all_nodes_ready(expected_replicas=2): + booted_nodes = filter(lambda node: node.booted, nodes) + for node in booted_nodes: + print(node.succeed("patronictl list cluster1")) + node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'length') == {expected_replicas + 1} ]") + node.wait_until_succeeds("[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]") + node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^running$\"))) | length') == {expected_replicas} ]") + print(node.succeed("patronictl list cluster1")) + client.wait_until_succeeds("psql -h 127.0.0.1 -U postgres --command='select 1;'") + + def run_dummy_queries(): + client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'") + client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101") + client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'") + + start_all() + + with subtest("should bootstrap a new patroni cluster"): + wait_for_all_nodes_ready() + + with subtest("should be able to insert and select"): + client.succeed("psql -h 127.0.0.1 -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'") + client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100") + + with subtest("should restart after all nodes are crashed"): + for node in nodes: + node.crash() + for node in nodes: + node.start() + wait_for_all_nodes_ready() + + with subtest("should be able to run queries while any one node is crashed"): + masterNodeName = node1.succeed("patronictl list -f json cluster1 | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip() + masterNodeIndex = int(masterNodeName[len(masterNodeName)-1]) - 1 + + # Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent) + nodes.append(nodes.pop(masterNodeIndex)) + + for node in nodes: + node.crash() + wait_for_all_nodes_ready(1) + + # Execute some queries while a node is down. + run_dummy_queries() + + # Restart crashed node. + node.start() + wait_for_all_nodes_ready() + + # Execute some queries with the node back up. + run_dummy_queries() + ''; + }) diff --git a/pkgs/servers/sql/patroni/default.nix b/pkgs/servers/sql/patroni/default.nix index e3d5089dfa0a..6eaaa4a7366a 100644 --- a/pkgs/servers/sql/patroni/default.nix +++ b/pkgs/servers/sql/patroni/default.nix @@ -1,6 +1,7 @@ { lib , pythonPackages , fetchFromGitHub +, nixosTests }: pythonPackages.buildPythonApplication rec { @@ -53,6 +54,10 @@ pythonPackages.buildPythonApplication rec { pythonImportsCheck = [ "patroni" ]; + passthru.tests = { + patroni = nixosTests.patroni; + }; + meta = with lib; { homepage = "https://patroni.readthedocs.io/en/latest/"; description = "A Template for PostgreSQL HA with ZooKeeper, etcd or Consul";