From f9e2f76a590d11cbeaa10e3953ddc96110bf1b3b Mon Sep 17 00:00:00 2001 From: Christian Albrecht Date: Fri, 1 Mar 2019 07:56:59 +0100 Subject: [PATCH] nixos/kubernetes: Add systemd path units to protect services from crashing and clobbering the logs when certificates are not in place yet and make sure services are activated when certificates are ready. To prevent errors similar to "kube-controller-manager.path: Failed to enter waiting state: Too many open files" fs.inotify.max_user_instances has to be increased. --- .../services/cluster/kubernetes/apiserver.nix | 41 ++++++++- .../cluster/kubernetes/controller-manager.nix | 22 ++++- .../services/cluster/kubernetes/flannel.nix | 8 +- .../services/cluster/kubernetes/kubelet.nix | 19 ++++- .../services/cluster/kubernetes/pki.nix | 84 ++++++++++++++++++- nixos/tests/kubernetes/base.nix | 5 +- 6 files changed, 168 insertions(+), 11 deletions(-) diff --git a/nixos/modules/services/cluster/kubernetes/apiserver.nix b/nixos/modules/services/cluster/kubernetes/apiserver.nix index 81e45b417de3..08f929060aa0 100644 --- a/nixos/modules/services/cluster/kubernetes/apiserver.nix +++ b/nixos/modules/services/cluster/kubernetes/apiserver.nix @@ -272,7 +272,25 @@ in ###### implementation config = mkMerge [ - (mkIf cfg.enable { + (mkIf cfg.enable (let + apiserverPaths = [ + cfg.clientCaFile + cfg.etcd.caFile + cfg.etcd.certFile + cfg.etcd.keyFile + cfg.kubeletClientCaFile + cfg.kubeletClientCertFile + cfg.kubeletClientKeyFile + cfg.serviceAccountKeyFile + cfg.tlsCertFile + cfg.tlsKeyFile + ]; + etcdPaths = [ + config.services.etcd.certFile + config.services.etcd.keyFile + config.services.etcd.trustedCaFile + ]; + in { systemd.services.kube-apiserver = { description = "Kubernetes APIServer Service"; wantedBy = [ "kubernetes.target" ]; @@ -341,6 +359,25 @@ in Restart = "on-failure"; RestartSec = 5; }; + unitConfig.ConditionPathExists = apiserverPaths; + }; + + systemd.paths.kube-apiserver = { + wantedBy = [ "kube-apiserver.service" ]; + pathConfig = { + PathExists = apiserverPaths; + PathChanged = apiserverPaths; + }; + }; + + systemd.services.etcd.unitConfig.ConditionPathExists = etcdPaths; + + systemd.paths.etcd = { + wantedBy = [ "etcd.service" ]; + pathConfig = { + PathExists = etcdPaths; + PathChanged = etcdPaths; + }; }; services.etcd = { @@ -421,7 +458,7 @@ in }; }; - }) + })) ]; diff --git a/nixos/modules/services/cluster/kubernetes/controller-manager.nix b/nixos/modules/services/cluster/kubernetes/controller-manager.nix index dff97f144d55..27b28311adbc 100644 --- a/nixos/modules/services/cluster/kubernetes/controller-manager.nix +++ b/nixos/modules/services/cluster/kubernetes/controller-manager.nix @@ -104,7 +104,16 @@ in }; ###### implementation - config = mkIf cfg.enable { + config = mkIf cfg.enable (let + controllerManagerPaths = [ + cfg.rootCaFile + cfg.tlsCertFile + cfg.tlsKeyFile + top.pki.certs.controllerManagerClient.cert + top.pki.certs.controllerManagerClient.key + ]; + in { + systemd.services.kube-controller-manager = { description = "Kubernetes Controller Manager Service"; wantedBy = [ "kubernetes.target" ]; @@ -142,6 +151,15 @@ in Group = "kubernetes"; }; path = top.path; + unitConfig.ConditionPathExists = controllerManagerPaths; + }; + + systemd.paths.kube-controller-manager = { + wantedBy = [ "kube-controller-manager.service" ]; + pathConfig = { + PathExists = controllerManagerPaths; + PathChanged = controllerManagerPaths; + }; }; services.kubernetes.pki.certs = with top.lib; { @@ -158,5 +176,5 @@ in }; services.kubernetes.controllerManager.kubeconfig.server = mkDefault top.apiserverAddress; - }; + }); } diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix index 93ee2fd65eeb..ef06acb6de39 100644 --- a/nixos/modules/services/cluster/kubernetes/flannel.nix +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -55,13 +55,15 @@ in ${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker systemctl restart docker ''; + unitConfig.ConditionPathExists = [ "/run/flannel/subnet.env" ]; serviceConfig.Type = "oneshot"; }; - systemd.paths."flannel-subnet-env" = { - wantedBy = [ "flannel.service" ]; + systemd.paths.flannel-subnet-env = { + wantedBy = [ "mk-docker-opts.service" ]; pathConfig = { - PathModified = "/run/flannel/subnet.env"; + PathExists = [ "/run/flannel/subnet.env" ]; + PathChanged = [ "/run/flannel/subnet.env" ]; Unit = "mk-docker-opts.service"; }; }; diff --git a/nixos/modules/services/cluster/kubernetes/kubelet.nix b/nixos/modules/services/cluster/kubernetes/kubelet.nix index c94bb28bf7fb..86402cba7c48 100644 --- a/nixos/modules/services/cluster/kubernetes/kubelet.nix +++ b/nixos/modules/services/cluster/kubernetes/kubelet.nix @@ -241,7 +241,13 @@ in ###### implementation config = mkMerge [ - (mkIf cfg.enable { + (mkIf cfg.enable (let + kubeletPaths = [ + cfg.clientCaFile + cfg.tlsCertFile + cfg.tlsKeyFile + ]; + in { services.kubernetes.kubelet.seedDockerImages = [infraContainer]; systemd.services.kubelet = { @@ -308,6 +314,15 @@ in ''; WorkingDirectory = top.dataDir; }; + unitConfig.ConditionPathExists = kubeletPaths; + }; + + systemd.paths.kubelet = { + wantedBy = [ "kubelet.service" ]; + pathConfig = { + PathExists = kubeletPaths; + PathChanged = kubeletPaths; + }; }; # Allways include cni plugins @@ -336,7 +351,7 @@ in }; services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress; - }) + })) (mkIf (cfg.enable && cfg.manifests != {}) { environment.etc = mapAttrs' (name: manifest: diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix index 38deca23a990..8ad17d4dfb4e 100644 --- a/nixos/modules/services/cluster/kubernetes/pki.nix +++ b/nixos/modules/services/cluster/kubernetes/pki.nix @@ -119,6 +119,29 @@ in cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl"; cfsslCert = "${cfsslCertPathPrefix}.pem"; cfsslKey = "${cfsslCertPathPrefix}-key.pem"; + + certmgrPaths = [ + top.caFile + certmgrAPITokenPath + ]; + addonManagerPaths = mkIf top.addonManager.enable [ + cfg.certs.addonManager.cert + cfg.certs.addonManager.key + cfg.certs.clusterAdmin.cert + cfg.certs.clusterAdmin.key + ]; + flannelPaths = [ + cfg.certs.flannelClient.cert + cfg.certs.flannelClient.key + ]; + proxyPaths = mkIf top.proxy.enable [ + cfg.certs.kubeProxyClient.cert + cfg.certs.kubeProxyClient.key + ]; + schedulerPaths = mkIf top.scheduler.enable [ + cfg.certs.schedulerClient.cert + cfg.certs.schedulerClient.key + ]; in { @@ -230,6 +253,18 @@ in mapAttrs mkSpec cfg.certs; }; + systemd.services.certmgr = { + unitConfig.ConditionPathExists = certmgrPaths; + }; + + systemd.paths.certmgr = { + wantedBy = [ "certmgr.service" ]; + pathConfig = { + PathExists = certmgrPaths; + PathChanged = certmgrPaths; + }; + }; + #TODO: Get rid of kube-addon-manager in the future for the following reasons # - it is basically just a shell script wrapped around kubectl # - it assumes that it is clusterAdmin or can gain clusterAdmin rights through serviceAccount @@ -255,7 +290,18 @@ in export KUBECONFIG=${clusterAdminKubeconfig} ${kubectl}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files} ''; - })]); + }) + { + unitConfig.ConditionPathExists = addonManagerPaths; + }]); + + systemd.paths.kube-addon-manager = mkIf top.addonManager.enable { + wantedBy = [ "kube-addon-manager.service" ]; + pathConfig = { + PathExists = addonManagerPaths; + PathChanged = addonManagerPaths; + }; + }; environment.etc.${cfg.etcClusterAdminKubeconfig}.source = mkIf (!isNull cfg.etcClusterAdminKubeconfig) clusterAdminKubeconfig; @@ -337,6 +383,42 @@ in }; }; + systemd.services.flannel = { + unitConfig.ConditionPathExists = flannelPaths; + }; + + systemd.paths.flannel = { + wantedBy = [ "flannel.service" ]; + pathConfig = { + PathExists = flannelPaths; + PathChanged = flannelPaths; + }; + }; + + systemd.services.kube-proxy = mkIf top.proxy.enable { + unitConfig.ConditionPathExists = proxyPaths; + }; + + systemd.paths.kube-proxy = mkIf top.proxy.enable { + wantedBy = [ "kube-proxy.service" ]; + pathConfig = { + PathExists = proxyPaths; + PathChanged = proxyPaths; + }; + }; + + systemd.services.kube-scheduler = mkIf top.scheduler.enable { + unitConfig.ConditionPathExists = schedulerPaths; + }; + + systemd.paths.kube-scheduler = mkIf top.scheduler.enable { + wantedBy = [ "kube-scheduler.service" ]; + pathConfig = { + PathExists = schedulerPaths; + PathChanged = schedulerPaths; + }; + }; + services.kubernetes = { apiserver = mkIf top.apiserver.enable (with cfg.certs.apiServer; { diff --git a/nixos/tests/kubernetes/base.nix b/nixos/tests/kubernetes/base.nix index ec1a75e74c41..212023859f6d 100644 --- a/nixos/tests/kubernetes/base.nix +++ b/nixos/tests/kubernetes/base.nix @@ -30,7 +30,10 @@ let { config, pkgs, lib, nodes, ... }: mkMerge [ { - boot.postBootCommands = "rm -fr /var/lib/kubernetes/secrets /tmp/shared/*"; + boot = { + postBootCommands = "rm -fr /var/lib/kubernetes/secrets /tmp/shared/*"; + kernel.sysctl = { "fs.inotify.max_user_instances" = 256; }; + }; virtualisation.memorySize = mkDefault 1536; virtualisation.diskSize = mkDefault 4096; networking = {