diff --git a/nixos/modules/misc/ids.nix b/nixos/modules/misc/ids.nix index 4d91e266d85b..a5ce50ac2610 100644 --- a/nixos/modules/misc/ids.nix +++ b/nixos/modules/misc/ids.nix @@ -169,6 +169,7 @@ opentsdb = 159; scollector = 160; bosun = 161; + kubernetes = 158; # When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399! @@ -302,6 +303,7 @@ liquidsoap = 155; scollector = 156; bosun = 157; + kubernetes = 158; # When adding a gid, make sure it doesn't match an existing uid. And don't use gids above 399! diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index cf9e7fa3bfa1..8cdc9d2dd4c1 100755 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -395,6 +395,7 @@ ./virtualisation/container-config.nix ./virtualisation/containers.nix ./virtualisation/docker.nix + ./virtualisation/kubernetes.nix ./virtualisation/libvirtd.nix ./virtualisation/lxc.nix #./virtualisation/nova.nix diff --git a/nixos/modules/virtualisation/docker.nix b/nixos/modules/virtualisation/docker.nix index 11d3f576728f..9ae9624fd481 100644 --- a/nixos/modules/virtualisation/docker.nix +++ b/nixos/modules/virtualisation/docker.nix @@ -103,6 +103,12 @@ in LimitNPROC = 1048576; } // proxy_env; + postStart = '' + while ! [ -e /var/run/docker.sock ]; do + sleep 0.1 + done + ''; + # Presumably some containers are running we don't want to interrupt restartIfChanged = false; }; diff --git a/nixos/modules/virtualisation/kubernetes.nix b/nixos/modules/virtualisation/kubernetes.nix new file mode 100644 index 000000000000..50388cf2e121 --- /dev/null +++ b/nixos/modules/virtualisation/kubernetes.nix @@ -0,0 +1,461 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.virtualisation.kubernetes; + +in { + + ###### interface + + options.virtualisation.kubernetes = { + package = mkOption { + description = "Kubernetes package to use."; + default = pkgs.kubernetes; + type = types.package; + }; + + verbose = mkOption { + description = "Kubernetes enable verbose mode for debugging"; + default = false; + type = types.bool; + }; + + etcdServers = mkOption { + description = "Kubernetes list of etcd servers to watch."; + default = [ "127.0.0.1:4001" ]; + type = types.listOf types.str; + }; + + roles = mkOption { + description = '' + Kubernetes role that this machine should take. + + Master role will enable etcd, apiserver, scheduler and controller manager + services. Node role will enable etcd, docker, kubelet and proxy services. + ''; + default = []; + type = types.listOf (types.enum ["master" "node"]); + }; + + dataDir = mkOption { + description = "Kubernetes root directory for managing kubelet files."; + default = "/var/lib/kubernetes"; + type = types.path; + }; + + apiserver = { + enable = mkOption { + description = "Whether to enable kubernetes apiserver."; + default = false; + type = types.bool; + }; + + address = mkOption { + description = "Kubernetes apiserver listening address."; + default = "127.0.0.1"; + type = types.str; + }; + + publicAddress = mkOption { + description = '' + Kubernetes apiserver public listening address used for read only and + secure port. + ''; + default = cfg.apiserver.address; + type = types.str; + }; + + port = mkOption { + description = "Kubernets apiserver listening port."; + default = 8080; + type = types.int; + }; + + readOnlyPort = mkOption { + description = "Kubernets apiserver read-only port."; + default = 7080; + type = types.int; + }; + + securePort = mkOption { + description = "Kubernetes apiserver secure port."; + default = 6443; + type = types.int; + }; + + tlsCertFile = mkOption { + description = "Kubernetes apiserver certificate file."; + default = ""; + type = types.str; + }; + + tlsPrivateKeyFile = mkOption { + description = "Kubernetes apiserver private key file."; + default = ""; + type = types.str; + }; + + tokenAuth = mkOption { + description = '' + Kubernetes apiserver token authentication file. See + + ''; + default = {}; + example = literalExample '' + { + alice = "abc123"; + bob = "xyz987"; + } + ''; + type = types.attrsOf types.str; + }; + + authorizationMode = mkOption { + description = '' + Kubernetes apiserver authorization mode (AlwaysAllow/AlwaysDeny/ABAC). See + + ''; + default = "AlwaysAllow"; + type = types.enum ["AlwaysAllow" "AlwaysDeny" "ABAC"]; + }; + + authorizationPolicy = mkOption { + description = '' + Kubernetes apiserver authorization policy file. See + + ''; + default = []; + example = literalExample '' + [ + {user = "admin";} + {user = "scheduler"; readonly = true; kind= "pods";} + {user = "scheduler"; kind = "bindings";} + {user = "kubelet"; readonly = true; kind = "bindings";} + {user = "kubelet"; kind = "events";} + {user= "alice"; ns = "projectCaribou";} + {user = "bob"; readonly = true; ns = "projectCaribou";} + ] + ''; + type = types.listOf types.attrs; + }; + + allowPrivileged = mkOption { + description = "Whether to allow privileged containers on kubernetes."; + default = false; + type = types.bool; + }; + + portalNet = mkOption { + description = "Kubernetes CIDR notation IP range from which to assign portal IPs"; + default = "10.10.10.10/16"; + type = types.str; + }; + + extraOpts = mkOption { + description = "Kubernetes apiserver extra command line options."; + default = ""; + type = types.str; + }; + }; + + scheduler = { + enable = mkOption { + description = "Whether to enable kubernetes scheduler."; + default = false; + type = types.bool; + }; + + address = mkOption { + description = "Kubernetes scheduler listening address."; + default = "127.0.0.1"; + type = types.str; + }; + + port = mkOption { + description = "Kubernets scheduler listening port."; + default = 10251; + type = types.int; + }; + + master = mkOption { + description = "Kubernetes apiserver address"; + default = "${cfg.apiserver.address}:${toString cfg.apiserver.port}"; + type = types.str; + }; + + extraOpts = mkOption { + description = "Kubernetes scheduler extra command line options."; + default = ""; + type = types.str; + }; + }; + + controllerManager = { + enable = mkOption { + description = "Whether to enable kubernetes controller manager."; + default = false; + type = types.bool; + }; + + address = mkOption { + description = "Kubernetes controller manager listening address."; + default = "127.0.0.1"; + type = types.str; + }; + + port = mkOption { + description = "Kubernets controller manager listening port."; + default = 10252; + type = types.int; + }; + + master = mkOption { + description = "Kubernetes apiserver address"; + default = "${cfg.apiserver.address}:${toString cfg.apiserver.port}"; + type = types.str; + }; + + machines = mkOption { + description = "Kubernetes apiserver list of machines to schedule to schedule onto"; + default = []; + type = types.listOf types.str; + }; + + extraOpts = mkOption { + description = "Kubernetes scheduler extra command line options."; + default = ""; + type = types.str; + }; + }; + + kubelet = { + enable = mkOption { + description = "Whether to enable kubernetes kubelet."; + default = false; + type = types.bool; + }; + + address = mkOption { + description = "Kubernetes kubelet info server listening address."; + default = "0.0.0.0"; + type = types.str; + }; + + port = mkOption { + description = "Kubernets kubelet info server listening port."; + default = 10250; + type = types.int; + }; + + hostname = mkOption { + description = "Kubernetes kubelet hostname override"; + default = config.networking.hostName; + type = types.str; + }; + + allowPrivileged = mkOption { + description = "Whether to allow kubernetes containers to request privileged mode."; + default = false; + type = types.bool; + }; + + extraOpts = mkOption { + description = "Kubernetes kubelet extra command line options."; + default = ""; + type = types.str; + }; + }; + + proxy = { + enable = mkOption { + description = "Whether to enable kubernetes proxy."; + default = false; + type = types.bool; + }; + + address = mkOption { + description = "Kubernetes proxy listening address."; + default = "0.0.0.0"; + type = types.str; + }; + + extraOpts = mkOption { + description = "Kubernetes proxy extra command line options."; + default = ""; + type = types.str; + }; + }; + }; + + ###### implementation + + config = mkMerge [ + (mkIf cfg.apiserver.enable { + systemd.services.kubernetes-apiserver = { + description = "Kubernetes Api Server"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-interfaces.target" "etcd.service" ]; + serviceConfig = { + ExecStart = let + authorizationPolicyFile = + pkgs.writeText "kubernetes-policy" + (builtins.toJSON cfg.apiserver.authorizationPolicy); + tokenAuthFile = + pkgs.writeText "kubernetes-auth" + (concatImapStringsSep "\n" (i: v: v + "," + (toString i)) + (mapAttrsToList (name: token: token + "," + name) cfg.apiserver.tokenAuth)); + in ''${cfg.package}/bin/kube-apiserver \ + -etcd_servers=${concatMapStringsSep "," (f: "http://${f}") cfg.etcdServers} \ + -address=${cfg.apiserver.address} \ + -port=${toString cfg.apiserver.port} \ + -read_only_port=${toString cfg.apiserver.readOnlyPort} \ + -public_address_override=${cfg.apiserver.publicAddress} \ + -allow_privileged=${if cfg.apiserver.allowPrivileged then "true" else "false"} \ + ${optionalString (cfg.apiserver.tlsCertFile!="") + "-tls_cert_file=${cfg.apiserver.tlsCertFile}"} \ + ${optionalString (cfg.apiserver.tlsPrivateKeyFile!="") + "-tls_private_key_file=${cfg.apiserver.tlsPrivateKeyFile}"} \ + ${optionalString (cfg.apiserver.tokenAuth!=[]) + "-token_auth_file=${tokenAuthFile}"} \ + -authorization_mode=${cfg.apiserver.authorizationMode} \ + ${optionalString (cfg.apiserver.authorizationMode == "ABAC") + "-authorization_policy_file=${authorizationPolicyFile}"} \ + ${optionalString (cfg.apiserver.tlsCertFile!="" && cfg.apiserver.tlsCertFile!="") + "-secure_port=${toString cfg.apiserver.securePort}"} \ + -portal_net=${cfg.apiserver.portalNet} \ + -logtostderr=true \ + ${optionalString cfg.verbose "-v=6 -log_flush_frequency=1s"} \ + ${cfg.apiserver.extraOpts} + ''; + User = "kubernetes"; + }; + postStart = '' + until ${pkgs.curl}/bin/curl -s -o /dev/null 'http://${cfg.apiserver.address}:${toString cfg.apiserver.port}/'; do + sleep 1; + done + ''; + }; + }) + + (mkIf cfg.scheduler.enable { + systemd.services.kubernetes-scheduler = { + description = "Kubernetes Scheduler Service"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-interfaces.target" "kubernetes-apiserver.service" ]; + serviceConfig = { + ExecStart = ''${cfg.package}/bin/kube-scheduler \ + -address=${cfg.scheduler.address} \ + -port=${toString cfg.scheduler.port} \ + -master=${cfg.scheduler.master} \ + -logtostderr=true \ + ${optionalString cfg.verbose "-v=6 -log_flush_frequency=1s"} \ + ${cfg.scheduler.extraOpts} + ''; + User = "kubernetes"; + }; + }; + }) + + (mkIf cfg.controllerManager.enable { + systemd.services.kubernetes-controller-manager = { + description = "Kubernetes Controller Manager Service"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-interfaces.target" "kubernetes-apiserver.service" ]; + serviceConfig = { + ExecStart = ''${cfg.package}/bin/kube-controller-manager \ + -address=${cfg.controllerManager.address} \ + -port=${toString cfg.controllerManager.port} \ + -master=${cfg.controllerManager.master} \ + ${optionalString (cfg.controllerManager.machines != []) + "-machines=${concatStringsSep "," cfg.controllerManager.machines}"} \ + -logtostderr=true \ + ${optionalString cfg.verbose "-v=6 -log_flush_frequency=1s"} \ + ${cfg.controllerManager.extraOpts} + ''; + User = "kubernetes"; + }; + }; + }) + + (mkIf cfg.kubelet.enable { + systemd.services.kubernetes-kubelet = { + description = "Kubernetes Kubelet Service"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-interfaces.target" "etcd.service" "docker.service" ]; + serviceConfig = { + ExecStart = ''${cfg.package}/bin/kubelet \ + -etcd_servers=${concatMapStringsSep "," (f: "http://${f}") cfg.etcdServers} \ + -address=${cfg.kubelet.address} \ + -port=${toString cfg.kubelet.port} \ + -hostname_override=${cfg.kubelet.hostname} \ + -allow_privileged=${if cfg.kubelet.allowPrivileged then "true" else "false"} \ + -root_dir=${cfg.dataDir} \ + -logtostderr=true \ + ${optionalString cfg.verbose "-v=6 -log_flush_frequency=1s"} \ + ${cfg.kubelet.extraOpts} + ''; + User = "kubernetes"; + PermissionsStartOnly = true; + WorkingDirectory = cfg.dataDir; + }; + }; + }) + + (mkIf cfg.proxy.enable { + systemd.services.kubernetes-proxy = { + description = "Kubernetes Proxy Service"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-interfaces.target" "etcd.service" ]; + serviceConfig = { + ExecStart = ''${cfg.package}/bin/kube-proxy \ + -etcd_servers=${concatMapStringsSep "," (s: "http://${s}") cfg.etcdServers} \ + -bind_address=${cfg.proxy.address} \ + -logtostderr=true \ + ${optionalString cfg.verbose "-v=6 -log_flush_frequency=1s"} \ + ${cfg.proxy.extraOpts} + ''; + }; + }; + }) + + (mkIf (any (el: el == "master") cfg.roles) { + virtualisation.kubernetes.apiserver.enable = mkDefault true; + virtualisation.kubernetes.scheduler.enable = mkDefault true; + virtualisation.kubernetes.controllerManager.enable = mkDefault true; + }) + + (mkIf (any (el: el == "node") cfg.roles) { + virtualisation.docker.enable = mkDefault true; + virtualisation.kubernetes.kubelet.enable = mkDefault true; + virtualisation.kubernetes.proxy.enable = mkDefault true; + }) + + (mkIf (any (el: el == "node" || el == "master") cfg.roles) { + services.etcd.enable = mkDefault true; + }) + + (mkIf ( + cfg.apiserver.enable || + cfg.scheduler.enable || + cfg.controllerManager.enable || + cfg.kubelet.enable || + cfg.proxy.enable + ) { + environment.systemPackages = [ cfg.package ]; + + users.extraUsers = singleton { + name = "kubernetes"; + uid = config.ids.uids.kubernetes; + description = "Kubernetes user"; + extraGroups = [ "docker" ]; + group = "kubernetes"; + home = cfg.dataDir; + createHome = true; + }; + users.extraGroups.kubernetes.gid = config.ids.gids.kubernetes; + }) + + ]; +} diff --git a/nixos/release.nix b/nixos/release.nix index 5c08b26c3cf2..7036db4144ae 100644 --- a/nixos/release.nix +++ b/nixos/release.nix @@ -258,6 +258,7 @@ in rec { tests.ipv6 = callTest tests/ipv6.nix {}; tests.jenkins = callTest tests/jenkins.nix {}; tests.kde4 = callTest tests/kde4.nix {}; + tests.kubernetes = callTest tests/kubernetes.nix {}; tests.latestKernel.login = callTest tests/login.nix { latestKernel = true; }; tests.login = callTest tests/login.nix {}; #tests.logstash = callTest tests/logstash.nix {}; diff --git a/nixos/tests/kubernetes.nix b/nixos/tests/kubernetes.nix new file mode 100644 index 000000000000..b8532fe092e0 --- /dev/null +++ b/nixos/tests/kubernetes.nix @@ -0,0 +1,175 @@ +# This test runs two node kubernetes cluster and checks if simple redis pod works + +import ./make-test.nix rec { + name = "kubernetes"; + + redisMaster = builtins.toFile "redis-master-pod.yaml" '' + id: redis-master-pod + kind: Pod + apiVersion: v1beta1 + desiredState: + manifest: + version: v1beta1 + id: redis-master-pod + containers: + - name: master + image: master:5000/scratch + cpu: 100 + ports: + - name: redis-server + containerPort: 6379 + hostPort: 6379 + volumeMounts: + - name: nix-store + mountPath: /nix/store + readOnly: true + volumeMounts: + - name: system-profile + mountPath: /bin + readOnly: true + command: + - /bin/redis-server + volumes: + - name: nix-store + source: + hostDir: + path: /nix/store + - name: system-profile + source: + hostDir: + path: /run/current-system/sw/bin + labels: + name: redis + role: master + ''; + + nodes = { + master = + { config, pkgs, nodes, ... }: + { + virtualisation.kubernetes = { + roles = ["master" "node"]; + controllerManager.machines = ["master" "node"]; + kubelet.extraOpts = "-network_container_image=master:5000/pause"; + apiserver.address = "0.0.0.0"; + verbose = true; + }; + virtualisation.docker.extraOptions = "--iptables=false --ip-masq=false -b cbr0 --insecure-registry master:5000"; + + services.etcd = { + listenPeerUrls = ["http://0.0.0.0:7001"]; + initialAdvertisePeerUrls = ["http://master:7001"]; + initialCluster = ["master=http://master:7001" "node=http://node:7001"]; + }; + services.dockerRegistry.enable = true; + services.dockerRegistry.host = "0.0.0.0"; + services.dockerRegistry.port = 5000; + + virtualisation.vlans = [ 1 2 ]; + networking.bridges = { + cbr0.interfaces = [ "eth2" ]; + }; + networking.interfaces = { + cbr0 = { + ipAddress = "10.10.0.1"; + prefixLength = 24; + }; + }; + networking.localCommands = '' + ip route add 10.10.0.0/16 dev cbr0 + ip route flush cache + ''; + networking.extraHosts = "127.0.0.1 master"; + + networking.firewall.enable = false; + #networking.firewall.allowedTCPPorts = [ 4001 7001 ]; + + environment.systemPackages = [ pkgs.redis ]; + }; + + node = + { config, pkgs, nodes, ... }: + { + virtualisation.kubernetes = { + roles = ["node"]; + kubelet.extraOpts = "-network_container_image=master:5000/pause"; + verbose = true; + }; + virtualisation.docker.extraOptions = "--iptables=false --ip-masq=false -b cbr0 --insecure-registry master:5000"; + services.etcd = { + listenPeerUrls = ["http://0.0.0.0:7001"]; + initialAdvertisePeerUrls = ["http://node:7001"]; + initialCluster = ["master=http://master:7001" "node=http://node:7001"]; + }; + + virtualisation.vlans = [ 1 2 ]; + networking.bridges = { + cbr0.interfaces = [ "eth2" ]; + }; + networking.interfaces = { + cbr0 = { + ipAddress = "10.10.1.1"; + prefixLength = 24; + }; + }; + networking.localCommands = '' + ip route add 10.10.0.0/16 dev cbr0 + ip route flush cache + ''; + networking.extraHosts = "127.0.0.1 node"; + + networking.firewall.enable = false; + #networking.firewall.allowedTCPPorts = [ 4001 7001 ]; + + environment.systemPackages = [ pkgs.redis ]; + }; + + client = + { config, pkgs, nodes, ... }: + { + virtualisation.docker.enable = true; + virtualisation.docker.extraOptions = "--insecure-registry master:5000"; + environment.systemPackages = [ pkgs.kubernetes ]; + environment.etc."test/redis-master-pod.yaml".source = redisMaster; + environment.etc."test/pause".source = "${pkgs.kubernetes}/bin/kube-pause"; + environment.etc."test/Dockerfile".source = pkgs.writeText "Dockerfile" '' + FROM scratch + ADD pause / + ENTRYPOINT ["/pause"] + ''; + }; + }; + + testScript = '' + startAll; + + $master->waitForUnit("kubernetes-apiserver.service"); + $master->waitForUnit("kubernetes-scheduler.service"); + $master->waitForUnit("kubernetes-controller-manager.service"); + $master->waitForUnit("kubernetes-kubelet.service"); + $master->waitForUnit("kubernetes-proxy.service"); + + $node->waitForUnit("kubernetes-kubelet.service"); + $node->waitForUnit("kubernetes-proxy.service"); + + $master->waitUntilSucceeds("kubecfg list minions | grep master"); + $master->waitUntilSucceeds("kubecfg list minions | grep node"); + + $client->waitForUnit("docker.service"); + $client->succeed("tar cv --files-from /dev/null | docker import - scratch"); + $client->succeed("docker tag scratch master:5000/scratch"); + $master->waitForUnit("docker-registry.service"); + $client->succeed("docker push master:5000/scratch"); + $client->succeed("mkdir -p /root/pause"); + $client->succeed("cp /etc/test/pause /root/pause/"); + $client->succeed("cp /etc/test/Dockerfile /root/pause/"); + $client->succeed("cd /root/pause && docker build -t master:5000/pause ."); + $client->succeed("docker push master:5000/pause"); + + subtest "simple pod", sub { + $client->succeed("kubectl create -f ${redisMaster} -s http://master:8080"); + $client->waitUntilSucceeds("kubectl get pods -s http://master:8080 | grep redis-master | grep -i running"); + } + + ''; +} diff --git a/pkgs/applications/networking/cluster/kubernetes/default.nix b/pkgs/applications/networking/cluster/kubernetes/default.nix index 76b29b13c000..7af4c0deb9eb 100644 --- a/pkgs/applications/networking/cluster/kubernetes/default.nix +++ b/pkgs/applications/networking/cluster/kubernetes/default.nix @@ -15,9 +15,12 @@ stdenv.mkDerivation rec { preBuild = "patchShebangs ./hack"; + postBuild = ''go build --ldflags '-extldflags "-static" -s' build/pause/pause.go''; + installPhase = '' mkdir -p "$out/bin" cp _output/local/go/bin/* "$out/bin/" + cp pause $out/bin/kube-pause ''; preFixup = ''