From 716b0dfaaf12afe83ff54b793dc52c022ab62155 Mon Sep 17 00:00:00 2001 From: illustris Date: Sun, 27 Feb 2022 11:52:18 +0530 Subject: [PATCH] nixos/hadoop: add gateway role --- .../services/cluster/hadoop/default.nix | 31 ++++++------ .../modules/services/cluster/hadoop/hdfs.nix | 21 ++++----- .../modules/services/cluster/hadoop/yarn.nix | 10 ++-- nixos/tests/hadoop/hadoop.nix | 47 +++++++++++-------- 4 files changed, 57 insertions(+), 52 deletions(-) diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix index 636bb4067277..5c7ea79c3017 100644 --- a/nixos/modules/services/cluster/hadoop/default.nix +++ b/nixos/modules/services/cluster/hadoop/default.nix @@ -149,6 +149,8 @@ with lib; description = "Directories containing additional config files to be added to HADOOP_CONF_DIR"; }; + gatewayRole.enable = mkEnableOption "gateway role for deploying hadoop configs"; + package = mkOption { type = types.package; default = pkgs.hadoop; @@ -158,21 +160,16 @@ with lib; }; - config = mkMerge [ - (mkIf (builtins.hasAttr "yarn" config.users.users || - builtins.hasAttr "hdfs" config.users.users || - builtins.hasAttr "httpfs" config.users.users) { - users.groups.hadoop = { - gid = config.ids.gids.hadoop; - }; - environment = { - systemPackages = [ cfg.package ]; - etc."hadoop-conf".source = let - hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; - in "${hadoopConf}"; - variables.HADOOP_CONF_DIR = "/etc/hadoop-conf/"; - }; - }) - - ]; + config = mkIf cfg.gatewayRole.enable { + users.groups.hadoop = { + gid = config.ids.gids.hadoop; + }; + environment = { + systemPackages = [ cfg.package ]; + etc."hadoop-conf".source = let + hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/"; + in "${hadoopConf}"; + variables.HADOOP_CONF_DIR = "/etc/hadoop-conf/"; + }; + }; } diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix index 61d9941298aa..1725dc62d0cc 100644 --- a/nixos/modules/services/cluster/hadoop/hdfs.nix +++ b/nixos/modules/services/cluster/hadoop/hdfs.nix @@ -51,6 +51,8 @@ let }; }; + services.hadoop.gatewayRole.enable = true; + networking.firewall.allowedTCPPorts = mkIf ((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall) allowedTCPPorts; @@ -145,17 +147,13 @@ in ]; }) - (mkIf - ( - cfg.hdfs.namenode.enable || cfg.hdfs.datanode.enable || cfg.hdfs.journalnode.enable || cfg.hdfs.zkfc.enable - ) - { - users.users.hdfs = { - description = "Hadoop HDFS user"; - group = "hadoop"; - uid = config.ids.uids.hdfs; - }; - }) + (mkIf cfg.gatewayRole.enable { + users.users.hdfs = { + description = "Hadoop HDFS user"; + group = "hadoop"; + uid = config.ids.uids.hdfs; + }; + }) (mkIf cfg.hdfs.httpfs.enable { users.users.httpfs = { description = "Hadoop HTTPFS user"; @@ -163,5 +161,6 @@ in isSystemUser = true; }; }) + ]; } diff --git a/nixos/modules/services/cluster/hadoop/yarn.nix b/nixos/modules/services/cluster/hadoop/yarn.nix index 90ae75a44b79..9f0d3f85db72 100644 --- a/nixos/modules/services/cluster/hadoop/yarn.nix +++ b/nixos/modules/services/cluster/hadoop/yarn.nix @@ -49,10 +49,7 @@ in }; config = mkMerge [ - (mkIf ( - cfg.yarn.resourcemanager.enable || cfg.yarn.nodemanager.enable - ) { - + (mkIf cfg.gatewayRole.enable { users.users.yarn = { description = "Hadoop YARN user"; group = "hadoop"; @@ -74,6 +71,9 @@ in Restart = "always"; }; }; + + services.hadoop.gatewayRole.enable = true; + networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [ 8088 # resourcemanager.webapp.address 8030 # resourcemanager.scheduler.address @@ -119,6 +119,8 @@ in }; }; + services.hadoop.gatewayRole.enable = true; + networking.firewall.allowedTCPPortRanges = [ (mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;}) ]; diff --git a/nixos/tests/hadoop/hadoop.nix b/nixos/tests/hadoop/hadoop.nix index e84a56f302f5..42c238ef853d 100644 --- a/nixos/tests/hadoop/hadoop.nix +++ b/nixos/tests/hadoop/hadoop.nix @@ -145,7 +145,14 @@ import ../make-test-python.nix ({ package, ... }: { }; }; }; - }; + client = { options, ... }: { + services.hadoop = { + gatewayRole.enable = true; + inherit package coreSite hdfsSite; + yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA; + }; + }; + }; testScript = '' start_all() @@ -202,26 +209,26 @@ import ../make-test-python.nix ({ package, ... }: { # DN should have started by now, but confirm anyway dn1.wait_for_unit("hdfs-datanode") # Print states of namenodes - dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") + client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") # Wait for cluster to exit safemode - dn1.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait") - dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") + client.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait") + client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") # test R/W - dn1.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile") - assert "testfilecontents" in dn1.succeed("sudo -u hdfs hdfs dfs -cat /testfile") + client.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile") + assert "testfilecontents" in client.succeed("sudo -u hdfs hdfs dfs -cat /testfile") # Test NN failover nn1.succeed("systemctl stop hdfs-namenode") - assert "active" in dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState") - dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") - assert "testfilecontents" in dn1.succeed("sudo -u hdfs hdfs dfs -cat /testfile") + assert "active" in client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState") + client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") + assert "testfilecontents" in client.succeed("sudo -u hdfs hdfs dfs -cat /testfile") nn1.succeed("systemctl start hdfs-namenode") nn1.wait_for_open_port(9870) nn1.wait_for_open_port(8022) nn1.wait_for_open_port(8020) - assert "standby" in dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState") - dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") + assert "standby" in client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState") + client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat") #### YARN tests #### @@ -237,21 +244,21 @@ import ../make-test-python.nix ({ package, ... }: { nm1.wait_for_unit("yarn-nodemanager") nm1.wait_for_open_port(8042) nm1.wait_for_open_port(8040) - nm1.wait_until_succeeds("yarn node -list | grep Nodes:1") - nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") - nm1.succeed("sudo -u yarn yarn node -list | systemd-cat") + client.wait_until_succeeds("yarn node -list | grep Nodes:1") + client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") + client.succeed("sudo -u yarn yarn node -list | systemd-cat") # Test RM failover rm1.succeed("systemctl stop yarn-resourcemanager") - assert "standby" not in nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") - nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") + assert "standby" not in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") + client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") rm1.succeed("systemctl start yarn-resourcemanager") rm1.wait_for_unit("yarn-resourcemanager") rm1.wait_for_open_port(8088) - assert "standby" in nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") - nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") + assert "standby" in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") + client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") - assert "Estimated value of Pi is" in nm1.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~lib/hadoop-*/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10") - assert "SUCCEEDED" in nm1.succeed("yarn application -list -appStates FINISHED") + assert "Estimated value of Pi is" in client.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~lib/hadoop-*/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10") + assert "SUCCEEDED" in client.succeed("yarn application -list -appStates FINISHED") ''; })