2018-05-21 02:09:31 +01:00
|
|
|
{ config, lib, pkgs, ...}:
|
2021-10-20 21:31:12 +01:00
|
|
|
with lib;
|
2018-05-21 02:09:31 +01:00
|
|
|
let
|
|
|
|
cfg = config.services.hadoop;
|
2021-10-20 21:31:12 +01:00
|
|
|
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
|
|
|
restartIfChanged = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
description = ''
|
|
|
|
Automatically restart the service on config change.
|
|
|
|
This can be set to false to defer restarts on clusters running critical applications.
|
|
|
|
Please consider the security implications of inadvertently running an older version,
|
|
|
|
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
|
|
|
|
'';
|
|
|
|
default = false;
|
|
|
|
};
|
2018-05-21 02:09:31 +01:00
|
|
|
in
|
|
|
|
{
|
|
|
|
options.services.hadoop.hdfs = {
|
2021-10-20 21:31:12 +01:00
|
|
|
namenode = {
|
2021-11-03 14:06:28 +00:00
|
|
|
enable = mkEnableOption "Whether to run the HDFS NameNode";
|
2021-11-02 06:29:58 +00:00
|
|
|
formatOnInit = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
|
|
|
Format HDFS namenode on first start. This is useful for quickly spinning up ephemeral HDFS clusters with a single namenode.
|
|
|
|
For HA clusters, initialization involves multiple steps across multiple nodes. Follow [this guide](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html)
|
|
|
|
to initialize an HA cluster manually.
|
|
|
|
'';
|
|
|
|
};
|
2021-10-20 21:31:12 +01:00
|
|
|
inherit restartIfChanged;
|
|
|
|
openFirewall = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = true;
|
|
|
|
description = ''
|
|
|
|
Open firewall ports for namenode
|
|
|
|
'';
|
|
|
|
};
|
2018-05-21 02:09:31 +01:00
|
|
|
};
|
2021-10-20 21:31:12 +01:00
|
|
|
datanode = {
|
2021-11-03 14:06:28 +00:00
|
|
|
enable = mkEnableOption "Whether to run the HDFS DataNode";
|
2021-10-20 21:31:12 +01:00
|
|
|
inherit restartIfChanged;
|
|
|
|
openFirewall = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = true;
|
|
|
|
description = ''
|
|
|
|
Open firewall ports for datanode
|
|
|
|
'';
|
|
|
|
};
|
2018-05-21 02:09:31 +01:00
|
|
|
};
|
2021-11-02 06:29:58 +00:00
|
|
|
journalnode = {
|
2021-11-03 14:06:28 +00:00
|
|
|
enable = mkEnableOption "Whether to run the HDFS JournalNode";
|
2021-11-02 06:29:58 +00:00
|
|
|
inherit restartIfChanged;
|
|
|
|
openFirewall = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = true;
|
|
|
|
description = ''
|
|
|
|
Open firewall ports for journalnode
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
};
|
|
|
|
zkfc = {
|
2021-11-03 14:06:28 +00:00
|
|
|
enable = mkEnableOption "Whether to run the HDFS ZooKeeper failover controller";
|
2021-11-02 06:29:58 +00:00
|
|
|
inherit restartIfChanged;
|
|
|
|
};
|
2021-11-02 13:04:04 +00:00
|
|
|
httpfs = {
|
2021-11-03 14:06:28 +00:00
|
|
|
enable = mkEnableOption "Whether to run the HDFS HTTPfs server";
|
2021-11-02 13:04:04 +00:00
|
|
|
tempPath = mkOption {
|
|
|
|
type = types.path;
|
|
|
|
default = "/tmp/hadoop/httpfs";
|
|
|
|
description = ''
|
|
|
|
HTTPFS_TEMP path used by HTTPFS
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
inherit restartIfChanged;
|
2021-11-04 05:56:32 +00:00
|
|
|
openFirewall = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = true;
|
|
|
|
description = ''
|
|
|
|
Open firewall ports for HTTPFS
|
|
|
|
'';
|
|
|
|
};
|
2021-11-02 13:04:04 +00:00
|
|
|
};
|
2018-05-21 02:09:31 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
config = mkMerge [
|
2021-11-02 16:16:48 +00:00
|
|
|
(mkIf cfg.hdfs.namenode.enable {
|
2019-08-13 22:52:01 +01:00
|
|
|
systemd.services.hdfs-namenode = {
|
2018-05-21 02:09:31 +01:00
|
|
|
description = "Hadoop HDFS NameNode";
|
|
|
|
wantedBy = [ "multi-user.target" ];
|
2021-10-20 21:31:12 +01:00
|
|
|
inherit (cfg.hdfs.namenode) restartIfChanged;
|
2018-05-21 02:09:31 +01:00
|
|
|
|
2021-11-02 06:29:58 +00:00
|
|
|
preStart = (mkIf cfg.hdfs.namenode.formatOnInit ''
|
2018-05-21 02:09:31 +01:00
|
|
|
${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
|
2021-11-02 06:29:58 +00:00
|
|
|
'');
|
2018-05-21 02:09:31 +01:00
|
|
|
|
|
|
|
serviceConfig = {
|
|
|
|
User = "hdfs";
|
|
|
|
SyslogIdentifier = "hdfs-namenode";
|
|
|
|
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
|
2021-10-20 21:31:12 +01:00
|
|
|
Restart = "always";
|
2018-05-21 02:09:31 +01:00
|
|
|
};
|
|
|
|
};
|
2021-10-20 21:31:12 +01:00
|
|
|
|
|
|
|
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.namenode.openFirewall [
|
|
|
|
9870 # namenode.http-address
|
|
|
|
8020 # namenode.rpc-address
|
2021-11-02 06:29:58 +00:00
|
|
|
8022 # namenode. servicerpc-address
|
2021-10-20 21:31:12 +01:00
|
|
|
]);
|
2018-05-21 02:09:31 +01:00
|
|
|
})
|
2021-11-02 16:16:48 +00:00
|
|
|
(mkIf cfg.hdfs.datanode.enable {
|
2019-08-13 22:52:01 +01:00
|
|
|
systemd.services.hdfs-datanode = {
|
2018-05-21 02:09:31 +01:00
|
|
|
description = "Hadoop HDFS DataNode";
|
|
|
|
wantedBy = [ "multi-user.target" ];
|
2021-10-20 21:31:12 +01:00
|
|
|
inherit (cfg.hdfs.datanode) restartIfChanged;
|
2018-05-21 02:09:31 +01:00
|
|
|
|
|
|
|
serviceConfig = {
|
|
|
|
User = "hdfs";
|
|
|
|
SyslogIdentifier = "hdfs-datanode";
|
|
|
|
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
|
2021-10-20 21:31:12 +01:00
|
|
|
Restart = "always";
|
2018-05-21 02:09:31 +01:00
|
|
|
};
|
|
|
|
};
|
2021-10-20 21:31:12 +01:00
|
|
|
|
|
|
|
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.datanode.openFirewall [
|
|
|
|
9864 # datanode.http.address
|
|
|
|
9866 # datanode.address
|
|
|
|
9867 # datanode.ipc.address
|
|
|
|
]);
|
2018-05-21 02:09:31 +01:00
|
|
|
})
|
2021-11-02 16:16:48 +00:00
|
|
|
(mkIf cfg.hdfs.journalnode.enable {
|
2021-11-02 06:29:58 +00:00
|
|
|
systemd.services.hdfs-journalnode = {
|
|
|
|
description = "Hadoop HDFS JournalNode";
|
|
|
|
wantedBy = [ "multi-user.target" ];
|
|
|
|
inherit (cfg.hdfs.journalnode) restartIfChanged;
|
|
|
|
|
|
|
|
serviceConfig = {
|
|
|
|
User = "hdfs";
|
|
|
|
SyslogIdentifier = "hdfs-journalnode";
|
|
|
|
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} journalnode";
|
|
|
|
Restart = "always";
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2021-11-04 05:45:21 +00:00
|
|
|
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.journalnode.openFirewall [
|
2021-11-02 06:29:58 +00:00
|
|
|
8480 # dfs.journalnode.http-address
|
|
|
|
8485 # dfs.journalnode.rpc-address
|
|
|
|
]);
|
|
|
|
})
|
2021-11-02 16:16:48 +00:00
|
|
|
(mkIf cfg.hdfs.zkfc.enable {
|
2021-11-02 06:29:58 +00:00
|
|
|
systemd.services.hdfs-zkfc = {
|
|
|
|
description = "Hadoop HDFS ZooKeeper failover controller";
|
|
|
|
wantedBy = [ "multi-user.target" ];
|
|
|
|
inherit (cfg.hdfs.zkfc) restartIfChanged;
|
|
|
|
|
|
|
|
serviceConfig = {
|
|
|
|
User = "hdfs";
|
|
|
|
SyslogIdentifier = "hdfs-zkfc";
|
|
|
|
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} zkfc";
|
|
|
|
Restart = "always";
|
|
|
|
};
|
|
|
|
};
|
|
|
|
})
|
2021-11-02 16:16:48 +00:00
|
|
|
(mkIf cfg.hdfs.httpfs.enable {
|
2021-11-02 13:04:04 +00:00
|
|
|
systemd.services.hdfs-httpfs = {
|
|
|
|
description = "Hadoop httpfs";
|
|
|
|
wantedBy = [ "multi-user.target" ];
|
|
|
|
inherit (cfg.hdfs.httpfs) restartIfChanged;
|
|
|
|
|
2021-11-04 05:56:32 +00:00
|
|
|
environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath;
|
2021-11-02 13:04:04 +00:00
|
|
|
|
|
|
|
preStart = ''
|
|
|
|
mkdir -p $HTTPFS_TEMP
|
|
|
|
'';
|
|
|
|
|
|
|
|
serviceConfig = {
|
|
|
|
User = "httpfs";
|
|
|
|
SyslogIdentifier = "hdfs-httpfs";
|
|
|
|
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} httpfs";
|
|
|
|
Restart = "always";
|
|
|
|
};
|
|
|
|
};
|
2021-11-04 05:45:21 +00:00
|
|
|
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.httpfs.openFirewall [
|
2021-11-02 13:04:04 +00:00
|
|
|
14000 # httpfs.http.port
|
|
|
|
]);
|
|
|
|
})
|
2018-05-21 02:09:31 +01:00
|
|
|
(mkIf (
|
2021-11-02 16:16:48 +00:00
|
|
|
cfg.hdfs.namenode.enable || cfg.hdfs.datanode.enable || cfg.hdfs.journalnode.enable || cfg.hdfs.zkfc.enable
|
2018-05-21 02:09:31 +01:00
|
|
|
) {
|
2018-07-02 16:57:31 +01:00
|
|
|
users.users.hdfs = {
|
2018-05-21 02:09:31 +01:00
|
|
|
description = "Hadoop HDFS user";
|
|
|
|
group = "hadoop";
|
|
|
|
uid = config.ids.uids.hdfs;
|
|
|
|
};
|
|
|
|
})
|
2021-11-02 16:16:48 +00:00
|
|
|
(mkIf cfg.hdfs.httpfs.enable {
|
2021-11-02 13:04:04 +00:00
|
|
|
users.users.httpfs = {
|
|
|
|
description = "Hadoop HTTPFS user";
|
|
|
|
group = "hadoop";
|
|
|
|
isSystemUser = true;
|
|
|
|
};
|
|
|
|
})
|
2018-05-21 02:09:31 +01:00
|
|
|
];
|
|
|
|
}
|