Configuration
Look at the file [DFSConfigKeys.java][keys] for an exhaustive list of supported properties.
site
(object) Properties added to the "hdfs-site.xml" file.opts
(string) NameNode options.
Example:
{
"ryba": {
"hdfs":
"nn": {
"java_opts": "-Xms1024m -Xmx1024m",
"include": ["in.my.cluster"],
"exclude": "not.in.my.cluster"
}
}
}
module.exports = (service) ->
options = service.options
Identities
options.hadoop_group = merge {}, service.deps.hadoop_core.options.hadoop_group, options.hadoop_group
options.group = merge {}, service.deps.hadoop_core.options.hdfs.group, options.group
options.user = merge {}, service.deps.hadoop_core.options.hdfs.user, options.user
Environment
# Layout
options.pid_dir ?= service.deps.hadoop_core.options.hdfs.pid_dir
options.log_dir ?= service.deps.hadoop_core.options.hdfs.log_dir
options.conf_dir ?= '/etc/hadoop-hdfs-namenode/conf'
# Java
options.java_home ?= service.deps.java.options.java_home
options.hadoop_opts ?= service.deps.hadoop_core.options.hadoop_opts
options.hadoop_namenode_init_heap ?= '-Xms1024m'
options.heapsize ?= '1024m'
options.newsize ?= '200m'
options.java_opts ?= ""
options.hadoop_heap ?= service.deps.hadoop_core.options.hadoop_heap
# Misc
options.clean_logs ?= false
options.fqdn ?= service.node.fqdn
options.hostname ?= service.node.hostname
options.iptables ?= service.deps.iptables and service.deps.iptables.options.action is 'start'
options.hadoop_policy ?= {}
options.hdfs_krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user
options.test_krb5_user ?= service.deps.test_user.options.krb5.user
System Options
options.opts ?= {}
options.opts.base ?= ''
options.opts.java_properties ?= {}
options.opts.jvm ?= {}
options.opts.jvm['-Xms'] ?= options.heapsize
options.opts.jvm['-Xmx'] ?= options.heapsize
options.opts.jvm['-XX:NewSize='] ?= options.newsize #should be 1/8 of datanode heapsize
options.opts.jvm['-XX:MaxNewSize='] ?= options.newsize #should be 1/8 of datanode heapsize
Configuration
# Hadoop core-site.xml
options.core_site = merge {}, service.deps.hadoop_core.options.core_site, options.core_site or {}
# Number of minutes after which the checkpoint gets deleted
options.core_site['fs.trash.interval'] ?= '10080' #1 week
# Hadoop hdfs-site.xml
options.hdfs_site ?= {}
options.hdfs_site['dfs.http.policy'] ?= 'HTTPS_ONLY' # HTTP_ONLY or HTTPS_ONLY or HTTP_AND_HTTPS
# Data
# Comma separated list of paths. Use the list of directories.
# For example, /data/1/hdfs/nn,/data/2/hdfs/nn.
options.hdfs_site['dfs.namenode.name.dir'] ?= ['file:///var/hdfs/name']
options.hdfs_site['dfs.namenode.name.dir'] = options.hdfs_site['dfs.namenode.name.dir'].join ',' if Array.isArray options.hdfs_site['dfs.namenode.name.dir']
# Network
options.slaves = service.deps.hdfs_dn.map (srv) -> srv.node.fqdn
options.hdfs_site['dfs.hosts'] ?= "#{options.conf_dir}/dfs.include"
options.include ?= service.deps.hdfs_dn.map (srv) -> srv.node.fqdn
options.include = string.lines options.include if typeof options.include is 'string'
options.hdfs_site['dfs.hosts.exclude'] ?= "#{options.conf_dir}/dfs.exclude"
options.exclude ?= []
options.exclude = string.lines options.exclude if typeof options.exclude is 'string'
options.hdfs_site['fs.permissions.umask-mode'] ?= '026' # 0750
# If "true", access tokens are used as capabilities
# for accessing datanodes. If "false", no access tokens are checked on
# accessing datanodes.
options.hdfs_site['dfs.block.access.token.enable'] ?= if options.core_site['hadoop.security.authentication'] is 'kerberos' then 'true' else 'false'
options.hdfs_site['dfs.block.local-path-access.user'] ?= ''
options.hdfs_site['dfs.namenode.safemode.threshold-pct'] ?= '0.99'
# Fix HDP Companion File bug
options.hdfs_site['dfs.https.namenode.https-address'] = null
# Activate ACLs
options.hdfs_site['dfs.namenode.acls.enabled'] ?= 'true'
options.hdfs_site['dfs.namenode.accesstime.precision'] ?= null
Kerberos
options.krb5 ?= {}
options.krb5.realm ?= service.deps.krb5_client.options.etc_krb5_conf?.libdefaults?.default_realm
throw Error 'Required Options: "realm"' unless options.krb5.realm
options.krb5.admin ?= service.deps.krb5_client.options.admin[options.krb5.realm]
# Configuration in "hdfs-site.xml"
options.hdfs_site['dfs.namenode.kerberos.principal'] ?= "nn/_HOST@#{options.krb5.realm}"
options.hdfs_site['dfs.namenode.keytab.file'] ?= '/etc/security/keytabs/nn.service.keytab'
options.hdfs_site['dfs.namenode.kerberos.internal.spnego.principal'] ?= "HTTP/_HOST@#{options.krb5.realm}"
options.hdfs_site['dfs.namenode.kerberos.https.principal'] = "HTTP/_HOST@#{options.krb5.realm}"
options.hdfs_site['dfs.web.authentication.kerberos.principal'] ?= "HTTP/_HOST@#{options.krb5.realm}"
options.hdfs_site['dfs.web.authentication.kerberos.keytab'] ?= '/etc/security/keytabs/spnego.service.keytab'
Configuration for HDFS High Availability (HA)
Add High Availability specific properties to the "hdfs-site.xml" file. The inserted properties are similar than the ones for a client or slave configuration with the additionnal "dfs.namenode.shared.edits.dir" property.
The default configuration implement the "sshfence" fencing method. This method SSHes to the target node and uses fuser to kill the process listening on the service's TCP port.
# HDFS Single Node configuration
if service.instances.length is 1
options.core_site['fs.defaultFS'] ?= "hdfs://#{service.node.fqdn}:8020"
options.hdfs_site['dfs.ha.automatic-failover.enabled'] ?= 'false'
options.hdfs_site['dfs.namenode.http-address'] ?= '0.0.0.0:9870'
options.hdfs_site['dfs.namenode.https-address'] ?= '0.0.0.0:9871'
options.hdfs_site['dfs.nameservices'] = null
# HDFS HA configuration
else if service.instances.length is 2
throw Error "Required Option: options.nameservice" unless options.nameservice
options.hdfs_site['dfs.nameservices'] ?= ''
options.hdfs_site['dfs.nameservices'] += "#{options.nameservice} " unless options.nameservice in options.hdfs_site['dfs.nameservices'].split ' '
options.hdfs_site["dfs.client.failover.proxy.provider.#{options.nameservice}"] ?= 'org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
options.core_site['fs.defaultFS'] ?= "hdfs://#{options.nameservice}"
options.active_nn_host ?= service.instances[0].node.fqdn
options.standby_nn_host = service.instances.filter( (instance) -> instance.node.fqdn isnt options.active_nn_host )[0].node.fqdn
for srv in service.deps.hdfs_nn
srv.options.hostname ?= srv.node.hostname
for srv in service.deps.hdfs_jn
options.hdfs_site['dfs.journalnode.kerberos.principal'] ?= srv.options.hdfs_site['dfs.journalnode.kerberos.principal']
else throw Error "Invalid number of NanodeNodes, got #{service.instances.length}, expecting 2"
Since HDFS-6376, Nameservice must be explicitely set as internal to provide other nameservices, for distcp purpose.
options.hdfs_site['dfs.internal.nameservices'] ?= ''
if options.nameservice not in options.hdfs_site['dfs.internal.nameservices'].split ','
options.hdfs_site['dfs.internal.nameservices'] += "#{if options.hdfs_site['dfs.internal.nameservices'] isnt '' then ',' else ''}#{options.nameservice}"
options.hdfs_site["dfs.ha.namenodes.#{options.nameservice}"] = (for srv in service.deps.hdfs_nn then srv.options.hostname).join ','
for srv in service.deps.hdfs_nn
options.hdfs_site['dfs.namenode.http-address'] = null
options.hdfs_site['dfs.namenode.https-address'] = null
options.hdfs_site["dfs.namenode.rpc-address.#{options.nameservice}.#{srv.options.hostname}"] ?= "#{srv.node.fqdn}:8020"
options.hdfs_site["dfs.namenode.http-address.#{options.nameservice}.#{srv.options.hostname}"] ?= "#{srv.node.fqdn}:9870"
options.hdfs_site["dfs.namenode.https-address.#{options.nameservice}.#{srv.options.hostname}"] ?= "#{srv.node.fqdn}:9871"
options.hdfs_site["dfs.client.failover.proxy.provider.#{options.nameservice}"] ?= 'org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
options.hdfs_site['dfs.ha.automatic-failover.enabled'] ?= 'true'
options.hdfs_site['dfs.namenode.shared.edits.dir'] = (for srv in service.deps.hdfs_jn then "#{srv.node.fqdn}:#{srv.options.hdfs_site['dfs.journalnode.rpc-address'].split(':')[1]}").join ';'
options.hdfs_site['dfs.namenode.shared.edits.dir'] = "qjournal://#{options.hdfs_site['dfs.namenode.shared.edits.dir']}/#{options.nameservice}"
SSL
options.ssl = merge {}, service.deps.hadoop_core.options.ssl, options.ssl
options.ssl_server = merge {}, service.deps.hadoop_core.options.ssl_server, options.ssl_server or {},
'ssl.server.keystore.location': "#{options.conf_dir}/keystore"
'ssl.server.truststore.location': "#{options.conf_dir}/truststore"
options.ssl_client = merge {}, service.deps.hadoop_core.options.ssl_client, options.ssl_client or {},
'ssl.client.truststore.location': "#{options.conf_dir}/truststore"
### Fencing
To prevent split-brain scenario, in addition to the Journal Quorum Process for
write, sshfence allow ssh connection to the previous disfunctioning active
namenode from the new one to "shoot it in the head" (STONITH).
If the previous master machine is dead, ssh connection will fail, so another
fencing method should be configured to not block failover.
options.hdfs_site['dfs.ha.fencing.methods'] ?= """
sshfence(#{options.user.name})
shell(/bin/true)
"""
options.hdfs_site['dfs.ha.fencing.ssh.connect-timeout'] ?= '30000'
options.hdfs_site['dfs.ha.fencing.ssh.private-key-files'] ?= "#{options.user.home}/.ssh/id_rsa"
Upgrade Domain
Upgrade domain try to address the limitation of block placement policy on rolling upgrade. The idea is to group datanodes in a new dimension called upgrade domain, in addition to the existing rack-based grouping.
# default configuratio from hadoop 2
options.hdfs_site['dfs.namenode.hosts.provider.classname'] ?= 'org.apache.hadoop.hdfs.server.blockmanagement.HostFileManager'
if options.upgrade_domain
# to enable upgrade domain use the conbinedHostFileManager
options.hdfs_site['dfs.namenode.hosts.provider.classname'] = 'org.apache.hadoop.hdfs.server.blockmanagement.CombinedHostFileManager'
# enable upgrade domains
for srv in service.deps.hdfs_dn
throw Error "rack position not specified" unless srv.options.rack.position?
options.hosts ?= service.deps.hdfs_dn.map (srv) -> hostName: srv.node.fqdn, upgradeDomain: srv.options.rack.position
options.hdfs_site['dfs.block.replicator.classname'] ?= 'org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyWithUpgradeDomain'
options.hdfs_site['dfs.hosts'] ?= "#{options.conf_dir}/dfs.hosts"
Metrics
options.metrics = merge {}, service.deps.metrics?.options, options.metrics
options.metrics.config ?= {}
options.metrics.sinks ?= {}
options.metrics.sinks.file_enabled ?= true
options.metrics.sinks.ganglia_enabled ?= false
options.metrics.sinks.graphite_enabled ?= false
# File sink
if options.metrics.sinks.file_enabled
options.metrics.config["namenode.sink.file.class"] ?= 'org.apache.hadoop.metrics2.sink.FileSink'
options.metrics.config["*.sink.file.#{k}"] ?= v for k, v of service.deps.metrics.options.sinks.file.config if service.deps.metrics?.options?.sinks?.file_enabled
options.metrics.config['namenode.sink.file.filename'] ?= 'namenode-metrics.out'
# Ganglia sink, accepted properties are "servers" and "supportsparse"
if options.metrics.sinks.ganglia_enabled
options.metrics.config["namenode.sink.ganglia.class"] ?= 'org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31'
options.metrics.config["*.sink.ganglia.#{k}"] ?= v for k, v of options.sinks.ganglia.config if service.deps.metrics?.options?.sinks?.ganglia_enabled
# Graphite Sink
if options.metrics.sinks.graphite_enabled
throw Error 'Missing remote_host ryba.hdfs.nn.metrics.sinks.graphite.config.server_host' unless options.metrics.sinks.graphite.config.server_host?
throw Error 'Missing remote_port ryba.hdfs.nn.metrics.sinks.graphite.config.server_port' unless options.metrics.sinks.graphite.config.server_port?
options.metrics.config["namenode.sink.graphite.class"] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
options.metrics.config["*.sink.graphite.#{k}"] ?= v for k, v of service.deps.metrics.options.sinks.graphite.config if service.deps.metrics?.options?.sinks?.graphite_enabled
Log4J
Inherits log4j configuration from the ryba/log4j
. The rendered file uses the variable
options.log4j.properties
options.log4j = merge {}, service.deps.log4j?.options, options.log4j
options.log4j.properties ?= {}
options.log4j.root_logger ?= 'INFO,RFA'
options.log4j.security_logger ?= 'INFO,DRFAS'
options.log4j.audit_logger ?= 'INFO,RFAAUDIT'
# adding SOCKET appender
if options.log4j.remote_host? andoptions.log4j.remote_port?
options.log4j.socket_client ?= "SOCKET"
# Root logger
if options.log4j.root_logger.indexOf(options.log4j.socket_client) is -1
then options.log4j.root_logger += ",#{options.log4j.socket_client}"
# Security Logger
if options.log4j.security_logger.indexOf(options.log4j.socket_client) is -1
then options.log4j.security_logger += ",#{options.log4j.socket_client}"
# Audit Logger
if options.log4j.audit_logger.indexOf(options.log4j.socket_client) is -1
then options.log4j.audit_logger += ",#{options.log4j.socket_client}"
# Adding Application name, remote host and port values in namenode's opts
options.opts['hadoop.log.application'] ?= 'namenode'
options.opts['hadoop.log.remote_host'] ?= options.log4j.remote_host
options.opts['hadoop.log.remote_port'] ?= options.log4j.remote_port
options.log4j.socket_opts ?=
Application: '${hadoop.log.application}'
RemoteHost: '${hadoop.log.remote_host}'
Port: '${hadoop.log.remote_port}'
ReconnectionDelay: '10000'
options.log4j.properties = merge options.log4j.properties, appender
type: 'org.apache.log4j.net.SocketAppender'
name: options.log4j.socket_client
logj4: options.log4j.properties
properties: options.log4j.socket_opts
Export configuration
for srv in service.deps.hdfs_dn
for property in [
'dfs.namenode.kerberos.principal'
'dfs.namenode.kerberos.internal.spnego.principal'
'dfs.namenode.kerberos.https.principal'
'dfs.web.authentication.kerberos.principal'
'dfs.ha.automatic-failover.enabled'
'dfs.nameservices'
'dfs.internal.nameservices'
'fs.permissions.umask-mode'
'dfs.block.access.token.enable'
] then srv.options.hdfs_site[property] ?= options.hdfs_site[property]
srv.options.hdfs_site["dfs.client.failover.proxy.provider.#{options.nameservice}"] ?= options.hdfs_site["dfs.client.failover.proxy.provider.#{options.nameservice}"]
for property in [
'fs.defaultFS'
] then srv.options.core_site[property] ?= options.core_site[property]
for property of options.hdfs_site
ok = false
ok = true if /^dfs\.namenode\.\w+-address/.test property
ok = true if property.indexOf('dfs.ha.namenodes.') is 0
continue unless ok
srv.options.hdfs_site[property] = options.hdfs_site[property]
for srv in service.deps.hdfs_jn
for property in [
'dfs.namenode.kerberos.principal'
'dfs.nameservices'
'dfs.internal.nameservices'
'fs.permissions.umask-mode'
'dfs.block.access.token.enable'
] then srv.options.hdfs_site[property] ?= options.hdfs_site[property]
for property in [
'fs.defaultFS'
] then srv.options.core_site[property] ?= options.core_site[property]
for property of options.hdfs_site
ok = false
ok = true if /^dfs\.namenode\.\w+-address/.test property
ok = true if property.indexOf('dfs.ha.namenodes.') is 0
continue unless ok
srv.options.hdfs_site[property] = options.hdfs_site[property]
Test
options.test = merge {}, service.deps.test_user.options, options.test or {}
Wait
options.wait_zookeeper_server = service.deps.zookeeper_server[0].options.wait
options.wait_hdfs_jn = service.deps.hdfs_jn[0].options.wait
options.wait_hdfs_dn = service.deps.hdfs_dn[0].options.wait
options.wait = {}
options.wait.conf_dir = options.conf_dir
options.wait.ipc = for srv in service.deps.hdfs_nn
nameservice = if options.nameservice then ".#{options.nameservice}" or ''
hostname = if options.nameservice then ".#{srv.node.hostname}" else ''
if srv.options.hdfs_site["dfs.namenode.rpc-address#{nameservice}#{hostname}"]
[fqdn, port] = srv.options.hdfs_site["dfs.namenode.rpc-address#{nameservice}#{hostname}"].split(':')
else
fqdn = srv.node.fqdn
port = 8020
host: fqdn, port: port
options.wait.http = for srv in service.deps.hdfs_nn
protocol = if options.hdfs_site['dfs.http.policy'] is 'HTTP_ONLY' then 'http' else 'https'
nameservice = if options.nameservice then ".#{options.nameservice}" or ''
hostname = if options.nameservice then ".#{srv.node.hostname}" else ''
if srv.options.hdfs_site["dfs.namenode.rpc-address#{nameservice}#{hostname}"]
[fqdn, port] = srv.options.hdfs_site["dfs.namenode.#{protocol}-address#{nameservice}#{hostname}"].split(':')
else
fqdn = srv.node.fqdn
port = if options.hdfs_site['dfs.http.policy'] is 'HTTP_ONLY' then '9870' else '9871'
host: fqdn, port: port
options.wait.krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user
Dependencies
string = require '@nikitajs/core/lib/misc/string'
{merge} = require '@nikitajs/core/lib/misc'
appender = require '../../lib/appender'