Hadoop HDFS JournalNode Configure
The JournalNode uses properties define inside the "ryba/hadoop/hdfs" module. It also declare a new property "dfs.journalnode.edits.dir".
hdp.hdfs.site['dfs.journalnode.edits.dir']
(string)
The directory where the JournalNode will write transaction logs, default to "/var/run/hadoop-hdfs/journalnode_edit_dir"
Example:
{
"site": {
"dfs.journalnode.edits.dir": "/var/run/hadoop-hdfs/journalnode\_edit\_dir"
}
}
module.exports = (service) ->
options = service.options
Environment
options.pid_dir ?= service.deps.hadoop_core.options.hdfs.pid_dir
options.log_dir ?= service.deps.hadoop_core.options.hdfs.log_dir
options.conf_dir ?= '/etc/hadoop-hdfs-journalnode/conf'
options.hadoop_opts ?= service.deps.hadoop_core.options.hadoop_opts
# Java
options.java_home ?= service.deps.java.options.java_home
options.hadoop_heap ?= service.deps.hadoop_core.options.hadoop_heap
options.newsize ?= '200m'
options.heapsize ?= '1024m'
# Misc
options.clean_logs ?= false
options.iptables ?= service.deps.iptables and service.deps.iptables.options.action is 'start'
options.fqdn = service.node.fqdn
options.hdfs_krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user
Identities
options.hadoop_group = merge {}, service.deps.hadoop_core.options.hadoop_group, options.hadoop_group
options.group = merge {}, service.deps.hadoop_core.options.hdfs.group, options.group
options.user = merge {}, service.deps.hadoop_core.options.hdfs.user, options.user
System Options
options.opts ?= {}
options.opts.base ?= ''
options.opts.java_properties ?= {}
options.opts.jvm ?= {}
options.opts.jvm['-Xms'] ?= options.heapsize
options.opts.jvm['-Xmx'] ?= options.heapsize
options.opts.jvm['-XX:NewSize='] ?= options.newsize #should be 1/8 of datanode heapsize
options.opts.jvm['-XX:MaxNewSize='] ?= options.newsize #should be 1/8 of datanode heapsize
Configuration
options.core_site = merge {}, service.deps.hadoop_core.options.core_site, options.core_site or {}
options.hdfs_site ?= {}
options.hdfs_site['dfs.journalnode.rpc-address'] ?= '0.0.0.0:8485'
options.hdfs_site['dfs.journalnode.http-address'] ?= '0.0.0.0:8480'
options.hdfs_site['dfs.journalnode.https-address'] ?= '0.0.0.0:8481'
options.hdfs_site['dfs.http.policy'] ?= 'HTTPS_ONLY'
# Recommandation is to ideally have dedicated disks to optimize fsyncs operation
options.hdfs_site['dfs.journalnode.edits.dir'] = options.hdfs_site['dfs.journalnode.edits.dir'].join ',' if Array.isArray options.hdfs_site['dfs.journalnode.edits.dir']
# options.hdfs_site['dfs.journalnode.edits.dir'] ?= ['/var/hdfs/edits']
throw Error "Required Option \"hdfs_site['dfs.journalnode.edits.dir']\": got #{JSON.stringify options.hdfs_site['dfs.journalnode.edits.dir']}" unless options.hdfs_site['dfs.journalnode.edits.dir']
Kerberos
options.krb5 ?= {}
options.krb5.realm ?= service.deps.krb5_client.options.etc_krb5_conf?.libdefaults?.default_realm
throw Error 'Required Options: "realm"' unless options.krb5.realm
# options.krb5.admin ?= service.deps.krb5_client.options.admin[options.krb5.realm]
# Kerberos
# TODO: Principal should be "jn/{host}@{realm}", however, there is
# no properties to have a separated keytab between jn and spnego principals
options.hdfs_site['dfs.journalnode.kerberos.internal.spnego.principal'] = "HTTP/_HOST@#{options.krb5.realm }"
options.hdfs_site['dfs.journalnode.kerberos.principal'] = "HTTP/_HOST@#{options.krb5.realm }"
options.hdfs_site['dfs.journalnode.keytab.file'] = '/etc/security/keytabs/spnego.service.keytab'
SSL
options.ssl = merge {}, service.deps.hadoop_core.options.ssl, options.ssl
options.ssl_server = merge {}, service.deps.hadoop_core.options.ssl_server, options.ssl_server or {}
options.ssl_client = merge {}, service.deps.hadoop_core.options.ssl_client, options.ssl_client or {}
Metrics
options.metrics = merge {}, service.deps.metrics?.options, options.metrics
options.metrics.config ?= {}
options.metrics.config["*.period"] ?= '60'
options.metrics.sinks ?= {}
options.metrics.sinks.file_enabled ?= true
options.metrics.sinks.ganglia_enabled ?= false
options.metrics.sinks.graphite_enabled ?= false
# File sink
if options.metrics.sinks.file_enabled
options.metrics.config["*.sink.file.#{k}"] ?= v for k, v of service.deps.metrics.options.sinks.file.config if service.deps.metrics?.options?.sinks?.file_enabled
options.metrics.config["journalnode.sink.file.class"] ?= 'org.apache.hadoop.metrics2.sink.FileSink'
options.metrics.config['journalnode.sink.file.filename'] ?= 'journalnode-metrics.out'
# Ganglia sink, accepted properties are "servers" and "supportsparse"
if options.metrics.sinks.ganglia_enabled
options.metrics.config["*.sink.ganglia.#{k}"] ?= v for k, v of options.sinks.ganglia.config if service.deps.metrics?.options?.sinks?.ganglia_enabled
options.metrics.config["journalnode.sink.ganglia.class"] ?= 'org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31'
# Graphite Sink
if options.metrics.sinks.graphite_enabled
throw Error 'Missing remote_host ryba.hdfs.jn.metrics.sinks.graphite.config.server_host' unless options.metrics.sinks.graphite.config.server_host?
throw Error 'Missing remote_port ryba.hdfs.jn.metrics.sinks.graphite.config.server_port' unless options.metrics.sinks.graphite.config.server_port?
options.metrics.config["journalnode.sink.graphite.class"] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
options.metrics.config["*.sink.graphite.#{k}"] ?= v for k, v of service.deps.metrics.options.sinks.graphite.config if service.deps.metrics?.options?.sinks?.graphite_enabled
Wait
options.wait_krb5_client = service.deps.krb5_client.options.wait
options.wait_zookeeper_server = service.deps.zookeeper_server[0].options.wait
options.wait = {}
options.wait.rpc = for srv in service.deps.hdfs_jn
srv.options.hdfs_site ?= {}
srv.options.hdfs_site['dfs.journalnode.rpc-address'] ?= '0.0.0.0:8485'
[_, port] = srv.options.hdfs_site['dfs.journalnode.rpc-address'].split ':'
host: srv.node.fqdn, port: port
options.wait.http = for srv in service.deps.hdfs_jn
srv.options.hdfs_site ?= {}
policy = srv.options.hdfs_site['dfs.http.policy'] or options.hdfs_site['dfs.http.policy']
address = if policy is 'HTTP_ONLY'
then srv.options.hdfs_site['dfs.journalnode.http-address'] or '0.0.0.0:8480'
else srv.options.hdfs_site['dfs.journalnode.https-address'] or '0.0.0.0:8481'
[_, port] = address.split ':'
host: srv.node.fqdn, port: port
Dependencies
{merge} = require '@nikitajs/core/lib/misc'