MapReduce JobHistoryServer (JHS) Configure
module.exports = (service) ->
options = service.options
Identities
options.hadoop_group = merge {}, service.deps.hadoop_core.options.hadoop_group, options.hadoop_group
options.group = merge {}, service.deps.hadoop_core.options.mapred.group, options.group
options.user = merge {}, service.deps.hadoop_core.options.mapred.user, options.user
Kerberos
options.krb5 ?= {}
options.krb5.realm ?= service.deps.krb5_client.options.etc_krb5_conf?.libdefaults?.default_realm
throw Error 'Required Options: "realm"' unless options.krb5.realm
options.krb5.admin ?= service.deps.krb5_client.options.admin[options.krb5.realm]
# Kerberos Test Principal
options.test_krb5_user ?= service.deps.test_user.options.krb5.user
Environment
# Layout
options.home ?= '/usr/hdp/current/hadoop-yarn-nodemanager'
options.log_dir ?= '/var/log/hadoop/mapreduce'
options.pid_dir ?= '/var/run/hadoop/mapreduce'
options.conf_dir ?= '/etc/hadoop-mapreduce-historyserver/conf'
# Java
options.java_home ?= service.deps.java.options.java_home
options.hadoop_heap ?= service.deps.hadoop_core.options.hadoop_heap
options.hadoop_opts ?= service.deps.hadoop_core.options.hadoop_opts
options.hadoop_client_opts ?= service.deps.hadoop_core.options.hadoop_client_opts
options.heapsize ?= '900'
# Misc
options.iptables ?= service.deps.iptables and service.deps.iptables.options.action is 'start'
options.hdfs_krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user
Configuration
# Hadoop core "core-site.xml"
options.core_site = merge {}, service.deps.hdfs_client[0].options.core_site, options.core_site or {}
# HDFS client "hdfs-site.xml"
options.hdfs_site = merge {}, service.deps.hdfs_client[0].options.hdfs_site, options.hdfs_site or {}
# YARN client "yarn-site.xml"
# Options will be exported by the YARN RM
options.yarn_site ?= {}
# MapRed JHS "mapred-site.xml"
options.mapred_site ?= {}
options.mapred_site['mapreduce.jobhistory.keytab'] ?= "/etc/security/keytabs/jhs.service.keytab"
options.mapred_site['mapreduce.jobhistory.principal'] ?= "jhs/#{service.node.fqdn}@#{options.krb5.realm}"
# Fix: src in "[DFSConfigKeys.java][keys]" and [HDP port list] mention 13562 while companion files mentions 8081
options.mapred_site['mapreduce.shuffle.port'] ?= '13562'
options.mapred_site['mapreduce.jobhistory.address'] ?= "#{service.node.fqdn}:10020"
options.mapred_site['mapreduce.jobhistory.webapp.address'] ?= "#{service.node.fqdn}:19888"
options.mapred_site['mapreduce.jobhistory.webapp.https.address'] ?= "#{service.node.fqdn}:19889"
options.mapred_site['mapreduce.jobhistory.admin.address'] ?= "#{service.node.fqdn}:10033"
Note: As of version "2.4.0", the property "mapreduce.jobhistory.http.policy" isn't honored. Instead, the property "yarn.http.policy" is used. It is exported from the yarn_rm.
# options.yarn_site['yarn.http.policy'] ?= service.deps.yarn_rm.options.yarn_site['yarn.http.policy']
options.mapred_site['mapreduce.jobhistory.http.policy'] ?= 'HTTPS_ONLY'
# See './hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java#158'
# yarn.site['mapreduce.jobhistory.webapp.spnego-principal']
# yarn.site['mapreduce.jobhistory.webapp.spnego-keytab-file']
Configuration for Staging Directories
The property "yarn.app.mapreduce.am.staging-dir" is an alternative to "done-dir" and "intermediate-done-dir". According to Cloudera: Configure mapreduce.jobhistory.intermediate-done-dir and mapreduce.jobhistory.done-dir in mapred-site.xml. Create these two directories. Set permissions on mapreduce.jobhistory.intermediate-done-dir to 1777. Set permissions on mapreduce.jobhistory.done-dir to 750.
If "yarn.app.mapreduce.am.staging-dir" is active (if the other two are unset), a folder history must be created and own by the mapreduce user. On startup, JHS will create two folders:
hdfs dfs -ls /user/history
Found 2 items
drwxrwx--- - mapred hadoop 0 2015-08-04 23:21 /user/history/done
drwxrwxrwt - mapred hadoop 0 2015-08-04 23:21 /user/history/done_intermediate
options.mapred_site['yarn.app.mapreduce.am.staging-dir'] = "/user" # default to "/tmp/hadoop-yarn/staging"
# options.mapred_site['mapreduce.jobhistory.done-dir'] ?= '/mr-history/done' # Directory where history files are managed by the MR JobHistory Server.
# options.mapred_site['mapreduce.jobhistory.intermediate-done-dir'] ?= '/mr-history/tmp' # Directory where history files are written by MapReduce jobs.
options.mapred_site['mapreduce.jobhistory.done-dir'] = null
options.mapred_site['mapreduce.jobhistory.intermediate-done-dir'] = null
Job Recovery
The following properties provides persistent state to the Job history server. They are referenced by [the druid hadoop configuration][druid] and [the Ambari 2.3 stack][amb-mr-site]. Job Recovery is activated by default.
options.mapred_site['mapreduce.jobhistory.recovery.enable'] ?= 'true'
options.mapred_site['mapreduce.jobhistory.recovery.store.class'] ?= 'org.apache.hadoop.mapreduce.v2.hs.HistoryServerLeveldbStateStoreService'
options.mapred_site['mapreduce.jobhistory.recovery.store.leveldb.path'] ?= '/var/mapred/jhs'
SSL
options.ssl = merge {}, service.deps.hadoop_core.options.ssl, options.ssl
options.ssl_server = merge {}, service.deps.hadoop_core.options.ssl_server, options.ssl_server or {},
'ssl.server.keystore.location': "#{options.conf_dir}/keystore"
'ssl.server.truststore.location': "#{options.conf_dir}/truststore"
options.ssl_client = merge {}, service.deps.hadoop_core.options.ssl_client, options.ssl_client or {},
'ssl.client.truststore.location': "#{options.conf_dir}/truststore"
Metrics
options.metrics = merge {}, service.deps.hadoop_core.options.metrics, options.metrics
options.metrics.config ?= {}
if options.metrics.sinks.file_enabled
options.metrics.config["*.sink.file.#{k}"] ?= v for k, v of options.metrics.sinks.file
if options.metrics.sinks.graphite_enabled
throw Error 'Unvalid metrics sink, please provide ryba.metrics.sinks.graphite.config.server_host and server_port' unless options.metrics.sinks.graphite.config.server_host? and options.metrics.sinks.graphite.config.server_port?
options.metrics.config["*.sink.graphite.#{k}"] ?= v for k, v of options.metrics.sinks.graphite.config
options.metrics.config["historyserver.sink.graphite.class"] ?= options.metrics.sinks.graphite.class
options.metrics.config["mrappmaster.sink.graphite.class"] ?= options.metrics.sinks.graphite.class
Metrics
options.metrics = merge {}, service.deps.metrics?.options, options.metrics
options.metrics.config ?= {}
options.metrics.sinks ?= {}
options.metrics.sinks.file_enabled ?= true
options.metrics.sinks.ganglia_enabled ?= false
options.metrics.sinks.graphite_enabled ?= false
# File sink
if options.metrics.sinks.file_enabled
options.metrics.config["*.sink.file.#{k}"] ?= v for k, v of service.deps.metrics.options.sinks.file.config if service.deps.metrics?.options?.sinks?.file_enabled
options.metrics.config['mrappmaster.sink.file.class'] ?= 'org.apache.hadoop.metrics2.sink.FileSink'
options.metrics.config['jobhistoryserver.sink.file.class'] ?= 'org.apache.hadoop.metrics2.sink.FileSink'
options.metrics.config['mrappmaster.sink.file.filename'] ?= 'mrappmaster-metrics.out'
options.metrics.config['jobhistoryserver.sink.file.filename'] ?= 'jobhistoryserver-metrics.out'
# Ganglia sink, accepted properties are "servers" and "supportsparse"
if options.metrics.sinks.ganglia_enabled
options.metrics.config["mrappmaster.sink.ganglia.class"] ?= 'org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31'
options.metrics.config["jobhistoryserver.sink.ganglia.class"] ?= 'org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31'
options.metrics.config["*.sink.ganglia.#{k}"] ?= v for k, v of options.sinks.ganglia.config if service.deps.metrics?.options?.sinks?.ganglia_enabled
# Graphite Sink
if options.metrics.sinks.graphite_enabled
throw Error 'Missing remote_host ryba.mapred_jhs.metrics.sinks.graphite.config.server_host' unless options.metrics.sinks.graphite.config.server_host?
throw Error 'Missing remote_port ryba.mapred_jhs.metrics.sinks.graphite.config.server_port' unless options.metrics.sinks.graphite.config.server_port?
options.metrics.config["mrappmaster.sink.graphite.class"] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
options.metrics.config["jobhistoryserver.sink.graphite.class"] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
options.metrics.config["*.sink.graphite.#{k}"] ?= v for k, v of service.deps.metrics.options.sinks.graphite.config if service.deps.metrics?.options?.sinks?.graphite_enabled
Wait
options.wait_hdfs_nn ?= service.deps.hdfs_nn[0].options.wait
options.wait = {}
options.wait.tcp = for srv in service.deps.mapred_jhs
srv.options.mapred_site ?= {}
srv.options.mapred_site['mapreduce.jobhistory.address'] ?= "#{srv.node.fqdn}:10020"
[fqdn, port] = srv.options.mapred_site['mapreduce.jobhistory.address'].split ':'
host: fqdn, port: port
options.wait.webapp = for srv in service.deps.mapred_jhs
protocol = if options.mapred_site['mapreduce.jobhistory.http.policy'] is 'HTTP_ONLY' then '' else 'https.'
srv.options.mapred_site ?= {}
srv.options.mapred_site['mapreduce.jobhistory.webapp.address'] ?= "#{srv.node.fqdn}:19888"
srv.options.mapred_site['mapreduce.jobhistory.webapp.https.address'] ?= "#{srv.node.fqdn}:19889"
[fqdn, port] = srv.options.mapred_site["mapreduce.jobhistory.webapp.#{protocol}address"].split ':'
host: fqdn, port: port
Dependencies
{merge} = require '@nikitajs/core/lib/misc'