Menu

MapReduce JobHistoryServer Install

Install and configure the MapReduce Job History Server (JHS).

Run the command ./bin/ryba install -m ryba/hadoop/mapred_jhs to install the Job History Server.

module.exports = header: 'MapReduce JHS Install', handler: ({options}) ->

Register

  @registry.register 'hconfigure', 'ryba/lib/hconfigure'
  @registry.register 'hdp_select', 'ryba/lib/hdp_select'

IPTables

ServicePortProtoParameter
jobhistory10020tcpmapreduce.jobhistory.address
jobhistory19888httpmapreduce.jobhistory.webapp.address
jobhistory19889httpsmapreduce.jobhistory.webapp.https.address
jobhistory13562tcpmapreduce.shuffle.port
jobhistory10033tcpmapreduce.jobhistory.admin.address

IPTables rules are only inserted if the parameter "iptables.action" is set to "start" (default value).

  jhs_shuffle_port = options.mapred_site['mapreduce.shuffle.port']
  jhs_port = options.mapred_site['mapreduce.jobhistory.address'].split(':')[1]
  jhs_webapp_port = options.mapred_site['mapreduce.jobhistory.webapp.address'].split(':')[1]
  jhs_webapp_https_port = options.mapred_site['mapreduce.jobhistory.webapp.https.address'].split(':')[1]
  jhs_admin_port = options.mapred_site['mapreduce.jobhistory.admin.address'].split(':')[1]
  @tools.iptables
    header: 'IPTables'
    if: options.iptables
    rules: [
      { chain: 'INPUT', jump: 'ACCEPT', dport: jhs_port, protocol: 'tcp', state: 'NEW', comment: "MapRed JHS Server" }
      { chain: 'INPUT', jump: 'ACCEPT', dport: jhs_webapp_port, protocol: 'tcp', state: 'NEW', comment: "MapRed JHS WebApp" }
      { chain: 'INPUT', jump: 'ACCEPT', dport: jhs_webapp_https_port, protocol: 'tcp', state: 'NEW', comment: "MapRed JHS WebApp" }
      { chain: 'INPUT', jump: 'ACCEPT', dport: jhs_shuffle_port, protocol: 'tcp', state: 'NEW', comment: "MapRed JHS Shuffle" }
      { chain: 'INPUT', jump: 'ACCEPT', dport: jhs_admin_port, protocol: 'tcp', state: 'NEW', comment: "MapRed JHS Admin Server" }
    ]

Service

Install the "hadoop-mapreduce-historyserver" service, symlink the rc.d startup script inside "/etc/init.d" and activate it on startup.

  @call header: 'Service', ->
    @service
      name: 'hadoop-mapreduce-historyserver'
    @hdp_select
      name: 'hadoop-mapreduce-client' # Not checked
      name: 'hadoop-mapreduce-historyserver'
    @service.init
      if_os: name: ['redhat','centos'], version: '6'
      header: 'Initd Script'
      target: '/etc/init.d/hadoop-mapreduce-historyserver'
      source: "#{__dirname}/../resources/hadoop-mapreduce-historyserver.j2"
      local: true
      context: options: options
      mode: 0o0755
    @call
      if_os: name: ['redhat','centos'], version: '7'
    , ->
      @service.init
        header: 'Systemd Script'
        target: '/usr/lib/systemd/system/hadoop-mapreduce-historyserver.service'
        source: "#{__dirname}/../resources/hadoop-mapreduce-historyserver-systemd.j2"
        local: true
        context: options: options
        mode: 0o0644
      @system.tmpfs
        header: 'Run dir'
        mount: "#{options.pid_dir}"
        uid: options.user.name
        gid: options.hadoop_group.name
        perm: '0755'

Layout

Create the log and pid directories.

  @call header: 'Layout', ->
    @system.mkdir
      target: "#{options.log_dir}"
      uid: options.user.name
      gid: options.hadoop_group.name
      mode: 0o0755
    @system.mkdir
      target: "#{options.pid_dir}"
      uid: options.user.name
      gid: options.hadoop_group.name
      mode: 0o0755
    @system.mkdir
      target: options.mapred_site['mapreduce.jobhistory.recovery.store.leveldb.path']
      uid: options.user.name
      gid: options.hadoop_group.name
      mode: 0o0750
      parent: true
      if: options.mapred_site['mapreduce.jobhistory.recovery.store.class'] is 'org.apache.hadoop.mapreduce.v2.hs.HistoryServerLeveldbStateStoreService'

Configure

Enrich the file "mapred-env.sh" present inside the Hadoop configuration directory with the location of the directory storing the process pid.

Templated properties are "ryba.mapred.heapsize" and "ryba.mapred.pid_dir".

  @hconfigure
    header: 'Core Site'
    target: "#{options.conf_dir}/core-site.xml"
    source: "#{__dirname}/../../resources/core_hadoop/core-site.xml"
    local: true
    properties: options.core_site
    backup: true
  @hconfigure
    header: 'HDFS Site'
    target: "#{options.conf_dir}/hdfs-site.xml"
    properties: options.hdfs_site
    backup: true
  @hconfigure
    header: 'YARN Site'
    target: "#{options.conf_dir}/yarn-site.xml"
    properties: options.yarn_site
    backup: true
  @hconfigure
    header: 'MapRed Site'
    target: "#{options.conf_dir}/mapred-site.xml"
    properties: options.mapred_site
    backup: true
  @file
    header: 'Log4j'
    target: "#{options.conf_dir}/log4j.properties"
    source: "#{__dirname}/../resources/log4j.properties"
    local: true
  @file.render
    header: 'Hadoop Env'
    target: "#{options.conf_dir}/hadoop-env.sh"
    source: "#{__dirname}/../resources/hadoop-env.sh.j2"
    local: true
    context:
      HADOOP_HEAPSIZE: options.hadoop_heap
      HADOOP_PID_DIR: options.pid_dir
      HADOOP_OPTS: options.hadoop_opts
      HADOOP_CLIENT_OPTS: options.hadoop_client_opts
      HADOOP_LOG_DIR: options.log_dir
      HADOOP_PID_DIR: options.pid_dir
      java_home: options.java_home
    uid: options.user.name
    gid: options.hadoop_group.name
    mode: 0o0755
    backup: true
  @file.render
    header: 'MapRed Env'
    target: "#{options.conf_dir}/mapred-env.sh"
    source: "#{__dirname}/../resources/mapred-env.sh.j2"
    context:
      HADOOP_HEAPSIZE: options.hadoop_heap
      HADOOP_PID_DIR: options.pid_dir
      HADOOP_OPTS: options.hadoop_opts
      HADOOP_CLIENT_OPTS: options.hadoop_client_opts
      HADOOP_LOG_DIR: options.log_dir
      HADOOP_PID_DIR: options.pid_dir
    local: true
    uid: options.user.name
    gid: options.hadoop_group.name
    mode: 0o0755
    backup: true

Configure the "hadoop-metrics2.properties" to connect Hadoop to a Metrics collector like Ganglia or Graphite.

  @file.properties
    header: 'Metrics'
    target: "#{options.conf_dir}/hadoop-metrics2.properties"
    content: options.metrics.config
    backup: true

SSL

  @call header: 'SSL', ->
    @hconfigure
      target: "#{options.conf_dir}/ssl-server.xml"
      properties: options.ssl_server
    @hconfigure
      target: "#{options.conf_dir}/ssl-client.xml"
      properties: options.ssl_client
    # Client: import certificate to all hosts
    @java.keystore_add
      keystore: options.ssl_client['ssl.client.truststore.location']
      storepass: options.ssl_client['ssl.client.truststore.password']
      caname: "hadoop_root_ca"
      cacert: options.ssl.cacert.source
      local: options.ssl.cacert.local
    # Server: import certificates, private and public keys to hosts with a server
    @java.keystore_add
      keystore: options.ssl_server['ssl.server.keystore.location']
      storepass: options.ssl_server['ssl.server.keystore.password']
      key: options.ssl.key.source
      cert: options.ssl.cert.source
      keypass: options.ssl_server['ssl.server.keystore.keypassword']
      name: options.ssl.key.name
      local: options.ssl.key.local
    @java.keystore_add
      keystore: options.ssl_server['ssl.server.keystore.location']
      storepass: options.ssl_server['ssl.server.keystore.password']
      caname: "hadoop_root_ca"
      cacert: options.ssl.cacert.source
      local: options.ssl.cacert.local

Kerberos

Create the Kerberos service principal by default in the form of "jhs/{host}@{realm}" and place its keytab inside "/etc/security/keytabs/jhs.service.keytab" with ownerships set to "mapred:hadoop" and permissions set to "0600".

  @krb5.addprinc options.krb5.admin,
    header: 'Kerberos'
    principal: options.mapred_site['mapreduce.jobhistory.principal']
    randkey: true
    keytab: options.mapred_site['mapreduce.jobhistory.keytab']
    uid: options.user.name
    gid: options.hadoop_group.name
    mode: 0o0600

HDFS Layout

Layout is inspired by Hadoop recommandation

  @system.execute
    header: 'HDFS Layout'
    cmd: mkcmd.hdfs options.hdfs_krb5_user, """
    modified=""
    if ! hdfs --config #{options.conf_dir} dfs -test -d #{options.mapred_site['yarn.app.mapreduce.am.staging-dir']}/history; then
      hdfs --config #{options.conf_dir} dfs -mkdir -p #{options.mapred_site['yarn.app.mapreduce.am.staging-dir']}/history
      hdfs --config #{options.conf_dir} dfs -chmod 0755 #{options.mapred_site['yarn.app.mapreduce.am.staging-dir']}/history
      hdfs --config #{options.conf_dir} dfs -chown #{options.user.name}:#{options.hadoop_group.name} #{options.mapred_site['yarn.app.mapreduce.am.staging-dir']}/history
      modified=1
    fi
    if ! hdfs --config #{options.conf_dir} dfs -test -d /app-logs; then
      hdfs --config #{options.conf_dir} dfs -mkdir -p /app-logs
      hdfs --config #{options.conf_dir} dfs -chmod 1777 /app-logs
      hdfs --config #{options.conf_dir} dfs -chown #{options.user.name} /app-logs
      modified=1
    fi
    if [ $modified != "1" ]; then exit 2; fi
    """
    code_skipped: 2

Dependencies

mkcmd = require '../../lib/mkcmd'