Menu

Hadoop YARN Timeline Server Install

The Timeline Server is a stand-alone server daemon and doesn't need to be co-located with any other service.

module.exports = header: 'YARN ATS Install', handler: ({options}) ->

Register

  @registry.register 'hconfigure', 'ryba/lib/hconfigure'
  @registry.register 'hdp_select', 'ryba/lib/hdp_select'
  @registry.register ['file', 'jaas'], 'ryba/lib/file_jaas'
  @registry.register ['hdfs','put'], 'ryba/lib/actions/hdfs/put'
  @registry.register ['hdfs','chown'], 'ryba/lib/actions/hdfs/chown'
  @registry.register ['hdfs','mkdir'], 'ryba/lib/actions/hdfs/mkdir'      

Identities

By default, the "hadoop-yarn-timelineserver" package create the following entries:

cat /etc/passwd | grep yarn
yarn:x:2403:2403:Hadoop YARN User:/var/lib/hadoop-yarn:/bin/bash
cat /etc/group | grep hadoop
hadoop:x:499:hdfs
  @system.group header: 'Hadoop Group', options.hadoop_group
  @system.group header: 'Group', options.group
  @system.user header: 'User', options.user

Wait

  @call once: true, 'masson/core/krb5_client/wait', options.wait_krb5_client

IPTables

ServicePortProtoParameter
timeline10200tcp/httpyarn.timeline-service.address
timeline8188tcp/httpyarn.timeline-service.webapp.address
timeline8190tcp/httpsyarn.timeline-service.webapp.https.address

IPTables rules are only inserted if the parameter "iptables.action" is set to "start" (default value).

  [_, rpc_port] = options.yarn_site['yarn.timeline-service.address'].split ':'
  [_, http_port] = options.yarn_site['yarn.timeline-service.webapp.address'].split ':'
  [_, https_port] = options.yarn_site['yarn.timeline-service.webapp.https.address'].split ':'
  @tools.iptables
    header: 'IPTables'
    if: options.iptables
    rules: [
      { chain: 'INPUT', jump: 'ACCEPT', dport: rpc_port, protocol: 'tcp', state: 'NEW', comment: "Yarn Timeserver RPC" }
      { chain: 'INPUT', jump: 'ACCEPT', dport: http_port, protocol: 'tcp', state: 'NEW', comment: "Yarn Timeserver HTTP" }
      { chain: 'INPUT', jump: 'ACCEPT', dport: https_port, protocol: 'tcp', state: 'NEW', comment: "Yarn Timeserver HTTPS" }
    ]

Service

Install the "hadoop-yarn-timelineserver" service, symlink the rc.d startup script in "/etc/init.d/hadoop-hdfs-datanode" and define its startup strategy.

  @call header: 'Service', ->
    @service
      name: 'hadoop-yarn-timelineserver'
    @hdp_select
      name: 'hadoop-yarn-client' # Not checked
      name: 'hadoop-yarn-timelineserver'
    @service.init
      if_os: name: ['redhat','centos'], version: '6'
      header: 'Initd Script'
      target: '/etc/init.d/hadoop-yarn-timelineserver'
      source: "#{__dirname}/../resources/hadoop-yarn-timelineserver.j2"
      local: true
      context: options: options
      mode: 0o0755
    @call
      if_os: name: ['redhat','centos'], version: '7'
    , ->
      @service.init
        header: 'Systemd Script'
        target: '/usr/lib/systemd/system/hadoop-yarn-timelineserver.service'
        source: "#{__dirname}/../resources/hadoop-yarn-timelineserver-systemd.j2"
        local: true
        context: options: options
        mode: 0o0644
      @system.tmpfs
        header: 'Run dir'
        mount: "#{options.pid_dir}"
        uid: options.user.name
        gid: options.hadoop_group.name
        perm: '0755'

Layout

  @call header: 'Layout', ->
    leveldb_jar = null
    @system.mkdir
      target: "#{options.conf_dir}"
    @system.mkdir
      target: "#{options.pid_dir}"
      uid: options.user.name
      gid: options.hadoop_group.name
      mode: 0o755
    @system.mkdir
      target: "#{options.log_dir}"
      uid: options.user.name
      gid: options.group.name
      parent: true
    @system.mkdir
      target: options.yarn_site['yarn.timeline-service.leveldb-timeline-store.path']
      uid: options.user.name
      gid: options.hadoop_group.name
      mode: 0o0750
      parent: true
    @system.mkdir
      target: "#{options.log_dir}/tmp" 
      uid: options.user.name
      gid: options.hadoop_group.name
      mode: 0o0750
      parent: true
    @call ->
      @system.execute
        cmd: 'ls /usr/hdp/current/hadoop-hdfs-client/lib/leveldbjni*  | tail -n1'
      , (err, data) ->
        return cb err if err
        leveldb_jar = data.stdout.trim()
    @call ->
      @system.copy
        header: 'Copy leveldb jar'
        source: leveldb_jar
        target: "#{options.log_dir}/tmp/#{path.basename leveldb_jar}"
        uid: options.user.name
        gid: options.hadoop_group.name

Configuration

Update the "yarn-site.xml" configuration file.

  @hconfigure
    header: 'Core Site'
    target: "#{options.conf_dir}/core-site.xml"
    source: "#{__dirname}/../../resources/core_hadoop/core-site.xml"
    local: true
    properties: options.core_site
    backup: true
  @hconfigure
    header: 'HDFS Site'
    target: "#{options.conf_dir}/hdfs-site.xml"
    properties: options.hdfs_site
    backup: true
  @hconfigure
    header: 'YARN Site'
    target: "#{options.conf_dir}/yarn-site.xml"
    properties: options.yarn_site
    backup: true
  @file
    header: 'Log4j'
    target: "#{options.conf_dir}/log4j.properties"
    source: "#{__dirname}/../resources/log4j.properties"
    local: true
  @call header: 'Environment', ->
    YARN_TIMELINESERVER_OPTS = options.opts.base
    YARN_TIMELINESERVER_OPTS += " -D#{k}=#{v}" for k, v of options.opts.java_properties
    YARN_TIMELINESERVER_OPTS += " #{k}#{v}" for k, v of options.opts.jvm
    @file.render
      target: "#{options.conf_dir}/yarn-env.sh"
      source: "#{__dirname}/../resources/yarn-env.sh.j2"
      local: true
      context:
        security_enabled: options.krb5.realm?
        hadoop_yarn_home: options.home
        java64_home: options.java_home
        yarn_log_dir: options.log_dir
        yarn_pid_dir: options.pid_dir
        hadoop_libexec_dir: ''
        hadoop_java_io_tmpdir: "#{options.log_dir}/tmp"
        yarn_heapsize: options.heapsize
        apptimelineserver_heapsize: options.heapsize
        yarn_ats_jaas_file: "#{options.conf_dir}/yarn-ats.jaas"
        # ryba options
        YARN_TIMELINESERVER_OPTS: YARN_TIMELINESERVER_OPTS
      uid: options.user.name
      gid: options.hadoop_group.name
      mode: 0o0755
      backup: true
  @file.render
    header: 'Env'
    target: "#{options.conf_dir}/hadoop-env.sh"
    source: "#{__dirname}/../resources/hadoop-env.sh.j2"
    local: true
    context:
      HADOOP_LOG_DIR: options.log_dir
      HADOOP_PID_DIR: options.pid_dir
      java_home: options.java_home
    uid: options.user.name
    gid: options.hadoop_group.name
    mode: 0o750
    backup: true
    eof: true

Configure the "hadoop-metrics2.properties" to connect Hadoop to a Metrics collector like Ganglia or Graphite.

  @file.properties
    header: 'Metrics'
    target: "#{options.conf_dir}/hadoop-metrics2.properties"
    content: options.metrics.config
    backup: true      

HDFS Layout

See:

Note, this is not documented anywhere and might not be considered as a best practice.

  @call header: 'HDFS layout', ->
    return unless options.yarn_site['yarn.timeline-service.generic-application-history.store-class'] is "org.apache.hadoop.yarn.server.applicationhistoryservice.FileSystemApplicationHistoryStore"
    dir = options.yarn_site['yarn.timeline-service.fs-history-store.uri']
    @wait.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, "hdfs --config #{options.conf_dir} dfs -test -d #{path.dirname dir}"
    @system.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, """
      hdfs --config #{options.conf_dir} dfs -mkdir -p #{dir}
      hdfs --config #{options.conf_dir} dfs -chown #{options.user.name} #{dir}
      hdfs --config #{options.conf_dir} dfs -chmod 1777 #{dir}
      """
      unless_exec: "[[ hdfs  --config #{options.conf_dir} dfs -d #{dir} ]]"

  @call header: 'YARN ATS 1.5', ->
    return unless options.yarn_site['yarn.timeline-service.version'] is "1.5"
    @system.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, """
      hdfs --config #{options.conf_dir} dfs -mkdir -p #{options.yarn_site['yarn.timeline-service.entity-group-fs-store.active-dir']}
      hdfs --config #{options.conf_dir} dfs -chown #{options.user.name}:#{options.hadoop_group.name} #{options.yarn_site['yarn.timeline-service.entity-group-fs-store.active-dir']}
      hdfs --config #{options.conf_dir} dfs -chmod 0777 #{options.yarn_site['yarn.timeline-service.entity-group-fs-store.active-dir']}
      """
      unless_exec: "[[ hdfs  --config #{options.conf_dir} dfs -d #{options.yarn_site['yarn.timeline-service.entity-group-fs-store.active-dir']} ]]"
    @system.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, """
      hdfs --config #{options.conf_dir} dfs -mkdir -p #{options.yarn_site['yarn.timeline-service.entity-group-fs-store.done-dir']}
      hdfs --config #{options.conf_dir} dfs -chown #{options.ats_user.name}:#{options.hadoop_group.name} #{options.yarn_site['yarn.timeline-service.entity-group-fs-store.done-dir']}
      hdfs --config #{options.conf_dir} dfs -chmod 0700 #{options.yarn_site['yarn.timeline-service.entity-group-fs-store.done-dir']}
      """
      unless_exec: "[[ hdfs  --config #{options.conf_dir} dfs -d #{options.yarn_site['yarn.timeline-service.entity-group-fs-store.done-dir']} ]]"

SSL

  @call header: 'SSL', ->
    @hconfigure
      target: "#{options.conf_dir}/ssl-server.xml"
      properties: options.ssl_server
    @hconfigure
      target: "#{options.conf_dir}/ssl-client.xml"
      properties: options.ssl_client
    # Client: import certificate to all hosts
    @java.keystore_add
      keystore: options.ssl_client['ssl.client.truststore.location']
      storepass: options.ssl_client['ssl.client.truststore.password']
      caname: "hadoop_root_ca"
      cacert: options.ssl.cacert.source
      local: options.ssl.cacert.local
    # Server: import certificates, private and public keys to hosts with a server
    @java.keystore_add
      keystore: options.ssl_server['ssl.server.keystore.location']
      storepass: options.ssl_server['ssl.server.keystore.password']
      key: options.ssl.key.source
      cert: options.ssl.cert.source
      keypass: options.ssl_server['ssl.server.keystore.keypassword']
      name: options.ssl.key.name
      local: options.ssl.key.local
    @java.keystore_add
      keystore: options.ssl_server['ssl.server.keystore.location']
      storepass: options.ssl_server['ssl.server.keystore.password']
      caname: "hadoop_root_ca"
      cacert: options.ssl.cacert.source
      local: options.ssl.cacert.local

Kerberos

Create the Kerberos service principal by default in the form of "ats/{host}@{realm}" and place its keytab inside "/etc/security/keytabs/ats.service.keytab" with ownerships set to "mapred:hadoop" and permissions set to "0600".

  @krb5.addprinc options.krb5.admin,
    header: 'Kerberos'
    principal: options.yarn_site['yarn.timeline-service.principal'].replace '_HOST', options.fqdn
    randkey: true
    keytab: options.yarn_site['yarn.timeline-service.keytab']
    uid: options.user.name
    gid: options.group.name
    mode: 0o0600

Kerberos JAAS

The JAAS file is used by the ResourceManager to initiate a secure connection with Zookeeper.

  @file.jaas
    header: 'Kerberos JAAS'
    target: "#{options.conf_dir}/yarn-ats.jaas"
    content: Client:
      principal: options.yarn_site['yarn.timeline-service.principal'].replace '_HOST', options.fqdn
      keyTab: options.yarn_site['yarn.timeline-service.keytab']
    uid: options.user.name
    gid: options.hadoop_group.name

Dependencies

path = require 'path'
mkcmd = require '../../lib/mkcmd'