Menu

Configuration

module.exports = (service) ->
  options = service.options

Environment

  options.env ?= {}
  options.env['TEZ_CONF_DIR'] ?= '/etc/tez/conf'
  options.env['TEZ_JARS'] ?= '/usr/hdp/current/tez-client/*:/usr/hdp/current/tez-client/lib/*'
  options.env['HADOOP_CLASSPATH'] ?= '$TEZ_CONF_DIR:$TEZ_JARS:$HADOOP_CLASSPATH'
  # Misc
  options.hostname = service.node.hostname
  options.force_check ?= false

Kerberos

  # Kerberos HDFS Admin
  options.hdfs_krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user
  # Kerberos Test Principal
  options.test_krb5_user ?= service.deps.test_user.options.krb5.user

Configuration

  options.tez_site ?= {}
  options.tez_site['tez.lib.uris'] ?= "/hdp/apps/${hdp.version}/tez/tez.tar.gz"
  # For documentation purpose in case we HDFS_DELEGATION_TOKEN in hive queries
  # Following line: options.tez_site['tez.am.am.complete.cancel.delegation.tokens'] ?= 'false'
  # Renamed to: options.tez_site['tez.cancel.delegation.tokens.on.completion'] ?= 'false'
  # Validation
  # Java.lang.IllegalArgumentException: tez.runtime.io.sort.mb 512 should be larger than 0 and should be less than the available task memory (MB):364
  # throw Error '' options.tez_site['tez.runtime.io.sort.mb']

Resource Allocation

  memory_per_container = 512
  rm_memory_max_mb = service.deps.yarn_rm[0].options.yarn_site['yarn.scheduler.maximum-allocation-mb']
  rm_memory_min_mb = service.deps.yarn_rm[0].options.yarn_site['yarn.scheduler.minimum-allocation-mb']
  am_memory_mb = options.tez_site['tez.am.resource.memory.mb'] or memory_per_container
  am_memory_mb = Math.min rm_memory_max_mb, am_memory_mb
  am_memory_mb = Math.max rm_memory_min_mb, am_memory_mb
  options.tez_site['tez.am.resource.memory.mb'] = am_memory_mb
  tez_memory_xmx = /-Xmx(.*?)m/.exec(options.tez_site['hive.tez.java.opts'])?[1] or Math.floor .8 * am_memory_mb
  tez_memory_xmx = Math.min rm_memory_max_mb, tez_memory_xmx
  options.tez_site['hive.tez.java.opts'] ?= "-Xmx#{tez_memory_xmx}m"

Deprecated warning

Convert deprecated values between HDP 2.1 and HDP 2.2.

  deprecated = {}
  deprecated['tez.am.java.opts'] = 'tez.am.launch.cmd-opts'
  deprecated['tez.am.env'] = 'tez.am.launch.env'
  deprecated['tez.am.shuffle-vertex-manager.min-src-fraction'] = 'tez.shuffle-vertex-manager.min-src-fraction'
  deprecated['tez.am.shuffle-vertex-manager.max-src-fraction'] = 'tez.shuffle-vertex-manager.max-src-fraction'
  deprecated['tez.am.shuffle-vertex-manager.enable.auto-parallel'] = 'tez.shuffle-vertex-manager.enable.auto-parallel'
  deprecated['tez.am.shuffle-vertex-manager.desired-task-input-size'] = 'tez.shuffle-vertex-manager.desired-task-input-size'
  deprecated['tez.am.shuffle-vertex-manager.min-task-parallelism'] = 'tez.shuffle-vertex-manager.min-task-parallelism'
  deprecated['tez.am.grouping.split-count'] = 'tez.grouping.split-count'
  deprecated['tez.am.grouping.by-length'] = 'tez.grouping.by-length'
  deprecated['tez.am.grouping.by-count'] = 'tez.grouping.by-count'
  deprecated['tez.am.grouping.max-size'] = 'tez.grouping.max-size'
  deprecated['tez.am.grouping.min-size'] = 'tez.grouping.min-size'
  deprecated['tez.am.grouping.rack-split-reduction'] = 'tez.grouping.rack-split-reduction'
  deprecated['tez.am.am.complete.cancel.delegation.tokens'] = 'tez.cancel.delegation.tokens.on.completion'
  deprecated['tez.am.max.task.attempts'] = 'tez.am.task.max.failed.attempts'
  deprecated['tez.generate.dag.viz'] = 'tez.generate.debug.artifacts'
  deprecated['tez.runtime.intermediate-output.key.comparator.class'] = 'tez.runtime.key.comparator.class'
  deprecated['tez.runtime.intermediate-output.key.class'] = 'tez.runtime.key.class'
  deprecated['tez.runtime.intermediate-output.value.class'] = 'tez.runtime.value.class'
  deprecated['tez.runtime.intermediate-output.should-compress'] = 'tez.runtime.compress'
  deprecated['tez.runtime.intermediate-output.compress.codec'] = 'tez.runtime.compress.codec'
  deprecated['tez.runtime.intermediate-input.key.secondary.comparator.class'] = 'tez.runtime.key.secondary.comparator.class'
  deprecated['tez.runtime.broadcast.data-via-events.enabled'] = 'tez.runtime.transfer.data-via-events.enabled'
  deprecated['tez.runtime.broadcast.data-via-events.max-size'] = 'tez.runtime.transfer.data-via-events.max-size'
  deprecated['tez.runtime.shuffle.input.buffer.percent'] = 'tez.runtime.shuffle.fetch.buffer.percent'
  deprecated['tez.runtime.task.input.buffer.percent'] = 'tez.runtime.task.input.post-merge.buffer.percent'
  deprecated['tez.runtime.job.counters.max'] = 'tez.am.counters.max.keys'
  deprecated['tez.runtime.job.counters.group.name.max'] = 'tez.am.counters.group-name.max.keys'
  deprecated['tez.runtime.job.counters.counter.name.max'] = 'tez.am.counters.name.max.keys'
  deprecated['tez.runtime.job.counters.groups.max'] = 'tez.am.counters.groups.max.keys'
  deprecated['tez.task.merge.progress.records'] = 'tez.runtime.merge.progress.records'
  deprecated['tez.runtime.metrics.session.id'] = 'tez.runtime.framework.metrics.session.id'
  deprecated['tez.task.scale.memory.additional.reservation.fraction.per-io'] = 'tez.task.scale.memory.additional-reservation.fraction.per-io'
  deprecated['tez.task.scale.memory.additional.reservation.fraction.max'] = 'tez.task.scale.memory.additional-reservation.fraction.max'
  deprecated['tez.task.initial.memory.scale.ratios'] = 'tez.task.scale.memory.ratios'
  deprecated['tez.resource.calculator.process-tree.class'] = 'tez.task.resource.calculator.process-tree.class'
  for previous, current of deprecated
    continue unless options.tez_site[previous]
    options.tez_site[current] = options.tez_site[previous]
    console.log "Deprecated property '#{previous}' [WARN]"

  if service.deps.yarn_ts?.length?
    if service.deps.yarn_ts[0].options.yarn_site['yarn.timeline-service.version'] is '1.5'
      options.tez_site['tez.history.logging.service.class'] ?= 'org.apache.tez.dag.history.logging.ats.ATSV15HistoryLoggingService'
    else
      options.tez_site['tez.history.logging.service.class'] ?= 'org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService'
  else
    options.tez_site['tez.history.logging.service.class'] ?= 'org.apache.tez.dag.history.logging.proto.ProtoHistoryLoggingService'

Tez Ports

Enrich the Yarn NodeManager with additionnal IPTables rules.

  # Range of ports that the AM can use when binding for client connections
  options.tez_site['tez.am.client.am.port-range'] ?= '34816-36864'
  for srv in service.deps.yarn_nm
    srv.options.iptables_rules.push { chain: 'INPUT', jump: 'ACCEPT', dport: options.tez_site['tez.am.client.am.port-range'].replace('-',':'), protocol: 'tcp', state: 'NEW', comment: "Tez AM Range" }

UI

  options.ui ?= {}
  options.ui.enabled ?= !!service.deps.httpd
  if options.ui.enabled
    options.ui.env ?= {}
    options.ui.env.hosts ?= {}
    unless options.tez_site['tez.tez-ui.history-url.base'] and options.ui.html_path
      unless service.deps.httpd
        throw 'Install masson/commons/httpd on ' + service.node.fqdn + ' or specify tez_site[\'tez.tez-ui.history-url.base\'] and ui.html_path if ui.enabled'
      options.tez_site['tez.tez-ui.history-url.base'] ?= "http://#{service.node.fqdn}/tez-ui"
      options.ui.html_path ?= "#{service.deps.httpd.options.user.home}/tez-ui"
    id = if service.deps.yarn_rm[0].options.yarn_site['yarn.resourcemanager.ha.enabled'] is 'true' then ".#{service.deps.yarn_rm[0].options.yarn_site['yarn.resourcemanager.ha.id']}" else ''
    if service.deps.yarn_ts?.length?
      options.ui.env.hosts.timeline ?= if service.deps.yarn_ts[0].options.yarn_site['yarn.http.policy'] is 'HTTP_ONLY'
      then "http://" + service.deps.yarn_ts[0].options.yarn_site['yarn.timeline-service.webapp.address']
      else "https://"+ service.deps.yarn_ts[0].options.yarn_site['yarn.timeline-service.webapp.https.address']
    else
      options.ui.env.hosts.timeline ?= if service.deps.yarn_tr[0].options.yarn_site['yarn.http.policy'] is 'HTTP_ONLY'
      then "http://" + service.deps.yarn_tr[0].options.yarn_site['yarn.timeline-service.webapp.address']
      else "https://"+ service.deps.yarn_tr[0].options.yarn_site['yarn.timeline-service.webapp.https.address']
    options.ui.env.hosts.rm ?= if service.deps.yarn_rm[0].options.yarn_site['yarn.http.policy'] is 'HTTP_ONLY'
    then "http://" + service.deps.yarn_rm[0].options.yarn_site["yarn.resourcemanager.webapp.address#{id}"]
    else "https://"+ service.deps.yarn_rm[0].options.yarn_site["yarn.resourcemanager.webapp.https.address#{id}"]
    ## Tez Site when UI is enabled
    options.tez_site['tez.runtime.convert.user-payload.to.history-text'] ?= 'true'