Menu

YARN NodeManager Configuration

Exemple

{ "ryba": { "yarn": { "nm": {
    "opts": "",
    "heapsize": "1024"
} } } }
module.exports = (service) ->
  options = service.options

Identities

  options.hadoop_group = merge {}, service.deps.hadoop_core.options.hadoop_group, options.hadoop_group
  options.group = merge {}, service.deps.hadoop_core.options.yarn.group, options.group
  options.user = merge {}, service.deps.hadoop_core.options.yarn.user, options.user

Kerberos

  options.krb5 ?= {}
  options.krb5.realm ?= service.deps.krb5_client.options.etc_krb5_conf?.libdefaults?.default_realm
  throw Error 'Required Options: "realm"' unless options.krb5.realm
  options.krb5.admin ?= service.deps.krb5_client.options.admin[options.krb5.realm]

Environment

  # Layout
  options.home ?= '/usr/hdp/current/hadoop-yarn-nodemanager'
  options.log_dir ?= '/var/log/hadoop/yarn'
  options.pid_dir ?= '/var/run/hadoop/yarn'
  options.conf_dir ?= '/etc/hadoop-yarn-nodemanager/conf'
  options.hadoop_conf_dir ?= service.deps.hadoop_core.options.conf_dir
  # Java
  options.java_home ?= service.deps.java.options.java_home
  options.heapsize ?= '1024m'
  options.newsize ?= '200m'
  # Misc
  options.fqdn ?= service.node.fqdn
  options.iptables ?= service.deps.iptables and service.deps.iptables.options.action is 'start'
  options.iptables_rules ?= []
  options.libexec ?= '/usr/hdp/current/hadoop-client/libexec'
  options.hdfs_krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user

Configuration

  # Hadoop core "core-site.xml"
  options.core_site = merge {}, service.deps.hdfs_client[0].options.core_site, options.core_site or {}
  # HDFS client "hdfs-site.xml"
  options.hdfs_site = merge {}, service.deps.hdfs_client[0].options.hdfs_site, options.hdfs_site or {}
  # Yarn NodeManager "yarn-site.xml"
  options.yarn_site ?= {}
  options.yarn_site['yarn.http.policy'] ?= 'HTTPS_ONLY' # HTTP_ONLY or HTTPS_ONLY or HTTP_AND_HTTPS
  # Working Directories (see capacity for server resource discovery)
  options.yarn_site['yarn.nodemanager.local-dirs'] ?= ['/var/yarn/local']
  options.yarn_site['yarn.nodemanager.local-dirs'] = options.yarn_site['yarn.nodemanager.local-dirs'].join ',' if Array.isArray options.yarn_site['yarn.nodemanager.local-dirs']
  options.yarn_site['yarn.nodemanager.log-dirs'] ?= ['/var/yarn/logs']
  options.yarn_site['yarn.nodemanager.log-dirs'] = options.yarn_site['yarn.nodemanager.log-dirs'].join ',' if Array.isArray options.yarn_site['yarn.nodemanager.log-dirs']
  # Configuration
  # options.yarn_site['yarn.scheduler.minimum-allocation-mb'] ?= null # Make sure we erase hdp default value
  # options.yarn_site['yarn.scheduler.maximum-allocation-mb'] ?= null # Make sure we erase hdp default value
  options.yarn_site['yarn.nodemanager.address'] ?= "#{service.node.fqdn}:45454"
  options.yarn_site['yarn.nodemanager.localizer.address'] ?= "#{service.node.fqdn}:8040"
  options.yarn_site['yarn.nodemanager.webapp.address'] ?= "#{service.node.fqdn}:8042"
  options.yarn_site['yarn.nodemanager.webapp.https.address'] ?= "#{service.node.fqdn}:8044"
  options.yarn_site['yarn.nodemanager.remote-app-log-dir'] ?= "/app-logs"
  options.yarn_site['yarn.nodemanager.keytab'] ?= '/etc/security/keytabs/nm.service.keytab'
  options.yarn_site['yarn.nodemanager.principal'] ?= "nm/_HOST@#{options.krb5.realm}"
  options.yarn_site['yarn.nodemanager.vmem-pmem-ratio'] ?= '2.1'
  # Cloudera recommand setting [vmem-check to false on Centos/RHEL 6 due to its aggressive allocation of virtual memory](http://blog.cloudera.com/blog/2014/04/apache-hadoop-yarn-avoiding-6-time-consuming-gotchas/)
  # by default, "yarn.nodemanager.vmem-check-enabled" is true (see in yarn-default.xml)
  options.yarn_site['yarn.nodemanager.pmem-check-enabled'] ?= 'true'
  options.yarn_site['yarn.nodemanager.vmem-check-enabled'] ?= 'true'
  options.yarn_site['yarn.nodemanager.resource.percentage-physical-cpu-limit'] ?= '100'
  options.yarn_site['yarn.nodemanager.container-executor.class'] ?= 'org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor'
  options.yarn_site['yarn.nodemanager.linux-container-executor.group'] ?= 'yarn'
  options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage'] ?= 'false' # By default, iyarn.nodemanager.container-executor.clasf spare CPU cycles are available, containers are allowed to exceed the CPU limits set for them
  # Fix bug in HDP companion files (missing "s")
  options.yarn_site['yarn.nodemanager.log.retain-second'] ?= null
  options.yarn_site['yarn.nodemanager.log.retain-seconds'] ?= '604800'
  # Configurations for History Server (Not sure wether this should be deployed on NMs):
  # console.log 'Warning yarn_nm/yarn_site[yarn.log-aggregation-enable] set to false'
  options.yarn_site['yarn.app.mapreduce.am.env'] ?= 'LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native'
  options.yarn_site['yarn.log-aggregation-enable'] ?= 'true'
  options.yarn_site['yarn.log-aggregation.retain-seconds'] ?= '2592000' #  30 days, how long to keep aggregation logs before deleting them. -1 disables. Be careful, set this too small and you will spam the name node.
  options.yarn_site['yarn.log-aggregation.retain-check-interval-seconds'] ?= '-1' # Time between checks for aggregated log retention. If set to 0 or a negative value then the value is computed as one-tenth of the aggregated log retention time. Be careful, set this too small and you will spam the name node.

System Options

  options.opts ?= {}
  options.opts.base ?= ''
  options.opts.java_properties ?= {}
  options.opts.jvm ?= {}
  options.opts.jvm['-Xms'] ?= options.heapsize
  options.opts.jvm['-Xmx'] ?= options.heapsize
  options.opts.jvm['-XX:NewSize='] ?= options.newsize #should be 1/8 of datanode heapsize
  options.opts.jvm['-XX:MaxNewSize='] ?= options.newsize #should be 1/8 of datanode heapsize

Container Executor

YARN containers in a secure cluster use the operating system facilities to offer execution isolation for containers. Secure containers execute under the credentials of the job user. The operating system enforces access restriction for the container. The container must run as the use that submitted the application.

Secure Containers work only in the context of secured YARN clusters.

  options.container_executor ?= {}
  options.container_executor['yarn.nodemanager.local-dirs'] ?= options.yarn_site['yarn.nodemanager.local-dirs']
  options.container_executor['yarn.nodemanager.linux-container-executor.group'] ?= options.yarn_site['yarn.nodemanager.linux-container-executor.group']
  options.container_executor['yarn.nodemanager.log-dirs'] = options.yarn_site['yarn.nodemanager.log-dirs']
  options.container_executor['banned.users'] ?= 'hdfs,yarn,mapred,bin'
  options.container_executor['min.user.id'] ?= '0'

YARN 3 Container Executor docker properties

  options.container_executor['docker'] ?= {}
  options.container_executor['docker']['module.enabled'] ?= 'true'
  options.container_executor['docker']['docker.privileged-containers.enabled'] ?= 'false'
  options.container_executor['docker']['docker.privileged-containers.registries'] ?= ''
  options.container_executor['docker']['docker.trusted.registries'] ?= ''
  options.container_executor['docker']['docker.allowed.capabilities'] ?= 'SYS_CHROOT,MKNOD,SETFCAP,SETPCAP,FSETID,CHOWN,AUDIT_WRITE,SETGID,NET_RAW,FOWNER,SETUID,DAC_OVERRIDE,KILL,NET_BIND_SERVICE'
  options.container_executor['docker']['docker.allowed.networks'] ?= 'host,bridge'
  options.container_executor['docker']['docker.allowed.ro-mounts'] ?= '/etc/passwd,/etc/group' # allow end user to submit their services as without docker
  options.container_executor['docker']['docker.allowed.rw-mounts'] ?= "#{[options.container_executor['yarn.nodemanager.local-dirs'].split(',')...,options.container_executor['yarn.nodemanager.log-dirs'].split(',')...].join(',')}"

Docker in YARN

  options.yarn_site['yarn.nodemanager.runtime.linux.allowed-runtimes'] ?= 'default,docker'
  options.yarn_site['yarn.nodemanager.runtime.linux.docker.allowed-container-network'] ?= 'host,bridge'
  options.yarn_site['yarn.nodemanager.runtime.linux.docker.default-container-network'] ?= 'host'
  options.yarn_site['yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed'] ?= 'false'
  options.yarn_site['yarn.nodemanager.runtime.linux.docker.privileged-containers.acl'] ?= ''
  options.yarn_site['yarn.nodemanager.runtime.linux.docker.capabilities'] ?= 'SYS_CHROOT,MKNOD,SETFCAP,SETPCAP,FSETID,CHOWN,AUDIT_WRITE,SETGID,NET_RAW,FOWNER,SETUID,DAC_OVERRIDE,KILL,NET_BIND_SERVICE'

Work Preserving Recovery

See ResourceManager for additionnal informations.

  options.yarn_site['yarn.nodemanager.recovery.enabled'] ?= 'true'
  options.yarn_site['yarn.nodemanager.recovery.dir'] ?= '/var/yarn/recovery-state'
  # log debug
  options.yarn_site['yarn.nodemanager.delete.debug-delay-sec'] ?= '3600'

Configuration for CGroups

Resources:

  • YARN-600: Hook up cgroups CPU settings to the number of virtual cores allocated

  • YARN-810: CGroup ceiling enforcement on CPU

  • Using YARN with Cgroups

  • VCore Configuration In Hadoop

    isLinuxContainer = options.yarn_site['yarn.nodemanager.container-executor.class'] is 'org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor'

    options.yarn_site['yarn.nodemanager.linux-container-executor.resources-handler.class'] ?= 'org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler'

    options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.hierarchy'] ?= '/hadoop-yarn'

    options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.mount'] ?= 'true'

    options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.mount-path'] ?= '/cgroup'

    options.yarn_site['yarn.nodemanager.linux-container-executor.resources-handler.class'] ?= 'org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler' hierarchy = options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.hierarchy'] ?= "/#{options.user.name}" options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.mount'] ?= 'false' options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.mount-path'] ?= '/cgroup'# deprecete /cgroup (RHEL 6) for /sys/fs/cgroup (RHEL 7)

    HDP doc, probably incorrect

    options.yarn_site['yarn.nodemanager.container-executor.cgroups.hierarchy'] ?= options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.hierarchy']

    options.yarn_site['yarn.nodemanager.container-executor.cgroups.mount'] ?= options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.mount']

    options.yarn_site['yarn.nodemanager.container-executor.resources-handler.class'] ?= options.yarn_site['yarn.nodemanager.container-executor.resources-handler.class']

    options.yarn_site['yarn.nodemanager.container-executor.group'] ?= 'hadoop'

    if options.yarn_site['yarn.nodemanager.linux-container-executor.cgroups.mount'] is 'false' # default values options.cgroup ?= "#{options.user.name}": perm: task: 'uid': "#{options.user.name}" 'gid': "#{options.user.name}" admin: 'uid': "#{options.user.name}" 'gid': "#{options.user.name}" cpu: 'cpu.rt_period_us': ""1000000"" 'cpu.rt_runtime_us': ""0"" 'cpu.cfs_period_us': ""100000"" 'cpu.cfs_quota_us': ""-1"" 'cpu.shares': '"1024"'

SSL

  options.ssl = merge {}, service.deps.hadoop_core.options.ssl, options.ssl
  options.ssl_server = merge {}, service.deps.hadoop_core.options.ssl_server, options.ssl_server or {},
    'ssl.server.keystore.location': "#{options.conf_dir}/keystore"
    'ssl.server.truststore.location': "#{options.conf_dir}/truststore"
  options.ssl_client = merge {}, service.deps.hadoop_core.options.ssl_client, options.ssl_client or {},
    'ssl.client.truststore.location': "#{options.conf_dir}/truststore"

Metrics

  options.metrics = merge {}, service.deps.metrics?.options, options.metrics

  options.metrics.config ?= {}
  options.metrics.sinks ?= {}
  options.metrics.sinks.file_enabled ?= true
  options.metrics.sinks.ganglia_enabled ?= false
  options.metrics.sinks.graphite_enabled ?= false
  # File sink
  if options.metrics.sinks.file_enabled
    options.metrics.config["*.sink.file.#{k}"] ?= v for k, v of service.deps.metrics.options.sinks.file.config if service.deps.metrics?.options?.sinks?.file_enabled
    options.metrics.config['maptask.sink.file.class'] ?= 'org.apache.hadoop.metrics2.sink.FileSink'
    options.metrics.config['maptask.sink.file.filename'] ?= 'maptask-metrics.out'
    options.metrics.config['nodemanager.sink.file.class'] ?= 'org.apache.hadoop.metrics2.sink.FileSink'
    options.metrics.config['nodemanager.sink.file.filename'] ?= 'nodemanager-metrics.out'
    options.metrics.config['reducetask.sink.file.class'] ?= 'org.apache.hadoop.metrics2.sink.FileSink'
    options.metrics.config['reducetask.sink.file.filename'] ?= 'reducetask-metrics.out'
  # Ganglia sink, accepted properties are "servers" and "supportsparse"
  if options.metrics.sinks.ganglia_enabled
    options.metrics.config['nodemanager.sink.ganglia.class'] ?= options.metrics.ganglia.class
    options.metrics.config['nodemanager.sink.ganglia.servers'] ?= "#{service.deps.ganglia.node.fqdn}:#{service.deps.ganglia.options.nn_port}"
    options.metrics.config['maptask.sink.ganglia.class'] ?= options.metrics.ganglia.class
    options.metrics.config['maptask.sink.ganglia.servers'] ?= "#{service.deps.ganglia.node.fqdn}:#{service.deps.ganglia.options.nn_port}"
    options.metrics.config['reducetask.sink.ganglia.class'] ?= options.metrics.ganglia.class
    options.metrics.config['reducetask.sink.ganglia.servers'] ?= "#{service.deps.ganglia.node.fqdn}:#{service.deps.ganglia.options.nn_port}"
    options.metrics.config["*.sink.ganglia.#{k}"] ?= v for k, v of options.sinks.ganglia.config if service.deps.metrics?.options?.sinks?.ganglia_enabled
  # Graphite Sink
  if options.metrics.sinks.graphite_enabled
    throw Error 'Missing remote_host ryba.yarn.nm.metrics.sinks.graphite.config.server_host' unless options.metrics.sinks.graphite.config.server_host?
    throw Error 'Missing remote_port ryba.yarn.nm.metrics.sinks.graphite.config.server_port' unless options.metrics.sinks.graphite.config.server_port?
    options.metrics.config["nodemanager.sink.graphite.class"] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
    options.metrics.config["maptask.sink.graphite.class"] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
    options.metrics.config["reducetask.sink.graphite.class"] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
    options.metrics.config["*.sink.graphite.#{k}"] ?= v for k, v of service.deps.metrics.options.sinks.graphite.config if service.deps.metrics?.options?.sinks?.graphite_enabled

List of Services

  options.yarn_site['yarn.nodemanager.aux-services'] ?= 'mapreduce_shuffle'
  options.yarn_site['yarn.nodemanager.aux-services.mapreduce_shuffle.class'] ?= 'org.apache.hadoop.mapred.ShuffleHandler'
  if service.deps.yarn_tr?.length
    if options.yarn_site['yarn.nodemanager.aux-services'].indexOf('timeline_collector') isnt -1
      options.yarn_site['yarn.nodemanager.aux-services'] = "#{options.yarn_site['yarn.nodemanager.aux-services']},timeline_collector" 
    options.yarn_site['yarn.nodemanager.aux-services.timeline_collector.class'] ?= 'org.apache.hadoop.yarn.server.timelineservice.collector.PerNodeTimelineCollectorsAuxService'

Import from Yarn TS/TR

  source_ts = if service.deps.yarn_tr?.length > 0 then service.deps.yarn_tr[0] else service.deps.yarn_ts

  for srv in service.deps.yarn_nm
    for property in [
      'yarn.timeline-service.enabled'
      'yarn.timeline-service.address'
      'yarn.timeline-service.reader.webapp.address'
      'yarn.timeline-service.reader.webapp.https.address'
      'yarn.timeline-service.principal'
      'yarn.timeline-service.http-authentication.type'
      'yarn.timeline-service.http-authentication.kerberos.principal'
    ]
      srv.options.yarn_site ?= {}
      srv.options.yarn_site[property] ?= options.yarn_site[property]

  if service.deps.yarn_tr?[0]?
    for property in [
      'yarn.timeline-service.reader.webapp.address'
      'yarn.timeline-service.reader.webapp.https.address'
    ] then options.yarn_site[property] ?= service.deps.yarn_tr[0].options.yarn_site[property]

Import/Export to Yarn RM

  #Import Yarn Global properties
  for property in [
    'yarn.nodemanager.remote-app-log-dir'
    'yarn.nodemanager.remote-app-log-dir-suffix'
    'yarn.log-aggregation-enable'
    'yarn.log-aggregation.retain-seconds'
    'yarn.log-aggregation.retain-check-interval-seconds'
    'yarn.generic-application-history.save-non-am-container-meta-info'
    'yarn.http.policy'
    'yarn.log.server.url'
    'yarn.resourcemanager.principal'
    'yarn.resourcemanager.cluster-id'
    'yarn.resourcemanager.ha.enabled'
    'yarn.resourcemanager.ha.rm-ids'
  ]
    options.yarn_site[property] ?= service.deps.yarn_rm[0].options.yarn_site[property]

  #Import Yarn RM specific properties
  for srv in service.deps.yarn_rm
    id = if srv.options.yarn_site['yarn.resourcemanager.ha.enabled'] is 'true' then ".#{srv.options.yarn_site['yarn.resourcemanager.ha.id']}" else ''
    for property in [
      'yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled'
      "yarn.resourcemanager.address#{id}"
      "yarn.resourcemanager.scheduler.address#{id}"
      "yarn.resourcemanager.admin.address#{id}"
      "yarn.resourcemanager.webapp.address#{id}"
      "yarn.resourcemanager.webapp.https.address#{id}"
      "yarn.resourcemanager.resource-tracker.address#{id}"
    ]
      options.yarn_site[property] ?= srv.options.yarn_site[property]

Wait

  # Import Rules
  options.wait_krb5_client = service.deps.krb5_client.options.wait
  options.wait_zookeeper_server = service.deps.zookeeper_server[0].options.wait
  options.wait_hdfs_nn = service.deps.hdfs_nn[0].options.wait
  # Default configuration required by wait
  for srv in service.deps.yarn_nm
    srv.options.yarn_site ?= {}
    srv.options.yarn_site['yarn.nodemanager.address'] ?= "#{srv.node.fqdn}:45454"
    srv.options.yarn_site['yarn.nodemanager.localizer.address'] ?= "#{srv.node.fqdn}:8040"
    srv.options.yarn_site['yarn.nodemanager.webapp.address'] ?= "#{srv.node.fqdn}:8042"
    srv.options.yarn_site['yarn.nodemanager.webapp.https.address'] ?= "#{srv.node.fqdn}:8044"
  # Local Rules
  options.wait = {}
  options.wait.tcp = for srv in service.deps.yarn_nm
    port = srv.options.yarn_site['yarn.nodemanager.address'].split(':')[1]
    host: srv.node.fqdn, port: port
  options.wait.tcp_localiser = for srv in service.deps.yarn_nm
    port = srv.options.yarn_site['yarn.nodemanager.localizer.address'].split(':')[1]
    host: srv.node.fqdn, port: port
  options.wait.webapp = for srv in service.deps.yarn_nm
    protocol = if options.yarn_site['yarn.http.policy'] is 'HTTP_ONLY' then '' else 'https.'
    port = srv.options.yarn_site["yarn.nodemanager.webapp.#{protocol}address"].split(':')[1]
    host: srv.node.fqdn, port: port
  options.wait.tcp_local =
    host: options.yarn_site['yarn.nodemanager.address'].split(':')[0]
    port: options.yarn_site['yarn.nodemanager.address'].split(':')[1]
  options.wait.tcp_local_localiser =
    host: options.yarn_site['yarn.nodemanager.localizer.address'].split(':')[0]
    port: options.yarn_site['yarn.nodemanager.localizer.address'].split(':')[1]
  protocol = if options.yarn_site['yarn.http.policy'] is 'HTTP_ONLY' then '' else 'https.'
  options.wait.webapp_local =
    port: options.yarn_site["yarn.nodemanager.webapp.#{protocol}address"].split(':')[1]
    host: options.yarn_site["yarn.nodemanager.webapp.#{protocol}address"].split(':')[0]

Dependencies

{merge} = require '@nikitajs/core/lib/misc'