Menu

Spark History Server Configure

module.exports = (service) ->
  options = service.options

Configuration

  # Layout
  options.pid_dir ?= '/var/run/spark'
  options.conf_dir ?= '/etc/spark-history-server/conf'
  options.log_dir ?= '/var/log/spark'
  # spark-config
  options.heapsize ?= '2g'
  options.iptables = !!service.deps.iptables and service.deps.iptables.options.action is 'start'

Identities

  # User
  options.user ?= {}
  options.user = name: options.user if typeof options.user is 'string'
  options.user.name ?= 'spark'
  options.user.system ?= true
  options.user.comment ?= 'Spark User'
  options.user.home ?= '/var/lib/spark'
  options.user.groups ?= 'hadoop'
  # Group
  options.group ?= {}
  options.group = name: options.group if typeof options.group is 'string'
  options.group.name ?= 'spark'
  options.group.system ?= true
  options.user.gid ?= options.group.name

Kerberos

  options.krb5 ?= {}
  options.krb5.realm ?= service.deps.krb5_client.options.etc_krb5_conf?.libdefaults?.default_realm
  throw Error 'Required Options: "realm"' unless options.krb5.realm
  options.krb5.admin ?= service.deps.krb5_client.options.admin[options.krb5.realm]
  # Kerberos HDFS Admin
  options.hdfs_krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user

JAVA Home

  options.java_home ?= service.deps.java.options.java_home

Configuration Spark Defaults

Inherits some of the basic spark yarn-cluster based installation

  options.conf ?= {}
  options.conf['spark.history.provider'] ?= 'org.apache.spark.deploy.history.FsHistoryProvider'
  options.conf['spark.history.fs.update.interval'] ?= '10s'
  options.conf['spark.history.retainedApplications'] ?= '50'
  options.conf['spark.history.ui.port'] ?= '18080'

  options.conf['spark.history.kerberos.keytab'] ?= '/etc/security/keytabs/spark.keytab'
  options.conf['spark.history.ui.acls.enable'] ?= 'true'
  options.conf['spark.history.fs.cleaner.enabled'] ?= 'false'
  options.conf['spark.history.retainedApplications'] ?= '50'

Configuration Kerberos

Spark History Server server is runned as the spark user.

  options.conf['spark.yarn.historyServer.address'] ?= "#{service.node.fqdn}:#{options.conf['spark.history.ui.port']}"
  options.conf['spark.history.kerberos.enabled'] ?= if service.deps.hadoop_core.options.core_site['hadoop.http.authentication.type'] is 'kerberos' then 'true' else 'false'
  options.conf['spark.history.kerberos.principal'] ?= "spark/#{service.node.fqdn}@#{options.krb5.realm}"
  options.conf['spark.history.kerberos.keytab'] ?= '/etc/security/keytabs/spark.service.keytab'

Configuration UI SSL

Use official 2.X documentation to configure ssl and use hadoop credential configuration to store passwords.

  options.ssl = merge {}, service.deps.ssl?.options, options.ssl
  options.ssl.enabled ?= !!service.deps.ssl
  options.truststore ?= {}
  options.keystore ?= {}
  if options.ssl.enabled
    throw Error "Required Option: ssl.cert" if  not options.ssl.cert
    throw Error "Required Option: ssl.key" if not options.ssl.key
    throw Error "Required Option: ssl.cacert" if not options.ssl.cacert
    options.truststore.target ?= "#{options.conf_dir}/truststore"
    throw Error "Required Property: truststore.password" if not options.truststore.password
    options.truststore.caname ?= 'hadoop_root_ca'
    options.truststore.type ?= 'jks'
    throw Error "Invalid Truststore Type: #{truststore.type}" unless options.truststore.type in ['jks', 'jceks', 'pkcs12']
    options.keystore.target ?= "#{options.conf_dir}/keystore"
    throw Error "Required Property: keystore.password" if not options.keystore.password
    options.keystore.caname ?= 'hadoop_root_ca'
    options.keystore.type ?= 'jks'
    throw Error "Invalid KeyStore Type: #{keystore.type}" unless options.keystore.type in ['jks', 'jceks', 'pkcs12']
    options.conf['spark.ssl.historyServer.enabled'] ?= 'true'
    options.conf['spark.ssl.historyServer.port'] ?= '18080'
    # options.conf['spark.ssl.historyServer.keyPassword'] ?= options.keystore.password
    options.conf['spark.ssl.historyServer.keyStore'] ?= options.keystore.target
    # options.conf['spark.ssl.historyServer.keyStorePassword'] ?= options.keystore.password
    options.conf['spark.ssl.historyServer.keyStoreType'] ?= options.keystore.type
    options.conf['spark.ssl.historyServer.trustStore'] ?= options.truststore.target
    # options.conf['spark.ssl.historyServer.trustStorePassword'] ?= options.truststore.password
    options.conf['spark.ssl.historyServer.trustStoreType'] ?= options.truststore.type
    options.conf['hadoop.security.credential.provider.path'] ?= "jceks://file#{options.conf_dir}/history-ui-credential.jceks"

Inheritance

  for prop in [
    'spark.master'
    'spark.authenticate'
    'spark.authenticate.secret'
    'spark.eventLog.enabled'
    'spark.eventLog.dir'
    'options.fs.logDirectory'
    'spark.yarn.services'
    'spark.ssl.enabledAlgorithms'
    'spark.eventLog.overwrite'
    'spark.yarn.jar'
    'options.retainedApplications'
    'spark.yarn.applicationMaster.waitTries'
    'spark.yarn.am.waitTime'
    'spark.yarn.containerLauncherMaxThreads'
    'spark.yarn.driver.memoryOverhead'
    'spark.yarn.executor.memoryOverhead'
    'spark.yarn.max.executor.failures'
    'spark.yarn.preserve.staging.files'
    'spark.yarn.queue'
    'spark.yarn.scheduler.heartbeat.interval-ms'
    'spark.yarn.submit.file.replication'
  ] then options.conf[prop] ?= service.deps.spark_client[0].options.conf[prop]

Configuration client

  for srv in service.deps.spark_client
    srv.options.conf['spark.history.provider'] = options.conf['spark.history.provider']
    srv.options.conf['spark.history.ui.port'] = options.conf['spark.history.ui.port']
    srv.options.conf['spark.yarn.historyServer.address'] = options.conf['spark.yarn.historyServer.address']

Log4j Properties

  options.log4j ?= {}
  options.log4j['log4j.rootCategory'] ?= 'INFO, console'
  options.log4j['log4j.appender.console'] ?= 'org.apache.log4j.ConsoleAppender'
  options.log4j['log4j.appender.console.target'] ?= 'System.out'
  options.log4j['log4j.appender.console.layout'] ?= 'org.apache.log4j.PatternLayout'
  options.log4j['log4j.appender.console.layout.ConversionPattern'] ?= '%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n'
  # Settings to quiet third party logs that are too verbose
  options.log4j['log4j.logger.org.spark-project.jetty'] ?= 'WARN'
  options.log4j['log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle'] ?= 'ERROR'
  options.log4j['log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper'] ?= 'INFO'
  options.log4j['log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter'] ?= 'INFO'
  options.log4j['log4j.logger.org.apache.parquet'] ?= 'ERROR'
  options.log4j['log4j.logger.parquet'] ?= 'ERROR'
  # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
  options.log4j['log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler'] ?= 'FATAL'
  options.log4j['log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry'] ?= 'ERROR'

Wait

  options.wait ?= {}
  options.wait.ui = for srv in service.deps.spark_history
    host: srv.node.fqdn
    port: srv.options?.conf?['spark.history.ui.port'] or options.conf['spark.history.ui.port'] or '18080'

Dependencies

{merge} = require 'nikita/lib/misc'