Menu

Apache Spark History Server

The history servers comes with the spark-client package. The single difference is in the configuration for kerberos properties.

We do not recommand using the spark WEB UI because it does not support SSL. Moreover it does make Yarn redirect the tracking URL to the WEBUI which prevents the user to see the log after the job has finished in the YARN Resource Manager web interface.

module.exports =  header: 'Spark History Server Install', handler: ({options}) ->

  @registry.register 'hdp_select', 'ryba/lib/hdp_select'
  @registry.register 'hdfs_mkdir', 'ryba/lib/hdfs_mkdir'
  @registry.register 'hconfigure', 'ryba/lib/hconfigure'

Identities

  @system.group header: 'Group', options.group
  @system.user header: 'User', options.user

Packages

  @service
    name: 'spark2'
  @hdp_select
    name: 'spark2-historyserver'
  @service.init
    target: "/etc/init.d/spark2-history-server"
    source: "#{__dirname}/../resources/spark-history-server.j2"
    local: true
    context: options
    backup: true
    mode: 0o0755
  @system.tmpfs
    if_os: name: ['redhat','centos'], version: '7'
    mount: options.pid_dir
    uid: options.user.name
    gid: options.group.name
    perm: '0750'

Layout

IPTables

ServicePortProtoInfo
spark history server18080httpSpark HTTP server

IPTables rules are only inserted if the parameter "iptables.action" is set to "start" (default value).

  @tools.iptables
    header: 'IPTables'
    rules: [
      { chain: 'INPUT', jump: 'ACCEPT', dport: options.conf['spark.ssl.historyServer.ui.port'], protocol: 'tcp', state: 'NEW', comment: "Spark HTTPS Server" }
    ]
    if: options.iptables

  @call header: 'Layout', ->
    @system.mkdir
      target: options.pid_dir
      uid: options.user.name
      gid: options.group.name
    @system.mkdir
      target: options.log_dir
      uid: options.user.name
      gid: options.group.name
    @system.mkdir
      target: options.conf_dir
      uid: options.user.name
      gid: options.group.name

Spark History Server Configure

  @file
    header: 'Spark env'
    target: "#{options.conf_dir}/spark-env.sh"
    # See "/usr/hdp/current/spark-historyserver/sbin/spark-daemon.sh" for
    # additionnal environmental variables.
    write: [
      match :/^export SPARK_PID_DIR=.*$/mg
      replace:"export SPARK_PID_DIR=#{options.pid_dir} # RYBA CONF \"ryba.options.pid_dir\", DONT OVERWRITE"
      append: true
    ,
      match :/^export SPARK_CONF_DIR=.*$/mg
      # replace:"export SPARK_CONF_DIR=#{spark.conf_dir} # RYBA CONF \"ryba.spark.conf_dir\", DONT OVERWRITE"
      replace:"export SPARK_CONF_DIR=${SPARK_HOME:-/usr/hdp/current/spark-historyserver}/conf # RYBA CONF \"ryba.spark.conf_dir\", DONT OVERWRITE"
      append: true
    ,
      match :/^export SPARK_LOG_DIR=.*$/mg
      replace:"export SPARK_LOG_DIR=#{options.log_dir} # RYBA CONF \"ryba.spark.log_dir\", DONT OVERWRITE"
      append: true
    ,
      match :/^export JAVA_HOME=.*$/mg
      replace:"export JAVA_HOME=#{options.java_home} # RYBA, DONT OVERWRITE"
      append: true
    ]
  @file
    header: 'Spark-config'
    target: "/usr/hdp/current/spark-historyserver/sbin/spark-config.sh"
    write: [
      match :/^export SPARK_DAEMON_MEMORY=.*$/mg
      replace:"export SPARK_DAEMON_MEMORY=#{options.heapsize} # RYBA CONF \"ryba.options.heapsize\", DONT OVERWRITE"
      append: true
    ]
  @file
    header: 'Spark Defaults'
    target: "#{options.conf_dir}/spark-defaults.conf"
    write: for k, v of options.conf
      match: ///^#{quote k}\ .*$///mg
      replace: if v is null then "" else "#{k} #{v}"
      append: v isnt null
    backup: true
  @system.link
    source: options.conf_dir
    target: '/usr/hdp/current/spark-historyserver/conf'

Clients Configuration

  @hconfigure
    header: 'Hive Site'
    target: "#{options.conf_dir}/hive-site.xml"
    source: "/etc/hive/conf/hive-site.xml"
    merge: true
    backup: true

  @hconfigure
    header: 'Core Site'
    target: "#{options.conf_dir}/core-site.xml"
    source: "/etc/hadoop/conf/core-site.xml"
    merge: true
    backup: true

  @system.copy
    target: "#{options.conf_dir}/hdfs-site.xml"
    source: "/etc/hadoop/conf/hdfs-site.xml"

Kerberos

  @krb5.addprinc options.krb5.admin,
    header: 'Kerberos'
    principal: options.conf['spark.history.kerberos.principal']
    keytab: options.conf['spark.history.kerberos.keytab']
    randkey: true
    uid: options.user.name
    gid: options.group.name

SSL

  @java.keystore_add
    keystore: options.keystore.target
    storepass: options.keystore.password
    key: options.ssl.key.source
    cert: options.ssl.cert.source
    keypass: options.keystore.target
    name: options.ssl.key.name
    local: options.ssl.cert.local
  @java.keystore_add
    keystore: options.keystore.target
    storepass: options.keystore.password
    caname: "hadoop_root_ca"
    cacert: options.ssl.cacert.source
    local: options.ssl.cacert.local
  # imports kafka broker server hadoop_root_ca CA truststore
  @java.keystore_add
    keystore: options.truststore.target
    storepass: options.truststore.target
    caname: "hadoop_root_ca"
    cacert: options.ssl.cacert.source
    local: options.ssl.cacert.local
  @system.execute
    cmd: """
      hadoop credential create spark.ssl.historyServer.keyPassword -value #{options.keystore.password} \
      -provider jceks://file#{options.conf_dir}/history-ui-credential.jceks
    """
    unless_exec: """
      hadoop credential list -provider jceks://file#{options.conf_dir}/history-ui-credential.jceks | grep spark.ssl.historyserver.keypassword
    """
  @system.execute
    cmd: """
      hadoop credential create spark.ssl.historyServer.keyStore -value #{options.keystore.password} \
      -provider jceks://file#{options.conf_dir}/history-ui-credential.jceks
    """
    unless_exec: """
      hadoop credential list -provider jceks://file#{options.conf_dir}/history-ui-credential.jceks | grep spark.ssl.historyserver.keystore
    """
  @system.execute
    cmd: """
      hadoop credential create spark.ssl.historyServer.trustStorePassword -value #{options.truststore.password} \
      -provider jceks://file#{options.conf_dir}/history-ui-credential.jceks
    """
    unless_exec: """
      hadoop credential list -provider jceks://file#{options.conf_dir}/history-ui-credential.jceks | grep spark.ssl.historyserver.truststorepassword
    """

Dependencies

quote = require 'regexp-quote'