Menu

HiveServer2 Configuration

Configure Log4J

Hive On HBase

The following properties are required by knox in secured mode:

  • hive.server2.enable.doAs
  • hive.server2.allow.user.substitution
  • hive.server2.transport.mode
  • hive.server2.thrift.http.port
  • hive.server2.thrift.http.path

Example:

{ "ryba": {
    "hive": {
      "server2": {
        "heapsize": "4096",
        "opts": "-Dcom.sun.management.jmxremote -Djava.rmi.server.hostname=130.98.196.54 -Dcom.sun.management.jmxremote.rmi.port=9526 -Dcom.sun.management.jmxremote.port=9526 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
      },
      "site": {
        "hive.server2.thrift.port": "10001"
      }
    }
} }
Show Source Code
module.exports = (service) ->
  options = service.options

Environment

Show Source Code
  # Layout
  options.conf_dir ?= '/etc/hive-server2/conf'
  options.log_dir ?= '/var/log/hive-server2'
  options.pid_dir ?= '/var/run/hive-server2'
  # Opts and Java
  options.java_home ?= service.deps.java.options.java_home
  options.opts ?= ''
  options.mode ?= 'local'
  throw Error 'Invalid Options mode: accepted value are "local" or "remote"' unless options.mode in ['local', 'remote']
  options.heapsize ?= if options.mode is 'local' then 1536 else 1024
  # Misc
  options.fqdn = service.node.fqdn
  options.hostname = service.node.hostname
  options.iptables ?= service.deps.iptables and service.deps.iptables.options.action is 'start'
  options.clean_logs ?= false

Kerberos

Show Source Code
  options.krb5 ?= {}
  options.krb5.realm ?= service.deps.krb5_client.options.etc_krb5_conf?.libdefaults?.default_realm
  throw Error 'Required Options: "realm"' unless options.krb5.realm
  options.krb5.admin ?= service.deps.krb5_client.options.admin[options.krb5.realm]

Identities

Show Source Code
  options.group = merge {}, service.deps.hive_hcatalog[0].options.group, options.group
  options.user = merge {}, service.deps.hive_hcatalog[0].options.user, options.user

Configuration

Show Source Code
  options.hive_site ?= {}
  properties = [ # Duplicate client, might remove
    'hive.metastore.sasl.enabled'
    'hive.security.authorization.enabled'
    # 'hive.security.authorization.manager'
    'hive.security.metastore.authorization.manager'
    'hive.security.authenticator.manager'
    'hive.optimize.mapjoin.mapreduce'
    'hive.enforce.bucketing'
    'hive.exec.dynamic.partition.mode'
    'hive.txn.manager'
    'hive.txn.timeout'
    'hive.txn.max.open.batch'
    # Transaction, read/write locks
    'hive.support.concurrency'
    'hive.cluster.delegation.token.store.zookeeper.connectString'
    # 'hive.cluster.delegation.token.store.zookeeper.znode'
    'hive.heapsize'
    'hive.exec.max.created.files'
    'hive.auto.convert.sortmerge.join.noconditionaltask'
    'hive.zookeeper.quorum'
    'hive.create.as.insert.only'
    'metastore.create.as.acid'
    'hive.metastore.warehouse.external.dir'
    'hive.hook.proto.base-directory'

  ]
  if options.mode is 'local'
    properties = properties.concat [
      'datanucleus.autoCreateTables'
      'hive.cluster.delegation.token.store.class'
      'hive.cluster.delegation.token.store.zookeeper.znode'
    ]
    options.hive_site['hive.metastore.uris'] = ' '
    options.hive_site['hive.compactor.initiator.on'] = 'false'
  else
    properties.push 'hive.metastore.uris'
  for property in properties
    options.hive_site[property] ?= service.deps.hive_hcatalog[0].options.hive_site[property]
  # Server2 specific properties
  options.hive_site['hive.server2.thrift.sasl.qop'] ?= 'auth'
  options.hive_site['hive.server2.enable.doAs'] ?= 'true'
  # options.hive_site['hive.server2.enable.impersonation'] ?= 'true' # Mention in CDH5.3 but hs2 logs complains it doesnt exist
  options.hive_site['hive.server2.allow.user.substitution'] ?= 'true'
  options.hive_site['hive.server2.transport.mode'] ?= 'http'
  options.hive_site['hive.server2.thrift.port'] ?= '10001'
  options.hive_site['hive.server2.thrift.http.port'] ?= '10001'
  options.hive_site['hive.server2.thrift.http.path'] ?= 'cliservice'
  # Bug fix: java properties are not interpolated
  # Default is "${system:java.io.tmpdir}/${system:user.name}/operation_logs"
  options.hive_site['hive.server2.logging.operation.log.location'] ?= "/tmp/#{options.user.name}/operation_logs"
  # Tez
  # https://streever.atlassian.net/wiki/pages/viewpage.action?pageId=4390918
  options.hive_site['hive.execution.engine'] ?= 'tez'
  throw Error 'Starting from HDP 3.0 only tez execution engine is supported' unless options.hive_site['hive.execution.engine'] is 'tez'
  options.hive_site['hive.server2.tez.default.queues'] ?= 'default'
  options.hive_site['hive.server2.tez.sessions.per.default.queue'] ?= '1'
  options.hive_site['hive.server2.tez.initialize.default.sessions'] ?= 'false'
  options.hive_site['hive.exec.post.hooks'] ?= 'org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook'
  # Permission inheritance
  # https://cwiki.apache.org/confluence/display/Hive/Permission+Inheritance+in+Hive
  # true unless ranger is the authorizer
  options.hive_site['hive.warehouse.subdir.inherit.perms'] ?= unless service.deps.ranger_admin then 'true' else 'false'

Database

Import database information from the Hive Metastore

Show Source Code
  merge options.hive_site, service.deps.hive_metastore.options.hive_site

Hive Server2 Environment

Show Source Code
  options.env ?= {}
  #JMX Config
  options.env["JMX_OPTS"] ?= ''
  if options.env["JMXPORT"]? and options.env["JMX_OPTS"].indexOf('-Dcom.sun.management.jmxremote.rmi.port') is -1
    options.env["$JMXSSL"] ?= false
    options.env["$JMXAUTH"] ?= false
    options.env["JMX_OPTS"] += """
    -Dcom.sun.management.jmxremote \
    -Dcom.sun.management.jmxremote.authenticate=#{options.env["$JMXAUTH"]} \
    -Dcom.sun.management.jmxremote.ssl=#{options.env["$JMXSSL"]} \
    -Dcom.sun.management.jmxremote.port=#{options.env["JMXPORT"]} \
    -Dcom.sun.management.jmxremote.rmi.port=#{options.env["JMXPORT"]} \
    """
  # fix bug where phoenix-server and phoenix-client do not contain same
  # version of class used.
  options.aux_jars_paths ?= {}
  if service.deps.hbase_client
    options.aux_jars_paths['/usr/hdp/current/hbase-client/lib/hbase-server.jar'] ?= true
    options.aux_jars_paths['/usr/hdp/current/hbase-client/lib/hbase-client.jar'] ?= true
    options.aux_jars_paths['/usr/hdp/current/hbase-client/lib/hbase-common.jar'] ?= true
  if service.deps.phoenix_client
    options.aux_jars_paths['/usr/hdp/current/phoenix-client/phoenix-hive.jar'] ?= true
  for path, val of service.deps.hive_hcatalog[0].options.aux_jars_paths
    options.aux_jars_paths[path] ?= val
  #aux_jars forced by ryba to guaranty consistency
  options.aux_jars = "#{Object.keys(options.aux_jars_paths).join ':'}"

Kerberos

Show Source Code
  # https://cwiki.apache.org/confluence/display/Hive/Setting+up+HiveServer2
  # Authentication type
  options.hive_site['hive.server2.authentication'] ?= 'KERBEROS'
  # The keytab for the HiveServer2 service principal
  # 'options.authentication.kerberos.keytab': "/etc/security/keytabs/hcat.service.keytab"
  options.hive_site['hive.server2.authentication.kerberos.keytab'] ?= '/etc/security/keytabs/hive.service.keytab'
  # The service principal for the HiveServer2. If _HOST
  # is used as the hostname portion, it will be replaced.
  # with the actual hostname of the running instance.
  options.hive_site['hive.server2.authentication.kerberos.principal'] ?= "hive/_HOST@#{options.krb5.realm}"
  # SPNEGO
  options.hive_site['hive.server2.authentication.spnego.principal'] ?= service.deps.hadoop_core.options.core_site['hadoop.http.authentication.kerberos.principal']
  options.hive_site['hive.server2.authentication.spnego.keytab'] ?= service.deps.hadoop_core.options.core_site['hadoop.http.authentication.kerberos.keytab']
  # Ensure we dont create the same principal as with the Hive HCatalog or the kvno will be incremented
  hive_hcatalog_local_srv = service.deps.hive_hcatalog.filter((srv) -> srv.node.id is service.node.id)[0]
  options.principal_identical_to_hcatalog = hive_hcatalog_local_srv and hive_hcatalog_local_srv.options.hive_site['hive.metastore.kerberos.principal'] is options.hive_site['hive.server2.authentication.kerberos.principal']

SSL

Show Source Code
  options.ssl = merge {}, service.deps.hadoop_core.options.ssl, options.ssl
  options.hive_site['hive.server2.use.SSL'] ?= 'true'
  options.hive_site['hive.server2.keystore.path'] ?= "#{options.conf_dir}/keystore"
  options.hive_site['hive.server2.keystore.password'] ?= service.deps.hadoop_core.options.ssl.keystore.password

HS2 High Availability & Rolling Upgrade

HS2 use Zookeepper to track registered servers. The znode address is "/<hs2_namespace>/serverUri=<host:port>;version=<versionInfo>; sequence=<sequence_number>" and its value is the server "host:port".

Show Source Code
  zookeeper_quorum = for srv in service.deps.zookeeper_server
    continue unless srv.options.config['peerType'] is 'participant'
    "#{srv.node.fqdn}:#{srv.options.config['clientPort']}"
  options.hive_site['hive.zookeeper.quorum'] ?= zookeeper_quorum.join ','
  options.hive_site['hive.server2.support.dynamic.service.discovery'] ?= if service.deps.hive_server2.length > 1 then 'true' else 'false'
  options.hive_site['hive.zookeeper.session.timeout'] ?= '600000' # Default is "600000"
  options.hive_site['hive.server2.zookeeper.namespace'] ?= 'hiveserver2' # Default is "hiveserver2"

Configuration for Proxy users

Show Source Code
  for srv in service.deps.hdfs_client
    srv.options.core_site["hadoop.proxyuser.#{options.user.name}.groups"] ?= '*'
    srv.options.core_site["hadoop.proxyuser.#{options.user.name}.hosts"] ?= '*'
Show Source Code
  options.log4j = merge {}, service.deps.log4j?.options, options.log4j
  options.log4j.properties ?= {}
  options.log4j.properties['hive.log.file'] ?= 'hiveserver2.log'
  options.log4j.properties['hive.log.dir'] ?= "#{options.log_dir}"
  options.log4j.properties['log4j.appender.EventCounter'] ?= 'org.apache.hadoop.hive.shims.HiveEventCounter'
  options.log4j.properties['log4j.appender.console'] ?= 'org.apache.log4j.ConsoleAppender'
  options.log4j.properties['log4j.appender.console.target'] ?= 'System.err'
  options.log4j.properties['log4j.appender.console.layout'] ?= 'org.apache.log4j.PatternLayout'
  options.log4j.properties['log4j.appender.console.layout.ConversionPattern'] ?= '%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n'
  options.log4j.properties['log4j.appender.console.encoding'] ?= 'UTF-8'
  options.log4j.properties['log4j.appender.RFAS'] ?= 'org.apache.log4j.RollingFileAppender'
  options.log4j.properties['log4j.appender.RFAS.File'] ?= '${hive.log.dir}/${hive.log.file}'
  options.log4j.properties['log4j.appender.RFAS.MaxFileSize'] ?= '20MB'
  options.log4j.properties['log4j.appender.RFAS.MaxBackupIndex'] ?= '10'
  options.log4j.properties['log4j.appender.RFAS.layout'] ?= 'org.apache.log4j.PatternLayout'
  options.log4j.properties['log4j.appender.RFAS.layout.ConversionPattern'] ?= '%d{ISO8601} %-5p %c{2} - %m%n'
  options.log4j.properties['log4j.appender.DRFA'] ?= 'org.apache.log4j.DailyRollingFileAppender'
  options.log4j.properties['log4j.appender.DRFA.File'] ?= '${hive.log.dir}/${hive.log.file}'
  options.log4j.properties['log4j.appender.DRFA.DatePattern'] ?= '.yyyy-MM-dd'
  options.log4j.properties['log4j.appender.DRFA.layout'] ?= 'org.apache.log4j.PatternLayout'
  options.log4j.properties['log4j.appender.DRFA.layout.ConversionPattern'] ?= '%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n'
  options.log4j.properties['log4j.appender.DAILY'] ?= 'org.apache.log4j.rolling.RollingFileAppender'
  options.log4j.properties['log4j.appender.DAILY.rollingPolicy'] ?= 'org.apache.log4j.rolling.TimeBasedRollingPolicy'
  options.log4j.properties['log4j.appender.DAILY.rollingPolicy.ActiveFileName'] ?= '${hive.log.dir}/${hive.log.file}'
  options.log4j.properties['log4j.appender.DAILY.rollingPolicy.FileNamePattern'] ?= '${hive.log.dir}/${hive.log.file}.%d{yyyy-MM-dd}'
  options.log4j.properties['log4j.appender.DAILY.layout'] ?= 'org.apache.log4j.PatternLayout'
  options.log4j.properties['log4j.appender.DAILY.layout.ConversionPattern'] ?= '%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n'
  options.log4j.properties['log4j.appender.AUDIT'] ?= 'org.apache.log4j.RollingFileAppender'
  options.log4j.properties['log4j.appender.AUDIT.File'] ?= '${hive.log.dir}/hiveserver2_audit.log'
  options.log4j.properties['log4j.appender.AUDIT.MaxFileSize'] ?= '20MB'
  options.log4j.properties['log4j.appender.AUDIT.MaxBackupIndex'] ?= '10'
  options.log4j.properties['log4j.appender.AUDIT.layout'] ?= 'org.apache.log4j.PatternLayout'
  options.log4j.properties['log4j.appender.AUDIT.layout.ConversionPattern'] ?= '%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n'

  options.log4j.appenders = ',RFAS'
  options.log4j.audit_appenders = ',AUDIT'
  if options.log4j.remote_host and options.log4j.remote_port
    options.log4j.appenders = options.log4j.appenders + ',SOCKET'
    options.log4j.audit_appenders = options.log4j.audit_appenders + ',SOCKET'
    options.log4j.properties['log4j.appender.SOCKET'] ?= 'org.apache.log4j.net.SocketAppender'
    options.log4j.properties['log4j.appender.SOCKET.Application'] ?= 'hiveserver2'
    options.log4j.properties['log4j.appender.SOCKET.RemoteHost'] ?= options.log4j.remote_host
    options.log4j.properties['log4j.appender.SOCKET.Port'] ?= options.log4j.remote_port

  options.log4j.properties['log4j.category.DataNucleus'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.category.Datastore'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.category.Datastore.Schema'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.category.JPOX.Datastore'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.category.JPOX.Plugin'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.category.JPOX.MetaData'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.category.JPOX.Query'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.category.JPOX.General'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.category.JPOX.Enhancer'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.hadoop.conf.Configuration'] ?= 'ERROR' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.zookeeper'] ?= 'INFO' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.zookeeper.server.ServerCnxn'] ?= 'WARN' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.zookeeper.server.NIOServerCnxn'] ?= 'WARN' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.zookeeper.ClientCnxn'] ?= 'WARN' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.zookeeper.ClientCnxnSocket'] ?= 'WARN' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO'] ?= 'WARN' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.hadoop.hive.ql.log.PerfLogger'] ?= '${hive.ql.log.PerfLogger.level}'
  options.log4j.properties['log4j.logger.org.apache.hadoop.hive.ql.exec.Operator'] ?= 'INFO' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.hadoop.hive.serde2.lazy'] ?= 'INFO' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.hadoop.hive.metastore.ObjectStore'] ?= 'INFO' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.hadoop.hive.metastore.MetaStore'] ?= 'INFO' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.hadoop.hive.metastore.HiveMetaStore'] ?= 'INFO' + options.log4j.appenders
  options.log4j.properties['log4j.logger.org.apache.hadoop.hive.metastore.HiveMetaStore.audit'] ?= 'INFO' + options.log4j.audit_appenders
  options.log4j.properties['log4j.additivity.org.apache.hadoop.hive.metastore.HiveMetaStore.audit'] ?= false
  options.log4j.properties['log4j.logger.server.AsyncHttpConnection'] ?= 'OFF'
  options.log4j.properties['hive.log.threshold'] ?= 'ALL'
  options.log4j.properties['hive.root.logger'] ?= 'INFO' + options.log4j.appenders
  options.log4j.properties['log4j.rootLogger'] ?= '${hive.root.logger}, EventCounter'
  options.log4j.properties['log4j.threshold'] ?= '${hive.log.threshold}'

Add Hive user as proxyuser

Show Source Code
  for srv in service.deps.hbase_thrift
    # migration: wdavidw 170906, in a future version, we could give access 
    # to parent sevices, eg: srv.use.hadoop_core.options.core_site
    hsrv = service.deps.hdfs_client.filter((hsrv) -> hsrv.node.fqdn is srv.node.fqdn)[0]
    hsrv.options.core_site ?= {}
    hsrv.options.core_site["hadoop.proxyuser.#{options.user.name}.hosts"] ?= '*'
    hsrv.options.core_site["hadoop.proxyuser.#{options.user.name}.groups"] ?= '*'

Export To Hcatalog

Show Source Code
  for srv in service.deps.hive_hcatalog
    srv.options.hive_site ?= {}
    for prop in [
      'hive.execution.engine'
    ]
      srv.options.hive_site[prop] ?= options.hive_site[prop]

Wait

Show Source Code
  options.wait_krb5_client ?= service.deps.krb5_client.options.wait
  options.wait_zookeeper_server ?= service.deps.zookeeper_server[0].options.wait
  options.wait_hive_hcatalog ?= service.deps.hive_hcatalog[0].options.wait
  options.wait = {}
  options.wait.thrift = for srv in service.deps.hive_server2
    srv.options.hive_site ?= {}
    srv.options.hive_site['hive.server2.transport.mode'] ?= 'http'
    srv.options.hive_site['hive.server2.thrift.http.port'] ?= '10001'
    srv.options.hive_site['hive.server2.thrift.port'] ?= '10001'
    host: srv.node.fqdn
    port: if srv.options.hive_site['hive.server2.transport.mode'] is 'http'
    then srv.options.hive_site['hive.server2.thrift.http.port']
    else srv.options.hive_site['hive.server2.thrift.port']

Dependencies

Show Source Code
{merge} = require '@nikitajs/core/lib/misc'