HBase Master Configuration
module.exports = (service) ->
options = service.options
Kerberos
options.krb5 ?= {}
options.krb5.realm ?= service.deps.krb5_client.options.etc_krb5_conf?.libdefaults?.default_realm
throw Error 'Required Options: "realm"' unless options.krb5.realm
options.krb5.admin ?= service.deps.krb5_client.options.admin[options.krb5.realm]
Identities
admin
(object|string) The Kerberos HBase principal.group
(object|string) The Unix HBase group name or a group object (see Nikita Group documentation).user
(object|string) The Unix HBase login name or a user object (see Nikita User documentation).
Example
{
"user": {
"name": "hbase", "system": true, "gid": "hbase", groups: "hadoop",
"comment": "HBase User", "home": "/var/run/hbase"
},
"group": {
"name": "HBase", "system": true
},
"admin": {
"password": "hbase123"
}
}
# Hadoop Group
options.hadoop_group = merge {}, service.deps.hadoop_core.options.hadoop_group, options.hadoop_group
# Group
options.group ?= {}
options.group = name: options.group if typeof options.group is 'string'
options.group.name ?= 'hbase'
options.group.system ?= true
# User
options.user ?= {}
options.user = name: options.user if typeof options.user is 'string'
options.user.name ?= 'hbase'
options.user.system ?= true
options.user.gid = options.group.name
options.user.comment ?= 'HBase User'
options.user.home ?= '/var/run/hbase'
options.user.groups ?= 'hadoop'
options.user.limits ?= {}
options.user.limits.nofile ?= 64000
options.user.limits.nproc ?= true
# Kerberos Hbase Admin Principal
options.admin ?= {}
options.admin.name ?= options.user.name
options.admin.principal ?= "#{options.admin.name}@#{options.krb5.realm}"
throw Error 'Required Option: admin.password' unless options.admin.password
Kerberos
# Kerberos HDFS Admin
options.hdfs_krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user
# Kerberos Test Principal
options.test_krb5_user ?= service.deps.test_user.options.krb5.user
Environment
# Layout
options.conf_dir ?= '/etc/hbase-master/conf'
options.log_dir ?= '/var/log/hbase'
options.pid_dir ?= '/var/run/hbase'
# Env
options.env ?= {}
options.env['HBASE_LOG_DIR'] ?= "#{options.log_dir}"
options.env['HBASE_OPTS'] ?= '-XX:+UseConcMarkSweepGC ' # -XX:+CMSIncrementalMode is deprecated
# Java
# 'HBASE_MASTER_OPTS' ?= '-Xmx2048m' # Default in HDP companion file
options.java_home ?= "#{service.deps.java.options.java_home}"
options.heapsize ?= '1024m'
options.newsize ?= '200m'
# Misc
options.fqdn ?= service.node.fqdn
options.hostname = service.node.hostname
options.iptables ?= service.deps.iptables and service.deps.iptables.options.action is 'start'
options.clean_logs ?= false
# HDFS
options.hdfs_conf_dir ?= service.deps.hadoop_core.options.conf_dir
options.hdfs_krb5_user ?= service.deps.hadoop_core.options.hdfs.krb5_user
System Options
options.opts ?= {}
options.opts.base ?= ''
options.opts.java_properties ?= {}
options.opts.jvm ?= {}
options.opts.jvm['-Xms'] ?= options.heapsize
options.opts.jvm['-Xmx'] ?= options.heapsize
options.opts.jvm['-XX:NewSize='] ?= options.newsize #should be 1/8 of hbase master heapsize
options.opts.jvm['-XX:MaxNewSize='] ?= options.newsize #should be 1/8 of hbase master heapsize
RegionServers
RegionServer must register to the Master, the key is the FQDN while the value activate or desactivate the RegionServer.
options.regionservers ?= {}
Configuration
# HBase "hbase-site.xml"
options.hbase_site ?= {}
options.hbase_site['hbase.master.port'] ?= '60000'
options.hbase_site['hbase.master.info.port'] ?= '60010'
options.hbase_site['hbase.master.info.bindAddress'] ?= '0.0.0.0'
options.hbase_site['hbase.ssl.enabled'] ?= 'true'
Configuration Distributed mode
options.hbase_site['zookeeper.znode.parent'] ?= '/hbase'
# The mode the cluster will be in. Possible values are
# false: standalone and pseudo-distributed setups with managed Zookeeper
# true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh)
options.hbase_site['hbase.cluster.distributed'] = 'true'
options.hbase_site['zookeeper.session.timeout'] ?= "#{20 * parseInt service.deps.zookeeper_server[0].options.config['tickTime']}"
# Enter the HBase NameNode server hostname
# http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-High-Availability-Guide/cdh4hag_topic_2_6.html
options.hbase_site['hbase.rootdir'] ?= "#{service.deps.hdfs_nn[0].options.core_site['fs.defaultFS']}/apps/hbase/data"
# Comma separated list of Zookeeper servers (match to
# what is specified in zoo.cfg but without portnumbers)
options.hbase_site['hbase.zookeeper.quorum'] ?= service.deps.zookeeper_server.map( (srv) -> srv.node.fqdn ).join ','
options.hbase_site['hbase.zookeeper.property.clientPort'] ?= service.deps.zookeeper_server[0].options.config['clientPort']
throw Error "Required Option: hbase_site['hbase.zookeeper.quorum']" unless options.hbase_site['hbase.zookeeper.quorum']
throw Error "Required Option: hbase_site['hbase.zookeeper.property.clientPort']" unless options.hbase_site['hbase.zookeeper.property.clientPort']
# Short-circuit are true but socket.path isnt defined for hbase, only for hdfs, see http://osdir.com/ml/hbase-user-hadoop-apache/2013-03/msg00007.html
# options.hbase_site['dfs.domain.socket.path'] ?= hdfs.site['dfs.domain.socket.path']
options.hbase_site['dfs.domain.socket.path'] ?= '/var/lib/hadoop-hdfs/dn_socket'
Configuration for Kerberos
options.hbase_site['hbase.security.authentication'] ?= 'kerberos' # Required by HM, RS and client
if options.hbase_site['hbase.security.authentication'] is 'kerberos'
options.hbase_site['hbase.master.keytab.file'] ?= '/etc/security/keytabs/hm.service.keytab'
options.hbase_site['hbase.master.kerberos.principal'] ?= "hbase/_HOST@#{options.krb5.realm}" # "hm/_HOST@#{realm}" <-- need zookeeper auth_to_local
options.hbase_site['hbase.regionserver.kerberos.principal'] ?= "hbase/_HOST@#{options.krb5.realm}" # "rs/_HOST@#{realm}" <-- need zookeeper auth_to_local
options.hbase_site['hbase.security.authentication.ui'] ?= 'kerberos'
options.hbase_site['hbase.security.authentication.spnego.kerberos.principal'] ?= "HTTP/_HOST@#{options.krb5.realm}"
options.hbase_site['hbase.security.authentication.spnego.kerberos.keytab'] ?= service.deps.hadoop_core.options.core_site['hadoop.http.authentication.kerberos.keytab']
options.hbase_site['hbase.coprocessor.master.classes'] ?= [
'org.apache.hadoop.hbase.security.access.AccessController'
]
# master be able to communicate with regionserver
options.hbase_site['hbase.coprocessor.region.classes'] ?= [
'org.apache.hadoop.hbase.security.token.TokenProvider'
'org.apache.hadoop.hbase.security.access.SecureBulkLoadEndpoint'
'org.apache.hadoop.hbase.security.access.AccessController'
]
Configuration for Security
Bulk loading in secure mode is a bit more involved than normal setup, since the client has to transfer the ownership of the files generated from the mapreduce job to HBase. Secure bulk loading is implemented by a coprocessor, named SecureBulkLoadEndpoint and use an HDFS directory which is world traversable (-rwx--x--x, 711).
options.hbase_site['hbase.security.authorization'] ?= 'true'
options.hbase_site['hbase.rpc.engine'] ?= 'org.apache.hadoop.hbase.ipc.SecureRpcEngine'
options.hbase_site['hbase.superuser'] ?= options.admin.name
options.hbase_site['hbase.bulkload.staging.dir'] ?= '/apps/hbase/staging'
# Jaas file
options.opts.java_properties['java.security.auth.login.config'] ?= "#{options.conf_dir}/hbase-master.jaas"
Configuration for Local Access
# migration: wdavidw 170902, shouldnt this only apply to the RegionServer ?
# # HDFS NN
# for srv in service.deps.hdfs_nn
# srv.options.hdfs_site ?= {}
# srv.options.hdfs_site['dfs.block.local-path-access.user'] ?= ''
# users = srv.options.hdfs_site['dfs.block.local-path-access.user'].split(',').filter (str) -> str isnt ''
# users.push 'hbase' unless options.user.name in users
# srv.options.hdfs_site['dfs.block.local-path-access.user'] = users.sort().join ','
# # HDFS DN
# srv = service.deps.hdfs_dn
# srv.options.hdfs_site['dfs.block.local-path-access.user'] ?= ''
# users = srv.options.hdfs_site['dfs.block.local-path-access.user'].split(',').filter (str) -> str isnt ''
# users.push 'hbase' unless options.user.name in users
# srv.options.hdfs_site['dfs.block.local-path-access.user'] = users.sort().join ','
Configuration for High Availability Reads (HA Reads)
- Hortonworks presentation of HBase HA
- HDP 2.5 Read HA instruction
- Bring quorum based write ahead log (write HA)
Async WAL Replication
WAL Replication is enabled by default but should be discovered based on the number of RegionServer (>2). However, this would introduce a circular dependency between the Master and the RegionServers.
TODO migration: wdavidw 170829, disable 'hbase.meta.replicas.use' from the RS if RS count < 3.
# enable hbase:meta region replication
options.hbase_site['hbase.meta.replicas.use'] ?= 'true'
options.hbase_site['hbase.meta.replica.count'] ?= '3' # Default to '1'
# enable replication for ervery regions
options.hbase_site['hbase.region.replica.replication.enabled'] ?= 'true'
# increase default time when 'hbase.region.replica.replication.enabled' is true
options.hbase_site['hbase.region.replica.wait.for.primary.flush'] ?= 'true'
options.hbase_site['hbase.master.loadbalancer.class'] = 'org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer' # Default value
# StoreFile Refresher
options.hbase_site['hbase.regionserver.storefile.refresh.period'] ?= '30000' # Default to '0'
options.hbase_site['hbase.regionserver.meta.storefile.refresh.period'] ?= '30000' # Default to '0'
options.hbase_site['hbase.region.replica.storefile.refresh.memstore.multiplier'] ?= '4'
# HFile TTL must be greater than refresher period
options.hbase_site['hbase.master.hfilecleaner.ttl'] ?= '3600000' # 1 hour
Configuration Region Server Groups
# see https://hbase.apache.org/book.html#rsgroup
options.rsgroups_enabled ?= false
if options.rsgroups_enabled
options.hbase_site['hbase.master.loadbalancer.class'] = 'org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer'
options.hbase_site['hbase.coprocessor.master.classes'].push 'org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint' unless 'org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint' in options.hbase_site['hbase.coprocessor.master.classes']
Configuration Cluster Replication
options.hbase_site['hbase.replication'] ?= 'true' if options.replicated_clusters
Configuration Quota
options.hbase_site['hbase.quota.enabled'] ?= 'false'
options.hbase_site['hbase.quota.refresh.period'] ?= 300000
Configuration for Log4J
options.log4j = merge {}, service.deps.log4j?.options, options.log4j
options.log4j.properties ?= {}
options.opts.java_properties['hbase.security.log.file'] ?= 'SecurityAuth-master.audit'
#HBase bin script use directly environment bariables
options.env['HBASE_ROOT_LOGGER'] ?= 'INFO,RFA'
options.env['HBASE_SECURITY_LOGGER'] ?= 'INFO,RFAS'
if options.log4j.remote_host? and options.log4j.remote_port?
# adding SOCKET appender
options.log4j.socket_client ?= "SOCKET"
# Root logger
if options.env['HBASE_ROOT_LOGGER'].indexOf(options.log4j.socket_client) is -1
then options.env['HBASE_ROOT_LOGGER'] += ",#{options.log4j.socket_client}"
# Security Logger
if options.env['HBASE_SECURITY_LOGGER'].indexOf(options.log4j.socket_client) is -1
then options.env['HBASE_SECURITY_LOGGER']+= ",#{options.log4j.socket_client}"
options.opts.java_properties['hbase.log.application'] = 'hbase-master'
options.opts.java_properties['hbase.log.remote_host'] = options.log4j.remote_host
options.opts.java_properties['hbase.log.remote_port'] = options.log4j.remote_port
options.log4j.socket_opts ?=
Application: '${hbase.log.application}'
RemoteHost: '${hbase.log.remote_host}'
Port: '${hbase.log.remote_port}'
ReconnectionDelay: '10000'
options.log4j.properties = merge options.log4j.properties, appender
type: 'org.apache.log4j.net.SocketAppender'
name: options.log4j.socket_client
logj4: options.log4j.properties
properties: options.log4j.socket_opts
Configuration for metrics
Configuration of HBase metrics system.
The File sink is activated by default. The Ganglia and Graphite sinks are automatically activated if the "ryba/retired/ganglia/collecto" and "ryba/graphite/collector" are respectively registered on one of the nodes of the cluster. You can disable any of those sinks by setting its class to "false", here how:
{ "ryba": { hbase: {"metrics":
"*.sink.file.class": false,
"*.sink.ganglia.class": false,
"*.sink.graphite.class": false
} } }
Metrics can be filtered by context (in this exemple "master", "regionserver", "jvm" and "ugi"). The list of available contexts can be obtained from HTTP, read the HBase documentation for additionnal informations.
{ "ryba": { hbase: {"metrics":
"hbase.sink.file-all.filename": "hbase-metrics-all.out",
"hbase.sink.file-master.filename": "hbase-metrics-master.out",
"hbase.sink.file-master.context": "mastert",
"hbase.sink.file-regionserver.filename": "hbase-metrics-regionserver.outt",
"hbase.sink.file-regionserver.context": "regionservert",
"hbase.sink.file-jvm.filename": "hbase-metrics-jvm.outt",
"hbase.sink.file-jvm.context": "jvmt",
"hbase.sink.file-ugi.filename": "hbase-metrics-ugi.outt",
"hbase.sink.file-ugi.context": "ugit"
} } }
According to the default "hadoop-metrics-hbase.properties", the list of supported contexts are "hbase", "jvm" and "rpc".
options.metrics = merge {}, service.deps.metrics?.options, options.metrics
options.metrics.sinks ?= {}
options.metrics.sinks.file_enabled ?= true
options.metrics.sinks.ganglia_enbaled ?= !!service.deps.ganglia_collector
options.metrics.sinks.graphite_enabled ?= false
options.metrics.config ?= {}
options.metrics.config['*.period'] ?= '60'
options.metrics.config['*.source.filter.class'] ?= 'org.apache.hadoop.metrics2.filter.GlobFilter'
options.metrics.config['hbase.*.source.filter.exclude'] ?= '*Regions*|*Namespace*|*User*'
options.metrics.config['hbase.extendedperiod'] ?= '3600'
# File sink
if options.metrics.sinks.file_enabled
options.metrics.config["*.sink.file.#{k}"] ?= v for k, v of options.metrics.sinks.file.config if service.deps.metrics?.options?.sinks?.file_enabled
options.metrics.config['hbase.sink.file.filename'] ?= 'hbase-metrics.out'
# Ganglia sink, accepted properties are "servers" and "supportsparse"
if options.metrics.sinks.ganglia_enbaled
options.metrics.config["*.sink.ganglia.#{k}"] ?= v for k, v of options.metrics.sinks.ganglia.config
options.metrics.config['hbase.sink.ganglia.class'] ?= 'org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31'
options.metrics.config['jvm.sink.ganglia.class'] ?= 'org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31'
options.metrics.config['rpm.sink.ganglia.class'] ?= 'org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31'
options.metrics.config['hbase.sink.ganglia.servers'] ?= "#{service.deps.ganglia_collector.node.fqdn}:#{service.deps.ganglia_collector.options.nn_port}"
options.metrics.config['jvm.sink.ganglia.servers'] ?= "#{service.deps.ganglia_collector.node.fqdn}:#{service.deps.ganglia_collector.options.nn_port}"
options.metrics.config['rpc.sink.ganglia.servers'] ?= "#{service.deps.ganglia_collector.node.fqdn}:#{service.deps.ganglia_collector.options.nn_port}"
# Graphite sink
if options.metrics.sinks.graphite_enabled
throw Error 'Missing remote_host ryba.hbase.master.metrics.sinks.graphite.config.server_host' unless options.metrics.sinks.graphite.config.server_host?
throw Error 'Missing remote_port ryba.hbase.master.metrics.sinks.graphite.config.server_port' unless options.metrics.sinks.graphite.config.server_port?
options.metrics.config['*.sink.graphite.metrics_prefix'] ?= if options.metrics.sinks.graphite.config.metrics_prefix? then "#{options.metrics.sinks.graphite.config.metrics_prefix}.hbase" else "hbase"
options.metrics.config["*.sink.graphite.#{k}"] ?= v for k, v of options.metrics.sinks.graphite.config if service.deps.metrics?.options?.sinks?.graphite_enabled
options.metrics.config['hbase.sink.graphite.class'] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
options.metrics.config['jvm.sink.graphite.class'] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
options.metrics.config['rpc.sink.graphite.class'] ?= 'org.apache.hadoop.metrics2.sink.GraphiteSink'
Wait
options.wait_krb5_client = service.deps.krb5_client.options.wait
options.wait_zookeeper_server = service.deps.zookeeper_server[0].options.wait
options.wait_hdfs_nn = service.deps.hdfs_nn[0].options.wait
for srv in service.deps.hbase_master
srv.options.master_site ?= {}
srv.options.master_site['hbase.master.port'] ?= '60000'
srv.options.master_site['hbase.master.info.port'] ?= '60010'
options.wait = {}
options.wait.rpc = for srv in service.deps.hbase_master
host: srv.node.fqdn
port: srv.options.master_site['hbase.master.port']
options.wait.http = for srv in service.deps.hbase_master
host: srv.node.fqdn
port: srv.options.master_site['hbase.master.info.port']
Dependencies
appender = require '../../lib/appender'
{merge} = require '@nikitajs/core/lib/misc'