Knox Configure
Configure default
This function is called if and only if no topology configuration is provided. This function declare services if modules are present in the configuration context. Configuration like address and port will be then enriched by the configure which loop on topologies to provide missing values
Configure
module.exports = (service) ->
options = service.options
Environment
# Layout
options.conf_dir ?= '/etc/knox/conf'
options.log_dir ?= '/var/log/knox'
options.pid_dir ?= '/var/run/knox'
options.bin_dir ?= '/usr/hdp/current/knox-server/bin'
# Misc
options.fqdn = service.node.fqdn
options.hostname = service.node.hostname
options.iptables ?= service.deps.iptables and service.deps.iptables.options.action is 'start'
Identities
# Group
options.group = name: options.group if typeof options.group is 'string'
options.group ?= {}
options.group.name ?= 'knox'
options.group.system ?= true
# User
options.user = name: options.user if typeof options.user is 'string'
options.user ?= {}
options.user.name ?= 'knox'
options.user.gid = options.group.name
options.user.system ?= true
options.user.comment ?= 'Knox Gateway User'
options.user.home ?= '/var/lib/knox'
options.user.limits ?= {}
options.user.limits.nofile ?= 64000
options.user.limits.nproc ?= true
Kerberos
options.krb5 ?= {}
options.krb5.realm ?= service.deps.krb5_client.options.etc_krb5_conf?.libdefaults?.default_realm
throw Error 'Required Options: "realm"' unless options.krb5.realm
options.krb5.admin ?= service.deps.krb5_client.options.admin[options.krb5.realm]
options.krb5_user ?= {}
options.krb5_user.principal ?= "#{options.user.name}/#{options.fqdn}@#{options.krb5.realm}"
options.krb5_user.keytab ?= '/etc/security/keytabs/options.service.keytab'
Test
options.ranger_admin ?= service.deps.ranger_admin.options.admin if service.deps.ranger_admin
options.test = merge {}, service.deps.test_user.options, options.test
if service.deps.ranger_admin?
service.deps.ranger_admin.options.users ?= {}
service.deps.ranger_admin.options.users[options.test.user.name] ?=
"name": options.test.user.name
"firstName": options.test.user.name
"lastName": 'hadoop'
"emailAddress": "#{options.test.user.name}@hadoop.ryba"
"password": options.test.user.password
'userSource': 1
'userRoleList': ['ROLE_USER']
'groups': []
'status': 1
Env
Knox reads its own env variable to retrieve configuration.
options.env ?= {}
options.env.app_mem_opts ?= '-Xmx8192m'
options.env.app_log_dir ?= "#{options.log_dir}"
options.env.app_log_opts ?= ''
options.env.app_dbg_opts ?= ''
Java
options.java_home = service.deps.java.options.java_home
options.jre_home = service.deps.java.options.jre_home
SSL
options.ssl = merge {}, service.deps.ssl?.options, options.ssl
options.ssl.enabled ?= !!service.deps.ssl
# options.truststore ?= {}
if options.ssl.enabled
throw Error "Required Option: ssl.cert" if not options.ssl.cert
throw Error "Required Option: ssl.key" if not options.ssl.key
throw Error "Required Option: ssl.cacert" if not options.ssl.cacert
options.ssl.keystore.target = '/usr/hdp/current/knox-server/data/security/keystores/gateway.jks'
# migration: lucasbak 16102017
# knox search by default gateway-identity as default keystore
options.ssl.key.name = 'gateway-identity'
throw Error "Required Property: truststore.password" if not options.ssl.truststore.password
options.ssl.truststore.caname ?= 'hadoop_root_ca'
# options.ssl.storepass ?= 'knox_master_secret_123'
# options.ssl.cacert ?= @config.ryba.ssl?.cacert
# options.ssl.cert ?= @config.ryba.ssl?.cert
# options.ssl.key ?= @config.ryba.ssl?.key
# options.ssl.keypass ?= 'knox_master_secret_123'
# Knox SSL
# throw Error 'Required Options: ssl.cacert' unless options.ssl.cacert?
# throw Error 'Required Options: ssl.cert' unless options.ssl.cert?
# throw Error 'Required Options ssl.key' unless options.ssl.key?
Configuration
# Configuration
options.gateway_site ?= {}
options.gateway_site['gateway.port'] ?= '8443'
options.gateway_site['gateway.path'] ?= 'gateway'
options.gateway_site['java.security.krb5.conf'] ?= '/etc/krb5.conf'
options.gateway_site['java.security.auth.login.config'] ?= "#{options.conf_dir}/knox.jaas"
options.gateway_site['gateway.hadoop.kerberos.secured'] ?= 'true'
options.gateway_site['sun.security.krb5.debug'] ?= 'true'
options.realm_passwords = {}
options.config ?= {}
Proxy Users
enrich_proxy_user = (srv) ->
srv.options.core_site["hadoop.proxyuser.#{options.user.name}.groups"] ?= '*'
hosts = srv.options.core_site["hadoop.proxyuser.#{options.user.name}.hosts"] or []
hosts = hosts.split ',' unless Array.isArray hosts
for instance in service.instances
hosts.push instance.node.fqdn unless instance.node.fqdn in hosts
hosts = hosts.join ','
srv.options.core_site["hadoop.proxyuser.#{options.user.name}.hosts"] ?= hosts
enrich_proxy_user srv for srv in service.deps.hdfs_nn
enrich_proxy_user srv for srv in service.deps.hdfs_dn
enrich_proxy_user srv for srv in service.deps.yarn_rm
enrich_proxy_user srv for srv in service.deps.yarn_nm
enrich_proxy_user srv for srv in service.deps.yarn_ts
enrich_proxy_user srv for srv in service.deps.mapred_jhs
# Probably hbase rest is reading "core-site.xml" from "/etc/hadoop/conf"
# enrich_proxy_user srv, 'hbase_rest' for srv in service.deps.hbase_rest
enrich_proxy_user srv, 'hdfs_client' for srv in service.deps.hdfs_client
for srv in service.deps.httpfs
srv.options.httpfs_site["httpfs.proxyuser.#{options.user.name}.groups"] ?= '*'
hosts = srv.options.httpfs_site["httpfs.proxyuser.#{options.user.name}.hosts"] or []
hosts = hosts.split ',' unless Array.isArray hosts
for instance in service.instances
hosts.push instance.node.fqdn unless instance.node.fqdn in hosts
hosts = hosts.join ','
srv.options.httpfs_site["httpfs.proxyuser.#{options.user.name}.hosts"] ?= hosts
for srv in service.deps.oozie_server
srv.options.oozie_site["oozie.service.ProxyUserService.proxyuser.#{options.user.name}.groups"] ?= '*'
hosts = srv.options.oozie_site["oozie.service.ProxyUserService.proxyuser.#{options.user.name}.hosts"] or []
hosts = hosts.split ',' unless Array.isArray hosts
for instance in service.instances
hosts.push instance.node.fqdn unless instance.node.fqdn in hosts
hosts = hosts.join ','
srv.options.oozie_site["oozie.service.ProxyUserService.proxyuser.#{options.user.name}.hosts"] ?= hosts
Configure topology
LDAP authentication is configured by adding a "ShiroProvider" authentication
provider to the cluster's topology file. When enabled, the Knox Gateway uses
Apache Shiro (org.apache.shiro.realm.ldap.JndiLdapRealm) to authenticate users
against the configured LDAP store.
Administrators can use myrealm.sssd_lookup
to read ldap config from masson/core/sssd
module. If sssd is not used, administrators should set the value of the target ldap
by setting the properties myrealm.ldap_uri
, myrealm.ldap_default_bind_dn
, myrealm.ldap_default_authtok
.
By default sssd_lookup
is false.
Inspired from knox-repo
Example:
realms:
ldapRealm:
ldap_search_base: 'ou=users,dc=ryba'
ldap_group_search_base: 'ou=groups,dc=ryba'
ldap_uri: 'ldaps://master03.metal.ryba:636'
ldap_tls_cacertdir: '/etc/openldap/cacerts'
ldap_default_bind_dn: 'cn=ldapadm,dc=ryba'
ldap_default_authtok: 'test'
groupSearchBase: 'ou=groups,dc=ryba'
groupIdAttribute: 'cn'
groupObjectClass: 'posixGroup'
memberAttribute: 'memberUId'
memberAttributeValueTemplate: 'uid={0},ou=uses,dc=ryba'
userDnTemplate:'cn={0},ou=users,dc=ryba'
userSearchAttributeName: 'cn'
userObjectClass: 'person'
nameservice = service.deps.hdfs_nn[0].options.nameservice
options.topologies ?= {}
for nameservice, topology of options.topologies
topology[nameservice] ?= {}
topology[nameservice].services ?= {}
topology[nameservice].services['namenode'] ?= !!service.deps.hdfs_nn
topology[nameservice].services['webhdfs'] ?= !!service.deps.hdfs_nn
topology[nameservice].services['jobtracker'] ?= !!service.deps.yarn_rm
topology[nameservice].services['hive'] ?= !!service.deps.hive_server2
topology[nameservice].services['webhcat'] ?= !!service.deps.hive_webhcat
topology[nameservice].services['oozie'] ?= !!service.deps.oozie_server
topology[nameservice].services['webhbase'] ?= !!service.deps.hbase_rest
# Configure providers
topology.providers ?= {}
topology.providers['authentication'] ?= {}
topology.providers['authentication'].name ?= 'ShiroProvider'
topology.providers['authentication'].config ?= {}
topology.providers['authentication'].config['sessionTimeout'] ?= 30
# By default, we only configure a simple LDAP Binding (user only)
# migration: wdavidw 170922, this used to be:
# realms = 'ldapRealm': topology
# migration: lucasbak 10102017 change how realms are configured
throw Error 'Need One Realm when ShiroProvider is used' unless topology.realms?
for realm, realm_config of topology.realms
if realm_config.sssd_lookup
throw Error 'masson/core/sssd must be used when realm.sssd_lookup is set' unless service.deps.sssd?
throw Error "masson/core/sssd ldap domain #{realm_config.sssd_lookup} does not exist" unless service.deps.sssd.options.config[realm_config.sssd_lookup]?
realm_config = merge {}, realm_config, service.deps.sssd.options.config[realm_config.sssd_lookup]
else
throw Error 'Required property ldap_uri' unless realm_config['ldap_uri']?
throw Error 'Required property ldap_default_bind_dn' unless realm_config['ldap_default_bind_dn']?
throw Error 'Required property ldap_default_authtok' unless realm_config['ldap_default_authtok']?
for property in [
'groupSearchBase'
'groupIdAttribute'
'groupObjectClass'
'memberAttribute'
'memberAttributeValueTemplate'
'userDnTemplate'
'userSearchAttributeName'
'userObjectClass'
'userSearchBase'
] then do ->
topology.providers['authentication'].config["main.#{realm}.#{property}"] ?= realm_config["#{property}"] if realm_config["#{property}"]?
#configure use ldap authentication
topology.providers['authentication'].config["main.#{realm}"] ?= 'org.apache.hadoop.gateway.shirorealm.KnoxLdapRealm' # OpenLDAP implementation
# topology.providers['authentication'].config['main.ldapRealm'] ?= 'org.apache.shiro.realm.ldap.JndiLdapRealm' # AD implementation
topology.providers['authentication'].config["main.#{realm}".replace('Realm','')+"ContextFactory"] ?= 'org.apache.hadoop.gateway.shirorealm.KnoxLdapContextFactory'
topology.providers['authentication'].config["main.#{realm}.contextFactory"] ?= '$'+"#{realm}".replace('Realm','')+'ContextFactory'
topology.providers['authentication'].config["main.#{realm}.userDnTemplate"] = realm_config['userDnTemplate'] if realm_config['userDnTemplate']?
topology.providers['authentication'].config["main.#{realm}.contextFactory.url"] = realm_config['ldap_uri'].split(',')[0]
topology.providers['authentication'].config["main.#{realm}.contextFactory.systemUsername"] = realm_config['ldap_default_bind_dn']
topology.providers['authentication'].config["main.#{realm}.contextFactory.systemPassword"] = "${ALIAS=#{nameservice}-#{realm}-password}"
options.realm_passwords["#{nameservice}-#{realm}-password"] = realm_config['ldap_default_authtok']
topology.providers['authentication'].config["main.#{realm}.searchBase"] = realm_config["ldap#{if realm == 'ldapGroupRealm' then '_group' else ''}_search_base"]
topology.providers['authentication'].config["main.#{realm}.contextFactory.authenticationMechanism"] ?= 'simple'
topology.providers['authentication'].config["main.#{realm}.authorizationEnabled"] ?= 'true'
topology.providers['authentication'].config['urls./**'] ?= 'authcBasic'
topology.providers['authentication'].config['main.securityManager.realms'] = ["$"+realm for realm, _ of topology.realms].join ","
# LDAP Authentication Caching
topology.providers['authentication'].config['main.cacheManager'] = "org.apache.shiro.cache.ehcache.EhCacheManager"
topology.providers['authentication'].config['main.securityManager.cacheManager'] = "$cacheManager"
topology.providers['authentication'].config['main.ldapRealm.authenticationCachingEnabled'] = true
topology.providers['authentication'].config['main.cacheManager.cacheManagerConfigFile'] = "classpath:#{nameservice}-ehcache.xml"
The Knox Gateway identity-assertion provider maps an authenticated user to an internal cluster user and/or group. This allows the Knox Gateway accept requests from external users without requiring internal cluster user names to be exposed.
topology.providers['identity-assertion'] ?= name: 'Pseudo'
topology.providers['authorization'] ?= if service.deps.ranger_admin? then name: 'XASecurePDPKnox' else name: 'AclsAuthz'
## Services
topology.services ?= {}
topology.services.knox ?= ''
Services are auto-configured in discovery mode if they are actived (services[module] = true) This mechanism can be used to configure a specific gateway without having to declare address and port (that may change over time).
# Namenode & WebHDFS
if topology.services['namenode'] is true
if service.deps.hdfs_nn
topology.services['namenode'] = service.deps.hdfs_nn[0].options.core_site['fs.defaultFS']
else throw Error 'Cannot autoconfigure KNOX namenode service, no namenode declared'
if topology.services['webhdfs'] is true
throw Error 'Cannot autoconfigure KNOX webhdfs service, no namenode declared' unless service.deps.hdfs_nn
# WebHDFS auto configuration rules:
# We provide by default namenode WebHDFS (default implementation, embedded in namenode) instead of httpfs. Httpfs put request through knox create empty files.
# We also configure HA for WebHDFS if namenodes are in HA-mode
# if service.deps.httpfs
# if service.deps.httpfs.length > 1
# topology.providers['ha'] ?= name: 'HaProvider'
# topology.providers['ha'].config ?= {}
# topology.providers['ha'].config['WEBHDFS'] ?= 'maxFailoverAttempts=3;failoverSleep=1000;maxRetryAttempts=300;retrySleep=1000;enabled=true'
# topology.services['webhdfs'] = service.deps.httpfs.map (srv) -> "http#{if srv.options.env.HTTPFS_SSL_ENABLED is 'true' then 's' else ''}://#{ctx.config.host}:#{ctx.config.ryba.httpfs.http_port}/webhdfs/v1"
if service.deps.hdfs_nn.length > 1
topology.providers['ha'] ?= name: 'HaProvider'
topology.providers['ha'].config ?= {}
topology.providers['ha'].config['WEBHDFS'] ?= 'maxFailoverAttempts=3;failoverSleep=1000;maxRetryAttempts=300;retrySleep=1000;enabled=true'
topology.services['webhdfs'] = []
for srv in service.deps.hdfs_nn
protocol = if srv.options.hdfs_site['dfs.http.policy'] is 'HTTP_ONLY' then 'http' else 'https'
port = srv.options.hdfs_site["dfs.namenode.#{protocol}-address.#{srv.options.nameservice}.#{srv.node.hostname}"].split(':')[1]
# We ensure that the default active namenode is first in the list !
action = if srv.node.fqdn is srv.options.active_nn_host then 'unshift' else 'push'
topology.services['webhdfs'][action] "#{protocol}://#{srv.node.fqdn}:#{port}/webhdfs"
else
protocol = if srv.use.hdfs_nn[0].options.hdfs_site['dfs.http.policy'] is 'HTTP_ONLY' then 'http' else 'https'
port = srv.use.hdfs_nn[0].options.hdfs_site["dfs.namenode.#{protocol}-address"].split(':')[1]
topology.services['webhdfs'] = "#{protocol}://#{srv.use.hdfs_nn[0].node.fqdn}:#{port}/webhdfs"
Yarn ResourceManager
if topology.services['jobtracker'] is true
if service.deps.yarn_rm
rm_shortname = if service.deps.yarn_rm.length > 1 then ".#{service.deps.yarn_rm[0].node.hostname}" else ''
rm_address = service.deps.yarn_rm[0].options.yarn_site["yarn.resourcemanager.address#{rm_shortname}"]
rm_ws_address = service.deps.yarn_rm[0].options.yarn_site["yarn.resourcemanager.webapp.https.address#{rm_shortname}"]
topology.services['jobtracker'] = "rpc://#{rm_address}"
topology.services['RESOURCEMANAGER'] = "https://#{rm_ws_address}/ws"
else throw Error 'Cannot autoconfigure KNOX jobtracker service, no resourcemanager declared'
Hive Server2
if topology.services['hive'] is true
if service.deps.hive_server2.length is 1
host = service.deps.hive_server2[0].node.fqdn
port = service.deps.hive_server2[0].options.hive_site['hive.server2.thrift.http.port']
protocol = if service.deps.hive_server2[0].options.hive_site['hive.server2.use.SSL'] is 'true' then 'https' else 'http'
topology.services['hive'] = "#{protocol}://#{host}:#{port}/cliservice"
else if service.deps.hive_server2.length > 1
topology.providers['ha'] ?= name: 'HaProvider'
topology.providers['ha'].config ?= {}
topology.providers['ha'].config['HIVE'] ?= 'maxFailoverAttempts=3;failoverSleep=1000;enabled=true;' +
"zookeeperEnsemble=#{service.deps.hive_server2[0].options.hive_site['hive.zookeeper.quorum']};zookeeperNamespace=#{service.deps.hive_server2[0].options.hive_site['hive.server2.zookeeper.namespace']}"
topology.services.hive = ''
else
throw Error 'Cannot autoconfigure KNOX hive service, no hiveserver2 declared'
Hive WebHCat
if topology.services['webhcat'] is true
throw Error 'Cannot autoconfigure KNOX webhcat service, no webhcat declared' unless service.deps.hive_webhcat
topology.services['webhcat'] = []
for srv in service.deps.hive_webhcat
fqdn = srv.node.fqdn
port = srv.options.webhcat_site['templeton.port']
topology.services['webhcat'].push "http://#{fqdn}:#{port}/templeton"
if service.deps.hive_webhcat.length > 1
topology.providers['ha'] ?= name: 'HaProvider'
topology.providers['ha'].config ?= {}
topology.providers['ha'].config['WEBHCAT'] ?= 'maxFailoverAttempts=3;failoverSleep=1000;enabled=true'
Oozie
if topology.services['oozie'] is true
throw Error 'Cannot autoconfigure KNOX oozie service, no oozie declared' unless service.deps.oozie_server
topology.services['oozie'] = []
for srv in service.deps.oozie_server
topology.services['oozie'].push srv.options.oozie_site['oozie.base.url']
if service.deps.oozie_server.length > 1
topology.providers['ha'] ?= name: 'HaProvider'
topology.providers['ha'].config ?= {}
topology.providers['ha'].config['OOZIE'] ?= 'maxFailoverAttempts=3;failoverSleep=1000;enabled=true'
WebHBase
if topology.services['webhbase'] is true
throw Error 'Cannot autoconfigure KNOX webhbase service, no webhbase declared' unless service.deps.hbase_rest
topology.services['webhbase'] = []
for srv in service.deps.hbase_rest
protocol = if srv.options.hbase_site['hbase.rest.ssl.enabled'] is 'true' then 'https' else 'http'
port = srv.options.hbase_site['hbase.rest.port']
if options.config.webhbase?
topology.services['webhbase'] =
url: "#{protocol}://#{srv.node.fqdn}:#{port}"
params: options.config.webhbase
else
topology.services['webhbase'].push "#{protocol}://#{srv.node.fqdn}:#{port}"
if service.deps.hbase_rest.length > 1
topology.providers['ha'] ?= name: 'HaProvider'
topology.providers['ha'].config ?= {}
topology.providers['ha'].config['WEBHBASE'] ?= 'maxFailoverAttempts=3;failoverSleep=1000;enabled=true'
HBase
if topology.services['hbaseui'] is true
throw Error 'Cannot autoconfigure KNOX hbaseui service, no hbaseui declared' unless service.deps.hbase_master
topology.services['hbaseui'] = []
for srv in service.deps.hbase_master
protocol = if service.deps.hbase_master.hbase_site['hbase.ssl.enabled'] is 'true' then 'https' else 'http'
port = service.deps.hbase_master.hbase_site['hbase.master.info.port']
topology.services['hbaseui'].push "#{protocol}://#{srv.node.fqdn}:#{port}"
Configuration for Log4J
options.log4j ?= merge {}, service.deps.log4j?.options, options.log4j
options.log4j.properties ?= {}
options.log4j.properties ?= {}
options.log4j.properties['app.log.dir'] ?= "#{options.log_dir}"
options.log4j.properties['log4j.rootLogger'] ?= 'ERROR,rfa'
if options.remote_host? and options.remote_port?
options.log4j.socket_client ?= 'SOCKET'
# Root logger
if options.log4j.properties['log4j.rootLogger'].indexOf(options.log4j.socket_client) is -1
then options.log4j.properties['log4j.rootLogger'] += ",#{options.log4j.socket_client}"
# Set java opts
options.log4j.properties['app.log.application'] ?= 'knox'
options.log4j.properties['app.log.remote_host'] ?= options.log4j.remote_host
options.log4j.properties['app.log.remote_port'] ?= options.log4j.remote_port
options.log4j.socket_opts ?=
Application: '${app.log.application}'
RemoteHost: '${app.log.remote_host}'
Port: '${app.log.remote_port}'
ReconnectionDelay: '10000'
appender
type: 'org.apache.log4j.net.SocketAppender'
name: options.log4j.socket_client
logj4: options.log4j.properties
properties: options.log4j.socket_opts
Wait
options.wait_ranger_admin = service.deps.ranger_admin.options.wait if service.deps.ranger_admin
options.wait ?= {}
options.wait.tcp = for srv in service.deps.knox_server
host: srv.node.fqdn
port: options.gateway_site['gateway.port']
Dependencies
appender = require '../../lib/appender'
{merge} = require '@nikitajs/core/lib/misc'