Falcon Server Configure
Apache Falcon is a data processing and management solution for Hadoop designed for data motion, coordination of data pipelines, lifecycle management, and data discovery. Falcon enables end consumers to quickly onboard their data and its associated processing and management tasks on Hadoop clusters.
module.exports = ->
nn_ctxs = @contexts 'ryba/hadoop/hdfs_nn'
dn_ctxs = @contexts 'ryba/hadoop/hdfs_dn'
hcat_ctxs = @contexts 'ryba/hive/hcatalog'
falcon_ctxs = @contexts 'ryba/falcon/server'
oozie_ctxs = @contexts 'ryba/oozie/server'
hadoop_ctxs = [nn_ctxs..., dn_ctxs...]
{realm} = @config.ryba
@config.ryba.falcon ?= {}
falcon = @config.ryba.falcon.server ?= {}
Identities
# Group
falcon.group = name: falcon.group if typeof falcon.group is 'string'
falcon.group ?= {}
falcon.group.name ?= 'falcon'
falcon.group.system ?= true
# User
falcon.user = name: falcon.user if typeof falcon.user is 'string'
falcon.user ?= {}
falcon.user.name ?= 'falcon'
falcon.user.system ?= true
falcon.user.comment ?= 'Falcon User'
falcon.user.home ?= '/var/lib/falcon'
falcon.user.groups ?= ['hadoop']
falcon.user.gid = falcon.group.name
Kerberos
# Kerberos HDFS Admin
options.hdfs_krb5_user = service.deps.hadoop_core.options.hdfs.krb5_user
Environment
falcon.conf_dir ?= '/etc/falcon/conf'
falcon.log_dir ?= '/var/log/falcon'
falcon.pid_dir ?= '/var/run/falcon'
falcon.server_opts ?= ''
falcon.server_heap ?= ''
Runtime
falcon.runtime ?= {}
# Note, prism serveur start on port 16443, see https://falcon.apache.org/Distributed-mode.html
falcon.runtime['prism.falcon.local.endpoint'] ?= "https://#{@config.host}:15443/"
# Runtime (http://falcon.incubator.apache.org/Security.html)
# nn_rcp = nn_ctxs[0].config.ryba.core_site['fs.defaultFS']
# nn_protocol = if nn_ctxs[0].config.ryba.hdfs.site['HTTP_ONLY'] then 'http' else 'https'
# nn_nameservice = if nn_ctxs[0].config.ryba.hdfs.site['dfs.nameservices'] then ".#{nn_ctxs[0].config.ryba.hdfs.site['dfs.nameservices']}" else ''
# nn_shortname = if nn_ctxs.length then ".#{nn_ctxs[0].config.shortname}" else ''
# nn_http = ctx.config.ryba.hdfs.site["dfs.namenode.#{nn_protocol}-address#{nn_nameservice}#{nn_shortname}"]
nn_principal = nn_ctxs[0].config.ryba.hdfs.site['dfs.namenode.kerberos.principal']
falcon.startup ?= {}
falcon.startup['*.falcon.authentication.type'] ?= 'kerberos'
falcon.startup['*.falcon.service.authentication.kerberos.principal'] ?= "#{falcon.user.name}/#{@config.host}@#{realm}"
falcon.startup['*.falcon.service.authentication.kerberos.keytab'] ?= '/etc/security/keytabs/falcon.service.keytab'
falcon.startup['*.dfs.namenode.kerberos.principal'] ?= "#{nn_principal}"
falcon.startup['*.falcon.http.authentication.type=kerberos'] ?= 'kerberos'
falcon.startup['*.falcon.http.authentication.token.validity'] ?= '36000'
falcon.startup['*.falcon.http.authentication.signature.secret'] ?= 'falcon' # Change this
falcon.startup['*.falcon.http.authentication.cookie.domain'] ?= ''
falcon.startup['*.falcon.http.authentication.kerberos.principal'] ?= "HTTP/#{@config.host}@#{realm}"
falcon.startup['*.falcon.http.authentication.kerberos.keytab'] ?= '/etc/security/keytabs/spnego.service.keytab'
falcon.startup['*.falcon.http.authentication.kerberos.name.rules'] ?= 'DEFAULT'
falcon.startup['*.falcon.http.authentication.blacklisted.users'] ?= ''
# Authorization Configuration
# falcon.startup['*.falcon.security.authorization.enabled'] ?= 'true'
# falcon.startup['*.falcon.security.authorization.provider'] ?= 'org.apache.falcon.security.DefaultAuthorizationProvider'
# falcon.startup['*.falcon.security.authorization.superusergroup'] ?= 'falcon'
# falcon.startup['*.falcon.security.authorization.admin.users'] ?= "#{falcon.user.name}"
# falcon.startup['*.falcon.security.authorization.admin.groups'] ?= "#{falcon.group.name}"
# falcon.startup['*.falcon.enableTLS'] ?= 'true'
# falcon.startup['*.keystore.file'] ?= '/path/to/keystore/file'
# falcon.startup['*.keystore.password'] ?= 'password'
# falcon.startup[''] ?= ''
# Cluster values in check
# falcon.cluster['hadoop.rpc.protection'] ?= nn_ctxs[0].config.ryba.core_site['hadoop.rpc.protection']
# falcon.cluster['dfs.namenode.kerberos.principal'] ?= nn_ctxs[0].config.ryba.hdfs.site['dfs.namenode.kerberos.principal']
# falcon.cluster['hive.metastore.kerberos.principal'] ?= hcat_ctxs[0].config.ryba.hive.site['hive.metastore.kerberos.principal']
# falcon.cluster['hive.metastore.sasl.enabled'] ?= hcat_ctxs[0].config.ryba.hive.site['hive.metastore.sasl.enabled']
# falcon.cluster['hive.metastore.uris'] ?= hcat_ctxs[0].config.ryba.hive.site['hive.metastore.uris']
# Entity values in check
# falcon.entity['dfs.namenode.kerberos.principal'] ?= nn_ctxs[0].config.ryba.hdfs.site['dfs.namenode.kerberos.principal']
# falcon.entity['hive.metastore.kerberos.principal'] ?= hcat_ctxs[0].config.ryba.hive.site['hive.metastore.kerberos.principal']
# falcon.entity['hive.metastore.sasl.enabled'] ?= hcat_ctxs[0].config.ryba.hive.site['hive.metastore.sasl.enabled']
# falcon.entity['hive.metastore.uris'] ?= hcat_ctxs[0].config.ryba.hive.site['hive.metastore.uris']
Configuration for Proxy Users
falcon_hosts = falcon_ctxs.map((ctx) -> ctx.config.host).join ','
for hadoop_ctx in hadoop_ctxs
hadoop_ctx.config.ryba ?= {}
hadoop_ctx.config.ryba.core_site ?= {}
hadoop_ctx.config.ryba.core_site["hadoop.proxyuser.#{falcon.user.name}.groups"] ?= '*'
hadoop_ctx.config.ryba.core_site["hadoop.proxyuser.#{falcon.user.name}.hosts"] ?= falcon_hosts
for oozie_ctx in oozie_ctxs
oozie_ctx.config.ryba ?= {}
oozie = oozie_ctx.config.ryba.oozie.server ?= {}
oozie.falcon ?= {}
oozie.falcon.enabled ?= true
if oozie.falcon.enabled
oozie.site ?= {}
oozie.site["oozie.service.ProxyUserService.proxyuser.#{falcon.user.name}.hosts"] ?= falcon_hosts
oozie.site["oozie.service.ProxyUserService.proxyuser.#{falcon.user.name}.groups"] ?= '*'
oozie.site['oozie.service.URIHandlerService.uri.handlers'] ?= "org.apache.oozie.dependency.FSURIHandler,org.apache.oozie.dependency.HCatURIHandler"
oozie.site['oozie.service.ELService.ext.functions.coord-job-submit-instances'] ?= """
now=org.apache.oozie.extensions.OozieELExtensions#ph1_now_echo,
today=org.apache.oozie.extensions.OozieELExtensions#ph1_today_echo,
yesterday=org.apache.oozie.extensions.OozieELExtensions#ph1_yesterday_echo,
currentMonth=org.apache.oozie.extensions.OozieELExtensions#ph1_currentMonth_echo,
lastMonth=org.apache.oozie.extensions.OozieELExtensions#ph1_lastMonth_echo,
currentYear=org.apache.oozie.extensions.OozieELExtensions#ph1_currentYear_echo,
lastYear=org.apache.oozie.extensions.OozieELExtensions#ph1_lastYear_echo,
formatTime=org.apache.oozie.coord.CoordELFunctions#ph1_coord_formatTime_echo,
latest=org.apache.oozie.coord.CoordELFunctions#ph2_coord_latest_echo,
future=org.apache.oozie.coord.CoordELFunctions#ph2_coord_future_echo
"""
oozie.site['oozie.service.ELService.ext.functions.coord-action-create-inst'] ?= """
now=org.apache.oozie.extensions.OozieELExtensions#ph2_now_inst,
today=org.apache.oozie.extensions.OozieELExtensions#ph2_today_inst,
yesterday=org.apache.oozie.extensions.OozieELExtensions#ph2_yesterday_inst,
currentMonth=org.apache.oozie.extensions.OozieELExtensions#ph2_currentMonth_inst,
lastMonth=org.apache.oozie.extensions.OozieELExtensions#ph2_lastMonth_inst,
currentYear=org.apache.oozie.extensions.OozieELExtensions#ph2_currentYear_inst,
lastYear=org.apache.oozie.extensions.OozieELExtensions#ph2_lastYear_inst,
latest=org.apache.oozie.coord.CoordELFunctions#ph2_coord_latest_echo,
future=org.apache.oozie.coord.CoordELFunctions#ph2_coord_future_echo,
formatTime=org.apache.oozie.coord.CoordELFunctions#ph2_coord_formatTime,
user=org.apache.oozie.coord.CoordELFunctions#coord_user
"""
oozie.site['oozie.service.ELService.ext.functions.coord-action-start'] ?= """
now=org.apache.oozie.extensions.OozieELExtensions#ph2_now,
today=org.apache.oozie.extensions.OozieELExtensions#ph2_today,
yesterday=org.apache.oozie.extensions.OozieELExtensions#ph2_yesterday,
currentMonth=org.apache.oozie.extensions.OozieELExtensions#ph2_currentMonth,
lastMonth=org.apache.oozie.extensions.OozieELExtensions#ph2_lastMonth,
currentYear=org.apache.oozie.extensions.OozieELExtensions#ph2_currentYear,
lastYear=org.apache.oozie.extensions.OozieELExtensions#ph2_lastYear,
latest=org.apache.oozie.coord.CoordELFunctions#ph3_coord_latest,
future=org.apache.oozie.coord.CoordELFunctions#ph3_coord_future,
dataIn=org.apache.oozie.extensions.OozieELExtensions#ph3_dataIn,
instanceTime=org.apache.oozie.coord.CoordELFunctions#ph3_coord_nominalTime,
dateOffset=org.apache.oozie.coord.CoordELFunctions#ph3_coord_dateOffset,
formatTime=org.apache.oozie.coord.CoordELFunctions#ph3_coord_formatTime,
user=org.apache.oozie.coord.CoordELFunctions#coord_user
"""
oozie.site['oozie.service.ELService.ext.functions.coord-sla-submit'] = """
instanceTime=org.apache.oozie.coord.CoordELFunctions#ph1_coord_nominalTime_echo_fixed,
user=org.apache.oozie.coord.CoordELFunctions#coord_user
"""
oozie.site['oozie.service.ELService.ext.functions.coord-sla-create'] = """
instanceTime=org.apache.oozie.coord.CoordELFunctions#ph2_coord_nominalTime,
user=org.apache.oozie.coord.CoordELFunctions#coord_user
"""
Graph Database Properties
See https://github.com/apache/falcon/blob/master/src/conf/startup.properties