Benchmark Configuration
Example:
{ "ryba": { "benchmark": {
"iterations": 10
"datanodes": [
"https://worker1.ryba:50475/jmx",
"https://worker2.ryba:50475/jmx"
],
"output": "path/to/benchmarks/output/dir"
} }
module.exports = ->
benchmark = @config.ryba.benchmark ?= {}
benchmark.iterations ?= 10
benchmark.output ?= "benchmark_results"
benchmark.output += "/#{moment().format 'YYYYMMDDHHmmss'}"
JMX urls
benchmark.datanodes ?= @contexts('ryba/hadoop/hdfs_dn').map (ctx) ->
{hdfs} = ctx.config.ryba
address = if hdfs.hdfs_site['dfs.http.policy'] is 'HTTP_ONLY'
then "http://#{hdfs.hdfs_site['dfs.datanode.http.address']}"
else "https://#{hdfs.hdfs_site['dfs.datanode.https.address']}"
"#{address.replace '0.0.0.0', ctx.config.host}/jmx"
for datanode, i in benchmark.datanodes
datanode = benchmark.datanodes[i] = url: datanode if typeof datanode is 'string'
datanode.name ?= datanode.url.split("/")[2].split(":")[0]
datanode.urls ?= {}
datanode.urls.system ?= "#{datanode.url}?qry=java.lang:type=OperatingSystem"
datanode.urls.disks ?= "#{datanode.url}?qry=Hadoop:service=DataNode,name=DataNodeInfo"
datanode.urls.metrics ?= "#{datanode.url}?qry=Hadoop:service=DataNode,name=DataNodeActivity-#{datanode.url.split("/")[2].split(":")[0]}-1004"
Jobs jar path
benchmark.jars ?= {}
benchmark.jars.cloudera ?= "/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar"
benchmark.jars.hortonworks ?= "/usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-examples-2*.jar"
benchmark.jars.current ?= {}
TeraGen / TeraSort output values
benchmark.terasort =
stdout_value_names: [
"HDFS: Number of bytes read"
"HDFS: Number of bytes written"
"HDFS: Number of large read operations"
"HDFS: Number of write operations"
"CPU time spent (ms)"
"Physical memory (bytes)"
"Virtual memory (bytes)"
]
Kerberos
benchmark.kerberos = @config.ryba.krb5.user ?= {}
Normalization
Once normalized, the benchmark property looks like:
{ iterations: 1,
datanodes_jmx_urls:
[ 'https://worker1.ryba:50475/jmx',
'https://worker2.ryba:50475/jmx' ],
output: './benchmark/20170428174520',
datanodes:
[ { name: 'worker1.ryba',
urls:
{ system: 'https://worker1.ryba:50475/jmx?qry=java.lang:type=OperatingSystem',
disks: 'https://worker1.ryba:50475/jmx?qry=Hadoop:service=DataNode,name=DataNodeInfo',
metrics: 'https://worker1.ryba:50475/jmx?qry=Hadoop:service=DataNode,name=DataNodeActivity-worker1.ryba-1004' } },
{ name: 'worker2.ryba',
urls:
{ system: 'https://worker2.ryba:50475/jmx?qry=java.lang:type=OperatingSystem',
disks: 'https://worker2.ryba:50475/jmx?qry=Hadoop:service=DataNode,name=DataNodeInfo',
metrics: 'https://worker2.ryba:50475/jmx?qry=Hadoop:service=DataNode,name=DataNodeActivity-worker2.ryba-1004' } } ],
jars:
{ cloudera: { mapreduce: '/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' },
hortonworks: { mapreduce: '/usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-examples-2*.jar' },
current: {} },
terasort:
{ stdout_value_names:
[ 'HDFS: Number of bytes read',
'HDFS: Number of bytes written',
'HDFS: Number of large read operations',
'HDFS: Number of write operations',
'CPU time spent (ms)',
'Physical memory (bytes)',
'Virtual memory (bytes)' ] },
kerberos:
{ password: 'test123',
password_sync: true,
principal: 'ryba@HADOOP.RYBA' } }
Imports
moment = require 'moment'