Menu

Benchmark discovery

For each given datanode, discover count of CPUs, total RAM and count of disks.

module.exports = header: 'Benchmark - Discovery', handler: ->
  {benchmark} = @config.ryba

  @each benchmark.datanodes, (options) ->
    datanode = options.key

Discover CPU & RAM

    @system.execute
      header: 'JMX System'
      cmd: """
      echo #{benchmark.kerberos.password} | kinit #{benchmark.kerberos.principal} >/dev/null 2>&1
      curl --fail -k --negotiate -u: \
        -H "Content-Type: application/json" \
        -X GET #{datanode.urls.system}
      """
      trap: true
    , (err, execute, stdout) ->
      throw err if err
      data = JSON.parse stdout
      throw Error "Invalid Response" unless /^java.lang:type=OperatingSystem/.test data?.beans[0]?.name
      {AvailableProcessors, TotalPhysicalMemorySize} = data.beans[0]
      datanode.cpus = AvailableProcessors
      datanode.ram = TotalPhysicalMemorySize

Discover Disks count

    @system.execute
      header: 'JMX Disks'
      cmd: """
      echo #{benchmark.kerberos.password} | kinit #{benchmark.kerberos.principal} >/dev/null 2>&1
      curl --fail -k --negotiate -u: \
        -H "Content-Type: application/json" \
        -X GET #{datanode.urls.disks}
      """
      trap: true
    , (err, execute, stdout) ->
      throw err if err
      data = JSON.parse stdout
      throw Error "Invalid Response" unless /^Hadoop:service=DataNode,name=DataNodeInfo/.test data?.beans[0]?.name
      {VolumeInfo, Version} = data.beans[0]
      datanode.disks = Object.keys(JSON.parse VolumeInfo).length
      if Version.indexOf("cdh") != -1
        benchmark.jars.current = benchmark.jars.cloudera
      else benchmark.jars.current = benchmark.jars.hortonworks

Prepare TeraSort benchmarks

Generate the official GraySort input data set. The user specifies the number of rows and the output directory and this class runs a map/reduce program to generate the data. The format of the data is:

  • 100 bytes: (10 bytes key) (constant 2 bytes) (32 bytes rowid) (constant 4 bytes) (48 bytes filler) (constant 4 bytes)
  • The rowid is the right justified row id as a hex number.

Tests are run with half the total number of disks, the total of disk and 5 times the total of disks. Generated data size are: 1GB, 10GB, 100GB 1TB

  @call ->
    total_disks = benchmark.datanodes.length * benchmark.datanodes[0].disks
    benchmark.terasort.parameters = []
    for disks_count in [total_disks/2, total_disks, total_disks*5]
      # 1 block / disk
      benchmark.terasort.parameters.push
        maps: disks_count
        rows: Math.floor 128 * Math.pow(1024, 2) / 100
      # 10 blocks / disk
      benchmark.terasort.parameters.push
        maps: disks_count * 10
        rows: Math.floor 128 * Math.pow(1024, 2) / 100
      # # 10 blocks / disk
      # benchmark.terasort.parameters.push
      #   maps: disks_count
      #   rows: Math.floor 128 * 10 * Math.pow(1024, 2) / 100
      
      # # 100MB
      # benchmark.terasort.parameters.push
      #   maps: disks_count
      #   rows: Math.pow(1024, 2) / 100 / 100
      # # 1GB
      # benchmark.terasort.parameters.push
      #   maps: disks_count
      #   rows: Math.pow(1024, 3) / 100
      # # 10GB
      # benchmark.terasort.parameters.push
      #   maps: disks_count
      #   rows: Math.pow(1024, 3) * 10 / 100
      # # 100GB
      # benchmark.terasort.parameters.push
      #   maps: disks_count
      #   rows: Math.pow(1024, 3) * 100 / 100
      # # 1TB
      # benchmark.terasort.parameters.push
      #   maps: disks_count
      #   rows: Math.pow(1024, 4) / 100

Create ouptut directory

  @system.mkdir
    header: 'Output Dir'
    ssh: false
    target: benchmark.output

Imports

each = require 'each'