Menu

MapReduce Client Check

module.exports = header: 'MapReduce Client Check', handler: ({options}) ->

Wait

Wait for the MapReduce History Server as well as all YARN services to be started.

  @call 'ryba/hadoop/mapred_jhs/wait', once: true, options.wait_mapred_jhs
  @call 'ryba/hadoop/yarn_ts/wait', once: true, options.wait_yarn_ts
  @call 'ryba/hadoop/yarn_nm/wait', once: true, options.wait_yarn_nm
  @call 'ryba/hadoop/yarn_rm/wait', once: true, options.wait_yarn_rm

Check Distributed Shell

The distributed shell is a yarn client application which submit a command or a Shell script to be executed inside one or multiple YARN containers.

  # Note: yarn functionnality moved to mapred since it requires mapred-site with memory settings
  @call header: 'Distributed Shell', handler: ->
    appname = "ryba_check_#{options.hostname}_distributed_cache_#{Date.now()}"
    scriptpath = "#{options.user.home}/check_distributed_shell.sh"
    @file
      target: "#{scriptpath}"
      content: """
      #!/usr/bin/env bash
      echo Ryba Ryba NM hostname: `hostname`
      """
      mode: 0o0640
    @system.execute
      cmd: mkcmd.test options.test_krb5_user, """
      yarn org.apache.hadoop.yarn.applications.distributedshell.Client \
        -jar /usr/hdp/current/hadoop-yarn-client/hadoop-yarn-applications-distributedshell.jar \
        -shell_script #{scriptpath} \
        -appname #{appname} \
        -num_containers 1
      # Valid states: ALL, NEW, NEW_SAVING, SUBMITTED, ACCEPTED, RUNNING, FINISHED, FAILED, KILLED 
      # Wait for application to run
      done_cmd="yarn application -list -appStates ALL | grep #{appname} | egrep 'FINISHED|FAILED|KILLED'"
      i=0; while [[ $i -lt 1000 ]] && [[ ! `$done_cmd` ]]; do ((i++)); sleep 1; done
      # Get application id
      application=`yarn application -list -appStates ALL | grep #{appname} | sed -e 's/^\\(application_[0-9_]\\+\\).*/\\1/'`
      if [ ! "$application" ]; then exit 1; fi
      rm=`yarn logs -applicationId $application 2>/dev/null | grep 'Ryba NM hostname' | sed 's/Ryba NM hostname: \\(.*\\)/\\1/'`
      [ "$rm" ]
      """
      unless_exists: unless options.force_check then scriptpath

Check

Run the "teragen" and "terasort" hadoop examples. Will only be executed if the directory "/user/test/10gsort" generated by this action is not present on HDFS. Delete this directory to re-execute the check.

  # 100 records = 1Ko
  # 10 000 000 000 = 100 Go
  @system.execute
    header: 'Teragen & Terasort'
    cmd: mkcmd.test options.test_krb5_user, """
    hdfs dfs -rm -r check-#{options.hostname}-mapred || true
    hdfs dfs -mkdir -p check-#{options.hostname}-mapred
    hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-examples-3*.jar teragen 100 check-#{options.hostname}-mapred/input
    hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-examples-3*.jar terasort check-#{options.hostname}-mapred/input check-#{options.hostname}-mapred/output
    """
    unless_exec: unless options.force_check then mkcmd.test options.test_krb5_user, "hdfs dfs -test -d check-#{options.hostname}-mapred/output"
    trap: true

Dependencies

mkcmd = require '../../lib/mkcmd'