Menu

HDFS Datanode Layout

module.exports = header: 'HDFS NN layout', handler: ({options}) ->

Register

  @registry.register 'hdfs_mkdir', 'ryba/lib/hdfs_mkdir'

Wait

Wait for the DataNodes and NameNodes to be started.

  @call 'ryba/hadoop/hdfs_dn/wait', once: true, options.wait_hdfs_dn
  @call 'ryba/hadoop/hdfs_nn/wait', once: true, conf_dir: options.conf_dir, options.wait

HDFS layout

Set up the directories and permissions inside the HDFS filesytem. The layout is inspired by the Hadoop recommandation on the official Apache website. The following folder are created:

drwxr-xr-x   - hdfs   hadoop      /
drwxr-xr-x   - hdfs   hadoop      /apps
drwxrwxrwt   - hdfs   hadoop      /tmp
drwxr-xr-x   - hdfs   hadoop      /user
drwxr-xr-x   - hdfs   hadoop      /user/hdfs
  @call header: 'HDFS layout', (opts)->
    @wait.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, "hdfs --config '#{options.conf_dir}' dfs -test -d /"
    @system.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, """
      hdfs --config '#{options.conf_dir}' dfs -chmod 755 /
      """
    @system.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, """
      if hdfs --config '#{options.conf_dir}' dfs -test -d /tmp; then exit 2; fi
      hdfs --config '#{options.conf_dir}' dfs -mkdir /tmp
      hdfs --config '#{options.conf_dir}' dfs -chown #{options.user.name}:#{options.hadoop_group.name} /tmp
      hdfs --config '#{options.conf_dir}' dfs -chmod 1777 /tmp
      """
      code_skipped: 2
    , (err, obj) ->
      @log 'Directory "/tmp" prepared' if obj.status
    @system.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, """
      if hdfs --config '#{options.conf_dir}' dfs -test -d /user; then exit 2; fi
      hdfs --config '#{options.conf_dir}' dfs -mkdir /user
      hdfs --config '#{options.conf_dir}' dfs -chown #{options.user.name}:#{options.hadoop_group.name} /user
      hdfs --config '#{options.conf_dir}' dfs -chmod 755 /user
      hdfs --config '#{options.conf_dir}' dfs -mkdir /user/#{options.user.name}
      hdfs --config '#{options.conf_dir}' dfs -chown #{options.user.name}:#{options.hadoop_group.name} /user/#{options.user.name}
      hdfs --config '#{options.conf_dir}' dfs -chmod 755 /user/#{options.user.name}
      """
      code_skipped: 2
    , (err, obj) ->
      @log 'Directory "/user/{test_user}" prepared' if obj.status
    @system.execute
      cmd: mkcmd.hdfs options.hdfs_krb5_user, """
      if hdfs --config '#{options.conf_dir}' dfs -test -d /apps; then exit 2; fi
      hdfs --config '#{options.conf_dir}' dfs -mkdir /apps
      hdfs --config '#{options.conf_dir}' dfs -chown #{options.user.name}:#{options.hadoop_group.name} /apps
      hdfs --config '#{options.conf_dir}' dfs -chmod 755 /apps
      """
      code_skipped: 2
    , (err, obj) ->
      @log 'Directory "/apps" prepared' if obj.status

HDP Layout

  @system.execute
    header: 'HDP Layout'
    cmd: mkcmd.hdfs options.hdfs_krb5_user, """
    version=`readlink /usr/hdp/current/hadoop-client | sed 's/.*\\/\\(.*\\)\\/hadoop/\\1/'`
    hdfs --config '#{options.conf_dir}' dfs -mkdir -p /hdp/apps/$version
    hdfs --config '#{options.conf_dir}' dfs -chown -R  #{options.user.name}:#{options.hadoop_group.name} /hdp
    hdfs --config '#{options.conf_dir}' dfs -chmod 555 /hdp
    hdfs --config '#{options.conf_dir}' dfs -chmod 555 /hdp/apps
    hdfs --config '#{options.conf_dir}' dfs -chmod -R 555 /hdp/apps/$version
    """
    trap: true
    unless_exec: mkcmd.hdfs options.hdfs_krb5_user, """
    version=`readlink /usr/hdp/current/hadoop-client | sed 's/.*\\/\\(.*\\)\\/hadoop/\\1/'`
    hdfs --config '#{options.conf_dir}' dfs -test -d /hdp/apps/$version
    """

Security Layout

With Hadoop YARN 3, lots of files will have to be shared on a remote/distributed file system. HDFS is a good place for this, for keytabs. It prevents to put a lot of keytab on nodes and forget it. Create hdfs://etc/security/keytabs folder.

  @system.execute
    header: 'Security Layout'
    cmd: mkcmd.hdfs options.hdfs_krb5_user, """
    hdfs --config '#{options.conf_dir}' dfs -mkdir -p /etc/security/keytabs
    hdfs --config '#{options.conf_dir}' dfs -chown -R  #{options.user.name}:#{options.hadoop_group.name} /etc/security/keytabs
    hdfs --config '#{options.conf_dir}' dfs -chmod 555 /etc
    hdfs --config '#{options.conf_dir}' dfs -chmod 555 /etc/security
    hdfs --config '#{options.conf_dir}' dfs -chmod -R 555 /etc/security/keytabs
    """
    trap: true
    unless_exec: mkcmd.hdfs options.hdfs_krb5_user, """
    hdfs --config '#{options.conf_dir}' dfs -test -d /etc/security/keytabs
    """

Test User

Create a Unix and Kerberos test user, by default "test" and execute simple HDFS commands to ensure the NameNode is properly working. Note, those commands are NameNode specific, meaning they only afect HDFS metadata.

  @hdfs_mkdir
    header: 'User Test'
    target: "/user/#{options.test.user.name}"
    user: options.test.user.name
    group: options.test.group.name
    mode: 0o0750
    conf_dir: options.conf_dir
    krb5_user:
      principal: options.hdfs_site['dfs.namenode.kerberos.principal'].replace '_HOST', options.fqdn
      keytab: options.hdfs_site['dfs.namenode.keytab.file']

Dependencies

mkcmd = require '../../lib/mkcmd'