From 85a6f0d36f202c385368e63df0a9b1bd0caf763b Mon Sep 17 00:00:00 2001 From: hxlong <1141741507@qq.com> Date: Fri, 16 Oct 2020 10:32:26 +0800 Subject: [PATCH 1/2] update assignment3 --- Assignment3.md | 15 +- file/assignment3/hadoop/capacity-scheduler.xml | 146 ++++++++++ file/assignment3/hadoop/configuration.xsl | 40 +++ file/assignment3/hadoop/container-executor.cfg | 4 + file/assignment3/hadoop/core-site.xml | 65 +++++ file/assignment3/hadoop/excludes | 0 file/assignment3/hadoop/hadoop-env.cmd | 85 ++++++ file/assignment3/hadoop/hadoop-env.sh | 126 ++++++++ file/assignment3/hadoop/hadoop-metrics.properties | 75 +++++ file/assignment3/hadoop/hadoop-metrics2.properties | 68 +++++ file/assignment3/hadoop/hadoop-policy.xml | 226 ++++++++++++++ file/assignment3/hadoop/hdfs-site.xml | 120 ++++++++ file/assignment3/hadoop/httpfs-env.sh | 55 ++++ file/assignment3/hadoop/httpfs-log4j.properties | 35 +++ file/assignment3/hadoop/httpfs-signature.secret | 1 + file/assignment3/hadoop/httpfs-site.xml | 17 ++ file/assignment3/hadoop/id_rsa | 27 ++ file/assignment3/hadoop/kms-acls.xml | 135 +++++++++ file/assignment3/hadoop/kms-env.sh | 59 ++++ file/assignment3/hadoop/kms-log4j.properties | 41 +++ file/assignment3/hadoop/kms-site.xml | 173 +++++++++++ file/assignment3/hadoop/log4j.properties | 323 +++++++++++++++++++++ file/assignment3/hadoop/mapred-env.cmd | 20 ++ file/assignment3/hadoop/mapred-env.sh | 14 + file/assignment3/hadoop/mapred-queues.xml.template | 92 ++++++ file/assignment3/hadoop/mapred-site.xml | 51 ++++ file/assignment3/hadoop/mapred-site.xml.template | 21 ++ file/assignment3/hadoop/slaves | 1 + file/assignment3/hadoop/ssl-client.xml.example | 80 +++++ file/assignment3/hadoop/ssl-server.xml.example | 88 ++++++ file/assignment3/hadoop/yarn-env.cmd | 60 ++++ file/assignment3/hadoop/yarn-env.sh | 127 ++++++++ file/assignment3/hadoop/yarn-excludes | 0 file/assignment3/hadoop/yarn-site.xml | 276 ++++++++++++++++++ 34 files changed, 2661 insertions(+), 5 deletions(-) create mode 100644 file/assignment3/hadoop/capacity-scheduler.xml create mode 100644 file/assignment3/hadoop/configuration.xsl create mode 100644 file/assignment3/hadoop/container-executor.cfg create mode 100644 file/assignment3/hadoop/core-site.xml create mode 100644 file/assignment3/hadoop/excludes create mode 100644 file/assignment3/hadoop/hadoop-env.cmd create mode 100644 file/assignment3/hadoop/hadoop-env.sh create mode 100644 file/assignment3/hadoop/hadoop-metrics.properties create mode 100644 file/assignment3/hadoop/hadoop-metrics2.properties create mode 100644 file/assignment3/hadoop/hadoop-policy.xml create mode 100644 file/assignment3/hadoop/hdfs-site.xml create mode 100644 file/assignment3/hadoop/httpfs-env.sh create mode 100644 file/assignment3/hadoop/httpfs-log4j.properties create mode 100644 file/assignment3/hadoop/httpfs-signature.secret create mode 100644 file/assignment3/hadoop/httpfs-site.xml create mode 100644 file/assignment3/hadoop/id_rsa create mode 100644 file/assignment3/hadoop/kms-acls.xml create mode 100644 file/assignment3/hadoop/kms-env.sh create mode 100644 file/assignment3/hadoop/kms-log4j.properties create mode 100644 file/assignment3/hadoop/kms-site.xml create mode 100644 file/assignment3/hadoop/log4j.properties create mode 100644 file/assignment3/hadoop/mapred-env.cmd create mode 100644 file/assignment3/hadoop/mapred-env.sh create mode 100644 file/assignment3/hadoop/mapred-queues.xml.template create mode 100644 file/assignment3/hadoop/mapred-site.xml create mode 100644 file/assignment3/hadoop/mapred-site.xml.template create mode 100644 file/assignment3/hadoop/slaves create mode 100644 file/assignment3/hadoop/ssl-client.xml.example create mode 100644 file/assignment3/hadoop/ssl-server.xml.example create mode 100644 file/assignment3/hadoop/yarn-env.cmd create mode 100644 file/assignment3/hadoop/yarn-env.sh create mode 100644 file/assignment3/hadoop/yarn-excludes create mode 100644 file/assignment3/hadoop/yarn-site.xml diff --git a/Assignment3.md b/Assignment3.md index e26833d..00d5906 100644 --- a/Assignment3.md +++ b/Assignment3.md @@ -91,6 +91,7 @@ 1. su hadoop 切换到hadoop用户 2. hdfs dfsadmin -report 查看信息 + 3. exit (用完退出该用户,避免后续出现操作权限问题) @@ -100,7 +101,7 @@ ### 二)熟悉基本命令 -> Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。 我们可以登录Master节点通过一些基本命令操作文件,操作的命令与我们在Linux系统命令类似。 +> Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。 我们可以登录Master节点通过一些基本命令操作文件,操作的命令与我们在Linux系统命令类似。`所有的操作都在root用户下执行,避免出现权限问题` #### 列出文件 @@ -112,7 +113,7 @@ > hadoop fs -mkdir 接受路径指定的uri作为参数,创建这些目录。其行为类似于Unix的mkdir -p,它会创建路径中的各级父目录。 > -> eg: hadoop fs -mkdir /dir1 /dir2 (该目录时被创建在HDFS文件系统中,而不是本地文件系统) +> eg: hadoop fs -mkdir /dir1 /dir2 (该目录时被创建在HDFS文件系统中,而不是本地文件系统) #### 上传文件 @@ -151,7 +152,7 @@ > > 2. 在HDFS文件系统中创建文件夹 test, 然后将该文件上传到该文件夹中 `使用 mkdir 和 put ` > -> 3. 使用cat命令查看文件内容并截图 `截图中需要包含文件夹信息` +> 3. 使用cat命令查看文件内容并截图 `截图中需要包含文件夹信息` (查看的是HDFS文件系统中的info,txt,而不是本地文件系统) > > `操作时主要使用的用户和目录,避免出现permission denied问题` @@ -180,7 +181,7 @@ ``` sh /root/install_uhadoop_client_new.sh client_ip client_user password port client_ip: 客户机IP (申请的UHost) - client_user: 客户机上需要安装客户端的用户名 + client_user: 客户机上需要安装客户端的用户名(root) password: 客户机root密码 port:客户机ssh连接端口 (通常是22) ``` @@ -234,7 +235,11 @@ hadoop jar /home/hadoop/hadoop-examples.jar wordcount /input /output 如果/ou `**************作业5:用任一语言实现单线程的WordCount,记录运行时间,然后给出运行时间截图,插入实验报告中(选做)***************` - + `作业要求` + +> 需要统计的文件夹: [/home/hadoop/etc/hadoop](file/assginment3/hadoop) +> +> 在本地执行,并记录执行时间 `******特别注意,实验结束后请删除UHadoop集群、EIP 和UHost主机等资源******` diff --git a/file/assignment3/hadoop/capacity-scheduler.xml b/file/assignment3/hadoop/capacity-scheduler.xml new file mode 100644 index 0000000..6ac726e --- /dev/null +++ b/file/assignment3/hadoop/capacity-scheduler.xml @@ -0,0 +1,146 @@ + + + + + yarn.scheduler.capacity.maximum-applications + 10000 + + Maximum number of applications that can be pending and running. + + + + + yarn.scheduler.capacity.maximum-am-resource-percent + 0.1 + + Maximum percent of resources in the cluster which can be used to run + application masters i.e. controls number of concurrent running + applications. + + + + + yarn.scheduler.capacity.resource-calculator + org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator + + The ResourceCalculator implementation to be used to compare + Resources in the scheduler. + The default i.e. DefaultResourceCalculator only uses Memory while + DominantResourceCalculator uses dominant-resource to compare + multi-dimensional resources such as Memory, CPU etc. + + + + + yarn.scheduler.capacity.root.queues + default + + The queues at the this level (root is the root queue). + + + + + yarn.scheduler.capacity.root.default.capacity + 100 + Default queue target capacity. + + + + yarn.scheduler.capacity.root.default.user-limit-factor + 1 + + Default queue user limit a percentage from 0.0 to 1.0. + + + + + yarn.scheduler.capacity.root.default.maximum-capacity + 100 + + The maximum capacity of the default queue. + + + + + yarn.scheduler.capacity.root.default.state + RUNNING + + The state of the default queue. State can be one of RUNNING or STOPPED. + + + + + yarn.scheduler.capacity.root.default.acl_submit_applications + * + + The ACL of who can submit jobs to the default queue. + + + + + yarn.scheduler.capacity.root.default.acl_administer_queue + * + + The ACL of who can administer jobs on the default queue. + + + + + yarn.scheduler.capacity.node-locality-delay + 40 + + Number of missed scheduling opportunities after which the CapacityScheduler + attempts to schedule rack-local containers. + Typically this should be set to number of nodes in the cluster, By default is setting + approximately number of nodes in one rack which is 40. + + + + + yarn.scheduler.capacity.queue-mappings + + + A list of mappings that will be used to assign jobs to queues + The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]* + Typically this list will be used to map users to queues, + for example, u:%user:%user maps all users to queues with the same name + as the user. + + + + + yarn.scheduler.capacity.queue-mappings-override.enable + false + + If a queue mapping is present, will it override the value specified + by the user? This can be used by administrators to place jobs in queues + that are different than the one specified by the user. + The default is false. + + + + + yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments + 1 + + Controls the number of OFF_SWITCH assignments allowed + during a node's heartbeat. Increasing this value can improve + scheduling rate for OFF_SWITCH containers. Lower values reduce + "clumping" of applications on particular nodes. The default is 1. + Legal values are 1-MAX_INT. This config is refreshable. + + + + diff --git a/file/assignment3/hadoop/configuration.xsl b/file/assignment3/hadoop/configuration.xsl new file mode 100644 index 0000000..d50d80b --- /dev/null +++ b/file/assignment3/hadoop/configuration.xsl @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + +
namevaluedescription
+ + +
+
diff --git a/file/assignment3/hadoop/container-executor.cfg b/file/assignment3/hadoop/container-executor.cfg new file mode 100644 index 0000000..d68cee8 --- /dev/null +++ b/file/assignment3/hadoop/container-executor.cfg @@ -0,0 +1,4 @@ +yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group +banned.users=#comma separated list of users who can not run applications +min.user.id=1000#Prevent other super-users +allowed.system.users=##comma separated list of system users who CAN run applications diff --git a/file/assignment3/hadoop/core-site.xml b/file/assignment3/hadoop/core-site.xml new file mode 100644 index 0000000..e9b52fc --- /dev/null +++ b/file/assignment3/hadoop/core-site.xml @@ -0,0 +1,65 @@ + + + + + + + + fs.default.name + hdfs://Ucluster + + + ha.zookeeper.quorum + uhadoop-ia1nlbku-master1:2181,uhadoop-ia1nlbku-master2:2181,uhadoop-ia1nlbku-core1:2181 + + + hadoop.proxyuser.hive.hosts + * + + + hadoop.proxyuser.hive.groups + * + + + hadoop.proxyuser.hadoop.hosts + * + + + hadoop.proxyuser.hue.hosts + * + + + hadoop.proxyuser.hue.groups + * + + + hadoop.proxyuser.oozie.hosts + * + + + hadoop.proxyuser.oozie.groups + * + + + io.compression.codecs + org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec + + + hadoop.http.staticuser.user + hadoop + + + ipc.maximum.data.length + 69415731 + + \ No newline at end of file diff --git a/file/assignment3/hadoop/excludes b/file/assignment3/hadoop/excludes new file mode 100644 index 0000000..e69de29 diff --git a/file/assignment3/hadoop/hadoop-env.cmd b/file/assignment3/hadoop/hadoop-env.cmd new file mode 100644 index 0000000..5aed07d --- /dev/null +++ b/file/assignment3/hadoop/hadoop-env.cmd @@ -0,0 +1,85 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem Set Hadoop-specific environment variables here. + +@rem The only required environment variable is JAVA_HOME. All others are +@rem optional. When running a distributed configuration it is best to +@rem set JAVA_HOME in this file, so that it is correctly defined on +@rem remote nodes. + +@rem The java implementation to use. Required. +set JAVA_HOME=%JAVA_HOME% + +@rem The jsvc implementation to use. Jsvc is required to run secure datanodes. +@rem set JSVC_HOME=%JSVC_HOME% + +@rem set HADOOP_CONF_DIR= + +@rem Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. +if exist %HADOOP_HOME%\contrib\capacity-scheduler ( + if not defined HADOOP_CLASSPATH ( + set HADOOP_CLASSPATH=%HADOOP_HOME%\contrib\capacity-scheduler\*.jar + ) else ( + set HADOOP_CLASSPATH=%HADOOP_CLASSPATH%;%HADOOP_HOME%\contrib\capacity-scheduler\*.jar + ) +) + +@rem The maximum amount of heap to use, in MB. Default is 1000. +@rem set HADOOP_HEAPSIZE= +@rem set HADOOP_NAMENODE_INIT_HEAPSIZE="" + +@rem Extra Java runtime options. Empty by default. +@rem set HADOOP_OPTS=%HADOOP_OPTS% -Djava.net.preferIPv4Stack=true + +@rem Command specific options appended to HADOOP_OPTS when specified +if not defined HADOOP_SECURITY_LOGGER ( + set HADOOP_SECURITY_LOGGER=INFO,RFAS +) +if not defined HDFS_AUDIT_LOGGER ( + set HDFS_AUDIT_LOGGER=INFO,NullAppender +) + +set HADOOP_NAMENODE_OPTS=-Dhadoop.security.logger=%HADOOP_SECURITY_LOGGER% -Dhdfs.audit.logger=%HDFS_AUDIT_LOGGER% %HADOOP_NAMENODE_OPTS% +set HADOOP_DATANODE_OPTS=-Dhadoop.security.logger=ERROR,RFAS %HADOOP_DATANODE_OPTS% +set HADOOP_SECONDARYNAMENODE_OPTS=-Dhadoop.security.logger=%HADOOP_SECURITY_LOGGER% -Dhdfs.audit.logger=%HDFS_AUDIT_LOGGER% %HADOOP_SECONDARYNAMENODE_OPTS% + +@rem The following applies to multiple commands (fs, dfs, fsck, distcp etc) +set HADOOP_CLIENT_OPTS=%HADOOP_CLIENT_OPTS% +@rem set heap args when HADOOP_HEAPSIZE is empty +if not defined HADOOP_HEAPSIZE ( + set HADOOP_CLIENT_OPTS=-Xmx512m %HADOOP_CLIENT_OPTS% +) +@rem set HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData %HADOOP_JAVA_PLATFORM_OPTS%" + +@rem On secure datanodes, user to run the datanode as after dropping privileges +set HADOOP_SECURE_DN_USER=%HADOOP_SECURE_DN_USER% + +@rem Where log files are stored. %HADOOP_HOME%/logs by default. +@rem set HADOOP_LOG_DIR=%HADOOP_LOG_DIR%\%USERNAME% + +@rem Where log files are stored in the secure data environment. +set HADOOP_SECURE_DN_LOG_DIR=%HADOOP_LOG_DIR%\%HADOOP_HDFS_USER% + +@rem The directory where pid files are stored. /tmp by default. +@rem NOTE: this should be set to a directory that can only be written to by +@rem the user that will run the hadoop daemons. Otherwise there is the +@rem potential for a symlink attack. +set HADOOP_PID_DIR=%HADOOP_PID_DIR% +set HADOOP_SECURE_DN_PID_DIR=%HADOOP_PID_DIR% + +@rem A string representing this instance of hadoop. %USERNAME% by default. +set HADOOP_IDENT_STRING=%USERNAME% diff --git a/file/assignment3/hadoop/hadoop-env.sh b/file/assignment3/hadoop/hadoop-env.sh new file mode 100644 index 0000000..7b4483f --- /dev/null +++ b/file/assignment3/hadoop/hadoop-env.sh @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set Hadoop-specific environment variables here. + +# The only required environment variable is JAVA_HOME. All others are +# optional. When running a distributed configuration it is best to +# set JAVA_HOME in this file, so that it is correctly defined on +# remote nodes. + +# The java implementation to use. +export JAVA_HOME=${JAVA_HOME} + +# The jsvc implementation to use. Jsvc is required to run secure datanodes +# that bind to privileged ports to provide authentication of data transfer +# protocol. Jsvc is not required if SASL is configured for authentication of +# data transfer protocol using non-privileged ports. +#export JSVC_HOME=${JSVC_HOME} + +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/home/hadoop/conf"} + +# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. +for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do + if [ "$HADOOP_CLASSPATH" ]; then + export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f + else + export HADOOP_CLASSPATH=$f + fi +done + + +export hadoop_pid_dir_prefix=/var/run/hadoop-hdfs +export hdfs_log_dir_prefix=/var/log/hadoop-hdfs + +# The maximum amount of heap to use, in MB. Default is 1000. +#export HADOOP_HEAPSIZE= +#export HADOOP_NAMENODE_INIT_HEAPSIZE="" +export HADOOP_HEAPSIZE=1024 +export HADOOP_USER_CLASSPATH_FIRST=true + +export HADOOP_NAMENODE_HEAPSIZE=3072 +export HADOOP_DATANODE_HEAPSIZE=3413 + + +export namenode_opt_newsize=128m +export namenode_opt_maxnewsize=128m +export namenode_heapsize=${HADOOP_NAMENODE_HEAPSIZE}m +export dtnode_heapsize=${HADOOP_DATANODE_HEAPSIZE}m + + +# Enable extra debugging of Hadoop's JAAS binding, used to set up +# Kerberos security. +# export HADOOP_JAAS_DEBUG=true + +# Extra Java runtime options. Empty by default. +# For Kerberos debugging, an extended option set logs more invormation +# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug" +export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" + + +SHARED_HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile=${hdfs_log_dir_prefix}/hs_err_pid%p.log -XX:NewSize=${namenode_opt_newsize} -XX:MaxNewSize=${namenode_opt_maxnewsize} -Xloggc:$hdfs_log_dir_prefix/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -Xms${namenode_heapsize} -Xmx${namenode_heapsize} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,RFAAUDIT" + +export HADOOP_NAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-namenode/bin/kill-name-node\" -Dorg.mortbay.jetty.Request.maxFormContentSize=-1 ${HADOOP_NAMENODE_OPTS}" + +export HADOOP_DATANODE_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseConcMarkSweepGC -XX:ErrorFile=${hdfs_log_dir_prefix}/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -Xloggc:${hdfs_log_dir_prefix}/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms${dtnode_heapsize} -Xmx${dtnode_heapsize} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,RFAAUDIT ${HADOOP_DATANODE_OPTS} -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly" + + +export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS" +export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS" + +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS" +# set heap args when HADOOP_HEAPSIZE is empty +if [ "$HADOOP_HEAPSIZE" = "" ]; then + export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" +fi +#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" + +# On secure datanodes, user to run the datanode as after dropping privileges. +# This **MUST** be uncommented to enable secure HDFS if using privileged ports +# to provide authentication of data transfer protocol. This **MUST NOT** be +# defined if SASL is configured for authentication of data transfer protocol +# using non-privileged ports. +export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} + +# Where log files are stored. $HADOOP_HOME/logs by default. +export HADOOP_LOG_DIR=$hdfs_log_dir_prefix + +# Where log files are stored in the secure data environment. +#export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} + +### +# HDFS Mover specific parameters +### +# Specify the JVM options to be used when starting the HDFS Mover. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HADOOP_MOVER_OPTS="" + +### +# Advanced Users Only! +### + +# The directory where pid files are stored. /tmp by default. +# NOTE: this should be set to a directory that can only be written to by +# the user that will run the hadoop daemons. Otherwise there is the +# potential for a symlink attack. +export HADOOP_PID_DIR=$hadoop_pid_dir_prefix +export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} + +# A string representing this instance of hadoop. $USER by default. +export HADOOP_IDENT_STRING=$USER \ No newline at end of file diff --git a/file/assignment3/hadoop/hadoop-metrics.properties b/file/assignment3/hadoop/hadoop-metrics.properties new file mode 100644 index 0000000..c1b2eb7 --- /dev/null +++ b/file/assignment3/hadoop/hadoop-metrics.properties @@ -0,0 +1,75 @@ +# Configuration of the "dfs" context for null +dfs.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "dfs" context for file +#dfs.class=org.apache.hadoop.metrics.file.FileContext +#dfs.period=10 +#dfs.fileName=/tmp/dfsmetrics.log + +# Configuration of the "dfs" context for ganglia +# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) +# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# dfs.period=10 +# dfs.servers=localhost:8649 + + +# Configuration of the "mapred" context for null +mapred.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "mapred" context for file +#mapred.class=org.apache.hadoop.metrics.file.FileContext +#mapred.period=10 +#mapred.fileName=/tmp/mrmetrics.log + +# Configuration of the "mapred" context for ganglia +# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter) +# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# mapred.period=10 +# mapred.servers=localhost:8649 + + +# Configuration of the "jvm" context for null +#jvm.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "jvm" context for file +#jvm.class=org.apache.hadoop.metrics.file.FileContext +#jvm.period=10 +#jvm.fileName=/tmp/jvmmetrics.log + +# Configuration of the "jvm" context for ganglia +# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# jvm.period=10 +# jvm.servers=localhost:8649 + +# Configuration of the "rpc" context for null +rpc.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "rpc" context for file +#rpc.class=org.apache.hadoop.metrics.file.FileContext +#rpc.period=10 +#rpc.fileName=/tmp/rpcmetrics.log + +# Configuration of the "rpc" context for ganglia +# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# rpc.period=10 +# rpc.servers=localhost:8649 + + +# Configuration of the "ugi" context for null +ugi.class=org.apache.hadoop.metrics.spi.NullContext + +# Configuration of the "ugi" context for file +#ugi.class=org.apache.hadoop.metrics.file.FileContext +#ugi.period=10 +#ugi.fileName=/tmp/ugimetrics.log + +# Configuration of the "ugi" context for ganglia +# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext +# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext31 +# ugi.period=10 +# ugi.servers=localhost:8649 + diff --git a/file/assignment3/hadoop/hadoop-metrics2.properties b/file/assignment3/hadoop/hadoop-metrics2.properties new file mode 100644 index 0000000..0c09228 --- /dev/null +++ b/file/assignment3/hadoop/hadoop-metrics2.properties @@ -0,0 +1,68 @@ +# syntax: [prefix].[source|sink].[instance].[options] +# See javadoc of package-info.java for org.apache.hadoop.metrics2 for details + +*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink +# default sampling period, in seconds +*.period=10 + +# The namenode-metrics.out will contain metrics from all context +#namenode.sink.file.filename=namenode-metrics.out +# Specifying a special sampling period for namenode: +#namenode.sink.*.period=8 + +#datanode.sink.file.filename=datanode-metrics.out + +#resourcemanager.sink.file.filename=resourcemanager-metrics.out + +#nodemanager.sink.file.filename=nodemanager-metrics.out + +#mrappmaster.sink.file.filename=mrappmaster-metrics.out + +#jobhistoryserver.sink.file.filename=jobhistoryserver-metrics.out + +# the following example split metrics of different +# context to different sinks (in this case files) +#nodemanager.sink.file_jvm.class=org.apache.hadoop.metrics2.sink.FileSink +#nodemanager.sink.file_jvm.context=jvm +#nodemanager.sink.file_jvm.filename=nodemanager-jvm-metrics.out +#nodemanager.sink.file_mapred.class=org.apache.hadoop.metrics2.sink.FileSink +#nodemanager.sink.file_mapred.context=mapred +#nodemanager.sink.file_mapred.filename=nodemanager-mapred-metrics.out + +# +# Below are for sending metrics to Ganglia +# +# for Ganglia 3.0 support +# *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink30 +# +# for Ganglia 3.1 support +# *.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31 + +# *.sink.ganglia.period=10 + +# default for supportsparse is false +# *.sink.ganglia.supportsparse=true + +#*.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both +#*.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40 + +# Tag values to use for the ganglia prefix. If not defined no tags are used. +# If '*' all tags are used. If specifiying multiple tags separate them with +# commas. Note that the last segment of the property name is the context name. +# +#*.sink.ganglia.tagsForPrefix.jvm=ProcesName +#*.sink.ganglia.tagsForPrefix.dfs= +#*.sink.ganglia.tagsForPrefix.rpc= +#*.sink.ganglia.tagsForPrefix.mapred= + +#namenode.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 + +#datanode.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 + +#resourcemanager.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 + +#nodemanager.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 + +#mrappmaster.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 + +#jobhistoryserver.sink.ganglia.servers=yourgangliahost_1:8649,yourgangliahost_2:8649 diff --git a/file/assignment3/hadoop/hadoop-policy.xml b/file/assignment3/hadoop/hadoop-policy.xml new file mode 100644 index 0000000..2bf5c02 --- /dev/null +++ b/file/assignment3/hadoop/hadoop-policy.xml @@ -0,0 +1,226 @@ + + + + + + + + + security.client.protocol.acl + * + ACL for ClientProtocol, which is used by user code + via the DistributedFileSystem. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.client.datanode.protocol.acl + * + ACL for ClientDatanodeProtocol, the client-to-datanode protocol + for block recovery. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.datanode.protocol.acl + * + ACL for DatanodeProtocol, which is used by datanodes to + communicate with the namenode. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.inter.datanode.protocol.acl + * + ACL for InterDatanodeProtocol, the inter-datanode protocol + for updating generation timestamp. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.namenode.protocol.acl + * + ACL for NamenodeProtocol, the protocol used by the secondary + namenode to communicate with the namenode. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.admin.operations.protocol.acl + * + ACL for AdminOperationsProtocol. Used for admin commands. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.refresh.user.mappings.protocol.acl + * + ACL for RefreshUserMappingsProtocol. Used to refresh + users mappings. The ACL is a comma-separated list of user and + group names. The user and group list is separated by a blank. For + e.g. "alice,bob users,wheel". A special value of "*" means all + users are allowed. + + + + security.refresh.policy.protocol.acl + * + ACL for RefreshAuthorizationPolicyProtocol, used by the + dfsadmin and mradmin commands to refresh the security policy in-effect. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.ha.service.protocol.acl + * + ACL for HAService protocol used by HAAdmin to manage the + active and stand-by states of namenode. + + + + security.zkfc.protocol.acl + * + ACL for access to the ZK Failover Controller + + + + + security.qjournal.service.protocol.acl + * + ACL for QJournalProtocol, used by the NN to communicate with + JNs when using the QuorumJournalManager for edit logs. + + + + security.mrhs.client.protocol.acl + * + ACL for HSClientProtocol, used by job clients to + communciate with the MR History Server job status etc. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + + + security.resourcetracker.protocol.acl + * + ACL for ResourceTrackerProtocol, used by the + ResourceManager and NodeManager to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.resourcemanager-administration.protocol.acl + * + ACL for ResourceManagerAdministrationProtocol, for admin commands. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.applicationclient.protocol.acl + * + ACL for ApplicationClientProtocol, used by the ResourceManager + and applications submission clients to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.applicationmaster.protocol.acl + * + ACL for ApplicationMasterProtocol, used by the ResourceManager + and ApplicationMasters to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.containermanagement.protocol.acl + * + ACL for ContainerManagementProtocol protocol, used by the NodeManager + and ApplicationMasters to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.resourcelocalizer.protocol.acl + * + ACL for ResourceLocalizer protocol, used by the NodeManager + and ResourceLocalizer to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.job.task.protocol.acl + * + ACL for TaskUmbilicalProtocol, used by the map and reduce + tasks to communicate with the parent tasktracker. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.job.client.protocol.acl + * + ACL for MRClientProtocol, used by job clients to + communciate with the MR ApplicationMaster to query job status etc. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.applicationhistory.protocol.acl + * + ACL for ApplicationHistoryProtocol, used by the timeline + server and the generic history service client to communicate with each other. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + diff --git a/file/assignment3/hadoop/hdfs-site.xml b/file/assignment3/hadoop/hdfs-site.xml new file mode 100644 index 0000000..6ee08e3 --- /dev/null +++ b/file/assignment3/hadoop/hdfs-site.xml @@ -0,0 +1,120 @@ + + + + dfs.replication + 3 + + + dfs.name.dir + /data/dfs/nn + + + dfs.data.dir + /data/dfs/dn + + + dfs.nameservices + Ucluster + + + dfs.ha.namenodes.Ucluster + nn1,nn2 + + + dfs.namenode.rpc-address.Ucluster.nn1 + uhadoop-ia1nlbku-master1:8020 + + + dfs.namenode.rpc-address.Ucluster.nn2 + uhadoop-ia1nlbku-master2:8020 + + + dfs.namenode.http-address.Ucluster.nn1 + uhadoop-ia1nlbku-master1:50070 + + + dfs.namenode.http-address.Ucluster.nn2 + uhadoop-ia1nlbku-master2:50070 + + + dfs.namenode.shared.edits.dir + qjournal://uhadoop-ia1nlbku-master1:8485;uhadoop-ia1nlbku-master2:8485;uhadoop-ia1nlbku-core1:8485/Ucluster + + + dfs.journalnode.edits.dir + /data/dfs/jn + + + dfs.client.failover.proxy.provider.Ucluster + org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider + + + dfs.ha.fencing.methods + sshfence(hadoop:22) + + + dfs.ha.fencing.ssh.connect-timeout + 30000 + SSH connection timeout, in milliseconds, to use with the builtin shfence fencer. + + + dfs.ha.fencing.ssh.private-key-files + /home/hadoop/conf/id_rsa + + + dfs.ha.automatic-failover.enabled + true + + + dfs.datanode.max.xcievers + 4096 + + + dfs.webhdfs.enabled + true + + + dfs.namenode.heartbeat.recheck-interval + 45000 + + + fs.trash.interval + 7320 + + + dfs.datanode.max.transfer.threads + 8192 + + + dfs.image.compress + true + + + dfs.namenode.num.checkpoints.retained + 12 + + + dfs.datanode.handler.count + 20 + + + dfs.namenode.handler.count + 20 + + + dfs.socket.timeout + 900000 + + + dfs.hosts.exclude + /home/hadoop/conf/excludes + + + dfs.client.read.shortcircuit + true + + + dfs.domain.socket.path + /var/lib/hadoop-hdfs/dn_socket + + \ No newline at end of file diff --git a/file/assignment3/hadoop/httpfs-env.sh b/file/assignment3/hadoop/httpfs-env.sh new file mode 100644 index 0000000..a0941e9 --- /dev/null +++ b/file/assignment3/hadoop/httpfs-env.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# + +# Set httpfs specific environment variables here. + +# Settings for the Embedded Tomcat that runs HttpFS +# Java System properties for HttpFS should be specified in this variable +# +# export CATALINA_OPTS= + +# HttpFS logs directory +# +# export HTTPFS_LOG=${HTTPFS_HOME}/logs +export HTTPFS_LOG=/var/log/hadoop-hdfs + +# HttpFS temporary directory +# +# export HTTPFS_TEMP=${HTTPFS_HOME}/temp + +# The HTTP port used by HttpFS +# +# export HTTPFS_HTTP_PORT=14000 +export HTTPFS_HTTP_PORT=14000 + +# The Admin port used by HttpFS +# +# export HTTPFS_ADMIN_PORT=`expr ${HTTPFS_HTTP_PORT} + 1` + +# The hostname HttpFS server runs on +# +# export HTTPFS_HTTP_HOSTNAME=`hostname -f` + +# Indicates if HttpFS is using SSL +# +# export HTTPFS_SSL_ENABLED=false + +# The location of the SSL keystore if using SSL +# +# export HTTPFS_SSL_KEYSTORE_FILE=${HOME}/.keystore + +# The password of the SSL keystore if using SSL +# +# export HTTPFS_SSL_KEYSTORE_PASS=password \ No newline at end of file diff --git a/file/assignment3/hadoop/httpfs-log4j.properties b/file/assignment3/hadoop/httpfs-log4j.properties new file mode 100644 index 0000000..284a819 --- /dev/null +++ b/file/assignment3/hadoop/httpfs-log4j.properties @@ -0,0 +1,35 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# + +# If the Java System property 'httpfs.log.dir' is not defined at HttpFSServer start up time +# Setup sets its value to '${httpfs.home}/logs' + +log4j.appender.httpfs=org.apache.log4j.DailyRollingFileAppender +log4j.appender.httpfs.DatePattern='.'yyyy-MM-dd +log4j.appender.httpfs.File=${httpfs.log.dir}/httpfs.log +log4j.appender.httpfs.Append=true +log4j.appender.httpfs.layout=org.apache.log4j.PatternLayout +log4j.appender.httpfs.layout.ConversionPattern=%d{ISO8601} %5p %c{1} [%X{hostname}][%X{user}:%X{doAs}] %X{op} %m%n + +log4j.appender.httpfsaudit=org.apache.log4j.DailyRollingFileAppender +log4j.appender.httpfsaudit.DatePattern='.'yyyy-MM-dd +log4j.appender.httpfsaudit.File=${httpfs.log.dir}/httpfs-audit.log +log4j.appender.httpfsaudit.Append=true +log4j.appender.httpfsaudit.layout=org.apache.log4j.PatternLayout +log4j.appender.httpfsaudit.layout.ConversionPattern=%d{ISO8601} %5p [%X{hostname}][%X{user}:%X{doAs}] %X{op} %m%n + +log4j.logger.httpfsaudit=INFO, httpfsaudit + +log4j.logger.org.apache.hadoop.fs.http.server=INFO, httpfs +log4j.logger.org.apache.hadoop.lib=INFO, httpfs diff --git a/file/assignment3/hadoop/httpfs-signature.secret b/file/assignment3/hadoop/httpfs-signature.secret new file mode 100644 index 0000000..56466e9 --- /dev/null +++ b/file/assignment3/hadoop/httpfs-signature.secret @@ -0,0 +1 @@ +hadoop httpfs secret diff --git a/file/assignment3/hadoop/httpfs-site.xml b/file/assignment3/hadoop/httpfs-site.xml new file mode 100644 index 0000000..4a718e1 --- /dev/null +++ b/file/assignment3/hadoop/httpfs-site.xml @@ -0,0 +1,17 @@ + + + + + diff --git a/file/assignment3/hadoop/id_rsa b/file/assignment3/hadoop/id_rsa new file mode 100644 index 0000000..be2f8e0 --- /dev/null +++ b/file/assignment3/hadoop/id_rsa @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEAtbn8y8LombFy9dmv5efbOJSOEMpYe4lWOYIMKb9k2X/lJOrN +CRhHzWJa8n2xn6zyCWZ7ZZEBn7DY/YAmATyZ5NVRGj/XNQ9RwjokqWmMMA9u0anC +j7JMn5gaApufsEsNp3aokaBQZQqdZONijAtEb/62GcbOoq8h/tg8x7Uk+3Fu7X6d +hrUck3dTSQq2qHdi24sSq9zvu4oGF8guL5Eqxijre+eYylzaJC2LUwco/UInluf9 +Vzx0ch2A5qxyH5DzmpItZFMg2Z80alwQxx+1LnHfednbT1606LJkgair29h6R72i +svrztasrPawTq84MMCxiar32PRmIrKipKPfiCwIDAQABAoIBACUMauZbsTIMRESt +AbhcYYwSdTglGI7u+94zjilAtN3Gvj+dgvmUsqbDo4kGaR0FlD6oXwXg3zTgSAy+ +gIEGCtXlS2iPlV9i5Sc01V6YfxUZQF2MP3cuQYLT7pGTiqXVV05J2an+xgUjed0k +omWssmImypdMubne/I5JJXMNkiGUsSMMg5BzXMwrTwoSDfak5Bj8xVMEMLXm4XVJ +Gnar1EvSXa3jfVUSxzhN20bpUXol11Y0x04ssJWcHyFO9DHOJmjR/ycIq0AG5g15 +ue2XhJAQFV/vhq2yzxvB5tzfS1Hu7eDLSOL3Eke170MPOqetzjgbW5j5Ty6GoYSn +BI87jbkCgYEA8evYw9W3w7oNah8Rt4jysreX0dw+Uy+yG9AjKw6gA6oTU+Ck1HTV +yD+jsINinLVTCQBE6LrlO79N8CcXaj1dhL1KPMI5Ku+mpatPLkNEC2s9/lUEszpX +blsd7/WB8tMURV6WR5wqnFmj5npUxhwoxTmd/h6qogx7rxjRzyD2vAUCgYEAwE1b +TiXjPv3uXI8PbrzAA1sOvf0sOudmHZZ6bH7DnAg9pYzeRGjWykBB0fuQ2AMzPnhc +lIZ/hSRhEsLz5xX8BHy+clQLxs4DZpkxpUXY0w+VCF1Rpec0+fpVT3UjBWv5MDjH +1DcjUc6kDrFlyyiEMH1lG2ymUsGMWxN+YoE/ks8CgYEApmZI5PrduX0BuRrBvvIt +rYvmm2zYWbOW2NajOfyHR732KV19Qr1SRrivSLw2Wf/Gq4xJ2aKkBiKh4yugSW0I +JENnCr+1Prk0cQOSJQoThZ8wNv4Xi4f3l2qI/wJpbbKOYOCckYjzLjPiLqe6I8I+ +sNneuGozh976PAfgWI4d6FkCgYAPpHs/4R8aGafRCaYUuO/Zlged9sEpTXdmAr6U +or8gqx7wn4PZBroqG43/GbqPh7scYsgNIN+woePvlcInUwd8CfWn8SRAGLP4HZAH +RKY9jO/vjT++Ag+yIeXcn8eogj7z6DqBDbcmyWtY8p84JmSSWTDnSTBCXRIgunY2 +ZxMXywKBgQCfF5n1OwOwByXwuCfhYfs2B+t6WvkGqdQQuYW+LJGcJU1AkO2NvYxp +YWRybrlLvUz3crzJAxHO2If1jdpffDVY781HBYoeZwKfTg0Q+ts1zOW5PkR3rVZM +TCfMhcjgB1chwsR6Wf9N0NPRIc3+QSZStNOajCn/ATVTGliYObtb7w== +-----END RSA PRIVATE KEY----- diff --git a/file/assignment3/hadoop/kms-acls.xml b/file/assignment3/hadoop/kms-acls.xml new file mode 100644 index 0000000..cba69f4 --- /dev/null +++ b/file/assignment3/hadoop/kms-acls.xml @@ -0,0 +1,135 @@ + + + + + + + + + + hadoop.kms.acl.CREATE + * + + ACL for create-key operations. + If the user is not in the GET ACL, the key material is not returned + as part of the response. + + + + + hadoop.kms.acl.DELETE + * + + ACL for delete-key operations. + + + + + hadoop.kms.acl.ROLLOVER + * + + ACL for rollover-key operations. + If the user is not in the GET ACL, the key material is not returned + as part of the response. + + + + + hadoop.kms.acl.GET + * + + ACL for get-key-version and get-current-key operations. + + + + + hadoop.kms.acl.GET_KEYS + * + + ACL for get-keys operations. + + + + + hadoop.kms.acl.GET_METADATA + * + + ACL for get-key-metadata and get-keys-metadata operations. + + + + + hadoop.kms.acl.SET_KEY_MATERIAL + * + + Complementary ACL for CREATE and ROLLOVER operations to allow the client + to provide the key material when creating or rolling a key. + + + + + hadoop.kms.acl.GENERATE_EEK + * + + ACL for generateEncryptedKey CryptoExtension operations. + + + + + hadoop.kms.acl.DECRYPT_EEK + * + + ACL for decryptEncryptedKey CryptoExtension operations. + + + + + default.key.acl.MANAGEMENT + * + + default ACL for MANAGEMENT operations for all key acls that are not + explicitly defined. + + + + + default.key.acl.GENERATE_EEK + * + + default ACL for GENERATE_EEK operations for all key acls that are not + explicitly defined. + + + + + default.key.acl.DECRYPT_EEK + * + + default ACL for DECRYPT_EEK operations for all key acls that are not + explicitly defined. + + + + + default.key.acl.READ + * + + default ACL for READ operations for all key acls that are not + explicitly defined. + + + + + diff --git a/file/assignment3/hadoop/kms-env.sh b/file/assignment3/hadoop/kms-env.sh new file mode 100644 index 0000000..7ac74bb --- /dev/null +++ b/file/assignment3/hadoop/kms-env.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# + +# Set kms specific environment variables here. + +# Settings for the Embedded Tomcat that runs KMS +# Java System properties for KMS should be specified in this variable +# +# export CATALINA_OPTS= + +# KMS logs directory +# +# export KMS_LOG=${KMS_HOME}/logs + +# KMS temporary directory +# +# export KMS_TEMP=${KMS_HOME}/temp + +# The HTTP port used by KMS +# +# export KMS_HTTP_PORT=16000 + +# The Admin port used by KMS +# +# export KMS_ADMIN_PORT=`expr ${KMS_HTTP_PORT} + 1` + +# The maximum number of Tomcat handler threads +# +# export KMS_MAX_THREADS=1000 + +# The maximum size of Tomcat HTTP header +# +# export KMS_MAX_HTTP_HEADER_SIZE=65536 + +# The location of the SSL keystore if using SSL +# +# export KMS_SSL_KEYSTORE_FILE=${HOME}/.keystore + +# The password of the SSL keystore if using SSL +# +# export KMS_SSL_KEYSTORE_PASS=password + +# The full path to any native libraries that need to be loaded +# (For eg. location of natively compiled tomcat Apache portable +# runtime (APR) libraries +# +# export JAVA_LIBRARY_PATH=${HOME}/lib/native diff --git a/file/assignment3/hadoop/kms-log4j.properties b/file/assignment3/hadoop/kms-log4j.properties new file mode 100644 index 0000000..408317b --- /dev/null +++ b/file/assignment3/hadoop/kms-log4j.properties @@ -0,0 +1,41 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# + +# If the Java System property 'kms.log.dir' is not defined at KMS start up time +# Setup sets its value to '${kms.home}/logs' + +log4j.appender.kms=org.apache.log4j.DailyRollingFileAppender +log4j.appender.kms.DatePattern='.'yyyy-MM-dd +log4j.appender.kms.File=${kms.log.dir}/kms.log +log4j.appender.kms.Append=true +log4j.appender.kms.layout=org.apache.log4j.PatternLayout +log4j.appender.kms.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n + +log4j.appender.kms-audit=org.apache.log4j.DailyRollingFileAppender +log4j.appender.kms-audit.DatePattern='.'yyyy-MM-dd +log4j.appender.kms-audit.File=${kms.log.dir}/kms-audit.log +log4j.appender.kms-audit.Append=true +log4j.appender.kms-audit.layout=org.apache.log4j.PatternLayout +log4j.appender.kms-audit.layout.ConversionPattern=%d{ISO8601} %m%n + +log4j.logger.kms-audit=INFO, kms-audit +log4j.additivity.kms-audit=false + +log4j.rootLogger=ALL, kms +log4j.logger.org.apache.hadoop.conf=ERROR +log4j.logger.org.apache.hadoop=INFO +log4j.logger.com.sun.jersey.server.wadl.generators.WadlGeneratorJAXBGrammarGenerator=OFF +# make zookeeper log level an explicit config, and not changing with rootLogger. +log4j.logger.org.apache.zookeeper=INFO +log4j.logger.org.apache.curator=INFO \ No newline at end of file diff --git a/file/assignment3/hadoop/kms-site.xml b/file/assignment3/hadoop/kms-site.xml new file mode 100644 index 0000000..c3c9a1c --- /dev/null +++ b/file/assignment3/hadoop/kms-site.xml @@ -0,0 +1,173 @@ + + + + + + + + hadoop.kms.key.provider.uri + jceks://file@/${user.home}/kms.keystore + + URI of the backing KeyProvider for the KMS. + + + + + hadoop.security.keystore.java-keystore-provider.password-file + kms.keystore.password + + If using the JavaKeyStoreProvider, the file name for the keystore password. + + + + + + + hadoop.kms.cache.enable + true + + Whether the KMS will act as a cache for the backing KeyProvider. + When the cache is enabled, operations like getKeyVersion, getMetadata, + and getCurrentKey will sometimes return cached data without consulting + the backing KeyProvider. Cached values are flushed when keys are deleted + or modified. + + + + + hadoop.kms.cache.timeout.ms + 600000 + + Expiry time for the KMS key version and key metadata cache, in + milliseconds. This affects getKeyVersion and getMetadata. + + + + + hadoop.kms.current.key.cache.timeout.ms + 30000 + + Expiry time for the KMS current key cache, in milliseconds. This + affects getCurrentKey operations. + + + + + + + hadoop.kms.audit.aggregation.window.ms + 10000 + + Duplicate audit log events within the aggregation window (specified in + ms) are quashed to reduce log traffic. A single message for aggregated + events is printed at the end of the window, along with a count of the + number of aggregated events. + + + + + + + hadoop.kms.authentication.type + simple + + Authentication type for the KMS. Can be either "simple" + or "kerberos". + + + + + hadoop.kms.authentication.kerberos.keytab + ${user.home}/kms.keytab + + Path to the keytab with credentials for the configured Kerberos principal. + + + + + hadoop.kms.authentication.kerberos.principal + HTTP/localhost + + The Kerberos principal to use for the HTTP endpoint. + The principal must start with 'HTTP/' as per the Kerberos HTTP SPNEGO specification. + + + + + hadoop.kms.authentication.kerberos.name.rules + DEFAULT + + Rules used to resolve Kerberos principal names. + + + + + + + hadoop.kms.authentication.signer.secret.provider + random + + Indicates how the secret to sign the authentication cookies will be + stored. Options are 'random' (default), 'string' and 'zookeeper'. + If using a setup with multiple KMS instances, 'zookeeper' should be used. + + + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.path + /hadoop-kms/hadoop-auth-signature-secret + + The Zookeeper ZNode path where the KMS instances will store and retrieve + the secret from. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.connection.string + #HOSTNAME#:#PORT#,... + + The Zookeeper connection string, a list of hostnames and port comma + separated. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.auth.type + none + + The Zookeeper authentication type, 'none' (default) or 'sasl' (Kerberos). + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.keytab + /etc/hadoop/conf/kms.keytab + + The absolute path for the Kerberos keytab with the credentials to + connect to Zookeeper. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.principal + kms/#HOSTNAME# + + The Kerberos service principal used to connect to Zookeeper. + + + + diff --git a/file/assignment3/hadoop/log4j.properties b/file/assignment3/hadoop/log4j.properties new file mode 100644 index 0000000..95afc61 --- /dev/null +++ b/file/assignment3/hadoop/log4j.properties @@ -0,0 +1,323 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Define some default values that can be overridden by system properties +hadoop.root.logger=INFO,console +hadoop.log.dir=. +hadoop.log.file=hadoop.log + +# Define the root logger to the system property "hadoop.root.logger". +log4j.rootLogger=${hadoop.root.logger}, EventCounter + +# Logging Threshold +log4j.threshold=ALL + +# Null Appender +log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender + +# +# Rolling File Appender - cap space usage at 5gb. +# +hadoop.log.maxfilesize=256MB +hadoop.log.maxbackupindex=20 +log4j.appender.RFA=org.apache.log4j.RollingFileAppender +log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} + +log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize} +log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex} + +log4j.appender.RFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +# Debugging Pattern format +#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# Daily Rolling File Appender +# + +log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} + +# Rollover at midnight +log4j.appender.DRFA.DatePattern=.yyyy-MM-dd + +log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +# Debugging Pattern format +#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# console +# Add "console" to rootlogger above if you want to use this +# + +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n + +# +# TaskLog Appender +# + +#Default values +hadoop.tasklog.taskid=null +hadoop.tasklog.iscleanup=false +hadoop.tasklog.noKeepSplits=4 +hadoop.tasklog.totalLogFileSize=100 +hadoop.tasklog.purgeLogSplits=true +hadoop.tasklog.logsRetainHours=12 + +log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender +log4j.appender.TLA.taskId=${hadoop.tasklog.taskid} +log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup} +log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize} + +log4j.appender.TLA.layout=org.apache.log4j.PatternLayout +log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n + +# +# HDFS block state change log from block manager +# +# Uncomment the following to log normal block state change +# messages from BlockManager in NameNode. +#log4j.logger.BlockStateChange=DEBUG + +# +#Security appender +# +hadoop.security.logger=INFO,NullAppender +hadoop.security.log.maxfilesize=256MB +hadoop.security.log.maxbackupindex=20 +log4j.category.SecurityLogger=${hadoop.security.logger} +hadoop.security.log.file=SecurityAuth-${user.name}.audit +log4j.appender.RFAS=org.apache.log4j.RollingFileAppender +log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} +log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout +log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize} +log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex} + +# +# Daily Rolling Security appender +# +log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} +log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout +log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd + +# +# hadoop configuration logging +# + +# Uncomment the following line to turn off configuration deprecation warnings. +# log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN + +# +# hdfs audit logging +# +hdfs.audit.logger=INFO,NullAppender +hdfs.audit.log.maxfilesize=256MB +hdfs.audit.log.maxbackupindex=20 +log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} +log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false +log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender +log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log +log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout +log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n +log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize} +log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex} + +# +# NameNode metrics logging. +# The default is to retain two namenode-metrics.log files up to 64MB each. +# +namenode.metrics.logger=INFO,NullAppender +log4j.logger.NameNodeMetricsLog=${namenode.metrics.logger} +log4j.additivity.NameNodeMetricsLog=false +log4j.appender.NNMETRICSRFA=org.apache.log4j.RollingFileAppender +log4j.appender.NNMETRICSRFA.File=${hadoop.log.dir}/namenode-metrics.log +log4j.appender.NNMETRICSRFA.layout=org.apache.log4j.PatternLayout +log4j.appender.NNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n +log4j.appender.NNMETRICSRFA.MaxBackupIndex=1 +log4j.appender.NNMETRICSRFA.MaxFileSize=64MB + +# +# DataNode metrics logging. +# The default is to retain two datanode-metrics.log files up to 64MB each. +# +datanode.metrics.logger=INFO,NullAppender +log4j.logger.DataNodeMetricsLog=${datanode.metrics.logger} +log4j.additivity.DataNodeMetricsLog=false +log4j.appender.DNMETRICSRFA=org.apache.log4j.RollingFileAppender +log4j.appender.DNMETRICSRFA.File=${hadoop.log.dir}/datanode-metrics.log +log4j.appender.DNMETRICSRFA.layout=org.apache.log4j.PatternLayout +log4j.appender.DNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n +log4j.appender.DNMETRICSRFA.MaxBackupIndex=1 +log4j.appender.DNMETRICSRFA.MaxFileSize=64MB + +# +# mapred audit logging +# +mapred.audit.logger=INFO,NullAppender +mapred.audit.log.maxfilesize=256MB +mapred.audit.log.maxbackupindex=20 +log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger} +log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false +log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender +log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log +log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout +log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n +log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize} +log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex} + +# Custom Logging levels + +#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG +#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG +#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG + +# Jets3t library +log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR + +# AWS SDK & S3A FileSystem +log4j.logger.com.amazonaws=ERROR +log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR +log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN + +# +# Event Counter Appender +# Sends counts of logging messages at different severity levels to Hadoop Metrics. +# +log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter + +# +# Job Summary Appender +# +# Use following logger to send summary to separate file defined by +# hadoop.mapreduce.jobsummary.log.file : +# hadoop.mapreduce.jobsummary.logger=INFO,JSA +# +hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger} +hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log +hadoop.mapreduce.jobsummary.log.maxfilesize=256MB +hadoop.mapreduce.jobsummary.log.maxbackupindex=20 +log4j.appender.JSA=org.apache.log4j.RollingFileAppender +log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file} +log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize} +log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex} +log4j.appender.JSA.layout=org.apache.log4j.PatternLayout +log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n +log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger} +log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false + +# +# shuffle connection log from shuffleHandler +# Uncomment the following line to enable logging of shuffle connections +# log4j.logger.org.apache.hadoop.mapred.ShuffleHandler.audit=DEBUG + +# +# Yarn ResourceManager Application Summary Log +# +# Set the ResourceManager summary log filename +yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log +# Set the ResourceManager summary log level and appender +yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger} +#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY + +# To enable AppSummaryLogging for the RM, +# set yarn.server.resourcemanager.appsummary.logger to +# ,RMSUMMARY in hadoop-env.sh + +# Appender for ResourceManager Application Summary Log +# Requires the following properties to be set +# - hadoop.log.dir (Hadoop Log directory) +# - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename) +# - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender) + +log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger} +log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false +log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender +log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file} +log4j.appender.RMSUMMARY.MaxFileSize=256MB +log4j.appender.RMSUMMARY.MaxBackupIndex=20 +log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout +log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n + +# HS audit log configs +#mapreduce.hs.audit.logger=INFO,HSAUDIT +#log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger} +#log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false +#log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender +#log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log +#log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout +#log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n +#log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd + +# Http Server Request Logs +#log4j.logger.http.requests.namenode=INFO,namenoderequestlog +#log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender +#log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log +#log4j.appender.namenoderequestlog.RetainDays=3 + +#log4j.logger.http.requests.datanode=INFO,datanoderequestlog +#log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender +#log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log +#log4j.appender.datanoderequestlog.RetainDays=3 + +#log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog +#log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender +#log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log +#log4j.appender.resourcemanagerrequestlog.RetainDays=3 + +#log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog +#log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender +#log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log +#log4j.appender.jobhistoryrequestlog.RetainDays=3 + +#log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog +#log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender +#log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log +#log4j.appender.nodemanagerrequestlog.RetainDays=3 + + +# WebHdfs request log on datanodes +# Specify -Ddatanode.webhdfs.logger=INFO,HTTPDRFA on datanode startup to +# direct the log to a separate file. +#datanode.webhdfs.logger=INFO,console +#log4j.logger.datanode.webhdfs=${datanode.webhdfs.logger} +#log4j.appender.HTTPDRFA=org.apache.log4j.DailyRollingFileAppender +#log4j.appender.HTTPDRFA.File=${hadoop.log.dir}/hadoop-datanode-webhdfs.log +#log4j.appender.HTTPDRFA.layout=org.apache.log4j.PatternLayout +#log4j.appender.HTTPDRFA.layout.ConversionPattern=%d{ISO8601} %m%n +#log4j.appender.HTTPDRFA.DatePattern=.yyyy-MM-dd + + +# Appender for viewing information for errors and warnings +yarn.ewma.cleanupInterval=300 +yarn.ewma.messageAgeLimitSeconds=86400 +yarn.ewma.maxUniqueMessages=250 +log4j.appender.EWMA=org.apache.hadoop.yarn.util.Log4jWarningErrorMetricsAppender +log4j.appender.EWMA.cleanupInterval=${yarn.ewma.cleanupInterval} +log4j.appender.EWMA.messageAgeLimitSeconds=${yarn.ewma.messageAgeLimitSeconds} +log4j.appender.EWMA.maxUniqueMessages=${yarn.ewma.maxUniqueMessages} diff --git a/file/assignment3/hadoop/mapred-env.cmd b/file/assignment3/hadoop/mapred-env.cmd new file mode 100644 index 0000000..f27943f --- /dev/null +++ b/file/assignment3/hadoop/mapred-env.cmd @@ -0,0 +1,20 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +set HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 + +set HADOOP_MAPRED_ROOT_LOGGER=%HADOOP_LOGLEVEL%,RFA + diff --git a/file/assignment3/hadoop/mapred-env.sh b/file/assignment3/hadoop/mapred-env.sh new file mode 100644 index 0000000..a803ff3 --- /dev/null +++ b/file/assignment3/hadoop/mapred-env.sh @@ -0,0 +1,14 @@ +export JAVA_HOME=/usr/java/latest +export HADOOP_MAPRED_LOG_DIR=/var/log/hadoop-mapreduce +export HADOOP_MAPRED_PID_DIR=/var/run/hadoop-mapreduce + +export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 + +export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA + +#export HADOOP_JOB_HISTORYSERVER_OPTS= +#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default. +#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger. +#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default. +#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default +#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0. \ No newline at end of file diff --git a/file/assignment3/hadoop/mapred-queues.xml.template b/file/assignment3/hadoop/mapred-queues.xml.template new file mode 100644 index 0000000..ce6cd20 --- /dev/null +++ b/file/assignment3/hadoop/mapred-queues.xml.template @@ -0,0 +1,92 @@ + + + + + + + + + + default + + + + + + + running + + + + + + + + + + diff --git a/file/assignment3/hadoop/mapred-site.xml b/file/assignment3/hadoop/mapred-site.xml new file mode 100644 index 0000000..c3e01c8 --- /dev/null +++ b/file/assignment3/hadoop/mapred-site.xml @@ -0,0 +1,51 @@ + + + + mapreduce.framework.name + yarn + + + yarn.app.mapreduce.am.staging-dir + /user + + + mapreduce.jobhistory.address + uhadoop-ia1nlbku-master2:10020 + + + mapreduce.job.reduce.slowstart.completedmaps + 0.95 + + + mapreduce.map.memory.mb + 2048 + + + mapreduce.reduce.memory.mb + 2048 + + + mapreduce.map.java.opts + -Xmx1843M + + + mapreduce.reduce.java.opts + -Xmx1843M + + + mapreduce.task.io.sort.mb + 512 + + + mapreduce.task.io.sort.factor + 100 + + + mapreduce.reduce.shuffle.parallelcopies + 50 + + + mapreduce.jobhistory.webapp.address + uhadoop-ia1nlbku-master2:19888 + + \ No newline at end of file diff --git a/file/assignment3/hadoop/mapred-site.xml.template b/file/assignment3/hadoop/mapred-site.xml.template new file mode 100644 index 0000000..761c352 --- /dev/null +++ b/file/assignment3/hadoop/mapred-site.xml.template @@ -0,0 +1,21 @@ + + + + + + + + + diff --git a/file/assignment3/hadoop/slaves b/file/assignment3/hadoop/slaves new file mode 100644 index 0000000..2fbb50c --- /dev/null +++ b/file/assignment3/hadoop/slaves @@ -0,0 +1 @@ +localhost diff --git a/file/assignment3/hadoop/ssl-client.xml.example b/file/assignment3/hadoop/ssl-client.xml.example new file mode 100644 index 0000000..a50dce4 --- /dev/null +++ b/file/assignment3/hadoop/ssl-client.xml.example @@ -0,0 +1,80 @@ + + + + + + + ssl.client.truststore.location + + Truststore to be used by clients like distcp. Must be + specified. + + + + + ssl.client.truststore.password + + Optional. Default value is "". + + + + + ssl.client.truststore.type + jks + Optional. The keystore file format, default value is "jks". + + + + + ssl.client.truststore.reload.interval + 10000 + Truststore reload check interval, in milliseconds. + Default value is 10000 (10 seconds). + + + + + ssl.client.keystore.location + + Keystore to be used by clients like distcp. Must be + specified. + + + + + ssl.client.keystore.password + + Optional. Default value is "". + + + + + ssl.client.keystore.keypassword + + Optional. Default value is "". + + + + + ssl.client.keystore.type + jks + Optional. The keystore file format, default value is "jks". + + + + diff --git a/file/assignment3/hadoop/ssl-server.xml.example b/file/assignment3/hadoop/ssl-server.xml.example new file mode 100644 index 0000000..a6820e9 --- /dev/null +++ b/file/assignment3/hadoop/ssl-server.xml.example @@ -0,0 +1,88 @@ + + + + + + + ssl.server.truststore.location + + Truststore to be used by NN and DN. Must be specified. + + + + + ssl.server.truststore.password + + Optional. Default value is "". + + + + + ssl.server.truststore.type + jks + Optional. The keystore file format, default value is "jks". + + + + + ssl.server.truststore.reload.interval + 10000 + Truststore reload check interval, in milliseconds. + Default value is 10000 (10 seconds). + + + + + ssl.server.keystore.location + + Keystore to be used by NN and DN. Must be specified. + + + + + ssl.server.keystore.password + + Must be specified. + + + + + ssl.server.keystore.keypassword + + Must be specified. + + + + + ssl.server.keystore.type + jks + Optional. The keystore file format, default value is "jks". + + + + + ssl.server.exclude.cipher.list + TLS_ECDHE_RSA_WITH_RC4_128_SHA,SSL_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA, + SSL_RSA_WITH_DES_CBC_SHA,SSL_DHE_RSA_WITH_DES_CBC_SHA, + SSL_RSA_EXPORT_WITH_RC4_40_MD5,SSL_RSA_EXPORT_WITH_DES40_CBC_SHA, + SSL_RSA_WITH_RC4_128_MD5 + Optional. The weak security cipher suites that you want excluded + from SSL communication. + + + diff --git a/file/assignment3/hadoop/yarn-env.cmd b/file/assignment3/hadoop/yarn-env.cmd new file mode 100644 index 0000000..d863c1e --- /dev/null +++ b/file/assignment3/hadoop/yarn-env.cmd @@ -0,0 +1,60 @@ +@echo off +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem (the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@rem User for YARN daemons +if not defined HADOOP_YARN_USER ( + set HADOOP_YARN_USER=%yarn% +) + +if not defined YARN_CONF_DIR ( + set YARN_CONF_DIR=%HADOOP_YARN_HOME%\conf +) + +if defined YARN_HEAPSIZE ( + @rem echo run with Java heapsize %YARN_HEAPSIZE% + set JAVA_HEAP_MAX=-Xmx%YARN_HEAPSIZE%m +) + +if not defined YARN_LOG_DIR ( + set YARN_LOG_DIR=%HADOOP_YARN_HOME%\logs +) + +if not defined YARN_LOGFILE ( + set YARN_LOGFILE=yarn.log +) + +@rem default policy file for service-level authorization +if not defined YARN_POLICYFILE ( + set YARN_POLICYFILE=hadoop-policy.xml +) + +if not defined YARN_ROOT_LOGGER ( + set YARN_ROOT_LOGGER=%HADOOP_LOGLEVEL%,console +) + +set YARN_OPTS=%YARN_OPTS% -Dhadoop.log.dir=%YARN_LOG_DIR% +set YARN_OPTS=%YARN_OPTS% -Dyarn.log.dir=%YARN_LOG_DIR% +set YARN_OPTS=%YARN_OPTS% -Dhadoop.log.file=%YARN_LOGFILE% +set YARN_OPTS=%YARN_OPTS% -Dyarn.log.file=%YARN_LOGFILE% +set YARN_OPTS=%YARN_OPTS% -Dyarn.home.dir=%HADOOP_YARN_HOME% +set YARN_OPTS=%YARN_OPTS% -Dyarn.id.str=%YARN_IDENT_STRING% +set YARN_OPTS=%YARN_OPTS% -Dhadoop.home.dir=%HADOOP_YARN_HOME% +set YARN_OPTS=%YARN_OPTS% -Dhadoop.root.logger=%YARN_ROOT_LOGGER% +set YARN_OPTS=%YARN_OPTS% -Dyarn.root.logger=%YARN_ROOT_LOGGER% +if defined JAVA_LIBRARY_PATH ( + set YARN_OPTS=%YARN_OPTS% -Djava.library.path=%JAVA_LIBRARY_PATH% +) +set YARN_OPTS=%YARN_OPTS% -Dyarn.policy.file=%YARN_POLICYFILE% \ No newline at end of file diff --git a/file/assignment3/hadoop/yarn-env.sh b/file/assignment3/hadoop/yarn-env.sh new file mode 100644 index 0000000..5736d95 --- /dev/null +++ b/file/assignment3/hadoop/yarn-env.sh @@ -0,0 +1,127 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export HADOOP_YARN_HOME=/home/hadoop +export YARN_LOG_DIR=/var/log/hadoop-yarn +export YARN_PID_DIR=/var/run/hadoop-yarn +export HADOOP_LIBEXEC_DIR=/home/hadoop/libexec +export JAVA_HOME=/usr/java/latest + + +# User for YARN daemons +export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn} + +# resolve links - $0 may be a softlink +export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}" + +# some Java parameters +# export JAVA_HOME=/home/y/libexec/jdk1.6.0/ +if [ "$JAVA_HOME" != "" ]; then + #echo "run java in $JAVA_HOME" + JAVA_HOME=$JAVA_HOME +fi + +if [ "$JAVA_HOME" = "" ]; then + echo "Error: JAVA_HOME is not set." + exit 1 +fi + +JAVA=$JAVA_HOME/bin/java +JAVA_HEAP_MAX=-Xmx1000m + +# For setting YARN specific HEAP sizes please use this +# Parameter and set appropriately +# YARN_HEAPSIZE=1000 + +# check envvars which might override default args +if [ "$YARN_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m" +fi + +# Resource Manager specific parameters + +# Specify the max Heapsize for the ResourceManager using a numerical value +# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set +# the value to 1000. +# This value will be overridden by an Xmx setting specified in either YARN_OPTS +# and/or YARN_RESOURCEMANAGER_OPTS. +# If not specified, the default value will be picked from either YARN_HEAPMAX +# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +export YARN_RESOURCEMANAGER_HEAPSIZE=1024 + +# Specify the max Heapsize for the timeline server using a numerical value +# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set +# the value to 1000. +# This value will be overridden by an Xmx setting specified in either YARN_OPTS +# and/or YARN_TIMELINESERVER_OPTS. +# If not specified, the default value will be picked from either YARN_HEAPMAX +# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +export YARN_TIMELINESERVER_HEAPSIZE=1000 + +# Specify the JVM options to be used when starting the ResourceManager. +# These options will be appended to the options specified as YARN_OPTS +# and therefore may override any similar flags set in YARN_OPTS +#export YARN_RESOURCEMANAGER_OPTS= + +# Node Manager specific parameters + +# Specify the max Heapsize for the NodeManager using a numerical value +# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set +# the value to 1000. +# This value will be overridden by an Xmx setting specified in either YARN_OPTS +# and/or YARN_NODEMANAGER_OPTS. +# If not specified, the default value will be picked from either YARN_HEAPMAX +# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +export YARN_NODEMANAGER_HEAPSIZE=1024 + +# Specify the JVM options to be used when starting the NodeManager. +# These options will be appended to the options specified as YARN_OPTS +# and therefore may override any similar flags set in YARN_OPTS +#export YARN_NODEMANAGER_OPTS= + +# so that filenames w/ spaces are handled correctly in loops below +IFS= + + +# default log directory & file +if [ "$YARN_LOG_DIR" = "" ]; then + YARN_LOG_DIR="$HADOOP_YARN_HOME/logs" +fi +if [ "$YARN_LOGFILE" = "" ]; then + YARN_LOGFILE='yarn.log' +fi + +# default policy file for service-level authorization +if [ "$YARN_POLICYFILE" = "" ]; then + YARN_POLICYFILE="hadoop-policy.xml" +fi + +# restore ordinary behaviour +unset IFS + + +YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR" +YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR" +YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE" +YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE" +YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME" +YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING" +YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" +YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" +if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then + YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH" +fi +YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE" +YARN_OPTS="$YARN_OPTS -Dfile.encoding=UTF-8" diff --git a/file/assignment3/hadoop/yarn-excludes b/file/assignment3/hadoop/yarn-excludes new file mode 100644 index 0000000..e69de29 diff --git a/file/assignment3/hadoop/yarn-site.xml b/file/assignment3/hadoop/yarn-site.xml new file mode 100644 index 0000000..817ed1f --- /dev/null +++ b/file/assignment3/hadoop/yarn-site.xml @@ -0,0 +1,276 @@ + + + + yarn.resourcemanager.connect.retry-interval.ms + 2000 + + + yarn.resourcemanager.ha.enabled + true + + + yarn.resourcemanager.ha.automatic-failover.enabled + true + + + yarn.resourcemanager.ha.automatic-failover.embedded + true + + + yarn.resourcemanager.cluster-id + ucloud-yarn-rm-cluster + + + yarn.resourcemanager.ha.rm-ids + rm1,rm2 + + + yarn.resourcemanager.ha.id + rm1 + + + yarn.resourcemanager.recovery.enabled + true + + + yarn.resourcemanager.store.class + org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore + + + yarn.resourcemanager.zk-address + uhadoop-ia1nlbku-master1:2181,uhadoop-ia1nlbku-master2:2181,uhadoop-ia1nlbku-core1:2181 + + + yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms + 5000 + + + yarn.resourcemanager.scheduler.class + org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler + + + yarn.scheduler.fair.user-as-default-queue + true + + + yarn.scheduler.fair.allow-undeclared-pools + true + + + yarn.acl.enable + true + + + yarn.admin.acl + yarn,mapred,hdfs,hadoop + + + yarn.resourcemanager.address.rm1 + uhadoop-ia1nlbku-master1:23140 + + + yarn.resourcemanager.scheduler.address.rm1 + uhadoop-ia1nlbku-master1:23130 + + + yarn.resourcemanager.webapp.https.address.rm1 + uhadoop-ia1nlbku-master1:23189 + + + yarn.resourcemanager.webapp.address.rm1 + uhadoop-ia1nlbku-master1:23188 + + + yarn.resourcemanager.resource-tracker.address.rm1 + uhadoop-ia1nlbku-master1:23125 + + + yarn.resourcemanager.admin.address.rm1 + uhadoop-ia1nlbku-master1:23141 + + + yarn.resourcemanager.address.rm2 + uhadoop-ia1nlbku-master2:23140 + + + yarn.resourcemanager.scheduler.address.rm2 + uhadoop-ia1nlbku-master2:23130 + + + yarn.resourcemanager.webapp.https.address.rm2 + uhadoop-ia1nlbku-master2:23189 + + + yarn.resourcemanager.webapp.address.rm2 + uhadoop-ia1nlbku-master2:23188 + + + yarn.resourcemanager.resource-tracker.address.rm2 + uhadoop-ia1nlbku-master2:23125 + + + yarn.resourcemanager.admin.address.rm2 + uhadoop-ia1nlbku-master2:23141 + + + yarn.nodemanager.resource.memory-mb + 5460 + 64G. Physical memory, in MB, to be made available to running containers. + + + yarn.nodemanager.resource.cpu-vcores + 4 + Number of CPU cores that can be allocated for containers. + + + yarn.scheduler.maximum-allocation-mb + 65535 + + + yarn.nodemanager.local-dirs + /data/yarn/local + + + yarn.nodemanager.log-dirs + /data/yarn/logs + + + yarn.nodemanager.localizer.address + 0.0.0.0:23344 + + + yarn.nodemanager.webapp.address + 0.0.0.0:23999 + + + yarn.nodemanager.aux-services + mapreduce_shuffle,spark_shuffle + + + mapreduce.shuffle.port + 23080 + + + yarn.log-aggregation-enable + true + + + yarn.log-aggregation.retain-seconds + 5184000 + + + yarn.log-aggregation.retain-check-interval-seconds + 86400 + + + yarn.nodemanager.remote-app-log-dir + hdfs://Ucluster/var/log/hadoop-yarn/apps + + + yarn.nodemanager.aux-services.mapreduce_shuffle.class + org.apache.hadoop.mapred.ShuffleHandler + + + Classpath for typical applications. + yarn.application.classpath + + $HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,$HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/* + + + + yarn.nodemanager.container-executor.class + org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor + + + yarn.nodemanager.linux-container-executor.group + hadoop + + + hadoop.proxyuser.mapred.groups + * + + + hadoop.proxyuser.mapred.hosts + * + + + hadoop.proxyuser.hadoop.groups + * + + + hadoop.proxyuser.hadoop.hosts + * + + + yarn.scheduler.maximum-allocation-mb + 65535 + + + yarn.nodemanager.address + 0.0.0.0:23333 + + + yarn.resourcemanager.nodes.exclude-path + /home/hadoop/conf/yarn-excludes + + + yarn.scheduler.maximum-allocation-vcores + 32 + + + yarn.nodemanager.vmem-check-enabled + false + + + yarn.log.server.url + http://uhadoop-ia1nlbku-master2:19888/jobhistory/logs + + + yarn.timeline-service.hostname + uhadoop-ia1nlbku-master2 + + + yarn.timeline-service.enabled + true + + + yarn.resourcemanager.system-metrics-publisher.enabled + true + + + yarn.timeline-service.generic-application-history.enabled + true + + + yarn.timeline-service.leveldb-timeline-store.path + /data/yarn/timeline + + + yarn.timeline-service.leveldb-state-store.path + /data/yarn/timeline + + + yarn.timeline-service.address + ${yarn.timeline-service.hostname}:10200 + + + yarn.timeline-service.webapp.address + ${yarn.timeline-service.hostname}:8188 + + + yarn.timeline-service.webapp.https.address + ${yarn.timeline-service.hostname}:8190 + + + yarn.timeline-service.http-cross-origin.enabled + true + + + yarn.timeline-service.handler-thread-count + 10 + + + yarn.nodemanager.aux-services.spark_shuffle.class + org.apache.spark.network.yarn.YarnShuffleService + + \ No newline at end of file From 224b30e29a1c1733e401e7c8e040206bfc7b85e6 Mon Sep 17 00:00:00 2001 From: hxlong <1141741507@qq.com> Date: Fri, 16 Oct 2020 10:36:41 +0800 Subject: [PATCH 2/2] update assignment3 --- Assignment3.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Assignment3.md b/Assignment3.md index 00d5906..5c617aa 100644 --- a/Assignment3.md +++ b/Assignment3.md @@ -154,7 +154,7 @@ > > 3. 使用cat命令查看文件内容并截图 `截图中需要包含文件夹信息` (查看的是HDFS文件系统中的info,txt,而不是本地文件系统) > -> `操作时主要使用的用户和目录,避免出现permission denied问题` +> `操作时注意使用的用户和目录,避免出现permission denied问题` ### 三) 客户端搭建 @@ -227,6 +227,10 @@ hadoop jar /home/hadoop/hadoop-examples.jar wordcount /input /output 如果/ou `**************作业4:统计/home/hadoop/etc/hadoop目录下所有文件的词频并截图,插入实验报告中***************` + + +`******特别注意,做完作业4 即可请删除UHadoop集群、EIP 和UHost主机等资源******` + #### WordCount 实现原理 > MapReduce主要分为两步Map步和Reduce步,引用网上流传很广的一个故事来解释,现在你要统计一个图书馆里面有多少本书,为了完成这个任务,你可以指派小明去统计书架1,指派小红去统计书架2,这个指派的过程就是Map步,最后,每个人统计完属于自己负责的书架后,再对每个人的结果进行累加统计,这个过程就是Reduce步。下图是WordCount的实现原理图,[WordCount实现](https://hadoop.apache.org/docs/r1.0.4/cn/mapred_tutorial.html#用法)。 @@ -237,9 +241,9 @@ hadoop jar /home/hadoop/hadoop-examples.jar wordcount /input /output 如果/ou `作业要求` -> 需要统计的文件夹: [/home/hadoop/etc/hadoop](file/assginment3/hadoop) +> 需要统计的文件夹: [/home/hadoop/etc/hadoop](file/assignment3/hadoop/) > > 在本地执行,并记录执行时间 -`******特别注意,实验结束后请删除UHadoop集群、EIP 和UHost主机等资源******` +