主机配置: 192.168.88.101 test1 192.168.88.102 test2 192.168.88.103 test3
Hadoop 上传压缩包并且解压
hadoop百度云:链接:https://pan.baidu.com/s/1DRV_x7Q_ZTUO4KMkr2-6Qg?pwd=not3
# 先配置NameNode主机:
# 上传 hadoop文件压缩包 到NameNode主机,并且解压到/export/server/
[test@test1 ~]$ su - # 切换至ROOT用户
[root@test1 ~]# rz # 上传hadoop文件包
[root@test1 ~]# tar -zxvf hadoop-3.3.4.tar.gz -C /export/server # 解压
[root@test1 ~]# cd /export/server/ # 进入配置文件夹
HDFS 四个文件配置
# (1): 配置workers文件
[root@test1 hadoop]# cd hadoop/etc/hadoop/
[root@test1 hadoop]# vim workers
test1
test2
test3
# (2): 配置hadoop-env.sh文件
[root@test1 hadoop]# vim hadoop-env.sh
export JAVA_HOME=/export/server/jdk # 配置了 Java 的安装目录
export HADOOP_HOME=/export/server/hadoop # 配置了 Hadoop 的安装目录
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop # 配置了 Hadoop 的配置文件所在目录。
export HADOOP_LOG_DIR=$HADOOP_HOME/logs # 配置了 Hadoop 日志文件的存放目录。
# (3): 配置core-site.xml文件 test1 -> 自定义
[root@test1 hadoop]# vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://test1:8020</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
</configuration>
# (4): 配置hdfs-site.xml文件 test1,test2,test3 -> 自定义
[root@test1 hadoop]# vim hdfs-site.xml
<configuration>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>700</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/nn</value>
</property>
<property>
<name>dfs.namenode.hosts</name>
<value>test1,test2,test3</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>268435456</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/dn</value>
</property>
</configuration>
HDFS 配置文件分发节点
# 将配置好的hadoop分发 到 每个主机上
[root@test1 server]# scp -r /export/server/hadoop-3.3.4 test2:/export/server/
[root@test1 server]# scp -r /export/server/hadoop-3.3.4 test3:/export/server/
...
HDFS 数据存储文件夹
# NameNode主机添加 /data/dn /data/nn
# DataNode主机创建 /data/dn
[root@test1 hadoop]# mkdir -p /data/nn # test1
[root@test1 hadoop]# mkdir -p /data/dn # test1
[root@test2 hadoop]# mkdir -p /data/dn # test2
[root@test3 hadoop]# mkdir -p /data/dn # test3
...
Hadoop 软链接
# 所有主机 创建 hadoop软连接 - ll查看是否创建
[root@test1 server]# ln -s /export/server/hadoop-3.3.4 hadoop
[root@test2 server]# ln -s /export/server/hadoop-3.3.4 hadoop
...
Hadoop 变量环境
# 所有主机 添加 hadoop 变量环境 并且激活
[root@test1 server]# vim /etc/profile
export HADOOP_HOME=/export/server/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@test1 server]# source /etc/profile
...
Hadoop 授权用户
# 所有主机 相关文件夹授权给hadoop用户
[root@test1 server]# chown -R hadoop:hadoop /data
[root@test1 server]# chown -R hadoop:hadoop /export
...
HDFS 启动
# NameNode主机-初始化系统-开启HDFS集群
[root@test1 server]# su - hadoop
[root@test1 server]# cd /export/server/hadoop/etc/hadoop/
[hadoop@test1 ~]$ hadoop namenode -format # 格式化NameNode
[hadoop@test1 ~]$ start-dfs.sh # 启动全部hdfs集群
Starting namenodes on [test1]
Starting datanodes
Starting secondary namenodes [test1]
[hadoop@test1 server]$ jps
19824 DataNode
20118 SecondaryNameNode
20279 Jps
19694 NameNode
# WEB管理地址 点击 Live Nodes 查看集群台数
http://192.168.88.101:9870
#单独控制:
$HADOOP_HOME/bin/hadoop-daemon.sh (start | status | stop) ( namenode | secondarynamenode | datanode)
MapReduce 文件配置
# (1) 配置 mapred-env.sh
[hadoop@test1 server]$ cd /export/server/hadoop/etc/hadoop/
[hadoop@test1 hadoop]$ vim mapred-env.sh
export JAVA_HOME=/export/server/jdk
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 # JobHistoryServer进程内存为1G
export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA # 日志级别为INFO
# (2) 配置 mapred-site.xml test1-自定义
[hadoop@test1 hadoop]$ vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description></description>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>test1:10020</value>
<description></description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>test1:19888</value>
<description></description>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/data/mr-history/tmp</value>
<description></description>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/data/mr-history/done</value>
<description></description>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
</configuration>
YARN 文件配置
# (1) 配置yarn-env.sh
[hadoop@test1 hadoop]$ vim yarn-env.sh
export JAVA_HOME=/export/server/jdk
export HADOOP_HOME=/export/server/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
# (2) 配置vim yarn-site.xml test1-自定义
[hadoop@test1 hadoop]$ vim yarn-site.xml
<configuration>
<property>
<name>yarn.log.server.url</name>
<value>http://test1:19888/jobhistory/logs</value>
<description></description>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>test1:8089</value>
<description>proxy server hostname and port</description>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<description>Configuration to enable or disable log aggregation</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/tmp/logs</value>
<description>Configuration to enable or disable log aggregation</description>
</property>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>test1</value>
<description></description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
<description></description>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data/nm-local</value>
<description>Comma-separated list of paths on the local filesystem where intermediate data is written.</description>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/data/nm-log</value>
<description>Comma-separated list of paths on the local filesystem where logs are written.</description>
</property>
<property>
<name>yarn.nodemanager.log.retain-seconds</name>
<value>10800</value>
<description>Default time (in seconds) to retain log files on the NodeManager Only applicable if log-aggregation is disabled.</description>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>Shuffle service that needs to be set for Map Reduce applications.</description>
</property>
</configuration>
YARN&MapReduce 配置文件分发节点
# 将MapReduce配置文件与yarn配置文件分发到其他节点
# test2:
scp /export/server/hadoop/etc/hadoop/mapred-env.sh test2:/export/server/hadoop/etc/hadoop/
scp /export/server/hadoop/etc/hadoop/mapred-site.xml test2:/export/server/hadoop/etc/hadoop/
scp /export/server/hadoop/etc/hadoop/yarn-env.sh test2:/export/server/hadoop/etc/hadoop/
scp /export/server/hadoop/etc/hadoop/yarn-site.xml test2:/export/server/hadoop/etc/hadoop/
# test3:
scp /export/server/hadoop/etc/hadoop/mapred-env.sh test3:/export/server/hadoop/etc/hadoop/
scp /export/server/hadoop/etc/hadoop/mapred-site.xml test3:/export/server/hadoop/etc/hadoop/
scp /export/server/hadoop/etc/hadoop/yarn-env.sh test3:/export/server/hadoop/etc/hadoop/
scp /export/server/hadoop/etc/hadoop/yarn-site.xml test3:/export/server/hadoop/etc/hadoop/
...分发N台
YARN 启动
# 启动YARN MapReduce不需要启动
[hadoop@test1 hadoop]$ $HADOOP_HOME/sbin/start-yarn.sh # 停止stop
Starting resourcemanager
Starting nodemanagers
[hadoop@test1 hadoop]$ jps
25338 ResourceManager
25456 NodeManager
25668 WebAppProxyServer
10481 NameNode
10897 SecondaryNameNode
10610 DataNode
25916 Jps
# 启动历史服务器 HADOOP_HOME应该是需要跟环境变量里面的一样
[hadoop@test1 bin]$ $HADOOP_HOME/sbin/mapred --daemon start historyserver # 停止换成stop
[hadoop@test1 bin]$ jps
25456 NodeManager
10481 NameNode
10897 SecondaryNameNode
10610 DataNode
25668 WebAppProxyServer
26230 Jps
25338 ResourceManager
26175 JobHistoryServer
# WEB UI控制台地址 点击nodes查看节点
http://192.168.88.101:8088
# 单独控制:
$HADOOP_HOME/bin/yarn --daemon (start|stop) (resourcemanager|nodemanager|proxyserver)
$HADOOP_HOME/bin/mapred --daemon (start|stop) historyserver