Hadoop安装部署

发布时间 2023-03-31 10:59:01作者: 我是真的想笑


 

解压安装包

tar -zxvf hadoop-2.8.5.tar.gz -C /opt/app/

删除文档

位置:$HADOOP_HOME/share

rm -rf doc

修改配置文件

HDFS配置

位置:$HADOOP_HOME/etc/hadoop/

vi hadoop-env.sh

# The java implementation to use.
export JAVA_HOME=/opt/app/jdk1.8.0_202/

vi hdfs-site.xml

<configuration>
<!--namenode的rpc通信地址-->
<property>
<name>dfs.namenode.rpc-address</name>
<value>linux01:9000</value>
</property>
<!--secondary namenode的http地址-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>linux02:50090</value>
</property>
<!--namenode的元数据持久化存储目录-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hdpdata/name/</value>
</property>
<!--secondarynamenode的存储目录-->
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/opt/hdpdata/secondayname/</value>
</property>
<!--data的块存储目录-->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hdpdata/data/</value>
</property>
</configuration>

vi core-site.xml

<!--hdfs客户端默认访问的文件系统-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://linux01:9000/</value>
</property>

Yarn配置

vi yarn-site.xml

<!--主节点所在机器-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>linux01</value>
</property>

<!--为mr程序提供shuffle服务-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>

MapReduce配置

cp mapred-site.xml.template mapred-site.xml

vi mapred-site.xml

<!--mr运行模型,默认为local-->
<property>
  <name>mapreduce.framework.name</name>
  <value>yarn</value>
</property>

拷贝到其他机器

for i in {2..3}

> do

> scp -r /opt/app/hadoop-2.8.5 linux0${i}:/opt/app/

> done

配置环境变量

vi /etc/profile

export JAVA_HOME=/opt/app/jdk1.8.0_202
export HADOOP_HOME=/opt/app/hadoop-2.8.5
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin

source /etc/profile

初始化元数据目录

hadoop namenode -format

启动组件

启动HDFS

  • 单独启动

hadoop-daemon.sh start namenode

hadoop-daemon.sh start datanode

hadoop-daemon.sh start secondarynamenode

hadoop-daemon.sh stop namenode

hadoop-daemon.sh stop datanode

hadoop-daemon.sh stop secondarynamenode

  • 批量启动

vi slaves

linux01
linux02
linux03

start-dfs.sh

stop-dfs.sh

启动Yarn

  • 单独启动

yarn-daemon.sh start resourcemanager

yarn-daemon.sh stop resourcemanager

yarn-daemon.sh start nodemanager

yarn-daemon.sh stop nodemanager

  • 批量启动

start-yarn.sh

stop-yarn.sh

测试MapReduce

cd /opt/app/hadoop-2.8.5/share/hadoop/mapreduce

hadoop jar hadoop-mapreduce-examples-2.8.5.jar

hadoop jar hadoop-mapreduce-examples-2.8.5.jar pi 10 10

注意事项

  • 元数据目录不会像DateNode块存储目录那样根据配置文件自动创建

cat /opt/hdpdata/name/current/VERSION

#Mon Jan 11 17:34:40 CST 2021
namespaceID=1710038
clusterID=CID-e944a738-e42e-4f76-add1-74180a7a7ed1 #集群ID
cTime=1610357680824 #创建时间
storageType=NAME_NODE
blockpoolID=BP-1113767523-192.168.1.101-1610357680824 #块池ID,NameNode的IP,创建时间
layoutVersion=-63
  • 新的DateNode会去找NameNode注册,NameNode会返回集群ID、块池ID给DateNode,DateNode创建集群存储目录

ll /opt/hdpdata/data/current

drwx------. 4 root root 4096 Jan 11 18:42 BP-1113767523-192.168.1.101-1610357680824
-rw-r--r--. 1 root root  229 Jan 11 18:42 VERSION

cat /opt/hdpdata/data/current/VERSION

#Mon Jan 11 18:42:04 CST 2021
storageID=DS-cb0fb282-8c4a-46f9-b52a-9fce1cc1f7e4
clusterID=CID-e944a738-e42e-4f76-add1-74180a7a7ed1
cTime=0
datanodeUuid=99f887b8-fb83-4dd5-8ce8-f4411029f7cc
storageType=DATA_NODE
layoutVersion=-57
  • 监听端口
    • 9000:hdfs的rpc端口,DataNode、Client与NameNode通信的端口
    • 50070:hdfs的http服务端口
    • 8088:yarn的http服务端口

jps

5765 NameNode
7350 NodeManager
5897 DataNode
7257 ResourceManager
8767 Jps

netstat -nltp | grep 5765

tcp        0      0 0.0.0.0:50070               0.0.0.0:*                   LISTEN      5765/java           
tcp        0      0 192.168.1.101:9000          0.0.0.0:*                   LISTEN      5765/java          

netstat -nltp | grep 7257

tcp        0      0 ::ffff:192.168.1.101:8088   :::*                        LISTEN      7257/java           
tcp        0      0 ::ffff:192.168.1.101:8030   :::*                        LISTEN      7257/java           
tcp        0      0 ::ffff:192.168.1.101:8031   :::*                        LISTEN      7257/java           
tcp        0      0 ::ffff:192.168.1.101:8032   :::*                        LISTEN      7257/java           
tcp        0      0 ::ffff:192.168.1.101:8033   :::*                        LISTEN      7257/java           
  • 查看启动命令

ps -ef | grep java

root       5765      1  0 Jan11 ?        00:01:00 /opt/app/jdk1.8.0_202//bin/java -Dproc_namenode -Xmx1000m -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/opt/app/hadoop-2.8.5/logs -Dhadoop.log.file=hadoop.log -Dhadoop.home.dir=/opt/app/hadoop-2.8.5 -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,console -Djava.library.path=/opt/app/hadoop-2.8.5/lib/native -Dhadoop.policy.file=hadoop-policy.xml -Djava.net.preferIPv4Stack=true -Djava.net.preferIPv4Stack=true -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/opt/app/hadoop-2.8.5/logs -Dhadoop.log.file=hadoop-root-namenode-linux01.log -Dhadoop.home.dir=/opt/app/hadoop-2.8.5 -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,RFA -Djava.library.path=/opt/app/hadoop-2.8.5/lib/native -Dhadoop.policy.file=hadoop-policy.xml -Djava.net.preferIPv4Stack=true -Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender -Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender -Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender -Dhadoop.security.logger=INFO,RFAS org.apache.hadoop.hdfs.server.namenode.NameNode
root       5897      1  0 Jan11 ?        00:00:43 /opt/app/jdk1.8.0_202//bin/java -Dproc_datanode -Xmx1000m -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/opt/app/hadoop-2.8.5/logs -Dhadoop.log.file=hadoop.log -Dhadoop.home.dir=/opt/app/hadoop-2.8.5 -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,console -Djava.library.path=/opt/app/hadoop-2.8.5/lib/native -Dhadoop.policy.file=hadoop-policy.xml -Djava.net.preferIPv4Stack=true -Djava.net.preferIPv4Stack=true -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/opt/app/hadoop-2.8.5/logs -Dhadoop.log.file=hadoop-root-datanode-linux01.log -Dhadoop.home.dir=/opt/app/hadoop-2.8.5 -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,RFA -Djava.library.path=/opt/app/hadoop-2.8.5/lib/native -Dhadoop.policy.file=hadoop-policy.xml -Djava.net.preferIPv4Stack=true -server -Dhadoop.security.logger=ERROR,RFAS -Dhadoop.security.logger=ERROR,RFAS -Dhadoop.security.logger=ERROR,RFAS -Dhadoop.security.logger=INFO,RFAS org.apache.hadoop.hdfs.server.datanode.DataNode
root       7257      1  0 01:22 pts/0    00:00:43 /opt/app/jdk1.8.0_202//bin/java -Dproc_resourcemanager -Xmx1000m -Dhadoop.log.dir=/opt/app/hadoop-2.8.5/logs -Dyarn.log.dir=/opt/app/hadoop-2.8.5/logs -Dhadoop.log.file=yarn-root-resourcemanager-linux01.log -Dyarn.log.file=yarn-root-resourcemanager-linux01.log -Dyarn.home.dir= -Dyarn.id.str=root -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/opt/app/hadoop-2.8.5/lib/native -Dyarn.policy.file=hadoop-policy.xml -Dhadoop.log.dir=/opt/app/hadoop-2.8.5/logs -Dyarn.log.dir=/opt/app/hadoop-2.8.5/logs -Dhadoop.log.file=yarn-root-resourcemanager-linux01.log -Dyarn.log.file=yarn-root-resourcemanager-linux01.log -Dyarn.home.dir=/opt/app/hadoop-2.8.5 -Dhadoop.home.dir=/opt/app/hadoop-2.8.5 -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/opt/app/hadoop-2.8.5/lib/native -classpath /opt/app/hadoop-2.8.5/etc/hadoop:/opt/app/hadoop-2.8.5/etc/hadoop:/opt/app/hadoop-2.8.5/etc/hadoop:/opt/app/hadoop-2.8.5/share/hadoop/common/lib/*:/opt/app/hadoop-2.8.5/share/hadoop/common/*:/opt/app/hadoop-2.8.5/share/hadoop/hdfs:/opt/app/hadoop-2.8.5/share/hadoop/hdfs/lib/*:/opt/app/hadoop-2.8.5/share/hadoop/hdfs/*:/opt/app/hadoop-2.8.5/share/hadoop/yarn/lib/*:/opt/app/hadoop-2.8.5/share/hadoop/yarn/*:/opt/app/hadoop-2.8.5/share/hadoop/mapreduce/lib/*:/opt/app/hadoop-2.8.5/share/hadoop/mapreduce/*:/opt/app/hadoop-2.8.5/contrib/capacity-scheduler/*.jar:/opt/app/hadoop-2.8.5/contrib/capacity-scheduler/*.jar:/opt/app/hadoop-2.8.5/contrib/capacity-scheduler/*.jar:/opt/app/hadoop-2.8.5/share/hadoop/yarn/*:/opt/app/hadoop-2.8.5/share/hadoop/yarn/lib/*:/opt/app/hadoop-2.8.5/etc/hadoop/rm-config/log4j.properties org.apache.hadoop.yarn.server.resourcemanager.ResourceManager
root       7350      1  0 01:22 ?        00:00:25 /opt/app/jdk1.8.0_202//bin/java -Dproc_nodemanager -Xmx1000m -Dhadoop.log.dir=/opt/app/hadoop-2.8.5/logs -Dyarn.log.dir=/opt/app/hadoop-2.8.5/logs -Dhadoop.log.file=yarn-root-nodemanager-linux01.log -Dyarn.log.file=yarn-root-nodemanager-linux01.log -Dyarn.home.dir= -Dyarn.id.str=root -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/opt/app/hadoop-2.8.5/lib/native -Dyarn.policy.file=hadoop-policy.xml -server -Dhadoop.log.dir=/opt/app/hadoop-2.8.5/logs -Dyarn.log.dir=/opt/app/hadoop-2.8.5/logs -Dhadoop.log.file=yarn-root-nodemanager-linux01.log -Dyarn.log.file=yarn-root-nodemanager-linux01.log -Dyarn.home.dir=/opt/app/hadoop-2.8.5 -Dhadoop.home.dir=/opt/app/hadoop-2.8.5 -Dhadoop.root.logger=INFO,RFA -Dyarn.root.logger=INFO,RFA -Djava.library.path=/opt/app/hadoop-2.8.5/lib/native -classpath /opt/app/hadoop-2.8.5/etc/hadoop:/opt/app/hadoop-2.8.5/etc/hadoop:/opt/app/hadoop-2.8.5/etc/hadoop:/opt/app/hadoop-2.8.5/share/hadoop/common/lib/*:/opt/app/hadoop-2.8.5/share/hadoop/common/*:/opt/app/hadoop-2.8.5/share/hadoop/hdfs:/opt/app/hadoop-2.8.5/share/hadoop/hdfs/lib/*:/opt/app/hadoop-2.8.5/share/hadoop/hdfs/*:/opt/app/hadoop-2.8.5/share/hadoop/yarn/lib/*:/opt/app/hadoop-2.8.5/share/hadoop/yarn/*:/opt/app/hadoop-2.8.5/share/hadoop/mapreduce/lib/*:/opt/app/hadoop-2.8.5/share/hadoop/mapreduce/*:/contrib/capacity-scheduler/*.jar:/contrib/capacity-scheduler/*.jar:/opt/app/hadoop-2.8.5/share/hadoop/yarn/*:/opt/app/hadoop-2.8.5/share/hadoop/yarn/lib/*:/opt/app/hadoop-2.8.5/etc/hadoop/nm-config/log4j.properties org.apache.hadoop.yarn.server.nodemanager.NodeManager
root       8795   8754  0 02:50 pts/1    00:00:00 grep java