节点名 | IP | 主机名 |
---|---|---|
时钟服务 server | 192.168.100.101 | seactive |
Active-Namenode+zkfc | 192.168.100.101 | seactive |
Standby- Namenode+zkfc | 192.168.100.102 | sestandby |
Datanode | 192.168.100.101 | seactive |
Datanode | 192.168.100.102 | sestandby |
Datanode | 192.168.100.103 | senode |
Zookeeper-1 | 192.168.100.101 | seactive |
Zookeeper-2 | 192.168.100.102 | sestandby |
Zookeeper-3 | 192.168.100.103 | senode |
Journalnode-1 | 192.168.100.101 | seactive |
Journalnode-2 | 192.168.100.102 | sestandby |
Journalnode-3 | 192.168.100.103 | senode |
Active-Resourcemanager | 192.168.100.101 | seactive |
Standby-Resourcemanager | 192.168.100.102 | sestandby |
NodeManager | 192.168.100.101 | seactive |
NodeManager | 192.168.100.102 | sestandby |
NodeManager | 192.168.100.103 | senode |
- 用户
useradd hadoop
passwd hadoop
vim /etc/sudoers
hadoop ALL=(ALL) NOPASSWD: ALL
- 静态 IP
su hadoop
sudo vim /etc/sysconfig/network-scripts/ifcfg-ens33
# 删除UUID
IPADDR="192.168.100.101" # 102 103
GATEWAY="192.168.100.2"
IPADNS1DDR="192.168.100.2"
- 主机名
sudo vim /etc/hostname
seactive # sestandby senode
- hosts
sudo vim /etc/hosts
192.168.100.101 seactive
192.168.100.102 sestandby
192.168.100.103 senode
sudo reboot
- ssh 免密登录
ssh-keygen
ssh-copy-id localhost
ssh-keygen.exe
scp $env:UserProfile\.ssh\id_rsa.pub hadoop@192.168.100.101:~/.ssh/keys
cd ~/.ssh
cat keys >> authorized_keys
- xsync
mkdir ~/bin
vim ~/bin/xsync.sh
#!/bin/bash
if [ $# -lt 1 ]
then
echo "参数不足"
exit
fi
for host in seactive sestandby senode
do
echo "================ $host ================"
for file in $@
do
if [ -e $file ]
then
pdir=$(cd -P $(dirname $file); pwd)
fname=$(basename $file)
ssh $host "sudo mkdir -p $pdir"
rsync -av $pdir/$fname $USER@$host:$pdir
else
echo "$file 不存在"
fi
done
done
chmod 700 ~/bin/xsync.sh
- 卸载自带的 Java
rpm -qa | grep java-1.8.0
rpm -qa | grep java-1.8.0 | xargs sudo rpm -e --nodeps
- 关闭防火墙
sudo systemctl stop firewalld.service
sudo systemctl disable firewalld.service
sudo systemctl status firewalld.service
- 上传安装包
cd /opt
sudo mkdir software module
sudo chown hadoop:hadoop module software
scp .\jdk-8u321-linux-x64.tar.gz hadoop@192.168.100.101:/opt/software
scp .\hadoop-3.3.1.tar.gz hadoop@192.168.100.101:/opt/software
scp .\apache-zookeeper-3.6.3-bin.tar.gz hadoop@192.168.100.101:/opt/software
- NTP 服务器
# 相同配置
sudo vim /etc/chrony.conf
# 注释掉原本的 server0~3 后 添加以下内容
server seactive iburst
# 不同配置
# 在seactive上再次编辑
sudo vim /etc/chrony.conf
allow 192.168.100.0/24
local stratum 10
- 解压
cd /opt/
tar -zxf software/jdk-8u321-linux-x64.tar.gz -C module/
tar -zxf software/hadoop-3.3.1.tar.gz -C module/
tar -zxf software/apache-zookeeper-3.6.3-bin.tar.gz -C module/
- 配置环境变量
sudo vim /etc/profile.d/my_env.sh
# jdk1.8.0_321
export JAVA_HOME=/opt/module/jdk1.8.0_321
export PATH=$PATH:$JAVA_HOME/bin
# hadoop-3.3.1
export HADOOP_HOME=/opt/module/hadoop-3.3.1
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:
# zookeeper-3.6.3
export ZOOKEEPER_HOME=/opt/module/apache-zookeeper-3.6.3-bin
export PATH=$PATH:$ZOOKEEPER_HOME/bin
source /etc/profile
- 相同配置
cd /opt/module/apache-zookeeper-3.6.3-bin/conf
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
dataDir=/opt/module/apache-zookeeper-3.6.3-bin/data
server.1=192.168.100.101:2888:3888
server.2=192.168.100.102:2888:3888
server.3=192.168.100.103:2888:3888
- 不同配置
mkdir /opt/module/apache-zookeeper-3.6.3-bin/data
echo 1 > /opt/module/apache-zookeeper-3.6.3-bin/data/myid # seactive
echo 2 > /opt/module/apache-zookeeper-3.6.3-bin/data/myid # sestandby
echo 3 > /opt/module/apache-zookeeper-3.6.3-bin/data/myid # senode
cd /opt/module/hadoop-3.3.1/etc/hadoop
vim core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property>
<name>hadoop.security.authorization</name>
<value>false</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-3.3.1/data/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://seactive:9820</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>seactive:2181,sestandby:2181,senode:2181</value>
</property>
<property>
<name>ha.zookeeper.session-timeout.ms</name>
<value>10000</value>
</property>
<property>
<name>net.topology.node.switch.mapping.impl</name>
<value>org.apache.hadoop.net.TableMapping</value>
</property>
<property>
<name>net.topology.table.file.name</name>
<value>/opt/module/hadoop-3.3.1/etc/hadoop/topology.data</value>
</property>
</configuration>
vim hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>cluster</value>
</property>
<property>
<name>dfs.ha.namenodes.cluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.cluster.nn1</name>
<value>seactive:9820</value>
</property>
<property>
<name>dfs.namenode.rpc-address.cluster.nn2</name>
<value>sestandby:9820</value>
</property>
<property>
<name>dfs.senode.http-address.cluster.nn1</name>
<value>seactive:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.cluster.nn2</name>
<value>sestandby:9870</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled.cluster</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://seactive:8485;sestandby:8485;senode:8485/cluster</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/module/hadoop-3.3.1/data/tmp/journal</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/module/hadoop-3.3.1/data/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/module/hadoop-3.3.1/data/datanode</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence(:22)</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.cluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/opt/module/hadoop-3.3.1/etc/hadoop/excludes</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>1440</value>
</property>
<property>
<name>fs.trash.checkpoint.interval</name>
<value>0</value>
</property>
<property>
<name>heartbeat.recheck.interval</name>
<value>2000</value>
</property>
<property>
<name>dfs.heartbeat.interval</name>
<value>1</value>
</property>
<property>
<name>dfs.datanode.failed.volumes.tolerated</name>
<value>0</value>
</property>
</configuration>
vim mapred-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>0.0.0.0:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>0.0.0.0:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/module/hadoop-3.3.1</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/module/hadoop-3.3.1</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/module/hadoop-3.3.1</value>
</property>
</configuration>
vim yarn-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>seactive:2181,sestandby:2181,senode:2181</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-ha</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>seactive</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>sestandby</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>seactive:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>sestandby:8088</value>
</property>
</configuration>
# 所有节点
zkServer.sh start
zkServer.sh status
hdfs --daemon start journalnode
# 格式化其中一台namenode并启动
hdfs zkfc -formatZK
hdfs namenode -format
hdfs --daemon start zkfc
hdfs --daemon start namenode
yarn --daemon start resourcemanager
# 在另一台同步namenode的元数据信息并启动
hdfs namenode -bootstrapStandby
hdfs --daemon start zkfc
hdfs --daemon start namenode
yarn --daemon start resourcemanager
# 所有节点
yarn --daemon start nodemanager
hdfs --daemon start datanode
mapred --daemon start historyserver
- 命令验证
ifconfig
ping seactive
ping sestandby
ping senode
ssh sestandby
ssh senode
sudo systemctl restart chronyd
chronyc sources
java -version
hadoop version
jps
# seactive
# Jps
# DataNode
# NameNode
# DFSZKFailoverController
# ResourceManager
# JournalNode
# JobHistoryServer
# QuorumPeerMain
# NodeManager
# sestandby
# DataNode
# ResourceManager
# DFSZKFailoverController
# JobHistoryServer
# Jps
# NameNode
# NodeManager
# QuorumPeerMain
# JournalNode
# senode
# QuorumPeerMain
# Jps
# JournalNode
# DataNode
# NodeManager
# JobHistoryServer
-
网页验证
-
测试用例
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar pi 10 10
hdfs dfs -ls /output
hdfs dfs -cat /output/part-r-00000