Skip to content

Latest commit

 

History

History
581 lines (494 loc) · 12.6 KB

16-综合实验.md

File metadata and controls

581 lines (494 loc) · 12.6 KB

综合实验


0.规划

节点名 IP 主机名
时钟服务 server 192.168.100.101 seactive
Active-Namenode+zkfc 192.168.100.101 seactive
Standby- Namenode+zkfc 192.168.100.102 sestandby
Datanode 192.168.100.101 seactive
Datanode 192.168.100.102 sestandby
Datanode 192.168.100.103 senode
Zookeeper-1 192.168.100.101 seactive
Zookeeper-2 192.168.100.102 sestandby
Zookeeper-3 192.168.100.103 senode
Journalnode-1 192.168.100.101 seactive
Journalnode-2 192.168.100.102 sestandby
Journalnode-3 192.168.100.103 senode
Active-Resourcemanager 192.168.100.101 seactive
Standby-Resourcemanager 192.168.100.102 sestandby
NodeManager 192.168.100.101 seactive
NodeManager 192.168.100.102 sestandby
NodeManager 192.168.100.103 senode

1.克隆 3 台虚拟机分别配置

  • 用户
useradd hadoop
passwd hadoop
vim /etc/sudoers
hadoop  ALL=(ALL)       NOPASSWD: ALL
  • 静态 IP
su hadoop
sudo vim /etc/sysconfig/network-scripts/ifcfg-ens33
# 删除UUID
IPADDR="192.168.100.101" # 102 103
GATEWAY="192.168.100.2"
IPADNS1DDR="192.168.100.2"
  • 主机名
sudo vim /etc/hostname
seactive # sestandby senode
  • hosts
sudo vim /etc/hosts
192.168.100.101 seactive
192.168.100.102 sestandby
192.168.100.103 senode
sudo reboot
  • ssh 免密登录
ssh-keygen
ssh-copy-id localhost
ssh-keygen.exe
scp $env:UserProfile\.ssh\id_rsa.pub hadoop@192.168.100.101:~/.ssh/keys
cd ~/.ssh
cat keys >> authorized_keys
  • xsync
mkdir ~/bin
vim ~/bin/xsync.sh
#!/bin/bash
if [ $# -lt 1 ]
then
    echo "参数不足"
    exit
fi
for host in seactive sestandby senode
do
    echo "================ $host ================"
    for file in $@
    do
        if [ -e $file ]
        then
            pdir=$(cd -P $(dirname $file); pwd)
            fname=$(basename $file)
            ssh $host "sudo mkdir -p $pdir"
            rsync -av $pdir/$fname $USER@$host:$pdir
        else
            echo "$file 不存在"
        fi
    done
done
chmod 700 ~/bin/xsync.sh
  • 卸载自带的 Java
rpm -qa | grep java-1.8.0
rpm -qa | grep java-1.8.0 | xargs sudo rpm -e --nodeps
  • 关闭防火墙
sudo systemctl stop firewalld.service
sudo systemctl disable firewalld.service
sudo systemctl status firewalld.service
  • 上传安装包
cd /opt
sudo mkdir software module
sudo chown hadoop:hadoop module software
scp .\jdk-8u321-linux-x64.tar.gz hadoop@192.168.100.101:/opt/software
scp .\hadoop-3.3.1.tar.gz hadoop@192.168.100.101:/opt/software
scp .\apache-zookeeper-3.6.3-bin.tar.gz hadoop@192.168.100.101:/opt/software
  • NTP 服务器
# 相同配置
sudo vim /etc/chrony.conf
# 注释掉原本的 server0~3 后 添加以下内容
server seactive iburst
# 不同配置
# 在seactive上再次编辑
sudo vim /etc/chrony.conf
allow 192.168.100.0/24
local stratum 10

2.解压安装

  • 解压
cd /opt/
tar -zxf software/jdk-8u321-linux-x64.tar.gz -C module/
tar -zxf software/hadoop-3.3.1.tar.gz -C module/
tar -zxf software/apache-zookeeper-3.6.3-bin.tar.gz -C module/
  • 配置环境变量
sudo vim /etc/profile.d/my_env.sh
# jdk1.8.0_321
export JAVA_HOME=/opt/module/jdk1.8.0_321
export PATH=$PATH:$JAVA_HOME/bin

# hadoop-3.3.1
export HADOOP_HOME=/opt/module/hadoop-3.3.1
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:

# zookeeper-3.6.3
export ZOOKEEPER_HOME=/opt/module/apache-zookeeper-3.6.3-bin
export PATH=$PATH:$ZOOKEEPER_HOME/bin
source /etc/profile

3.Zookeeper 配置

  • 相同配置
cd /opt/module/apache-zookeeper-3.6.3-bin/conf
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
dataDir=/opt/module/apache-zookeeper-3.6.3-bin/data

server.1=192.168.100.101:2888:3888
server.2=192.168.100.102:2888:3888
server.3=192.168.100.103:2888:3888
  • 不同配置
mkdir /opt/module/apache-zookeeper-3.6.3-bin/data

echo 1 > /opt/module/apache-zookeeper-3.6.3-bin/data/myid # seactive
echo 2 > /opt/module/apache-zookeeper-3.6.3-bin/data/myid # sestandby
echo 3 > /opt/module/apache-zookeeper-3.6.3-bin/data/myid # senode

4.Hadoop 配置

cd /opt/module/hadoop-3.3.1/etc/hadoop
vim core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
    <property>
        <name>hadoop.security.authorization</name>
        <value>false</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/module/hadoop-3.3.1/data/tmp</value>
    </property>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://seactive:9820</value>
    </property>
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>seactive:2181,sestandby:2181,senode:2181</value>
    </property>
    <property>
        <name>ha.zookeeper.session-timeout.ms</name>
        <value>10000</value>
    </property>
    <property>
        <name>net.topology.node.switch.mapping.impl</name>
        <value>org.apache.hadoop.net.TableMapping</value>
    </property>
    <property>
        <name>net.topology.table.file.name</name>
        <value>/opt/module/hadoop-3.3.1/etc/hadoop/topology.data</value>
    </property>
</configuration>
vim hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <property>
        <name>dfs.nameservices</name>
        <value>cluster</value>
    </property>
    <property>
        <name>dfs.ha.namenodes.cluster</name>
        <value>nn1,nn2</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.cluster.nn1</name>
        <value>seactive:9820</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.cluster.nn2</name>
        <value>sestandby:9820</value>
    </property>
    <property>
        <name>dfs.senode.http-address.cluster.nn1</name>
        <value>seactive:9870</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.cluster.nn2</name>
        <value>sestandby:9870</value>
    </property>
    <property>
        <name>dfs.ha.automatic-failover.enabled.cluster</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://seactive:8485;sestandby:8485;senode:8485/cluster</value>
    </property>
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/opt/module/hadoop-3.3.1/data/tmp/journal</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/opt/module/hadoop-3.3.1/data/namenode</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/opt/module/hadoop-3.3.1/data/datanode</value>
    </property>
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/hadoop/.ssh/id_rsa</value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>30000</value>
    </property>
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence(:22)</value>
    </property>
    <property>
        <name>dfs.client.failover.proxy.provider.cluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>
    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.hosts.exclude</name>
        <value>/opt/module/hadoop-3.3.1/etc/hadoop/excludes</value>
    </property>
    <property>
        <name>fs.trash.interval</name>
        <value>1440</value>
    </property>
    <property>
        <name>fs.trash.checkpoint.interval</name>
        <value>0</value>
    </property>
    <property>
        <name>heartbeat.recheck.interval</name>
        <value>2000</value>
    </property>
    <property>
        <name>dfs.heartbeat.interval</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.datanode.failed.volumes.tolerated</name>
        <value>0</value>
    </property>
</configuration>
vim mapred-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>0.0.0.0:10020</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>0.0.0.0:19888</value>
    </property>
    <property>
        <name>yarn.app.mapreduce.am.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/module/hadoop-3.3.1</value>
    </property>
    <property>
        <name>mapreduce.map.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/module/hadoop-3.3.1</value>
    </property>
    <property>
        <name>mapreduce.reduce.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/module/hadoop-3.3.1</value>
    </property>
</configuration>
vim yarn-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>seactive:2181,sestandby:2181,senode:2181</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>yarn-ha</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>seactive</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>sestandby</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>seactive:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>sestandby:8088</value>
    </property>
</configuration>

5.启动服务

# 所有节点
zkServer.sh start
zkServer.sh status
hdfs --daemon start journalnode


# 格式化其中一台namenode并启动
hdfs zkfc -formatZK
hdfs namenode -format

hdfs --daemon start zkfc
hdfs --daemon start namenode
yarn --daemon start resourcemanager


# 在另一台同步namenode的元数据信息并启动
hdfs namenode -bootstrapStandby

hdfs --daemon start zkfc
hdfs --daemon start namenode
yarn --daemon start resourcemanager

# 所有节点
yarn --daemon start nodemanager
hdfs --daemon start datanode
mapred --daemon start historyserver

6.验证

  • 命令验证
ifconfig

ping seactive
ping sestandby
ping senode

ssh sestandby
ssh senode

sudo systemctl restart chronyd
chronyc sources

java -version
hadoop version

jps
# seactive
# Jps
# DataNode
# NameNode
# DFSZKFailoverController
# ResourceManager
# JournalNode
# JobHistoryServer
# QuorumPeerMain
# NodeManager

# sestandby
# DataNode
# ResourceManager
# DFSZKFailoverController
# JobHistoryServer
# Jps
# NameNode
# NodeManager
# QuorumPeerMain
# JournalNode

# senode
# QuorumPeerMain
# Jps
# JournalNode
# DataNode
# NodeManager
# JobHistoryServer
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar pi 10 10

hdfs dfs -ls /output
hdfs dfs -cat /output/part-r-00000