1.建用户
groupadd hadoop
useradd -g hadoop hadoop
配置sudo权限
visudo
增加
hadoop ALL=(ALL) NOPASSWD: ALL
2.修改主机名
临时
sudo hostname hadoop01
永久
hostnamectl set-hostname hadoop01
3.关闭防火墙
systemctl stop firewalld
systemctl disable firewalld
4.安装openssh服务
yum -y install openssh-clients
5.配置hosts
192.168.2.30 hadoop01
192.168.2.31 hadoop02
192.168.2.32 hadoop03
6.免密登录
su - hadoop
ssh-keygen -t rsa
cp id_rsa.pub authorized_keys
chmod 700 /home/hadoop/.ssh/
chmod 600 /home/hadoop/.ssh/*
ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub hadoop@192.168.2.32
7.统一时区
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
yum -y install ntp
ntpdate pool.ntp.org
9.编写配置文件
mkdir -p /home/hadoop/tools
deploy.conf
#规划集群角色
hadoop01,master,all,zookeeper,namenode,datanode,
hadoop02,slave,all,zookeeper,namenode,datanode,
hadoop03,slave,all,zookeeper,datanode,
vim deploy.sh
#!/bin/bash
if [ $# -lt 3 ]
then
echo "Usage: ./deploy.sh srcFile(or Dir) descFile(or Dir) MachineTag"
echo "Usage: ./deploy.sh srcFile(or Dir) descFile(or Dir) MachineTag confFile"
fi
src=$1
dest=$2
tag=$3
if [ 'a'$4'a' == 'aa' ]
then
confFile=/home/hadoop/tools/deploy.conf
else
confFile=$4
fi
if [ -f $confFile ]
then
if [ -f $src ]
then
for server in `cat $confFile | grep -v '^#' | grep ','$tag','|awk -F',' '{print $1}'`
do
scp $src $server":"${dest}
done
elif [ -d $src ]
then
for server in `cat $confFile | grep -v '^#' | grep ','$tag','|awk -F',' '{print $1}'`
do
scp -r $src $server":"${dest}
done
else
echo "Error: No source file exist"
fi
else
echo "Error: Please assign config file or run deploy.sh command with deploy.conf in same directory"
fi
vim runRemoteCmd.sh
#!/bin/bash
if [ $# -lt 2 ]
then
echo "Usage: ./runRemoteCmd.sh Command MachineTag"
echo "Usage: ./runRemoteCmd.sh Command MachineTag confFile"
exit
fi
cmd=$1
tag=$2
if [ 'a'$3'a' == 'aa' ]
then
confFile=/home/hadoop/tools/deploy.conf
else
confFile=$3
fi
if [ -f $confFile ]
then
for server in `cat $confFile | grep -v '^#' | grep ','$tag','|awk -F',' '{print $1}'`
do
echo "***********$server*************"
ssh $server "source ~/.bashrc;$cmd"
done
else
echo "Error: Please assign config file or run deploy.sh command with deploy.conf in same directory"
fi
10.建目录
/home/hadoop/tools/runRemoteCmd.sh "mkdir /home/hadoop/data" all
/home/hadoop/app/
tar -zxvf jdk-8u51-linux-x64.tar.gz
ln -s jdk1.8.0_51 jdk
vim /etc/profile 全部用户
vim ~/.bashrc 本用户
JAVA_HOME=/home/hadoop/app/jdk
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
PATH=$JAVA_HOME/bin:/home/hadoop/tools:$PATH
export JAVA_HOME CLASSPATH PATH
source ~/.bashrc
11.配置zookeeeper
cd /home/hadoop/app/
tar -zxvf zookeeper-3.4.5-cdh5.10.0.tar.gz
ln -s zookeeper-3.4.5-cdh5.10.0 zookeeper
修改配置文件添加到/home/hadoop/app/zookeeper/conf
zoo.cfg
cd zookeeper/conf
mkdir -p /home/hadoop/data/zookeeper/zkdata
cd /home/hadoop/data/zookeeper/zkdata/
vim myid
1,2,3
12.启动zookeeper
mkdir -p /home/hadoop/data/zookeeper/zkdata
mkdir -p /home/hadoop/data/zookeeper/zkdatalog
/home/hadoop/tools/runRemoteCmd.sh "/home/hadoop/app/zookeeper/bin/zkServer.sh start" all
/home/hadoop/tools/runRemoteCmd.sh "jps" all
/home/hadoop/tools/runRemoteCmd.sh "/home/hadoop/app/zookeeper/bin/zkServer.sh status" all
13.配置hadoop
cd /home/hadoop/app/
tar -zxvf hadoop-2.6.0-cdh5.10.0.tar.gz
ln -s hadoop-2.6.0-cdh5.10.0 hadoop
修改配置文件复制到/home/hadoop/app/hadoop/etc/hadoop
core-site.xml
hadoop-env.sh
hdfs-site.xml
slaves
同步至其他两台服务器
/home/hadoop/tools/deploy.sh ../app/hadoop-2.6.0-cdh5.10.0 /home/hadoop/app/ slave
14.启动
/home/hadoop/tools/runRemoteCmd.sh "/home/hadoop/app/hadoop-2.6.0-cdh5.10.0/sbin/hadoop-daemon.sh start journalnode" all
nn1节点格式化namenode,hadoop01执行
/home/hadoop/app/hadoop/bin/hdfs namenode -format
nn1节点格式化zkfc,hadoop01执行
/home/hadoop/app/hadoop/bin/hdfs zkfc -formatZK
nn1节点启动namenode,启动后到hadoop02执行同步,执行完后可关闭
/home/hadoop/app/hadoop/bin/hdfs namenode
nn2节点nn1节点元数据信息,hadoop02执行
/home/hadoop/app/hadoop/bin/hdfs namenode -bootstrapStandby
关闭所有节点的journalnode,hadoop01执行
/home/hadoop/tools/runRemoteCmd.sh "/home/hadoop/app/hadoop/sbin/hadoop-daemon.sh stop journalnode" all
15.启动hdfs
#mkdir -p /home/hadoop/data/tmp/dfs/name
/home/hadoop/app/hadoop/sbin/start-dfs.sh
/home/hadoop/app/hadoop/sbin/stop-dfs.sh
查看状态
/home/hadoop/app/hadoop/bin/hdfs haadmin -getServiceState nn1
/home/hadoop/app/hadoop/bin/hdfs haadmin -getServiceState nn2
192.168.2.30:50070
16测试
创建文件夹,在hadoop01执行
/home/hadoop/app/hadoop/bin/hdfs dfs -mkdir /test
查看文件夹
/home/hadoop/app/hadoop/bin/hdfs dfs -ls /
导入,要写入东西
/home/hadoop/app/hadoop/bin/hdfs dfs -put wd.txt /test
hadoop hadoop hadoop
hdfs hdfs hdfs
yarn yarn yarn
查看
/home/hadoop/app/hadoop/bin/hdfs dfs -cat /test/wd.txt
测试停止namenode
/home/hadoop/app/hadoop/sbin/hadoop-daemon.sh stop namenode
启动namenode
/home/hadoop/app/hadoop/sbin/hadoop-daemon.sh start namenode
17.yarn
cd /home/hadoop/app/hadoop/etc/hadoop
mv /home/hadoop/yarn-site.xml ./
mv /home/hadoop/mapred-site.xml ./
分发
cd /home/hadoop/tools
./deploy.sh /home/hadoop/app/hadoop/etc/hadoop/yarn-site.xml /home/hadoop/app/hadoop/etc/hadoop/ slave
./deploy.sh /home/hadoop/app/hadoop/etc/hadoop/mapred-site.xml /home/hadoop/app/hadoop/etc/hadoop/ slave
18.启动yarn集群
/home/hadoop/app/hadoop/sbin/start-yarn.sh
启动备用节点RM,hadoop02执行
/home/hadoop/app/hadoop/sbin/yarn-daemon.sh start resourcemanager
查看RM状态
/home/hadoop/app/hadoop/bin/yarn rmadmin -getServiceState rm1
/home/hadoop/app/hadoop/bin/yarn rmadmin -getServiceState rm2
页面查看
192.168.2.30:8088
修改访问主机hosts
192.168.2.30 hadoop01
192.168.2.31 hadoop02
192.168.2.32 hadoop03
访问hadoop01:8080
19.运行测试
cd /home/hadoop/app/hadoop/bin/
./hadoop jar share/hadoop/mapreduce2/hadoop-mapreduce-examples-2.6.0-cdh5.10.0.jar wordcount /test/wd.txt /test/out
查看结果
./hdfs dfs -ls /test/out
单词统计
./hdfs dfs -cat /test/out/*
20.配置win系统jdk环境变量
系统变量
JAVA_HOME
C:\Program Files\Java\jdk1.8.0_111
Path
%JAVA_HOME%\bin
21.
MAVEN_HOME
D:\workspace\apache-maven-3.3.9
Path
%MAVEN_HOME%\bin