173.实战大数据一

发表于 linux 分类,标签:
1.建用户
groupadd hadoop
useradd -g hadoop hadoop
配置sudo权限
visudo
增加
hadoop        ALL=(ALL)       NOPASSWD: ALL

2.修改主机名
临时
sudo hostname hadoop01
永久
hostnamectl set-hostname hadoop01

3.关闭防火墙
systemctl stop firewalld
systemctl disable firewalld

4.安装openssh服务
yum -y install openssh-clients

5.配置hosts
192.168.2.30 hadoop01
192.168.2.31 hadoop02
192.168.2.32 hadoop03

6.免密登录
su - hadoop
ssh-keygen -t rsa
cp id_rsa.pub authorized_keys

chmod 700 /home/hadoop/.ssh/
chmod 600 /home/hadoop/.ssh/*
ssh-copy-id  -i  /home/hadoop/.ssh/id_rsa.pub hadoop@192.168.2.32

7.统一时区
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
yum -y install ntp
ntpdate pool.ntp.org

9.编写配置文件
mkdir -p /home/hadoop/tools

deploy.conf 
#规划集群角色
hadoop01,master,all,zookeeper,namenode,datanode,
hadoop02,slave,all,zookeeper,namenode,datanode,
hadoop03,slave,all,zookeeper,datanode,

vim deploy.sh
#!/bin/bash
if [ $# -lt 3 ]
then
  echo "Usage: ./deploy.sh srcFile(or Dir) descFile(or Dir) MachineTag"
  echo "Usage: ./deploy.sh srcFile(or Dir) descFile(or Dir) MachineTag confFile"
fi
src=$1
dest=$2
tag=$3
if [ 'a'$4'a' == 'aa' ]
then
  confFile=/home/hadoop/tools/deploy.conf
else
  confFile=$4
fi
if [ -f $confFile ]
then
  if [ -f $src ]
  then
    for server in `cat $confFile | grep -v '^#' | grep ','$tag','|awk -F',' '{print $1}'`
    do
      scp $src $server":"${dest}
    done
  elif [ -d $src ]
  then
    for server in `cat $confFile | grep -v '^#' | grep ','$tag','|awk -F',' '{print $1}'`
    do
      scp -r $src $server":"${dest}
    done
  else
    echo "Error: No source file exist"
  fi
else
  echo "Error: Please assign config file or run deploy.sh command with deploy.conf in same directory"
fi

vim runRemoteCmd.sh
#!/bin/bash
if [ $# -lt 2 ]
then
  echo "Usage: ./runRemoteCmd.sh Command MachineTag"
  echo "Usage: ./runRemoteCmd.sh Command MachineTag confFile"
  exit
fi
cmd=$1
tag=$2
if [ 'a'$3'a' == 'aa' ]
then
  confFile=/home/hadoop/tools/deploy.conf
else
  confFile=$3
fi
if [ -f $confFile ]
then
  for server in `cat $confFile | grep -v '^#' | grep ','$tag','|awk -F',' '{print $1}'`
  do
    echo "***********$server*************"
    ssh $server "source ~/.bashrc;$cmd"
  done
else
  echo "Error: Please assign config file or run deploy.sh command with deploy.conf in same directory"
fi

10.建目录
/home/hadoop/tools/runRemoteCmd.sh "mkdir /home/hadoop/data" all
/home/hadoop/app/
tar -zxvf jdk-8u51-linux-x64.tar.gz
ln -s jdk1.8.0_51  jdk

vim /etc/profile    全部用户
vim ~/.bashrc    本用户
JAVA_HOME=/home/hadoop/app/jdk
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
PATH=$JAVA_HOME/bin:/home/hadoop/tools:$PATH
export JAVA_HOME CLASSPATH  PATH

source ~/.bashrc

11.配置zookeeeper
cd /home/hadoop/app/
tar -zxvf zookeeper-3.4.5-cdh5.10.0.tar.gz
ln -s zookeeper-3.4.5-cdh5.10.0 zookeeper
修改配置文件添加到/home/hadoop/app/zookeeper/conf
zoo.cfg

cd zookeeper/conf
mkdir -p /home/hadoop/data/zookeeper/zkdata
cd /home/hadoop/data/zookeeper/zkdata/
vim myid
1,2,3

12.启动zookeeper
mkdir  -p /home/hadoop/data/zookeeper/zkdata
mkdir  -p /home/hadoop/data/zookeeper/zkdatalog
/home/hadoop/tools/runRemoteCmd.sh "/home/hadoop/app/zookeeper/bin/zkServer.sh start" all
/home/hadoop/tools/runRemoteCmd.sh "jps" all
/home/hadoop/tools/runRemoteCmd.sh "/home/hadoop/app/zookeeper/bin/zkServer.sh status" all

13.配置hadoop
cd /home/hadoop/app/
tar -zxvf hadoop-2.6.0-cdh5.10.0.tar.gz
ln -s hadoop-2.6.0-cdh5.10.0 hadoop
修改配置文件复制到/home/hadoop/app/hadoop/etc/hadoop
core-site.xml
hadoop-env.sh
hdfs-site.xml
slaves
同步至其他两台服务器
/home/hadoop/tools/deploy.sh ../app/hadoop-2.6.0-cdh5.10.0 /home/hadoop/app/ slave

14.启动
/home/hadoop/tools/runRemoteCmd.sh "/home/hadoop/app/hadoop-2.6.0-cdh5.10.0/sbin/hadoop-daemon.sh start journalnode" all
nn1节点格式化namenode,hadoop01执行
/home/hadoop/app/hadoop/bin/hdfs namenode -format
nn1节点格式化zkfc,hadoop01执行
/home/hadoop/app/hadoop/bin/hdfs zkfc -formatZK
nn1节点启动namenode,启动后到hadoop02执行同步,执行完后可关闭
/home/hadoop/app/hadoop/bin/hdfs namenode

nn2节点nn1节点元数据信息,hadoop02执行
/home/hadoop/app/hadoop/bin/hdfs namenode -bootstrapStandby
关闭所有节点的journalnode,hadoop01执行
/home/hadoop/tools/runRemoteCmd.sh "/home/hadoop/app/hadoop/sbin/hadoop-daemon.sh stop journalnode" all

15.启动hdfs
#mkdir -p /home/hadoop/data/tmp/dfs/name
/home/hadoop/app/hadoop/sbin/start-dfs.sh
/home/hadoop/app/hadoop/sbin/stop-dfs.sh
查看状态
/home/hadoop/app/hadoop/bin/hdfs haadmin -getServiceState nn1
/home/hadoop/app/hadoop/bin/hdfs haadmin -getServiceState nn2

192.168.2.30:50070 

16测试
创建文件夹,在hadoop01执行
/home/hadoop/app/hadoop/bin/hdfs dfs -mkdir /test
查看文件夹
/home/hadoop/app/hadoop/bin/hdfs dfs -ls /
导入,要写入东西
/home/hadoop/app/hadoop/bin/hdfs dfs -put wd.txt /test
hadoop    hadoop    hadoop
hdfs      hdfs      hdfs
yarn      yarn      yarn
查看
/home/hadoop/app/hadoop/bin/hdfs dfs -cat /test/wd.txt

测试停止namenode
/home/hadoop/app/hadoop/sbin/hadoop-daemon.sh stop namenode
启动namenode
/home/hadoop/app/hadoop/sbin/hadoop-daemon.sh start namenode

17.yarn
cd /home/hadoop/app/hadoop/etc/hadoop
mv /home/hadoop/yarn-site.xml ./
mv /home/hadoop/mapred-site.xml ./
分发
cd /home/hadoop/tools
./deploy.sh /home/hadoop/app/hadoop/etc/hadoop/yarn-site.xml /home/hadoop/app/hadoop/etc/hadoop/ slave
./deploy.sh /home/hadoop/app/hadoop/etc/hadoop/mapred-site.xml /home/hadoop/app/hadoop/etc/hadoop/ slave

18.启动yarn集群
/home/hadoop/app/hadoop/sbin/start-yarn.sh
启动备用节点RM,hadoop02执行
/home/hadoop/app/hadoop/sbin/yarn-daemon.sh start resourcemanager
查看RM状态
/home/hadoop/app/hadoop/bin/yarn rmadmin -getServiceState rm1
/home/hadoop/app/hadoop/bin/yarn rmadmin -getServiceState rm2

页面查看
192.168.2.30:8088
修改访问主机hosts
192.168.2.30 hadoop01
192.168.2.31 hadoop02
192.168.2.32 hadoop03
访问hadoop01:8080

19.运行测试
cd /home/hadoop/app/hadoop/bin/
./hadoop jar share/hadoop/mapreduce2/hadoop-mapreduce-examples-2.6.0-cdh5.10.0.jar wordcount /test/wd.txt /test/out
查看结果
./hdfs dfs -ls /test/out
单词统计
./hdfs dfs -cat /test/out/*

20.配置win系统jdk环境变量
系统变量
JAVA_HOME
C:\Program Files\Java\jdk1.8.0_111
Path
%JAVA_HOME%\bin

21.
MAVEN_HOME
D:\workspace\apache-maven-3.3.9
Path
%MAVEN_HOME%\bin


0 篇评论

发表我的评论