CDH6安装

发布时间 2023-04-18 17:28:16作者: 红瑜Heins

Demand

  1. 七个节点, 两主五从;root / Roottest_1124
  2. 内网环境 CentOS 7 , 已安装 JDK ,mysql 5.7
  3. 所有的包, 默认放在 /opt/local/ 下;
# ip
10.0.75.225
10.0.75.226
10.0.75.227
10.0.75.228
10.0.75.232
10.0.75.233
10.0.75.234

#cm文件 /opt/cm6
allkeys.asc
cloudera-manager-agent-6.2.0-968826.el7.x86_64.rpm
cloudera-manager-daemons-6.2.0-968826.el7.x86_64.rpm
cloudera-manager-server-6.2.0-968826.el7.x86_64.rpm
cloudera-manager-server-db-2-6.2.0-968826.el7.x86_64.rpm
enterprise-debuginfo-6.2.0-968826.el7.x86_64.rpm
RPM-GPG-KEY-cloudera

#cdh文件 /opt/cdh6
CDH-6.2.0-1.cdh6.2.0.p0.967373-el7.parcel
CDH-6.2.0-1.cdh6.2.0.p0.967373-el7.parcel.sha
manifest.json

Address

# 网卡名字可能差异
vim /etc/sysconfig/network-scripts/ifcfg-ens192

# After write
systemctl restart network
TYPE=Ethernet
PROXY_METHOD=none
BROWSER_ONLY=no
BOOTPROTO=static
DEFROUTE=yes
IPV4_FAILURE_FATAL=no
IPV6INIT=no
IPV6_AUTOCONF=no
IPV6_DEFROUTE=no
IPV6_FAILURE_FATAL=no
IPV6_ADDR_GEN_MODE=stable-privacy
NAME=ens192
DEVICE=ens192
UUID=cd1f8bca-f811-401f-982e-ed2811d06346
IPADDR=10.0.75.225
NETMASK=255.255.255.0
GATEWAY=10.0.75.255
DNS1=8.8.8.8
DNS2=8.8.4.4
ONBOOT=yes

Hostname & Tools

touch hostname.sh
chmod +x hostname.sh
vim hostname.sh

# 最后执行
sudo ./hostname.sh
# 每次登录输入密码 Roottest_1124 
#! /bin/bash
# change hostname hosts and install tools

# Set the host name list
declare -a hostnames=("zt-cdh-master01-test" "zt-cdh-master02-test" "zt-cdh-worker01-test" "zt-cdh-worker02-test" "zt-cdh-worker03-test" "zt-cdh-worker04-test" "zt-cdh-worker05-test")
# Set the ip adderss list 
declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
# Set User and key
user=root
ssh_key=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")

echo "Start change hostnames and installing common tools" $now
for i in "${!nodes[@]}"
do
  echo "load ssh ${nodes[$i]} , hostname is ${hostnames[$i]}"
  ssh $user@${nodes[$i]} << EOF
  
  sudo yum install -y wget yum-utils vim tree lsof sshpass openssh-clients expect epel-release spawn-Interact || exit 1
  echo "Installing ${nodes[$i]} tools is ready" 
  
  sudo hostnamectl set-hostname ${hostnames[$i]} || exit 1
  hostname
  echo "Change ${hostnames[$i]} is ready" 

  hostname -I
  ip_addr=$(hostname -I | awk '{print $1}')
  grep -q "$ip_addr" /etc/hosts
  if [ $? -eq 0 ]; then
    # Comment the current hosts
    sed -i "s/^$ip_addr/#&/" /etc/hosts
    else
  fi
  # Write to new hosts
  echo "zt-cdh-master01-test 10.0.75.225" >> /etc/hosts
  echo "zt-cdh-master02-test 10.0.75.226" >> /etc/hosts
  echo "zt-cdh-worker01-test 10.0.75.227" >> /etc/hosts
  echo "zt-cdh-worker02-test 10.0.75.228" >> /etc/hosts
  echo "zt-cdh-worker03-test 10.0.75.232" >> /etc/hosts
  echo "zt-cdh-worker04-test 10.0.75.233" >> /etc/hosts
  echo "zt-cdh-worker05-test 10.0.75.234" >> /etc/hosts
  cat /etc/hosts
  echo "Change ${hostnames[$i]} hosts is ready" 

  exit
EOF
done
echo "change hostname hosts and install tools is success" $now

SSH

touch set_ssh.sh
chmod +x set_ssh.sh
ll
vim set_ssh.sh

# 最后执行
sudo ./set_ssh.sh
#! /bin/bash
# delete ssh key

# Set the ip adderss list 
declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
# Set User and key
username=root
password=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")

for node in "${nodes[@]}"
do
  sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$username@$node" << EOF
  rm -f ~/.ssh/*
  ls -l ~/.ssh/
EOF
done
#! /bin/bash
# Setting SSH non-encryption

# known_hosts 和 authorized_keys 都是用于 SSH 远程连接的文件,但它们的作用不同。
# known_hosts 文件存储了已知的远程主机的公钥,每次连接时 SSH 会检查该主机的公钥是否匹配以确保连接的安全性。
# authorized_keys 文件存储了本地用户可使用的公钥,当该用户使用 SSH 密钥进行身份验证时,SSH 会检查该用户的公钥是否在 authorized_keys 文件中,如果存在则允许连接

# Set the ip adderss list 
declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")

# Set User and key
username=root
password=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")

#循环遍历节点并执行命令
for node in "${nodes[@]}"
do
  #在远程节点上执行命令
  sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$username@$node" << EOF
  hostname
  cd ~/.ssh/
  #生成SSH密钥
  ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa
EOF
done

echo "Changing hostname is success"
echo "----------------------------"
declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
ssh 10.0.75.225
cd ~/.ssh/
cat id_rsa.pub >> authorized_keys
chmod 644 authorized_keys
cat authorized_keys
sshpass -p "Roottest_1124" ssh-copy-id -f -i ~/.ssh/id_rsa.pub root@10.0.75.226
scp authorized_keys root@10.0.75.226:~/.ssh/
ssh 10.0.75.226
cd ~/.ssh/
cat id_rsa.pub >> authorized_keys
chmod 644 authorized_keys
cat authorized_keys
sshpass -p "Roottest_1124" ssh-copy-id -f -i ~/.ssh/id_rsa.pub root@10.0.75.227
scp authorized_keys root@10.0.75.227:~/.ssh/
ssh 10.0.75.227
cd ~/.ssh/
cat id_rsa.pub >> authorized_keys
chmod 644 authorized_keys
cat authorized_keys
sshpass -p "Roottest_1124" ssh-copy-id -f -i ~/.ssh/id_rsa.pub root@10.0.75.228
scp authorized_keys root@10.0.75.228:~/.ssh/
ssh 10.0.75.228
cd ~/.ssh/
cat id_rsa.pub >> authorized_keys
chmod 644 authorized_keys
cat authorized_keys
sshpass -p "Roottest_1124" ssh-copy-id -f -i ~/.ssh/id_rsa.pub root@10.0.75.232
scp authorized_keys root@10.0.75.232:~/.ssh/
ssh 10.0.75.232
cd ~/.ssh/
cat id_rsa.pub >> authorized_keys
chmod 644 authorized_keys
cat authorized_keys
sshpass -p "Roottest_1124" ssh-copy-id -f -i ~/.ssh/id_rsa.pub root@10.0.75.233
scp authorized_keys root@10.0.75.233:~/.ssh/
ssh 10.0.75.233
cd ~/.ssh/
cat id_rsa.pub >> authorized_keys
chmod 644 authorized_keys
cat authorized_keys
sshpass -p "Roottest_1124" ssh-copy-id -f -i ~/.ssh/id_rsa.pub root@10.0.75.234
scp authorized_keys root@10.0.75.234:~/.ssh/
ssh 10.0.75.234
cd ~/.ssh/
cat id_rsa.pub >> authorized_keys
chmod 644 authorized_keys
cat authorized_keys
scp authorized_keys root@10.0.75.225:~/.ssh/
scp authorized_keys root@10.0.75.226:~/.ssh/
scp authorized_keys root@10.0.75.227:~/.ssh/
scp authorized_keys root@10.0.75.228:~/.ssh/
scp authorized_keys root@10.0.75.232:~/.ssh/
scp authorized_keys root@10.0.75.233:~/.ssh/

Environment

firewalld

touch set_firewalld.sh
chmod +x set_firewalld.sh
vim set_firewalld.sh
#!/bin/bash
    
# Checking firewall status
firewall_status=$(systemctl status firewalld | grep "Active:" | awk '{print $2}')
if [ "$firewall_status" = "active" ]; then
    sudo systemctl stop firewalld
    sudo systemctl disable firewalld
    echo "firewall disabled"
else
    echo "firewall already closed"
fi
echo "Checking firewalld is ready"

# configures the setting file for the SELinux system security module, specifying the working mode for SELinux 
# SELINUX= enforcing(强制执行), permissive(观察模式), disabled(禁用 SELinux)
# Check if SELINUX is already disabled
if grep -q "^SELINUX=disabled" /etc/sysconfig/selinux; then
  echo "SELINUX is already disabled"
  exit 0
fi
# Check if SELINUX is enforcing or permissive
if grep -Eq "^SELINUX=(enforcing|permissive)" /etc/sysconfig/selinux; then
  sed -i 's/^SELINUX=\(enforcing\|permissive\)/SELINUX=disabled/' /etc/sysconfig/selinux
  echo "SELINUX is enforcing or permissive , now is disable"
else
  echo "SELINUX=disabled" >> /etc/sysconfig/selinux
  echo "SELINUX add set to disabled"
fi
echo "Checking SELINUX is ready"

limits.conf

touch set_limits.sh
chmod +x set_limits.sh
vim set_limits.sh
#!/bin/bash

# Set the number of open files and maximum number of user process
# 设置文件打开数目和用户最大进程数
# grep -n 表示显示匹配行的行号,grep -i 表示忽略大小写进行匹配
# 定义文件路径
limits_file="/etc/security/limits.conf"
# 查找 #END 或者 #End 所在行,并在该行之后添加 #
end_line_num=$(grep -i -n "#END\|# END" $limits_file | cut -d ":" -f 1)
if [ ! -z "$end_line_num" ]
then
    sed -i "${end_line_num},\$s/^/#/" $limits_file 
fi
# sh 是 Bash shell 的一个别名,-c 表示接下来的双引号内是要执行的命令
# sudo sh -c "echo '* soft nofile 65536' >> /etc/security/limits.conf"
echo '* soft nofile 65536' >> /etc/security/limits.conf
echo '* hard nofile 1048576' >> /etc/security/limits.conf
echo '* soft nproc 65536' >> /etc/security/limits.conf
echo '* hard nproc unlimited' >> /etc/security/limits.conf
echo '* soft memlock unlimited' >> /etc/security/limits.conf
echo '* hard memlock unlimited' >> /etc/security/limits.conf
cat /etc/security/limits.conf
echo "Set the number of open files and the maximum number of user processes is ready"

sysctl.conf

touch set_sysctl.sh
chmod +x set_sysctl.sh
vim set_sysctl.sh
#!/bin/bash

# Used to control what data is stored in swap space and to what extent pages are swapped into swap partitions on disk when physical memory is insufficient
# 用于控制系统将哪些数据存储在交换空间(swap)中,在物理内存不足时,系统在多大程度上将页面交换到磁盘上的交换分区中
# Increasing swappiness results in more pages being swapped to disk , and a high swappiness value cause performance degradation 
# 增加swappiness值会导致更多的页面被交换到磁盘上,过高的swappiness值可能会导致性能下降
# Check if vm.swappiness exists in /etc/sysctl.conf
if grep -q "^vm\.swappiness" /etc/sysctl.conf; then
  sed -i "s/^vm\.swappiness.*/vm.swappiness=10/" /etc/sysctl.conf
else
  echo "vm.swappiness=10" >> /etc/sysctl.conf
fi
echo "Checking swappiness is ready"

# Disable ipv6
echo 'net.ipv6.conf.all.disable_ipv6=1' >> /etc/sysctl.conf
echo 'net.ipv6.conf.default.disable_ipv6=1' >> /etc/sysctl.conf
echo 'net.ipv6.conf.lo.disable_ipv6=1' >> /etc/sysctl.conf
echo "Checking Disable ipv6 is ready"

cat /etc/sysctl.conf
# Reload sysctl configuration to apply changes
sysctl -p

THP and lib

touch set_thp.sh
chmod +x set_thp.sh
vim set_thp.sh
#!/bin/bash

# Installing common dependence library tools 
yum install -y ntp bind-utils psmisc libxslt cyrus-sasl-plain cyrus-sasl-gssapi portmap mod_ssl openssl-devel chkconfig zlib sqlite fuse fuse-libs redhat-lsb
echo "Installing common dependence library success"
# close THP
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
echo "THP is ready"

time

touch set_time.sh
chmod +x set_time.sh
vim set_time.sh
#!/bin/bash

# Check if timezone is set to Shanghai
if [[ "$(timedatectl | grep 'Time zone' | awk '{print $3}')" != "Asia/Shanghai" ]]; then
    # Set timezone to Shanghai
    sudo cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
    echo "Time zone set to Asia/Shanghai"

    # 系统时钟和硬件时钟同步:
    sudo hwclock --systohc
	# 将当前时间和日期写入BIOS,避免重启后失效
	  sudo hwclock -w
    echo "BIOS clock updated"
else
    # 系统时钟和硬件时钟同步:
    sudo hwclock --systohc
	# 将当前时间和日期写入BIOS,避免重启后失效
	  sudo hwclock -w
    echo "Time zone is already set to Asia/Shanghai"
fi

Integration script

touch integration_set.sh
chmod +x integration_set.sh
vim integration_set.sh
#!/bin/bash

sudo /root/set_firewalld.sh
sudo /root/set_limits.sh
sudo /root/set_sysctl.sh
sudo /root/set_thp.sh
sudo /root/set_time.sh

Function1

touch scp_file.sh
chmod +x scp_file.sh
vim scp_file.sh
#!/bin/bash
# Node file transfer

declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
# Set User and key
username=root
password=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")
# 获取本机ip
local_ip=$(hostname -I | awk '{print $1}')

# 循环遍历数组中的节点
for node in "${nodes[@]}"
do
  if [ "$node" != "$local_ip" ]
  then
    echo "Processing ${node}..."
    scp /root/set_firewalld.sh $username@${node}:/root/set_firewalld.sh
    scp /root/set_limits.sh $username@${node}:/root/set_limits.sh
    scp /root/set_sysctl.sh $username@${node}:/root/set_sysctl.sh
    scp /root/set_thp.sh $username@${node}:/root/set_thp.sh
    scp /root/set_time.sh $username@${node}:/root/set_time.sh
    scp /root/integration_set.sh $username@${node}:/root/integration_set.sh
  fi
done

Function2

touch log_nodes.sh
chmod +x log_nodes.sh
vim log_nodes.sh
#!/bin/bash
# Log in to each node

declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
# Set User and key
username=root
password=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")
# 获取本机ip
local_ip=$(hostname -I | awk '{print $1}')

# loop log
for node in "${nodes[@]}"
do
	ssh $node << EOF
	hostname
	hostname -I 
	ls -l /root/
	java -version
	ls -l /usr/share/java/
	exit
EOF
done

NTP

install and agents change

touch set_ntp.sh
chmod +x set_ntp.sh
vim set_ntp.sh
#!/bin/bash

declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
# Set User and key
username=root
password=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")
# 获取本机ip
local_ip=$(hostname -I | awk '{print $1}')

# 循环遍历数组中的节点
for node in "${nodes[@]}"
do
    ssh $node << EOF
    sudo yum install -y ntp 
	# 注释掉原有的ntp服务器
    sed -i 's/^server 0.centos.pool.ntp.org iburst/#&/' /etc/ntp.conf
    sed -i 's/^server 1.centos.pool.ntp.org iburst/#&/' /etc/ntp.conf
    sed -i 's/^server 2.centos.pool.ntp.org iburst/#&/' /etc/ntp.conf
    sed -i 's/^server 3.centos.pool.ntp.org iburst/#&/' /etc/ntp.conf
    # 从节点指向主节点
    echo "server 10.0.75.225" >> /etc/ntp.conf
    sudo systemctl start ntpd
    sudo systemctl enable ntpd
    ntpq -p
    exit
EOF
done

master change

ssh 10.0.75.225
# 主节点 添加如下内容
vim /etc/ntp.conf
server  127.127.1.0     # local clock
fudge   127.127.1.0 stratum 10

Preparation

create metadata

  • metadata 设置如下
  • 将hive库的字符集设置为 utf8,可能会造成表注释乱码,网上建议设置为 latin1;
Service Database User Password
Cloudera Manager Server scm scm Pa55W0rd
Activity Monitor amon amon Pa55W0rd
Reports Manager rman rman Pa55W0rd
Hue hue hue Pa55W0rd
Hive Metastore Server metastore hive Pa55W0rd
Sentry Server sentry sentry Pa55W0rd
Cloudera Navigator Audit Server nav nav Pa55W0rd
Cloudera Navigator Metadata Server navms navms Pa55W0rd
Oozie oozie oozie Pa55W0rd
# 创建数据库及用户
mysql -uroot -pPa55W0rd
CREATE DATABASE scm DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON scm.* TO 'scm'@'%' IDENTIFIED BY 'Pa55W0rd';

CREATE DATABASE amon DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON amon.* TO 'amon'@'%' IDENTIFIED BY 'Pa55W0rd';

CREATE DATABASE rman DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON rman.* TO 'rman'@'%' IDENTIFIED BY 'Pa55W0rd';

CREATE DATABASE hue DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON hue.* TO 'hue'@'%' IDENTIFIED BY 'Pa55W0rd';

CREATE DATABASE metastore DEFAULT CHARACTER SET latin1;
GRANT ALL ON metastore.* TO 'hive'@'%' IDENTIFIED BY 'Pa55W0rd';

CREATE DATABASE sentry DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON sentry.* TO 'sentry'@'%' IDENTIFIED BY 'Pa55W0rd';

CREATE DATABASE nav DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON nav.* TO 'nav'@'%' IDENTIFIED BY 'Pa55W0rd';

CREATE DATABASE navms DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON navms.* TO 'navms'@'%' IDENTIFIED BY 'Pa55W0rd';

CREATE DATABASE oozie DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON oozie.* TO 'oozie'@'%' IDENTIFIED BY 'Pa55W0rd';

show databases;

-- 抽样查看授权是否正确
show grants for 'scm'@'%';
-- 应输出
-- GRANT USAGE ON *.* TO 'scm'@'%'
-- GRANT ALL PRIVILEGES ON `scm`.* TO 'scm'@'%'

http

# 仅主节点
yum install -y httpd
service httpd start
# 设置开机启动
chkconfig httpd on
# 测试 外网ip
http://39.98.209.75/

createrepo

# 生成元数据文件夹“repodata”
yum -y install createrepo
cd /opt/cm6/
createrepo .

# 连接到http服务
ln -s /opt/cm6/ /var/www/html/
ln -s /opt/cdh6/ /var/www/html/

# yum
vim /etc/yum.repos.d/cloudera-manager.repo
[cloudera-manager]
name=clouderad_repo
enabled=1
failovermethod=priority 
baseurl=http://10.0.75.225/cm6/
gpgcheck=1
gpgkey=http://10.0.75.225/cm6/allkeys.asc
# 默认的yum源文件 选项参数
# []填写yum源的唯一ID标识,可以为任意字符串,name是yum源名称,可以为任意字符串,
# baseurl指定yum源的url地址,
# mirrorlist指定镜像站点目录,默认是官网自身的yum源,这个可以注释掉,使用baseurl指定的源(http://mirrors.aliyun.com/repo/Centos-6.repo),
# enabled指是否激活YUM源0代表禁用,1代表激活
# gpgcheck安装软件时是否检查签名0代表禁用1代表激活 
# gpgkey如果检查软件包的签名,则该语句代表检查签名的密钥文件
# failovermethod=priority => 优先级最高的镜像站点下载软件包失败时,自动切换次优先级镜像站重试
# 查看 yum 配置
yum clean all && yum makecache
yum repolist | grep cloudera
yum repolist
# 测试 外网ip
http://10.0.75.225/cm6/
http://10.0.75.225/cdh6/

user

# 删除用户组
#userdel cloudera-scm
#groupdel cloudera-scm

# 创建用户组
groupadd cloudera-scm
useradd -r -g cloudera-scm cloudera-scm
id cloudera-scm
# 修改安装包用户权限
chown cloudera-scm.cloudera-scm -R /opt/cm6/
chown cloudera-scm.cloudera-scm -R /opt/cdh6/
#创建cdh数据目录
mkdir -p /opt/cdh_data/
chown cloudera-scm.cloudera-scm -R /opt/cdh_data/
ls -l /opt/
# 在root用户下为配置文件赋予权限
chmod u+w /etc/sudoers
vim /etc/sudoers
echo "cloudera-scm       ALL=(ALL)       NOPASSWD: ALL" >> /etc/sudoers
cat /etc/sudoers
#导入RPM-GPG-KEY-cloudera密钥:
sudo rpm --import /opt/cm6/RPM-GPG-KEY-cloudera

function1

touch scp_repo.sh
chmod +x scp_repo.sh
vim scp_repo.sh
#!/bin/bash
# Node file transfer

declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
# Set User and key
username=root
password=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")
# 获取本机ip
local_ip=$(hostname -I | awk '{print $1}')

# 遍历集群的节点
for node in "${nodes[@]}"
do
  if [ "$node" != "$local_ip" ]
  then
    scp /etc/yum.repos.d/cloudera-manager.repo $username@${node}:/etc/yum.repos.d/ || exit 1
    echo "Processing ${node} scp cloudera-manager.repo is success"
    scp /opt/cm6/RPM-GPG-KEY-cloudera root@10.0.75.226:/opt/ || exit 1
    echo "Processing ${node} scp RPM-GPG-KEY-cloudera is success"
  fi
done

function2

touch ssh_repo.sh
chmod +x ssh_repo.sh
vim ssh_repo.sh
#!/bin/bash
# Log in to each node

declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
# Set User and key
username=root
password=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")
# 获取本机ip
local_ip=$(hostname -I | awk '{print $1}')

# loop log
for node in "${nodes[@]}"
do
	echo "ssh $node"
	ssh $node << EOF
    # 创建用户组
    groupadd cloudera-scm
    useradd -r -g cloudera-scm cloudera-scm
    id cloudera-scm
    echo "create user is success"
    # 修改安装包用户权限
    chown cloudera-scm.cloudera-scm -R /opt/cm6/
    chown cloudera-scm.cloudera-scm -R /opt/cdh6/
    #创建cdh数据目录
    mkdir -p /opt/cdh_data/
    chown cloudera-scm.cloudera-scm -R /opt/cdh_data/
    ls -l /opt/
    echo "create doc is success"
    #在root用户下为配置文件赋予权限
    chmod u+w /etc/sudoers
    vim /etc/sudoers
    echo "cloudera-scm       ALL=(ALL)       NOPASSWD: ALL" >> /etc/sudoers
    cat /etc/sudoers
    echo "create permission is success"
    #镜像加载
    yum clean all && yum makecache || exit 1
    echo "create cloudera-repo is success"
    #导入RPM-GPG-KEY-cloudera密钥:
    sudo rpm --import /opt/cm6/RPM-GPG-KEY-cloudera || exit 1
    echo "create cloudera-key is success"
    exit
EOF
done

Install server

install CDH

sudo yum -y install cloudera-manager-daemons cloudera‐manager‐agent cloudera-manager-server
# 把CDH的包拷贝
cp /opt/cdh6/* /opt/cloudera/parcel-repo
chown cloudera-scm.cloudera-scm -R /opt/cloudera/

config CM metadata

# cm 和 mysql 是同一服务器
sudo /opt/cloudera/cm/schema/scm_prepare_database.sh mysql scm scm 'Pa55W0rd'
# cm 和 mysql 是不同服务器
# sudo /opt/cloudera/cm/schema/scm_prepare_database.sh mysql -h192.168.0.242 --scm-host scm_host scm scm 'Pa55W0rd'
  • 输出以下结果代表成功
JAVA_HOME=/usr/java/jdk1.8.0_191-amd64
Verifying that we can write to /etc/cloudera-scm-server
Creating SCM configuration file in /etc/cloudera-scm-server
Executing:  /usr/java/jdk1.8.0_191-amd64/bin/java -cp /usr/share/java/mysql-connector-java.jar:/usr/share/java/oracle-connector-java.jar:/usr/share/java/postgresql-connector-java.jar:/opt/cloudera/cm/schema/../lib/* com.cloudera.enterprise.dbutil.DbCommandExecutor /etc/cloudera-scm-server/db.properties com.cloudera.cmf.db.
[main] DbCommandExecutor              INFO  Successfully connected to database.
All done, your SCM database is configured correctly!

function2

touch ssh_install.sh
chmod +x ssh_install.sh
vim ssh_install.sh
#!/bin/bash
# Log in to each node

declare -a nodes=("10.0.75.225" "10.0.75.226" "10.0.75.227" "10.0.75.228" "10.0.75.232" "10.0.75.233" "10.0.75.234")
# Set User and key
username=root
password=Roottest_1124
# Current time
now=$(date +"%Y-%m-%d %H:%M:%S")
# 获取本机ip
local_ip=$(hostname -I | awk '{print $1}')

# loop log
for node in "${nodes[@]}"
do
	echo "ssh $node"
	ssh $node << EOF
    sudo yum -y install cloudera-manager-daemons cloudera‐manager‐agent || exit 1
    chown cloudera-scm.cloudera-scm -R /opt/cloudera/
    echo "$node cloudera agent is success"
    exit
EOF
done

start CM

# 启动
sudo systemctl start cloudera-scm-server
# 主节点观察启动进程
tail -f /var/log/cloudera-scm-server/cloudera-scm-server.log
# 查询端口是否被占用
sudo netstat -tuln | grep 7180

# 查看状态
sudo systemctl status cloudera-scm-server
# 停止
sudo systemctl stop cloudera-scm-server
# 设置开机自启动
sudo systemctl enable cloudera-scm-server
  • 出现以下日志时,启动成功
INFO WebServerImpl:org.eclipse.jetty.server.Server: Started @58667ms
INFO WebServerImpl:com.cloudera.server.cmf.WebServerImpl: Started Jetty server

install Web

service configuration

# 访问 Web地址
http://10.0.75.225:7180/
  1. 登陆
    1. 默认账号密码均为 admin
  2. WELCOME 界面
  3. Accept License(同意协议)
  4. Select Edition
  5. WELCOME 界面
  6. Cluster Basics
    1. 集群名称: Cloudera 1
  7. 主机名称
    1. zt-cdh-master[01-02]-test,zt-cdh-worker[01-05]-test
    2. 点击搜索可发现,点击继续;
  8. 选择存储库
    1. Repository Location -> 自定义存储库 ->
      1. 输入 http://10.0.75.225/cm6/
    2. CDH and other software -> 选择方法 -> 使用Parcel -> 更多选项 ->
      1. 远程Parcel存储库 URL
        1. 输入 http://10.0.75.225/cdh6/
      2. 本地Parcel存储库路径
        1. 输入 /opt/parcel-repo
  9. JDK 安装选项跳过,因为各个节点服务都已安装JDK
  10. 提供SSH登陆凭据
    1. 输入服务器root账户密码
  11. Install Agents
  12. Install Parcel
  13. Install Cluster

Directory configuration

  1. 注意数据文件指定存放路径
    1. 各服务安装过程中的设置只是一部分主要设置,
    2. 比如Impala日志的存储位置使用的就是默认值,而impala会产生大量日志,默认存储位置可能会空间不足;
    3. 注意:Impala使用内存计算,因此需要调整 impala daemon内存限制
  2. 自定义服务,只安装需要的服务
    1. 安装 HDFS Hive Impala Kudu Yarn Zookeeper;
  3. 自定义角色分配
    1. kudu 需要手动选择
    2. master cdh01
    3. tablet server cdh[02-03]
  4. 数据库设置
    1. hive 元数据库 库名 用户 密码:metastore hive Pa55W0rd
    2. 审核更改

May be error

如报错 scm 初始化失败

# 以下在初始化命令的时候做
rm -rf /etc/cloudera-scm-server/db.mgmt.properties

如报错 找不到 mysql 驱动

  1. mysql 连接驱动默认文件夹在/usr/share/java;
  2. mysql-connector-java.jar 如果有版本信息,版本信息必须删除,重命名文件;
  3. 否则,创建metadata会出现以下错误
Creating SCM configuration file in /etc/cloudera-scm-server
Executing:  /usr/java/jdk1.8.0_141-cloudera/bin/java -cp /usr/share/java/mysql-connector-java.jar:/usr/share/java/oracle-connector-java.jar:/usr/share/java/postgresql-connector-java.jar:/opt/cloudera/cm/schema/../lib/* com.cloudera.enterprise.dbutil.DbCommandExecutor /etc/cloudera-scm-server/db.properties com.cloudera.cmf.db.
[                          main] DbCommandExecutor              INFO  Unable to find JDBC driver for database type: MySQL
[                          main] DbCommandExecutor              ERROR JDBC Driver com.mysql.jdbc.Driver not found.
[                          main] DbCommandExecutor              ERROR Exiting with exit code 3
--> Error 3, giving up (use --force if you wish to ignore the error)

UI界面无反应

  1. 在安装前的上一步选择同时安装的个数为3;
  2. 浏览器有广告拦截插件,需关闭,或者更换浏览器;

未找到任何 parcel

  • 报错详情:
  1. 未在已配置的存储库中找到任何 parcel。尝试在更多选项下添加一个自定义存储库。
  2. 否则,您可能只能继续使用包。
  • 解决
  1. 读不到cdh文件,因为cdh本地存放目录: 默认/opt/cloudera/parcel-repo
  2. 确认CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel.sha hash值是否正确;
    1. 开启manifest.json文件 ,匹配CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel的hash值;
    2. CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel.sha hash值对照是否一样,
    3. 如果不一样,清空CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel.sha256 文件,按manifest.json写入hash;

NameNode 格式化失败

  • 报错详情:
  1. 对当前 NameNode 的名称目录进行格式化。如果名称目录不为空,此操作将失败。
  2. 命令 (Format (29)) 已失败 Failed to format NameNode.
  • 解决
  1. 将namenode的数据目录移出文件夹,然后重试就可以了;
  2. 修复后再删除;

Hive 启动报错

  • 报错详情
RROR org.apache.hadoop.hive.metastore.HiveMetaStore: [main]: Metastore Thrift Server threw an exception...
javax.jdo.JDODataStoreException: Required table missing : "`DBS`" in Catalog "" Schema "". DataNucleus requires this table to perform its persistence operations. Either your MetaData is incorrect, or you need to enable "datanucleus.schema.autoCreateTables"
  • 解决:修改配置
hive.metastore.schema.verification
fasle

datanucleus.metadata.validate
hive.metastore.schema.verification
false

datanucleus.schema.autoCreateAll
true
  • 查看权限
use mysql;
select host,user from user;
  • hive元数据库是否初始化
  1. hive的元数据库是否有其他表
  2. 没有初始化,可以通过命令初始化元数据
  3. 先删除hive数据库,再重新建立hive数据库,再赋权限,再初始化一次就好了
  4. 在 hive bin 目录下执行
./schematool -initSchema -dbType mysql--verbose
./schematool -dbType mysql -

无法找到主机的NTP 服务

  • 报错详情
  1. CDH集群 无法找到主机的NTP 服务,或该服务未响应时钟偏差请求
  • 解决
  1. 一般情况下,出现无法找到主机的 NTP 服务,或该服务未响应时钟偏差请求;
  2. 错误是因为NTP服务异常,
    1. 运行 systemctl status ntpd 查看NTP进程,
    2. 执行 systemctl restart ntpd 可重启该进程。
  3. ntpq -p 状态详解
st : 越小证明时间越精确;
when : 上一次同步时间是多久之前;
poll : 同步周期是多长;
reach : 同步了多少次了;
delay : 网络造成的延迟;
offset : 于server的时间差异;
jitter : Linux系统于BIOS的时间差