cdh node扩容

发布时间 2023-07-25 11:51:17作者: 大川哥
参考:https://blog.csdn.net/xiaoweite1/article/details/123919740
    
# hostname修改
    hostnamectl set-hostname bpit64
# ssh免密认证
    。。。略
# /etc/hosts修改
    10.5.250.56 bpit56.tal.com bpit56
    10.5.250.57 bpit57.tal.com bpit57
    10.5.250.58 bpit58.tal.com bpit58
    10.5.250.59 bpit59.tal.com bpit59
    10.5.250.60 bpit60.tal.com bpit60
    10.5.250.64 bpit64.tal.com bpit64

# cloudera manager 安装
    # 进入56
    cd /data/server/CDH/CDH6.3.2
    # 启动8080端口
    python -m http.server 8080
    
    # 复制yum源到64,需要56服务器启动上面那个才行,端口冲突,改成8081,配置文件也要改
    scp cloudera-manager.repo  10.5.250.64:/etc/yum.repos.d/
    yum install -y cloudera-manager-agent cloudera-manager-daemons
# 时间同步
    yum -y install chrony
    systemctl start chronyd
    chronyc sources -v
    # 同步硬件时钟到系统时钟
    hwclock --systohc
    # 检测时间
    timedatectl
# 安装jdk
    yum install java-1.8.0-openjdk-devel -y
    /etc/alternatives/java
    /etc/alternatives/java -> /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.372.b07-1.el7_9.x86_64/jre/bin/java    
    ln -s /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.362.b08-1.el7_9.x86_64/jre/bin/java  java
    # 小版本有问题,所以直接scp就行
    # scp原来机器的java  到新机器,做软链即可
# 交换分区和大页设置
    sysctl -w vm.swappiness=0
    echo "vm.swappiness=0" >> /etc/sysctl.conf
    echo never > /sys/kernel/mm/transparent_hugepage/defrag
    echo never >/sys/kernel/mm/transparent_hugepage/enabled
    echo "echo never > /sys/kernel/mm/transparent_hugepage/defrag" >> /etc/rc.d/rc.local
    echo "echo never > /sys/kernel/mm/transparent_hughugepage/enabled" >> /etc/rc.d/rc.local

# 修改server_host,跟CM心态检测,根据自己的主机名来修改
    sed -i '/server_host=/cserver_host=bpit56' /etc/cloudera-scm-agent/config.ini
# 启动agent
    systemctl start cloudera-scm-agent
    systemctl status cloudera-scm-agent


airflow:
    # 59有安装包 /opt/software
    scp Anaconda3-2019.10-Linux-x86_64.sh bpit64:/root/
    
    yum -y install mysql-devel gcc gcc-devel python-devel gcc-c++ cyrus-sasl cyrus-sasl-devel cyrus-sasl-lib
    sh Anaconda3-2019.10-Linux-x86_64.sh 
    # 一路回车
    
    # 在 /etc/profile中加入以下语句:
        export PATH=$PATH:/root/anaconda3/bin
    source /etc/profile
    # source activate     【初始化conda,必须执行,执行之后可以使用conda命令激活环境】
    # conda deactivate     【退出当前base环境】
    # conda activate python37【激活使用python37环境】
    # conda deactivate 【退出当前使用python37环境】
    # conda remove -n python37 --all 【删除python37环境】    
    # 参考:https://blog.csdn.net/wr_java/article/details/130196116
    
    conda create -n airflow213 python=3.7.5
    source activate
    conda activate airflow213
    vi /etc/profile
    # 修改
    export AIRFLOW_HOME=/data/airflow
    source /etc/profile
    pip install apache-airflow==2.1.3 -i https://pypi.tuna.tsinghua.edu.cn/simple

    mv airflow.cfg bak.airflow.cfg
    # 别的机器拷贝一个配置
    scp airflow.cfg bpit64:/data/airflow/
    
    rpm --import https://repo.mysql.com/RPM-GPG-KEY-mysql-2022
    yum install mysql-devel
    pip install mysqlclient -i https://pypi.tuna.tsinghua.edu.cn/simple
    
    (python37) [root@node3 ~]# airflow celery worker -D
    crontab -e 
        */5 10-23 * * * /usr/bin/bash /data/airflow/dags/deploy.sh >> /data/airflow/dags/deploy.log
        00 23 * * * /usr/bin/bash /data/airflow/log-clean.sh >> /data/airflow/log-clean.log
     scp -r dags bpit64:/data/airflow/
     scp log-clean.sh  bpit64:/data/airflow/
     chmod 777 logs
     ps -ef | grep celery
    
datax:
    DATAX_CMD = "/data/server/datax/bin/datax.py"
    /data/server/datax
    JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.362.b08-1.el7_9.x86_64/"
    
参考:
August 23, 2021
We’ve just released Apache Airflow 2.1.3.

? PyPI: https://pypi.org/project/apache-airflow/2.1.3/
? Docs: ⁨https://airflow.apache.org/docs/apache-airflow/2.1.3/
?️ Changelog: ⁨https://airflow.apache.org/docs/apache-airflow/2.1.3/changelog.html
? Sources: https://airflow.apache.org/docs/apache-airflow/2.1.3/installation.html#installing-airflow-from-released-sources-and-packages