CDH集群日志清理

发布时间 2023-10-19 22:31:06作者: a-tao必须奥利给

CDH集群日志清理

一、查看磁盘占用情况

df -h

二、查看日志占用情况

cdh的各种组件日志一般在 /var/log 目录下,因此需要关注“/”根目录,

查看 /var/log 下使用空间较大的文件夹,并又大到小排列

cd /var/log/
du -s ./* |sort -nr

还有一个是 Cloudera Management Service服务产生的日志,存在/var/lib/...

cd /data/var/lib/cloudera-service-monitor

du -s ./* | sort -nr

三. 清理日志

清理cm、cdh组件的日志数据

rm -rf /data/var/log/cloudera-scm-eventserver/*.out.*
rm -rf /data/var/log/cloudera-scm-firehose/*.out.*
rm -rf /data/var/log/cloudera-scm-agent/*.log.*
rm -rf /data/var/log/cloudera-scm-agent/*.out.*
rm -rf /data/var/log/cloudera-scm-server/*.out.*
rm -rf /data/var/log/cloudera-scm-server/*.log.*
	   
rm -rf /data/var/log/hadoop-hdfs/*.out.*
rm -rf /data/var/log/hadoop-httpfs/*.out.*
rm -rf /data/var/log/hadoop-kms/*.out.*
rm -rf /data/var/log/hadoop-mapreduce/*.out.*
rm -rf /data/var/log/hadoop-yarn/*.out.*
rm -rf /data/var/log/hadoop-hdfs/*.out.*
rm -rf /data/var/log/hadoop-hdfs/*.audit.*
rm -rf /data/var/log/flume-ng/*.out.*
rm -rf /data/var/log/solr/*.out.*
rm -rf /data/var/log/solr/solr_gc.log.*
	   
rm -rf /data/var/log/zookeeper/*.log.*
rm -rf /data/var/log/impalad/*.log.*
rm -rf /data/yarn/nm/usercache/*/filecache/*
rm -rf /data/azkaban/projects/*

清理监控服务的数据

rm -rf /data/var/lib/cloudera-host-monitor/ts/*/partition*/* 
rm -rf /data/var/lib/cloudera-service-monitor/ts/*/partition*/*

清理hdfs回收站数据

-- 查看回收站文件大小
hadoop fs -du -h -s /user/*/.Trash/*
-- 清理回收站内容(需2步,步骤1其他账号回收站内容移动到root下,步骤二清理root下回收站内容)
hadoop fs -rm -r /user/*/.Trash/*
hadoop fs -rm -r /user/root/.Trash/Current

四. 自动化脚本

vim cleanLog.sh

#!/bin/bash
 
rm -rf /data/var/lib/cloudera-host-monitor/ts/*/partition*/* 
rm -rf /data/var/lib/cloudera-service-monitor/ts/*/partition*/*
 
rm -rf /data/var/log/cloudera-scm-eventserver/*.out.*
rm -rf /data/var/log/cloudera-scm-firehose/*.out.*
rm -rf /data/var/log/cloudera-scm-agent/*.log.*
rm -rf /data/var/log/cloudera-scm-agent/*.out.*
rm -rf /data/var/log/cloudera-scm-server/*.out.*
rm -rf /data/var/log/cloudera-scm-server/*.log.*
	   
rm -rf /data/var/log/hadoop-hdfs/*.out.*
rm -rf /data/var/log/hadoop-httpfs/*.out.*
rm -rf /data/var/log/hadoop-kms/*.out.*
rm -rf /data/var/log/hadoop-mapreduce/*.out.*
rm -rf /data/var/log/hadoop-yarn/*.out.*
rm -rf /data/var/log/hadoop-hdfs/*.out.*
rm -rf /data/var/log/hadoop-hdfs/*.audit.*
rm -rf /data/var/log/flume-ng/*.out.*
rm -rf /data/var/log/solr/*.out.*
rm -rf /data/var/log/solr/solr_gc.log.*
	   
rm -rf /data/var/log/zookeeper/*.log.*
rm -rf /data/var/log/impalad/*.log.*
rm -rf /data/yarn/nm/usercache/*/filecache/*
rm -rf /data/azkaban/projects/*

crontab -e

#设置每周一的凌晨1点执行

00 01 * * 1 sh /root/clearLog.sh