Python查询Prometheus API

发布时间 2023-08-28 17:26:14作者: 風£飛

 

#!/bin/python
# -*- coding: utf-8 -*-

import pandas as pd
import requests, time, re


def getcolumn(status):
    if float(status) < 80:
        return "正常"
    # elif float(status) < 2:
    #     return "异常"
    else:
        return "异常"


def query(url, promql):
    """
    不推荐:
    params = 'query=round(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance,nodename) * 100 ),0.01)'
    :param url:
    :param promql: PromQL表达式
    :return:
    """
    queryurl = url + '/api/v1/query'
    params = {'query': promql}
    queryres = requests.get(queryurl, params=params)
    if queryres.json().get('status') == 'error':
        print(queryres.json().get('error'))
        return queryres.json().get('error')
    else:
        metrics = queryres.json().get("data").get("result")
        if metrics:
            return metrics
        else:
            print('无法获取有效数据')
            return '无法获取有效数据'


def queryrange(url, promql, starttime, endtime, step):
    """
    :param url:
    :param promql: PromQL表达式
    :param starttime: rfc3339('2023-08-21T02:10:10.000+08:00') | unix_timestamp
    :param endtime: rfc3339('2023-08-24T02:13:00.000+08:00') | unix_timestamp
    :param step: <duration | float> : 查询时间步长,时间区间内每step秒执行一次
    :return:
    """
    # rangeparams = {
    #     'query': 'round(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance,nodename) * 100 ),0.01)',
    #     'start': '2023-08-21T02:10:10.000+08:00', 'end': '2023-08-24T02:13:00.000+08:00', 'step': '60s'}
    rangeurl = url + '/api/v1/query_range'
    rangeparams = {'query': promql, 'start': starttime, 'end': endtime, 'step': step}
    rangeres = requests.get(rangeurl, params=rangeparams)
    metrics = rangeres.json().get("data").get("result")
    if metrics:
        return metrics
    else:
        print('无法获取有效数据')


def cpuresolv(result, nu):
    # re模块匹配中文u'[\u4e00-\u9fa5]+'(匹配所有中文的unicode类型)
    prog = re.compile(u'[\u4e00-\u9fa5]+\w+[\u4e00-\u9fa5]+')
    cpulist = []
    for i in result:
        cpudict = {}
        cpudict['instance'] = prog.findall(i.get('metric').get('instance'))[0]
        cpudict['服务器名'] = i.get('metric').get('nodename')
        cpudict['CPU' + str(nu) + '分钟使用率(%)'] = "%.2f" % float(i.get('value')[1])
        cpudict['时间'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(i.get('value')[0]))
        cpulist.append(cpudict)
    return cpulist


def cputosheet(url):
    cpu1m = 'abs(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[1m])) by (instance,nodename) * 100 ))'
    cpu5m = 'abs(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance,nodename) * 100 ))'
    cpu15m = 'abs(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[15m])) by (instance,nodename) * 100 ))'
    res1m = cpuresolv(query(url, cpu1m), '')
    res5m = cpuresolv(query(url, cpu5m), '')
    res15m = cpuresolv(query(url, cpu15m), '十五')
    left = pd.DataFrame(res1m, columns=['instance', '服务器名', 'CPU一分钟使用率(%)'])
    middle = pd.DataFrame(res5m, columns=['服务器名', 'CPU五分钟使用率(%)'])
    right = pd.DataFrame(res15m, columns=['服务器名', 'CPU十五分钟使用率(%)', '时间'])
    cpuuse = pd.merge(pd.merge(left, middle, on='服务器名'), right, on='服务器名')
    # 对dataframe新增加一列名为'状态',level由'CPU十五分钟使用率(%)'一列而来,如果小于80为正常,80以上为异常。其中axis = 1,表示原有dataframe的行不变,列的维数发生改变
    cpuuse['状态'] = cpuuse.apply(lambda x: getcolumn(x['CPU十五分钟使用率(%)']), axis=1)
    cpuuse.index = [i for i in range(1, len(cpuuse.index) + 1)]
    return cpuuse


def memoryresolv(result):
    prog = re.compile(u'[\u4e00-\u9fa5]+\w+[\u4e00-\u9fa5]+')
    memorylist = []
    for i in result:
        memdict = {}
        memdict['instance'] = prog.findall(i.get('metric').get('instance'))[0]
        memdict['服务器名'] = i.get('metric').get('nodename')
        memdict['内存使用率(%)'] = "%.2f" % float(i.get('value')[1])
        memdict['时间'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(i.get('value')[0]))
        memdict['状态'] = '正常' if float(i.get('value')[1]) < 80 else '异常'
        memorylist.append(memdict)
    return memorylist


def memorytosheet(url):
    memorypromql = '(node_memory_MemTotal_bytes-node_memory_MemAvailable_bytes)/node_memory_MemTotal_bytes*100 > 1'
    memres = memoryresolv(query(url, memorypromql))
    memuse = pd.DataFrame(memres, columns=['instance', '服务器名', '内存使用率(%)', '时间', '状态'])
    memuse.index = [i for i in range(1, len(memuse.index) + 1)]
    return memuse


def diskresolv(result):
    prog = re.compile(u'[\u4e00-\u9fa5]+\w+[\u4e00-\u9fa5]+')
    disklist = []
    for i in result:
        diskdict = {}
        diskdict['instance'] = prog.findall(i.get('metric').get('instance'))[0]
        diskdict['服务器名'] = i.get('metric').get('nodename')
        diskdict['分区'] = i.get('metric').get('mountpoint')
        diskdict['分区使用率(%)'] = "%.2f" % float(i.get('value')[1])
        diskdict['时间'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(i.get('value')[0]))
        diskdict['状态'] = '正常' if float(i.get('value')[1]) < 80 else '异常'
        disklist.append(diskdict)
    return disklist


def disktosheet(url):
    diskpromql = 'round((node_filesystem_size_bytes{fstype=~"xfs|ext4",mountpoint=~"/|/app"} - node_filesystem_free_bytes) / node_filesystem_size_bytes * 100 >1,0.01)'
    diskres = diskresolv(query(url, diskpromql))
    diskuse = pd.DataFrame(diskres, columns=['instance', '服务器名', '分区', '分区使用率(%)', '时间', '状态'])
    diskuse.index = [i for i in range(1, len(diskuse.index) + 1)]
    return diskuse

if __name__ == '__main__':
    nowtime = time.strftime("-%Y%m%d-%H")
    url = 'http://55.163.17.110:30003'
    cpusheet = cputosheet(url)
    memcheet = memorytosheet(url)
    disksheet = disktosheet(url)
    with pd.ExcelWriter('服务器状态{}.xlsx'.format(nowtime)) as f:
        cpusheet.to_excel(f, sheet_name='cpu')
        memcheet.to_excel(f, sheet_name='内存')
        disksheet.to_excel(f, sheet_name='磁盘')