jupyter进程管理

发布时间 2023-08-24 17:15:49作者: glowwormss

一、jupyter进程查看

import json
import os
import os.path
import posixpath
import subprocess
import numpy as np
import pandas as pd
import psutil
import requests
#import nvsmi

def get_running_notebooks(host, port, password=''):
    """
    获取运行中notebook的PID和运行路径,返回字典的列表,形如[{kernel_id: XXX,path: XXX}]
    """
    # 获取cookie
    url = f'http://{host}:{port}/login?next=%2F'
   
    s = requests.Session()
    resp = s.get(url)
    xsrf_cookie = resp.cookies['_xsrf']
    # 使用密码登录
    params = {'_xsrf': xsrf_cookie, 'password': password}
    res = s.post(url, data=params)
    # 获取运行中的notebooks
    ret = s.get(f'http://{host}:{port}/api/sessions')
    res = json.loads(ret.text)
    return [{
        'kernel_id': notebook['kernel']['id'],
        'path': notebook['notebook']['path'],
        'last_activate':notebook['kernel']['last_activity']
    } for notebook in res]

def get_process_id(name):
    """根据关键字找出进程的PID,返回PID的列表"""
    child = subprocess.Popen(['pgrep', '-f', name], stdout=subprocess.PIPE, shell=False)
    response = child.communicate()[0]
    return [pid.decode() for pid in response.split()]

def memory_pct_psutil(pid=None):
    """计算PID的内存占用"""
    if not pid:
        return None
    process = psutil.Process(int(pid))
    return process.memory_percent()

def memory_usage_psutil(pid=None):
    """计算PID的内存占用"""
    if not pid:
        return None
    memory_used=round(psutil.Process(int(pid)).memory_info().rss/1024/1024/1024,2)
    return memory_used

def cpu_usage_psutil(pid=None):
    """计算PID的CPU占用"""
    if not pid:
        return None
    process = psutil.Process(int(pid))
    return process.cpu_percent()


def show_notebooks_table(host, port, password=''):
    """
    展示运行中notebooks的信息,返回DataFrame,包括列:
    * index: notebook kernel id.
    * path: path to notebook file.
    * pid: pid of the notebook process.
    * memory: notebook memory consumption in percentage.
    """
    notebooks = get_running_notebooks(host, port, password)
    df = pd.DataFrame(notebooks)
    df['pid'] = df.kernel_id.map(get_process_id)
    df = df.explode('pid', ignore_index=True)
    df = df[df.pid.notnull()]
    df['memory_used'] = df.pid.map(memory_usage_psutil)
    #df['memory_pct'] = df.pid.map(memory_pct_psutil)
    df['cpu_pct'] = df.pid.map(cpu_usage_psutil)
    return df

df = show_notebooks_table("localhost", 端口, password='密码') 可查看所有进程

2、杀死最后活动时间在一周之前的进程

#需修改配置文件c.KernelManager.autorestart = False  
from datetime import datetime, timedelta
# 获取当前时间
now = datetime.now()
one_week_ago = now - timedelta(days=1)
one_week_ago_str = one_week_ago.strftime('%Y-%m-%d %H:%M:%S')

df = show_notebooks_table("localhost", 端口, password='密码')
pid_num=df.shape[0]
print ("一共检测到进程数:" +str(pid_num))
l=list(df[df['last_activate']<one_week_ago_str]['pid'])
print ("超时进程一共: "+str(len(l)))

if len(l)>0:
    print ("开始清理超时进程")
    for i in l:
        x=int(i)
        !kill -9 "$x"
        print (x)
else:
    print ("无超时进程,无需清理")
        
        
df = show_notebooks_table("localhost", 端口, password='密码')
pid_num=df.shape[0]
print ("清理后一共检测到进程数:" +str(pid_num))