这是一个基于threading可停止线程的有限容量有限并行度的python任务管理器

发布时间 2023-08-29 14:31:05作者: 薄书

这是一个可停止线程的有限容量有限并行度的任务管理器

基于:GitHub - AlitaIcon/StopableThreadJob: 可停止线程任务管理器

Quick Start

基础调用与效果

import time
import datetime
from loguru import logger

from StopableThreadJob.job_manager import JobManager

if __name__ == '__main__':
    def slow_func( name):
        for i in range(5):
            logger.info(f"{name} -- {datetime.datetime.now()}")
            time.sleep(1)


    job_manager = JobManager()
    # 删除未添加任务
    job_manager.remove_job('2')
    for pid in range(6):
        logger.info(f"添加任务: {pid}")
        job_manager.add_job(target=slow_func, args=(pid,), job_id=f'{pid}')
    time.sleep(1)
    job_manager.start_job()
    # 删除已添加运行中任务
    job_manager.remove_job('1')
    # 删除已添加未运行中任务
    job_manager.remove_job('4')
    time.sleep(5)
    # 删除运行完成任务
    job_manager.remove_job('0')
    job_manager.print_current_job()
    print(job_manager.job_store)
    for i in [0, 1, 2, 4]:
        logger.info(f"添加任务: {i}")
        job_manager.add_job(target=slow_func, args=(i,), job_id=f'{i}')
    job_manager.print_current_job()
    job_manager.start_job()
    time.sleep(6)
    print(job_manager.job_store)
    job_manager.print_current_job()
    time.sleep(30)

文件job_manager

import ctypes
import threading
from loguru import logger


class TerminableThread(threading.Thread):
    """
    a thread that can be stopped by forcing an exception in the execution context
    可以通过在执行上下文中强制异常来停止的线程
    """

    def terminate(self, exception_cls, repeat_sec=2.0):
        if self.is_alive() is False:
            return True
        killer = ThreadKiller(self, exception_cls, repeat_sec=repeat_sec)
        killer.start()


class ThreadKiller(threading.Thread):
    """
    separate thread to kill TerminableThread
    单独的线程来终止可终止线程
    """

    def __init__(self, target_thread, exception_cls, repeat_sec=2.0):
        threading.Thread.__init__(self)
        self.target_thread = target_thread
        self.exception_cls = exception_cls
        self.repeat_sec = repeat_sec
        self.daemon = True

    def run(self):
        """loop raising exception incase it's caught hopefully this breaks us far out"""
        while self.target_thread.is_alive():
            ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(self.target_thread.ident),
                                                       ctypes.py_object(self.exception_cls))
            self.target_thread.join(self.repeat_sec)

# 自定义错误类型:
class StopRunningCommand(Exception):
    pass

class JobManagerID:
    """
    任务ID池,用于初始化任务ID列表,
    """
    def __init__(self, pool_size=5):
        self.pid_list = list(range(pool_size))

    def list_move(self):
        # 将pid_list 列表循环左移一位,既列表第一项移动至末尾
        b = self.pid_list[:1][0]
        c = self.pid_list[1:]
        c.append(b)
        self.pid_list = c

# 主要的任务调用对象类
class JobManager:

    def __init__(self, semaphore=2):
        """
        :param semaphore: 任务池中可并行的任务数
        """
        self.job_store = {}
        self.job_lock = threading.RLock()
        self.semaphore = threading.Semaphore(semaphore)

    def add_job(self, job_id, target, *args, **kwargs):
        # 新增指定ID的任务
        def inner_job(*args, **kwargs):
            try:
                self.semaphore.acquire()
                ret = target(*args, **kwargs)
                print(f"{job_id} is finished.")
                return ret
            except StopRunningCommand as e:
                print(f"{job_id} has been stopped.")
            except Exception as e:
                print(f"{job_id} is finished.")
                raise e
            finally:
                if job_id in self.job_store:
                    self.job_store.pop(job_id) # 运行完毕后在job_store中删除任务
                self.semaphore.release()

        with self.job_lock:
            t = TerminableThread(target=inner_job, *args, **kwargs)
            t.daemon = True
            # if job_id in self.job:
            #     self.job[job_id].terminate(StopRunningCommand)
            self.job_store[job_id] = t
        return self.job_store[job_id]

    def remove_job(self, job_id):
        # 删除指定ID的任务
        with self.job_lock:
            if job_id in self.job_store:
                self.job_store[job_id].terminate(StopRunningCommand)

    def start_job(self): 
        # 开始任务池中全部的任务,当任务执行较快时会出现该循环还未结束但已经有任务结束了,
        # 从而导致循环的字典发生变化导致错误
        with self.job_lock:
            for j, t in self.job_store.items():
                if t.is_alive() is False:
                    t.start()
    def start_job_id(self,pid): 
        # 指定id开始执行任务
        with self.job_lock:
            if self.job_store[pid].is_alive() is False:
                self.job_store[pid].start()

    def job_start(self,pid):
        # 返回指定id的任务当前状态,True为正在计算
        return self.job_store[pid].is_alive()
        
    def print_current_job(self):
        # 返回指定任务池中全部的任务的当前状态,True为正在计算
        info = {jid: t.is_alive() for jid, t in self.job_store.items()}
        logger.info(info)

为实现任务运行异步且可并行的效果

定义的方法函数

from StopableThreadJob.job_manager import *
job_manager = JobManager(semaphore=4) 
job_manager_list = JobManagerID(pool_size = 5) 
# pool_size 用于设置任务池容量的大小
# semaphore 用于设置并行度,既任务池中可同时计算的任务数


def job():
    def slow_func(name):
        for i in range(10):
            logger.info(f"{name} -- {datetime.datetime.now()}")
            time.sleep(1)
            
    pid = job_manager_list.pid_list[0]
    if pid in job_manager.job_store:
        job_manager.remove_job(pid)
    job_manager.add_job(target=slow_func, args=(pid,), job_id=pid)
    job_manager.start_job_id(pid)
    job_manager_list.list_move()

  • pool_size 用于设置任务池容量的大小
  • semaphore 用于设置并行度,既任务池中可同时计算的任务数

实现有限的任务池以及有限的并行度的计算模块。

当任务池满的时候新的任务会将旧的任务挤出任务池。

所有的任务都在任务池中排队,根据并行度决定同时计算的数目。