利用python实现pip源软件同步到本地

发布时间 2023-08-28 11:47:57作者: 海东青_伟

1、首先根据pip源的url将软件名全部爬出来。

import os
import requests
import re

report = requests.get('https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple/') # url
text_str = str(report.text).split('\n')

directory = 'D:\\python\\pip.download\\Downloaded\\'  # 指定目录的路径

# 检查目录是否存在,如果不存在则创建目录
if not os.path.exists(directory):
    os.makedirs(directory)

file_path = os.path.join(directory, 'requirement.txt')

with open(file_path, 'w+') as f:
    for i in text_str:
        temp = re.findall('<a href="(.*?)/">', i)
        if temp != []:
            f.write(str(temp[0])+'\n')

2、根据爬出来的软件名将软件及其依赖包下载下来统一放在指定软件名的目录下。

import os
import logging

file = "D:\\python\\pip.download\\Downloaded\\requirement.txt"  # 下载列表文件路径
download_dir = "D:\\python\\pip.download\\Downloaded\\web\\packages\\"  # 下载目录
downloaded_log_file = "D:\\python\\pip.download\\logs\\downloaded_packages.txt"  # 已下载包名记录文件路径
error_log_file = "D:\\python\\pip.download\\logs\\download_errors.txt"  # 下载错误记录文件路径

logging.basicConfig(filename=downloaded_log_file, level=logging.INFO)

# 读取已下载的包名列表
downloaded_packages = []
if os.path.exists(downloaded_log_file):
    with open(downloaded_log_file, 'r') as f:
        downloaded_packages = f.read().splitlines()

with open(file, 'r') as f:
    text = f.readlines()
    total_packages = len(text)
    for index, package_name in enumerate(text):
        package_name = package_name.strip()
        if package_name in downloaded_packages:
            print(f"包 {package_name} 已经下载过,跳过此包 ({index+1}/{total_packages})")
            continue

        package_dir = os.path.join(download_dir, package_name)
        if not os.path.exists(package_dir):
            os.makedirs(package_dir)
        download_command = 'pip download ' + package_name + ' -i https://pypi.tuna.tsinghua.edu.cn/simple -d ' + package_dir
        try:
            print(f"正在下载包 {package_name} ({index+1}/{total_packages})")
            os.system(download_command)
            print(f"包 {package_name} 下载成功!")
            # 将已下载的包名记录到文件中
            with open(downloaded_log_file, 'a') as f:
                f.write(package_name + '\n')
        except Exception as e:
            # 记录错误信息到日志文件
            logging.error(f"下载包 {package_name} 时出错:{str(e)}")
            # 将下载错误信息记录到文件中
            with open(error_log_file, 'a') as f:
                f.write(f"包 {package_name} 下载失败!错误信息:{str(e)}\n")
            print(f"无法下载包 {package_name}。错误信息:{str(e)}")
            continue

欢迎点评!