python 实现 pdf表格转excel

发布时间 2023-04-20 15:16:05作者: 你若愿意,我一定去

pip install pdfplumber  -i https://pypi.tuna.tsinghua.edu.cn/simple

pip install tqdm  -i https://pypi.tuna.tsinghua.edu.cn/simple

 

import pdfplumber
from openpyxl import Workbook
from tqdm import tqdm
# file_name = data_folder+'医保药品分类与代码数据库更新202110.pdf'
file_name = 'C:\\Users\\mm\\Desktop\\123.pdf'
data_name = 'C:\\Users\\mm\\Desktop\\123.xls'

def analysis_table(pdf_file_path):
    # 打开表格
    workbook = Workbook()
    sheet = workbook.active

    # 打开pdf
    with pdfplumber.open(pdf_file_path) as pdf:
        # 遍历每页pdf 17476/17855
        for page in tqdm(pdf.pages):
            # 提取表格信息
            try:
                table = page.extract_table()
                # print(table)
                # 格式化表格数据
                for i, row in enumerate(table):
                    sheet.append(row[1:])
            except:
                break
    workbook.save(filename=data_name)

analysis_table(file_name)