python - pdf简单操作

发布时间 2023-03-28 23:52:28作者: wstong

1. 安装PyPDF2

pip3 install PyPDF2

2. 分割

from PyPDF2 import PdfReader, PdfWriter
file = input()
pdf_reader = PdfReader(file)
for i in range(len(pdf_reader.pages)):
    pdf_writer = PdfWriter()
    pdf_writer.add_page(pdf_reader.pages[i])
    with open(f'{i}.pdf','wb') as out:
        pdf_writer.write(out)

3. 合并

from PyPDF2 import PdfReader, PdfWriter
pdf_writer = PdfWriter()
num = int(input())
for i in range(num):
    pdf_reader = PdfReader(f'{i}.pdf')
    for j in range(len(pdf_reader.pages)):
        pdf_writer.add_page(pdf_reader.pages[j])
with open('合并.pdf','wb') as out:
    pdf_writer.write(out)

4. 删除指定页面

import os
from PyPDF2 import PdfReader, PdfWriter
file = input()
delete_list = input().split() # 空格隔开
pdf_reader = PdfReader(file)
pdf_writer = PdfWriter()
for i in range(len(pdf_reader.pages)):
    if str(i+1) in delete_list:
        continue
    pdf_writer.add_page(pdf_reader.pages[i])
with open('out.pdf','wb') as out:
    pdf_writer.write(out)
os.remove(file) # 删除原文件

5. 按照数字大小排序后合并

import os
from PyPDF2 import PdfReader, PdfWriter
from functools import cmp_to_key

def cmp(x, y):
    return int(x[0:-4]) - int(y[0:-4])

pdf_writer = PdfWriter()
pdf_list = [x for x in os.listdir() if x.endswith('.pdf')]
pdf_list = sorted(pdf_list, key=cmp_to_key(cmp))

for i in range(len(pdf_list)):
    pdf_reader = PdfReader(pdf_list[i])
    for j in range(len(pdf_reader.pages)):
        pdf_writer.add_page(pdf_reader.pages[j])
with open('out.pdf','wb') as out:
    pdf_writer.write(out)