python - 批量压缩word图片

发布时间 2023-12-19 21:22:36作者: wstong

主要分为3个步骤,提取图片,压缩图片,替换图片,需要用到python-docx和pillow

1. 提取图片

import docx

docName = "test.docx"
imageIndex = 0

document = docx.Document(docName)
for rel in document.part.rels.values():
    if "image" in rel.reltype and hasattr(rel, "target_part"):
        image_data = rel.target_part.blob
        with open(f"{docName}-{imageIndex}.jpg", "wb") as f:
            f.write(image_data)
        imageIndex += 1

2. 压缩图片

from PIL import Image


def compress_image(file_path, target_size=64, step=10, quality=80):
    o_size = os.path.getsize(file_path) / 1024
    while o_size >= target_size:
        im = Image.open(file_path)
        im.save(file_path, quality=quality)
        if quality - step < 0:
            break
        quality -= step
        o_size = os.path.getsize(file_path) / 1024


for i in range(imageIndex):
    compress_image(f"{docName}-{i}.jpg")

3. 替换图片

import os

imageIndex = 0
for rel in document.part.rels.values():
    if "image" in rel.reltype and hasattr(rel, "target_part"):
        with open(f"{docName}-{imageIndex}.jpg", "rb") as f:
            image_data = f.read()
        rel.target_part._blob = image_data
        os.remove(f"{docName}-{imageIndex}.jpg")
        imageIndex += 1
document.save("test1.docx")

4. 完整代码

import docx
import os
from PIL import Image


def compress_image(file_path, target_size=64, step=10, quality=80):
    o_size = os.path.getsize(file_path) / 1024
    while o_size >= target_size:
        im = Image.open(file_path)
        im.save(file_path, quality=quality)
        if quality - step < 0:
            break
        quality -= step
        o_size = os.path.getsize(file_path) / 1024


imageIndex = 0
docName = "test.docx"
document = docx.Document(docName)
for rel in document.part.rels.values():
    if "image" in rel.reltype and hasattr(rel, "target_part"):
        image_data = rel.target_part.blob
        with open("tmp.jpg", "wb") as f:
            f.write(image_data)

        compress_image("tmp.jpg")

        with open("tmp.jpg", "rb") as f:
            image_data = f.read()
        rel.target_part._blob = image_data

        imageIndex += 1
os.remove("tmp.jpg")
document.save("test1.docx")