Python提取Word中图片的实现步骤

发布时间 2023-09-06 15:28:29作者: 祺琪
for paragraph in doc.paragraphs:
    for run in paragraph.runs:
        if run.element.xml.startswith('<w:drawing'):
            inline = run.inline_shapes[0]
            if inline.has_image:
                image = inline._inline.graphic.graphicData.pic.nvPicPr.cNvPr.name
                print(image)
获取图片二进制数据

from docx.shared import Inches

for paragraph in doc.paragraphs:
    for run in paragraph.runs:
        if run.element.xml.startswith('<w:drawing'):
            inline = run.inline_shapes[0]
            if inline.has_image:
                image = inline._inline.graphic.graphicData.pic.nvPicPr.cNvPr.name
                image_data = inline._inline.graphic.graphicData.pic.blipFill.blip
                with open(f"{image}.png", 'wb') as f:
                    f.write(image_data)
如果你需要从letter.docx文档中提取所有图片数据,可以使用以下代码实现。

import docx
from docx.shared import Inches

doc = docx.Document('letter.docx')

for paragraph in doc.paragraphs:
    for run in paragraph.runs:
        if run.element.xml.startswith('<w:drawing'):
            inline = run.inline_shapes[0]
            if inline.has_image:
                image = inline._inline.graphic.graphicData.pic.nvPicPr.cNvPr.name
                image_data = inline._inline.graphic.graphicData.pic.blipFill.blip
                with open(f"{image}.png", 'wb') as f:
                    f.write(image_data)
如果你只需要提取某一个特定的Word文档中的图片,可以通过修改文档名称和图片名称信息,使用以下代码解决。

import docx
from docx.shared import Inches

doc = docx.Document('example.docx')

for paragraph in doc.paragraphs:
    for run in paragraph.runs:
        if run.element.xml.startswith('<w:drawing'):
            inline = run.inline_shapes[0]
            if inline.has_image:
                image = inline._inline.graphic.graphicData.pic.nvPicPr.cNvPr.name
                if image == 'image.png':
                    image_data = inline._inline.graphic.graphicData.pic.blipFill.blip
                    with open(f"{image}.png", 'wb') as f:
                        f.write(image_data)