from docx.document import Document as _Document from docx.oxml.text.paragraph import CT_P from docx.oxml.table import CT_Tbl from docx.table import _Cell, Table, _Row from docx.text.paragraph import Paragraph import docx path = './test.docx' doc = docx.Document(path) def iter_block_items(parent): if isinstance(parent, _Document): parent_elm = parent.element.body elif isinstance(parent, _Cell): parent_elm = parent._tc elif isinstance(parent, _Row): parent_elm = parent._tr else: raise ValueError("something's not right") for child in parent_elm.iterchildren(): if isinstance(child, CT_P): yield Paragraph(child, parent) elif isinstance(child, CT_Tbl): yield Table(child, parent) for block in iter_block_items(doc): # read Paragraph if isinstance(block, Paragraph): print(block.text) # read table elif isinstance(block, Table): print(block.style.name)
关于python - 如何使用Python迭代读取word中的段落、表格和图片?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55451762/