python-docx删除文档部分内容

发布时间 2023-12-20 14:21:50作者: Swlymbcty
 1 from docx.document import Document as _Document
 2 from docx.oxml.text.paragraph import CT_P
 3 from docx.oxml.table import CT_Tbl
 4 from docx.table import _Cell, Table
 5 from docx.text.paragraph import Paragraph
 6 
 7 
 8 def word_cut(document):
 9     def iter_block_items(parent):
10         if isinstance(parent, _Document):
11             parent_elm = parent.element.body
12         elif isinstance(parent, _Cell):
13             parent_elm = parent._tc
14         else:
15             raise ValueError("something's not right")
16         for child in parent_elm.iterchildren():
17             if isinstance(child, CT_P):
18                 yield Paragraph(child, parent)
19             elif isinstance(child, CT_Tbl):
20                 yield Table(child, parent)
21     count = 0
22     flag = True
23     for block in iter_block_items(document):
24         count += 1
25         if isinstance(block, Paragraph):
26             if flag:
27                 if count == 84:
28                     flag = False
29             else:
30                 p = block._element
31                 p.getparent().remove(p)
32 
33         elif isinstance(block, Table):
34             if not flag:
35                 p = block._element
36                 # 删除表格
37                 p.getparent().remove(p)
38 
39     return document