python-docx的简单使用

发布时间 2023-05-03 08:32:04作者: 狂自私
'''
设置表格所有单元格的四个边为0.5磅,黑色,实线
可以使用返回值,也可以不使用
'''
def 设置表格网格线为黑色实线(table_object:object):
    kwargs = {
        "top":{"sz": 4, "val": "single", "color": "#000000"},
        "bottom":{"sz": 4, "val": "single", "color": "#000000"},
        "left":{"sz": 4, "val": "single", "color": "#000000"},
        "right":{"sz": 4, "val": "single", "color": "#000000"},
        "insideV":{"sz": 4, "val": "single", "color": "#000000"},
        "insideH":{"sz": 4, "val": "single", "color":  "#000000"}
        }

    borders = docx.oxml.OxmlElement('w:tblBorders')
    for tag in ('bottom', 'top', 'left', 'right', 'insideV', 'insideH'):
        edge_data = kwargs.get(tag)
        if edge_data:
            any_border = docx.oxml.OxmlElement(f'w:{tag}')
            for key in ["sz", "val", "color", "space", "shadow"]:
                if key in edge_data:
                    any_border.set(docx.oxml.ns.qn(f'w:{key}'), str(edge_data[key]))
            borders.append(any_border)
            table_object._tbl.tblPr.append(borders)
    return table_object
'''
设置标题样式
'''
def 设置标题样式为黑色宋体(heading_object:object):
    heading_object.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.LEFT  #左对齐
    for run in heading_object.runs:
        run.font.name=u'宋体'    #设置为宋体
        #run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')#设置为宋体,和上边的一起使用
        run.font.color.rgb = docx.shared.RGBColor(0,0,0)#设置颜色为黑色
    return heading_object
'''
创建docx文档,将翻译结果和原文写入文档中
'''
def word():
    my_word_doc = docx.Document()   #打开一个空白文档
    # for style in my_word_doc.styles:
    #     print(style)
    heading = my_word_doc.add_heading(翻译结果["NAME"],level=2)    #指定样式标题2
    设置标题样式为黑色宋体(heading)
    heading = my_word_doc.add_heading("描述",level=3)    #指定样式标题3
    设置标题样式为黑色宋体(heading)
    for line in 翻译结果["SYNOPSIS"].split("\n"):
        my_word_doc.add_paragraph(line)
    for line in 翻译结果["DESCRIPTION"].split("\n"):
        my_word_doc.add_paragraph(line)
    
    heading = my_word_doc.add_heading("参数",level=3)    #指定样式标题3
    设置标题样式为黑色宋体(heading)

    #table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=3)    #指定样式标题3;在末尾添加一个表
    table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=2)    #指定样式标题3;在末尾添加一个表
    #table.style = my_word_doc.styles['Medium Grid 1']
    设置表格网格线为黑色实线(table)
    index=0
    for key,value in 翻译结果["PARAMETERS"].items():
        for line in key.split("\n"):
            cell = table.cell(index,0)
            cell.text += line
        for line in value.split("\n"):
            table.cell(index,1).text += line
        
        #table.cell(index,1).text = 帮助文件解析结果["PARAMETERS"][key]
        cell_paragraphs = table.cell(index,0).paragraphs
        for i in cell_paragraphs:
            i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT  #左对齐
        cell_paragraphs = table.cell(index,1).paragraphs
        for i in cell_paragraphs:
            i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT  #左对齐
        # table.cell(index,2).text = value
        # cell_paragraphs = table.cell(index,2).paragraphs
        # for i in cell_paragraphs:
        #     i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT  #左对齐
        index += 1
    heading = my_word_doc.add_heading("示例",level=3)    #指定样式标题3
    设置标题样式为黑色宋体(heading)

    for key,value in 翻译结果["Example"].items():
        heading = my_word_doc.add_heading(key[0:-1],level=4)    #指定样式标题4
        设置标题样式为黑色宋体(heading)
        for line in value.split("\n"):
            my_word_doc.add_paragraph(line)
    my_word_doc.save(r"C:\Users\gyj\Downloads\temp.docx")

实际上我是拿来转换PowerShell cmdlet命令的帮助txt文件为word文档的。其中带了翻译。简单记录下。

  1 import docx #pip install python-docx
  2 import re
  3 import json
  4 import requests
  5 import time
  6 import hashlib
  7 from urllib.parse import urlencode
  8 import random
  9 import copy
 10 #字典形式
 11 帮助文件解析结果={"NAME":"","SYNOPSIS":"","DESCRIPTION":"","PARAMETERS":{},"Example":{}};
 12 翻译结果={"NAME":"","SYNOPSIS":"","DESCRIPTION":"","PARAMETERS":{},"Example":{}};
 13 '''
 14 
 15 '''
 16 def 解析PowerShell命令的帮助文本(文件路径:str):
 17     with open(文件路径,mode='r',encoding='utf-8') as fd_help:
 18         line = fd_help.readline();
 19         while line:
 20             if(line.strip() == "NAME"):
 21                 line = fd_help.readline()
 22                 line = line.strip()
 23                 帮助文件解析结果['NAME'] = line
 24             elif(line.strip() == "SYNOPSIS"):
 25                 line = fd_help.readline()
 26                 while line[0:4]=="    ":
 27                     帮助文件解析结果['SYNOPSIS'] += line.lstrip()
 28                     line = fd_help.readline()
 29             elif(line.strip() == "DESCRIPTION"):
 30                 line = fd_help.readline()
 31                 while line[0:4]=="    ":
 32                     帮助文件解析结果['DESCRIPTION'] += line.lstrip()
 33                     line = fd_help.readline()
 34             elif(line.strip() == "PARAMETERS"):
 35                 line = fd_help.readline()
 36                 while line[0:4]=="    ":
 37                     参数名 = line.lstrip()
 38                     帮助文件解析结果['PARAMETERS'][参数名] = ""
 39                     line = fd_help.readline()
 40                     while line[0:8]=="        ":
 41                         帮助文件解析结果['PARAMETERS'][参数名] += line.lstrip()
 42                         line = fd_help.readline()
 43                     if(len(re.findall(r'^\s*-*\s*Example',line))):
 44                         break
 45             elif(len(re.findall(r'^\s*-*\s*Example',line))):
 46                 temp=re.sub(r"^\s*-*\s*([\w,\.: ]+) *-*",r"\1",line)
 47                 temp.rstrip()
 48                 帮助文件解析结果["Example"][temp]="";
 49                 line = fd_help.readline()
 50                 while line:
 51                     if(len(re.findall(r'^\s*-*\s*Example',line))):
 52                         temp=re.sub(r"^\s*-*\s*([\w,\.: ]+) *-*",r"\1",line)
 53                         temp.rstrip()
 54                         帮助文件解析结果["Example"][temp]="";
 55                         line = fd_help.readline()
 56                     while not (len(re.findall(r'^\s*-*\s*Example',line))) and line:
 57                         帮助文件解析结果["Example"][temp]+=line.lstrip()
 58                         line = fd_help.readline()
 59             else:
 60                 line = fd_help.readline()
 61     #检查key=""的情况,并删除这样的key
 62     if("" in 帮助文件解析结果):
 63         del 帮助文件解析结果[""]
 64     if("" in 帮助文件解析结果["PARAMETERS"]):
 65         del 帮助文件解析结果["PARAMETERS"][""]
 66     if("" in 帮助文件解析结果["Example"]):
 67         del 帮助文件解析结果["Example"][""]
 68 
 69 # 本文件是通过请求有道翻译,去获取翻译结果
 70 '''
 71 i: 你好
 72 from: AUTO
 73 to: AUTO
 74 smartresult: dict
 75 client: fanyideskweb
 76 salt: 16643765479061    //毫秒级别的时间戳后面加上个0-9之间的随机数,js代码:r + parseInt(10 * Math.random(), 10);这里的r表示时间戳字符串
 77 sign: 1d69ce8f7c6258243e573e31e29e0012    //签名,下面找到了
 78 lts: 1664376547906        //毫秒级别的时间戳
 79 bv: 42c8b36dd7d61c619e7b1dc11e44d870    //同设备相同,使用md5加密的(方法是:md5(User-Agent)==>md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53")
 80 doctype: json
 81 version: 2.1
 82 keyfrom: fanyi.web
 83 action: FY_BY_REALTlME
 84 
 85 /*
 86 解密出来了:sign的计算如下
 87 define("newweb/common/service", ["./utils", "./md5", "./jquery-1.7"], function(e, t) {
 88     var n = e("./jquery-1.7");
 89     e("./utils");
 90     e("./md5");
 91     var r = function(e) {
 92         var t = n.md5(navigator.appVersion)
 93           , r = "" + (new Date).getTime()
 94           , i = r + parseInt(10 * Math.random(), 10);
 95         return {
 96             ts: r,
 97             bv: t,
 98             salt: i,
 99             sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5")    //在这里,e是要翻译的内容,i是毫秒级别的时间戳后面加上个0-9之间的随机数;后面这串字符串估计是服务器那边随机生成的,应该会变化。每次抓取的时候,可以查看下js代码
100         }
101     };
102     
103     总体来说,data数据由函数“generateSaltSign”计算出来
104 */
105 '''
106 
107 '''
108 获取翻译结果
109 The_translated_string:被翻译的字符串
110 由于翻译是以行为单位,所以一行一个结果,函数将解析The_translated_string参数,并以字符串形式返回所有翻译结果
111 '''
112 def youdao_translate(The_translated_string:str):
113     if(The_translated_string == ""):
114         return {"":""}
115     url = r'https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
116 
117     User_Agent = "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.37"
118     header = {
119         "Accept": "application/json, text/javascript, */*; q=0.01",
120         "Accept-Encoding": "gzip, deflate, br",
121         "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
122         "Connection": "keep-alive",
123         #"Content-Length": "307",
124         "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
125         "Cookie":"OUTFOX_SEARCH_USER_ID=1135160796@10.108.162.134; OUTFOX_SEARCH_USER_ID_NCOO=775555146.507473; JSESSIONID=aaaQ2GYK5N-ozb24rKNcy; SESSION_FROM_COOKIE=unknown; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; JSESSIONID=abcPzon0RcZqc7GltuAgy; ___rl__test__cookies=1665366515354",
126         "Host": "fanyi.youdao.com",
127         "Origin": "https://fanyi.youdao.com",
128         "Referer": "https://fanyi.youdao.com/",
129         "sec-ch-ua": """\"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"\"""",
130         "sec-ch-ua-mobile": "?0",
131         "sec-ch-ua-platform": "Windows",
132         "Sec-Fetch-Dest": "empty",
133         "Sec-Fetch-Mode": "cors",
134         "Sec-Fetch-Site": "same-origin",
135         "User-Agent": User_Agent,
136         "X-Requested-With": "XMLHttpRequest"
137     }
138 
139     #The_translated_string=input("输入你要翻译的中文信息:\n")  # 被翻译的字符串
140     timestamp = str(round(time.time()*1000))  # 毫秒级别的时间戳
141     salt = timestamp + str(random.randint(0, 9))  # 毫秒级别的时间戳后面加上个0-9之间的随机数
142     sign_str = "fanyideskweb" + The_translated_string + salt + "Ygy_4c=r#e#4EX^NUGUc5"  # 构造签名字符串
143     # 签名,算法:sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5")    //在这里,后面这串字符串估计是服务器那边随机生成的,应该会变化。每次抓取的时候,可以查看下js代码
144     sign = hashlib.md5(str.encode(sign_str)).hexdigest()
145     # 同设备相同,使用md5加密的(方法是:md5(User-Agent)==>md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53")
146     bv = hashlib.md5(str.encode(User_Agent)).hexdigest()
147     cookies = {
148         "OUTFOX_SEARCH_USER_ID": "1135160796@10.108.162.134",
149         "OUTFOX_SEARCH_USER_ID_NCOO": "775555146.507473",
150         "JSESSIONID": "aaaQ2GYK5N-ozb24rKNcy",
151         "SESSION_FROM_COOKIE": "unknown",
152         "DICT_UGC": "be3af0da19b5c5e6aa4e17bd8d90b28a|",
153         "JSESSIONID": "abcPzon0RcZqc7GltuAgy",
154         "___rl__test__cookies": "1665366515354"
155     }
156     data = {
157         "i": The_translated_string,
158         "from": "AUTO",
159         "to": "AUTO",
160         # 一般来说,是从中文翻译为英文
161         # "from": "zh-CHS",
162         # "to": "en",
163         "smartresult": "dict",
164         "client": "fanyideskweb",
165         "salt": salt,
166         "sign": sign,
167         "lts": timestamp,
168         "bv": bv,
169         "doctype": "json",
170         "version": "2.1",
171         "keyfrom": "fanyi.web",
172         "action": "FY_BY_CLICKBUTTION"
173     }
174     data = urlencode(data);
175     result = requests.post(url, data=data, cookies=cookies,headers=header)
176     json_result = json.loads(result.text);
177     #lines = The_translated_string.splitlines();     #按照行边界拆分
178     if(not json_result["errorCode"]):
179         ret_list="";
180         for i in json_result["translateResult"]:    #如果源字符串就是存在段落的,则这里就会根据其来分结果
181             for j in i:                             #翻译服务器认为该分段的,这里就会再次存在子项
182                 ret_list+=j['tgt']
183                 ret_list+="\n"
184             #ret_list[json_result["translateResult"][i][0]["src"]]=json_result["translateResult"][i][0]["tgt"]
185         return ret_list;
186     else:
187         return "errorCode = "+str(json_result["errorCode"]);
188 
189 #示例
190 '''
191 str_ = "你好\n世界\n我来了\n哈哈"
192 print(youdao_translate(str_))
193 -----out-----
194 {'你好': 'hello', '世界': 'The world', '我来了': "I'm coming", '哈哈': 'Ha ha'}
195 '''
196 
197 '''
198 将变量“帮助文件解析结果”中的value翻译成中文
199 '''
200 def 执行翻译():
201     翻译结果 = copy.deepcopy(帮助文件解析结果)  #这里执行了深拷贝了,所以它不再是外面的那个“翻译结果”变量了
202     翻译结果["SYNOPSIS"] = youdao_translate(翻译结果["SYNOPSIS"])
203     翻译结果["DESCRIPTION"] = youdao_translate(翻译结果["DESCRIPTION"])
204     for key,value in 翻译结果["PARAMETERS"].items():
205         if(value == ""):
206             continue
207         翻译结果["PARAMETERS"][key] = youdao_translate(value)
208     for key,value in 翻译结果["Example"].items():
209         if(value == ""):
210             continue
211         翻译结果["Example"][key] = youdao_translate(value)
212     temp_dict = {}
213     for key,value in 翻译结果["Example"].items():
214         if(key == ""):
215             continue
216         temp_dict[youdao_translate(key)] = value
217     翻译结果["Example"] = temp_dict
218     return 翻译结果
219 '''
220 设置表格所有单元格的四个边为0.5磅,黑色,实线
221 可以使用返回值,也可以不使用
222 '''
223 def 设置表格网格线为黑色实线(table_object:object):
224     kwargs = {
225         "top":{"sz": 4, "val": "single", "color": "#000000"},
226         "bottom":{"sz": 4, "val": "single", "color": "#000000"},
227         "left":{"sz": 4, "val": "single", "color": "#000000"},
228         "right":{"sz": 4, "val": "single", "color": "#000000"},
229         "insideV":{"sz": 4, "val": "single", "color": "#000000"},
230         "insideH":{"sz": 4, "val": "single", "color":  "#000000"}
231         }
232 
233     borders = docx.oxml.OxmlElement('w:tblBorders')
234     for tag in ('bottom', 'top', 'left', 'right', 'insideV', 'insideH'):
235         edge_data = kwargs.get(tag)
236         if edge_data:
237             any_border = docx.oxml.OxmlElement(f'w:{tag}')
238             for key in ["sz", "val", "color", "space", "shadow"]:
239                 if key in edge_data:
240                     any_border.set(docx.oxml.ns.qn(f'w:{key}'), str(edge_data[key]))
241             borders.append(any_border)
242             table_object._tbl.tblPr.append(borders)
243     return table_object
244 '''
245 设置标题样式
246 '''
247 def 设置标题样式为黑色宋体(heading_object:object):
248     heading_object.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.LEFT  #左对齐
249     for run in heading_object.runs:
250         run.font.name=u'宋体'    #设置为宋体
251         #run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')#设置为宋体,和上边的一起使用
252         run.font.color.rgb = docx.shared.RGBColor(0,0,0)#设置颜色为黑色
253     return heading_object
254 '''
255 创建docx文档,将翻译结果和原文写入文档中
256 '''
257 def word():
258     my_word_doc = docx.Document()   #打开一个空白文档
259     # for style in my_word_doc.styles:
260     #     print(style)
261     heading = my_word_doc.add_heading(翻译结果["NAME"],level=2)    #指定样式标题2
262     设置标题样式为黑色宋体(heading)
263     heading = my_word_doc.add_heading("描述",level=3)    #指定样式标题3
264     设置标题样式为黑色宋体(heading)
265     for line in 翻译结果["SYNOPSIS"].split("\n"):
266         my_word_doc.add_paragraph(line)
267     for line in 翻译结果["DESCRIPTION"].split("\n"):
268         my_word_doc.add_paragraph(line)
269     
270     heading = my_word_doc.add_heading("参数",level=3)    #指定样式标题3
271     设置标题样式为黑色宋体(heading)
272 
273     #table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=3)    #指定样式标题3;在末尾添加一个表
274     table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=2)    #指定样式标题3;在末尾添加一个表
275     #table.style = my_word_doc.styles['Medium Grid 1']
276     设置表格网格线为黑色实线(table)
277     index=0
278     for key,value in 翻译结果["PARAMETERS"].items():
279         for line in key.split("\n"):
280             cell = table.cell(index,0)
281             cell.text += line
282         for line in value.split("\n"):
283             table.cell(index,1).text += line
284         
285         #table.cell(index,1).text = 帮助文件解析结果["PARAMETERS"][key]
286         cell_paragraphs = table.cell(index,0).paragraphs
287         for i in cell_paragraphs:
288             i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT  #左对齐
289         cell_paragraphs = table.cell(index,1).paragraphs
290         for i in cell_paragraphs:
291             i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT  #左对齐
292         # table.cell(index,2).text = value
293         # cell_paragraphs = table.cell(index,2).paragraphs
294         # for i in cell_paragraphs:
295         #     i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT  #左对齐
296         index += 1
297     heading = my_word_doc.add_heading("示例",level=3)    #指定样式标题3
298     设置标题样式为黑色宋体(heading)
299 
300     for key,value in 翻译结果["Example"].items():
301         heading = my_word_doc.add_heading(key[0:-1],level=4)    #指定样式标题4
302         设置标题样式为黑色宋体(heading)
303         for line in value.split("\n"):
304             my_word_doc.add_paragraph(line)
305     my_word_doc.save(r"C:\Users\xxx\Downloads\temp.docx")
306 
307 解析PowerShell命令的帮助文本(r"C:\Users\xxx\Downloads\Get-CimClass.txt")
308 #有道翻译的新版翻译结果解析实在是太变态了,我搞不了,是在不行就把格式弄好,使用文档翻译翻译吧。
309 #翻译结果 = 执行翻译()
310 翻译结果=帮助文件解析结果
311 word()
View Code