使用Python openpyxl找出两个文件的差异之处并标记

发布时间 2023-12-12 09:57:48作者: ament
# It imports the PatternFill class from the openpyxl.styles module.
from openpyxl.styles import PatternFill

# It imports the colors class from the openpyxl.styles module.
from openpyxl.styles import colors

# It imports the Font class from the openpyxl.styles module.
from openpyxl.styles import Font

# It imports the openpyxl module and renames it as pxl.
import openpyxl as pxl

# It loads the data1.xlsx file and assigns it to the workbook_1 variable.
workbook_1 = pxl.load_workbook(r'D:\data\1.xlsx')

# It loads the data2.xlsx file and assigns it to the workbook_2 variable.
workbook_2 = pxl.load_workbook(r'D:\data\2.xlsx')

# Assigning the Sheet1 object to the workbook_1_sheet_1 variable.
workbook_1_sheet_1 = workbook_1['Sheet1']

# It assigns the Sheet1 object to the workbook_2_sheet_1 variable.
workbook_2_sheet_1 = workbook_2['Sheet1']

# A ternary operator. It is equivalent to:
max_row = workbook_1_sheet_1.max_row if workbook_1_sheet_1.max_row > workbook_2_sheet_1.max_row else workbook_2_sheet_1.max_row

# A ternary operator. It is equivalent to:
max_column = workbook_1_sheet_1.max_column if workbook_1_sheet_1.max_column > workbook_2_sheet_1.max_column else workbook_2_sheet_1.max_column

# 创建一个新表,用来存储新的数据,在sheet1 中,不在sheet2 中的
new_sheet = workbook_1.create_sheet('差异新增数据')

# 新表第一行输入
new_sheet.append(['姓名', '身份证'])

# 遍历2到n行的身份证数据,不包含第一行表头
for i in range(2, (max_row + 1)):
    # 取出第i行 身份证数据 ,
    tmp1 = workbook_1_sheet_1.cell(i, 1)
    tmp_data = tmp1.value
    # 取出第i行身份数据,姓名
    tmp2 = workbook_1_sheet_1.cell(i,2)
    tmp_name = tmp2.value
    find_flag = True
    for j in range(2, (max_row + 1)):
        cell_2 = workbook_2_sheet_1.cell(j, 1)
        # 如果,找到相似的数据,标记
        if tmp_data == cell_2.value:
            tmp1.fill = PatternFill("solid", fgColor='FFFF00')
            tmp1.font = Font(color=colors.BLACK, bold=True)
            cell_2.fill = PatternFill("solid", fgColor='FFFF00')
            cell_2.font = Font(color=colors.BLACK, bold=True)
            find_flag = False
            break
    if find_flag:
        # 如果没找到,添加新数据
        new_sheet.append([tmp_name, tmp_data])

# It saves the workbook_1 object to the 3.xlsx file.
workbook_1.save(r'D:\data\3.xlsx')

# It saves the workbook_2 object to the 4.xlsx file.
workbook_2.save(r'D:\data\4.xlsx')



print("标记完成")