python批量处理Excel数据

发布时间 2023-04-03 20:37:00作者: 妖孽成佛
# 1、 批量提取一个工作簿中所有工作表的特定数据
'''
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
workbook = app.books.open('采购.xlsx') # 打开工作簿
worksheet = workbook.sheets # 列出工作簿中的所有工作表
data = [] # 创建一个空列表用于存放数据
for i in worksheet:
values = i.range('A1').expand().options(pd.DataFrame).value # 读取当前工作表的所有数据
filtered = values[values['采购物品'] == '复印纸'] # 提取采购物为复印纸的行数据
if not filtered.empty: # 判断提取行数据是否为空
data.append(filtered) # 提取数据追加到列表中
new_workbook = xw.books.add() # 新建工作簿
new_worksheet = new_workbook.sheets.add('复印纸') # 新建工作簿中新增名为复印纸的工作表
new_worksheet.range('A1').value = pd.concat(data, ignore_index = False) # 将提取的数据写入复印纸工作表中
new_workbook.save('复印纸.xlsx')
workbook.close()
app.quit()
'''
import os

# ### concat(objs,axis=0,join='outer',join_axes=None,ignore_index=False,keys=None,levels=None,names=None,verify_integrity=False,copy=True)
# objs:要拼接的数据对象
# axis:拼接时所依据的轴,如果为0,则沿着行拼接,为1,则沿着列拼接
# join:拼接的方式,默认outer
# join_index:index对象列表
# ignore_index:默认为FALSE,如为Ture,忽略原有索引,生成新的数字序列作为新索引
# keys:序列,默认空。使用传递的键作为最外层构建层次索引,如果为多索引,应使用元组
# levels:序列列表,默认值空,用于构建唯一值
# names:列表,默认值为空,结果层次索引中的级别名称
# verify_integrity:默认FALSE,用于检查新拼接的轴是否包含重复值
# copy:默认TRUE,如果为FALSE,则不执行非必要的数据复制

# 2、批量提取一个工作簿中所有工作表的列数据
'''
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
workbook = app.books.open('采购表.xlsx')
worksheet = workbook.sheets
column = ['采购日期', '采购金额'] # 指定要提取的列的标题
data = []
for i in worksheet:
values = i.range('A1').expand().options(pd.DataFrame, index = False).value
filtered = values[column] # 根据前面指定的列标题提取数据
data.append(filtered)
new_workbook = xw.books.add()
new_worksheet = new_workbook.sheets.add('提取数据')
new_worksheet.range('A1').value = pd.concat(data, ignore_index = False).set_index(column[0])
new_workbook.save('提取表.xlsx')
workbook.close()
app.quit()
'''

# 3、 在多个工作簿的指定工作表中批量追加行数据
'''
import os
import xlwings as xw
newContent = [['双肩包','64','110'],['腰包','23','58']] # 要追加的行数据
app = xw.apps.add()
file_path = '分部信息'
file_list = os.listdir(file_path)
for i in file_list:
if os.path.splitext(i)[1] == '.xlsx':
workbook = app.books.open(file_path + '\\' + i)
worksheet = workbook.sheets['产品分类表'] # 指定要追加行数据的工作表
values = worksheet.range('A1').expand() # 读取原有数据
number = values.shape[0] # 获取原有数据的行数
worksheet.range(number + 1, 1).value = newContent # 将前面指定的行数追加到原有数据的下方
workbook.save()
workbook.close()
app.quit()
'''
# 4、 对多个工作簿中指定工作表的数据进行分列
'''
import os
import xlwings as xw
import pandas as pd
file_path = '产品记录表'
file_list = os.listdir(file_path)
app = xw.App(visible = False, add_book = False)
for i in file_list:
if i.startswith('~$'): # 判断是否有文件以~$开头
continue # 如果有,则跳过
file_paths = os.path.join(file_path, i) # 将文件夹路径和名称拼接成工作表的完整路径
workbook = app.books.open(file_paths) # 打开工作簿
worksheet = workbook.sheets['规格表'] # 指定要处理的工作表
values = worksheet.range('A1').options(pd.DataFrame, header = 1, index = False, expand = 'table') # 读取指定工作表数据
new_values = values['规格'].str.split('*', expand = True) # 根据*拆分规格列
values['长(mm)'] = new_values[0]
values['宽(mm)'] = new_values[1]
values['高(mm)'] = new_values[2]
values.drop(columns=['规格'], inplace = True) # 删除规格列
worksheet['A1'].options(index = False).value = values # 用分列后的数据替换工作表中的原有数据
worksheet.autofit() # 根据数据内容自动调整工作表的行高列宽
workbook.save()
workbook.close()
app.quit()
'''
# ### Series.str.split(pat=None,n=-1,expand=False)
# pat:指定分隔符,默认为空格;
# n:指定拆分的次数,1为在第1个分隔符拆分,2为1、2个分隔符拆分
# expand:TRUE则为DataFrame,FALSE,则为Series
# ### DataFrame.drop(labels=None,axis=0,index=None,columns=None,inplace=False)
# labels:要删除的行、列的名称
# axis:默认为0,表示删除列。如为1,则删除行
# index:指定要删除的行
# columns:指定要删除的列
# inplace:默认False,表示删除不改变原DataFrame,返回执行删除操作后的新DataFrame。如为True,直接在原DataFrame上删除,无法恢复。

# 5、批量合并多个工作簿中指定工作表中的列数据
'''
import os
import xlwings as xw
import pandas as pd
file_path = '产品记录表'
file_list = os.listdir(file_path)
app = xw.App(visible = False, add_book = False)
for i in file_list:
if i.startswith('~$'):
continue
file_paths = os.path.join(file_path, i)
workbook = app.books.open(file_paths)
worksheet = workbook.sheets['规格表']
values = worksheet.range('A1').options(pd.DataFrame, header = 1, index = False, expand = 'table').value
# 合并列数据
values['规格'] = values['长(mm)'].astype('str') + values['宽(mm)'].astype('str') + '*' + values['高(mm)'].astype('str')
values.drop(columns=['长(mm)'], inplace= True) # 删除标题为长(mm)的列
values.drop(columns=['宽(mm)'], inplace= True)
values.drop(columns=['高(mm)'], inplace= True)
worksheet.clear() # 清楚工作表规格表中原有数据
worksheet['A1'].options(index = False).value = values # 将处理好的数据写入工作表
worksheet.autofit()
workbook.save()
workbook.close()
app.quit()
'''

# 6、多个工作簿中指定工作表的列数据拆分成多行
'''
import os
import xlwings as xw
import pandas as pd
file_path = '产品记录表'
file_list = os.listdir(file_path)
app = xw.App(visible = False, add_book = False)
for i in file_list:
if i.startswith('~$'):
continue
file_paths = os.path.join(file_path,i)
workbook = app.books.open(file_paths)
worksheet = workbook.sheets['规格表']
values = worksheet.range('A1').options(pd.DataFrame, header = 1, index = False, expand = 'table').value
new_values = values['规格'].str.split('*', expand = True)
values['长(mm)'] = new_values[0]
values['宽(mm)'] = new_values[1]
values['高(mm)'] = new_values[2]
values.drop(columns=['规格'], inplace= True)
values = values.T # 转换数据的行列
values.columns = values.iloc[0]
values.index.name = values.iloc[0].index.name
values.drop(values.iloc[0].index.name, inplace= True)
worksheet.clear()
worksheet['A1'].value = values
worksheet.autofit()
workbook.save()
workbook.close()
app.quit()
'''

# 7、批量提取一个工作簿中所有工作表的唯一值
### 将这6个工作表中的书名提取出来,但是不能有重复的书名
'''
import xlwings as xw
app = xw.App(visible= True, add_book = False)
workbook = app.books.open('上半年销售统计表.xlsx')
data = [] #创建一个空列存放书名数据
for i, worksheet in enumerate(workbook.sheets): # 遍历工作簿中的工作表
values = worksheet['A2'].expand('down').value # 提取当前工作表中的书名数据
data = data + values
data = list(set(data)) # 对列表中的书名数据进行去重操作
data.insert(0, '书名') # 在去重后的书名数据前添加列标题“书名”
new_workbook = xw.books.add() # 新建工作簿
new_worksheet = new_workbook.sheets.add('书名') # 新工作簿中新增名为“书名”的工作表
new_worksheet['A1'].options(transpose = True).value = data # 将处理好的书名数据写入新工作表
new_worksheet.autofit()
new_workbook.save('书名.xlsx')
workbook.close()
app.quit()
'''
### insert(index,obj)
# index:要插入元素的位置
# obj:要插入的元素

# 8、 批量提取一个工作簿中所有工作表的唯一值并汇总
'''
import xlwings as xw
app = xw.App(visible = True, add_book = False)
wb = app.books.open('销售统计表.xlsx')
data = list() # 创建空列表用于存放书名和销售明细
for i, sht in enumerate(wb.sheets):
values = sht['A2'].expand('table').value
data = data + values
sales = dict() # 创建空字典存放书名和销量汇总
for i in range(len(data)): #遍历书名和销量明细
name = data[i][0] # 获取书名
sale = data[i][1] # 获取销量
if name not in sales:
sales[name] = sale # 如果不存在,字典中添加此书名的销量记录
else:
sales[name] += sale # 如果存在,计算书名累加销量
dictlist = list()
for key, value in sales.items():
temp = [key, value] # 列出书名和累积销量
dictlist.append(temp)
dictlist.insert(0, ['书名','销量']) # 获取的数据前添加列标题书名和销量
new_workbook = xw.books.app()
new_worksheet = new_workbook.sheets.add('销量统计')
new_worksheet['A1'].value = dictlist
new_worksheet.autofit()
new_workbook.save('销售统计.xlsx')
wb.close
app.quit()
'''

# 9、调整多个工作簿的行高列宽
'''
import os
import xlwings as xw
file_path = 'e:\\table\销售表'
file_list = os.listdir(file_path)
app = xw.App(visible = False, add_book = False)
for i in file_list:
if i.startswith('~$'):
continue
file_paths = os.path.join(file_path, i)
for j in workbook.sheets:
value = j.range('A1').expand('table') # 在工作表选择要调整行高列宽的区域
value.column_width = 12 # 列宽调整为12个字符宽度
value.row_height = 20 # 将行高设置为20
workbook.save()
workbook.close()
app.quit()
'''

# 10、一个工作簿中所有表的行高列宽
'''
import xlwings as xw
app = xw.App(visible = False, add_book = False)
workbook = app.books.open('e:\\table\*.xlsx')
for i in workbook.sheets:
value = i.range('A1').expand('table')
value.column_width = 12
value.row_height = 20
workbook.save()
app.quit()
'''

# 11、批量更改多个工作簿的数据格式
'''
import os
import xlwings as xw
file_path = '采购'
file_list = os.listdir(file_path)
app = xw.App(visible = False, add_book = False)
for i in file_list:
if i.startswith('~$'):
continue
file_paths = os.path.join(file_path, i)
workbook = app.books.open(file_paths)
for j in workbook.sheets:
row_num = j['A1'].current_region.last_cell.row # 获取工作表数据区域最后一行的行号
j['A2:A{}'.format(row_num)].number_format = 'm/d' # A列的采购如期全部更改为月/日格式
j['D2:D{}'.format(row_num)].number_format = '¥#,##0.00' # D列的采购金额更改为货币符号和两位小数格式
workbook.save()
workbook.close()
app.quit()
'''

# 12、批量更改多个工作簿的外观格式
'''
import os
import xlwings as xw
file_path = '销售表'
file_list = os.listdir(file_path)
app = xw.App(visible = False, add_book = False)
for i in file_list:
if i.startswith('~$'):
continue
file_paths = os.path.join(file_path, i)
workbook = app.books.open(file_paths)
for j in workbook.sheets:
j['A1:H1'].api.Font.Name = '宋体' # 标题行为宋体
j['A1:H1'].api.Font.Size = 10
j['A1:H1'].api.Font.Bold = True # 字体加粗
j['A1:H1'].api.Font.Color = xw.utils.rgb_to_int((255,255,255)) # 字体颜色为白色
j['A1:H1'].color = xw.utils.rgb_to_int((0,0,0)) # 单元格填充颜色为黑色
j['A1:H1'].api.HorizontalAlignment = xw.constants.VAlign.xlHAlignCenter # 工作表标题行的水平对齐方式居中
j['A1:H1'].api.VerticalAlignment = xw.contants.VAlign.xlVAlignCenter # 工作表标题行垂直对齐方式居中
j['A2'].expand('table').api.Font.Name = '宋体'
j['A2'].expand('table').api.Font.Size = 10
j['A2'].expand('table').api.HorizontalAlignment = xw.constants.HAlign.xlHAlignLeft # 设置正文水平对齐方式靠左
j['A2'].expand('table').api.VerticalAlignment = xw.constants.VAlign.xlVAlignCenter # 设置正文垂直对齐方式居中
for cell in j['A1'].expand('table'):
for b in range(7,12):
cell.api.Borders(b).LineStyle =1 # 边框线型为1
cell.api.Borders(b).Weight = 2 # 单元格边框粗细2
workbook.save()
workbook.close()
app.quit()
'''

# 13、批量替换多个工作簿的行数据
'''
import os
import xlwings as xw
file_path = '分部信息'
file_list = os.listdir(file_path)
app = xw.App(visible = False, add_book = False)
for i in file_list:
if i.startswith('~$'):
continue
file_paths = os.path.join(file_path, i)
workbook = app.books.open(file_paths)
for j in workbook.sheets:
value = j['A2'].expand('table').value
for index, val in enumerate(value): # 按行遍历工作表数据
if val == ['背包', 16, 65]:
value[index] = ['双肩包', 36, 46]
j['A2'].expand('table').value = value
workbook.save()
workbook.close()
app.quit()
'''

# 14、批量升序工作簿中所有工作表
'''
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
workbook = app.books.open('销售表.xlsx')
worksheet = workbook.sheets
for i in worksheet:
values = i.range('A1').expand('table').options(pd.DataFrame).value # 读取当前工作表数据,并转换为DataFrame格式
result = values.sort_values(by = '销售利润') # 对销售利润进行升序排列
i.range('A1').value = result # 将排序结果写入当前工作表,替换原有数据
workbook.save()
workbook.close()
app.quit()
'''
### sort_values(by='##',axis=0,ascending=True,inplace=False,na_position='last')
# by:要排序的列名或索引值
# axis:省略或者0或者index,则按照by指定的列中的数据排序;如为1或columns,则按照by指定的索引的数据排序
# ascending:排序方式。省略或者TRUE,为升序;如果为false,则做降序排列
# inplace:省略或者false,不替换;如果TRUE,则用排序后的数据替换原来数据
# na_pasition:空值的显示位置,如果为first,将空值放在列的首位;如果为last,则放在列的末尾
'''
result = values.sort_values(by = '销售利润', ascending=False) # 对销售利润列进行降序排序
'''

# 15、批量排序多个工作簿中的数据
'''
import xlwings as xw
import pandas as pd
import os
app = xw.App(visible = False, add_book = False)
file_path = '产品销售表'
file_list = os.listdir(file_path)
for i in file_list:
if os.path.splitext(i)[1] == 'xlsx':
workbook = app.books.open(file_path + '\\' + i)
worksheet = workbook.sheets
for j in worksheet:
values = j.range('A1').expand('table').options(pd.DataFrame).value
result = values.sort_values(by = '销售利润')
j.range('A1').value = result
workbook.save()
workbook.close()
app.quit()
'''

# 16、筛选一个工作簿中所有工作表数据
'''
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
workbook = app.books.open('采购表.xlsx')
worksheet = workbook.sheets
table = pd.DataFrame() # 创建一个空DataFrame
for i,j in enumerate(worksheet):
# 读取当前工作表的数据
values = j.range('A1').options(pd.DataFrame, header = 1, index= False, expand= 'table').value
# 调整列的顺序
data = values.reindex(columns = ['采购物品', '采购日期', '采购数量', '采购金额'])
# 将调整列顺序后的数据合并到前面创建的DataFrame中
table = table.append(data, ignore_index = True)
table = table.groupby('采购物品') # 根据采购物品列筛选数据
new_workbook = xw.books.add() # 新建一个工作簿
for idx, group in table:
new_worksheet = new_workbook.sheets.add(idx) # 在新工作簿中新增工作表,以物品名称作为工作表名
new_worksheet['A1'].options(index = False).value = group # 新工作表中写入当前物品的所有明细
last_cell = new_worksheet['A1'].expand('table').last_cell # 获取当前工作表数据区域右下角的单元格
last_row = last_cell.row
last_column = last_cell.column
last_column_letter = chr(64 + last_column) # 将数据区域最后一列的数字转换为改列的列标字母
sum_cell_name = '{}{}'.format(last_column_letter, last_row+1) # 获取数据区域右下角单元格下方的单元格位置
sum_last_row_name = '{}{}'.format(last_column_letter, last_row) # 获取数据区域右下角单元格位置
formula = ' =SUM({}2:{})'.format(last_column_letter, sum_last_row_name) # 根据前面获取单元格位置构造Excel公式,对求购金额求和
new_worksheet[sum_cell_name].formula = formula # 将求和公式写入数据区域右下角单元格下方单元格中
new_worksheet.autofit() # 自动调整工作表的行高和列宽
new_worksheet.save('采购分类表.xlsx')
workbook.close()
app.quit()
'''
# reindex(index = **, columns = **, fill_value=0)
# index:要改变位置的行,**为列表
# columns:要改变位置的列,**为列表
# fill_value:可选参数,前面两个不存在时,可用该参数定义如何填充缺失值

# chr(64+列号) 可以将列号转换为列表

# 17、在一个工作簿中筛选单一类别数据
'''
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
workbook = app.books.open('采购表.xlsx')
worksheet = workbook.sheets
table = pd.DataFrame()
for i,j in enumerate(worksheet):
values = j.range('A1').options(pd.DataFrame, header=1, index = False, expand = 'table').value
data = values.reindex(columns = ['采购物品','采购日期','采购数量','采购金额'])
table = table.append(data, ignore_index = True) # 将多个工作表数据合并到一个DataFrame中
product = table[table['采购物品'] == '保险箱'] # 筛选采购物品是保险箱的数据
new_workbook = xw.books.add()
new_worksheet = new_workbook.sheets.add('保险箱')
new_worksheet['A1'].options(index = False).value = product # 将筛选出数据写入工作表(index=False为删除索引列)
new_worksheet.autofit()
new_workbook.save('保险箱.xlsx')
new_workbook.close()
app.quit()
'''

# 18、对多个工作簿的工作表进行分类汇总
'''
import os
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
file_path = '销售表'
file_list = os.listdir(file_path)
for i in file_list:
if os.path.splitext(i)[1] == 'xlsx':
workbook = app.books.open(file_path + '\\' + i)
worksheet = workbook.sheets
for j in worksheet:
values = j.range('A1').expand('table').options(pd.DataFrame).value # 读取当前工作表的数据
values['销售利润'] = values['销售利润'].astype('float') # 转换销售利润数据类型
result = values.groupby('销售区域').sum() # 根据销售区域列对数据进行分类汇总,求和
j.range('J1').value = result['销售利润'] # 将各个销售区域销售利润汇总结果写入当前工作表
workbook.save()
workbook.close()
app.quit()
'''
# groupby()函数后接的sum()函数用于进行求和汇总,还可以使用其他函数完成其他类型的汇总运算。常用的有:用mean()函数求平均值,
# 用count()函数统计个数,用max()函数求最大值,用min()函数求最小值。

# 19、批量分类汇总多个工作簿中的指定工作表
'''
import os
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
file_path = '销售表'
file_list = os.listdir(file_path)
for i in file_list:
if os.path.splitext(i)[1] == 'xlsx':
workbook = app.books.open(file_path + '\\' + i)
worksheet = workbook.sheets['销售记录表'] # 指定要分类汇总的工作表
values = worksheet.range('A1').expand('table').options(pd.DataFrame).value
values['销售利润'] = values['销售利润'].astype('float')
result = values.groupby('销售区域').sum()
worksheet.range('J1').value = result['销售利润']
workbook.save()
workbook.close()
app.quit()
'''

# 20/ 将多个工作簿数据分类汇总到一个工作簿
'''
import os
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
file_path = '销售表'
file_list = os.listdir(file_path)
collection = []
for i in file_list:
if os.path.splitext(i)[1] == 'xlsx':
workbook = app.books.open(file_path + '\\' + i)
worksheet = workbook.sheets['销售记录表']
values = worksheet.range('A1').expand('table').options(pd.DataFrame).value
filtered = values[['销售区域', '销售利润']] # 只保留两列数据
collection.append(filtered)
workbook.close()
new_values = pd.concat(collection, ignore_index = False).set_index('销售区域')
new_values['销售利润'] = new_values['销售利润'].astype('float')
result = new_values.groupby('销售区域').sum()
new_workbook = app.books.add()
sheet = new_workbook.sheets(0)
sheet.range('A1').value = result
new_workbook.save('汇总.xslx')
app.quit()
'''

# 21、对一个工作簿中所有表分别求和
'''
import xlwings as xw
import pandas as pd
app = xw.App(visible = False, add_book = False)
workbook = app.books.open('采购.xlsx')
worksheet = workbook.sheets
for i in worksheet:
values = i.range('A1').expand('table')
data = values.options(pd.DataFrame).value
sums = data['采购金额'].sum() # 创建的DataFrame中对采购金额进行求和
column = values.valuep[0].index('采购金额') + 1 # 获取采购金额列的序号
row = values.shape[0] # 获取数据区域最后一行行号
i.range(row+1, column).value = sums # 将求和结果写入采购金额列最后一个单元格下方的单元格
workbook.save()
workbook.close()
app.quit()
'''
# index(obj,start,end)
# obj:要查找的元素
# start:查找的起始位置
# end:可选,查找的结束位置

# 22、将所有求和结果写入固定单元格
'''
import xlwings as xw
import pandas as pd
app = xw.App(visible =False, add_book = False)
workbook = app.books.open('采购表.xlsx')
worksheet = workbook.sheets
for i in worksheet:
values = i.range('A1').expand('table').options(pd.DataFrame)
sums = values['采购金额'].sum()
i.range('F1').value = sums # 将当前工作表中的数据求和结果写入当前工作表的单元件中
workbook.save()
workbook.close()
app.quit()
'''

# 23、批量统计工作表的最大值最小值
'''
file_path = '产品销售统计表'
file_list = os.listdir(file_path)
for j in file_list:
if os.path.splitext(j)[1] == '.xslx':
workbook = app.books.open(file_path + '\\' + j)
worksheet = workbook.sheets
for i in worksheet:
values = i.range('A1').expand('table').options(pd.DataFrame).value
max = values['销售利润'].max()
min = values['销售利润'].min()
i.range('I1').value = '最大销售利润'
i.range('J1').value = max
i.range('I2').value = '最小销售利润'
i.range('J2').value = min
workbook.save()
workbook.close()
app.quit()
'''

# 24、批量统计工作簿中所有表的最大最小值
'''
workbook = xw.Book('产品销售统计表.xlsx')
worksheet = workbook.sheets
for i in worksheet:
values = i.range('A1').expand('table').options(pd.DataFrame).value
max = values['销售利润'].max()
min = values['销售利润'].min()
i.range('I1').value = '最大利润'
i.range('J1').value = max
i.range('I2').value = '最小利润'
i.range('J2').value = min
workbook.save()
workbook.close()
app.quit()
'''