餐厅订单数据分析

发布时间 2023-04-13 09:41:10作者: xxxxpig

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

data1=pd.read_excel('./资料1/meal_order_detail.xlsx',sheet_name='meal_order_detail1')
data2=pd.read_excel('./资料1/meal_order_detail.xlsx',sheet_name='meal_order_detail2')
data3=pd.read_excel('./资料1/meal_order_detail.xlsx',sheet_name='meal_order_detail3')
data=pd.concat([data1,data2,data3],axis=0)#按照行进行数据拼接
data.dropna(axis=1,inplace=True)
data.to_excel('./data.xlsx', index=False, encoding='utf-8')

#求卖出菜品平均价格平均数
round(data['amounts'].mean(),2)#方法1
print(round(np.mean(data['amounts']),2))#方法2

#统计最受欢迎的10道菜
dishes_count=data['dishes_name'].value_counts()[:10]
#print(dishes_count)
plt.rcParams['font.sans-serif']='SimHei'
plt.figure(figsize=(18,8))
dishes_count.plot(kind='bar',fontsize=12)
dishes_count.plot(kind='line',color=['r'])
plt.title('菜品销售前十名')
for x,y in enumerate(dishes_count):
#print(x,y)
plt.text(x,y+2,y,ha='center',fontsize=15)

#订单点菜种类最多
#data.groupby(by='order_id')
data_group=data['order_id'].value_counts()[:10]
plt.figure(figsize=(18,8))
data_group.plot(kind='bar',fontsize=12)
plt.title('订单种类TOP10')
plt.xlabel('订单ID')
plt.ylabel('点菜种类')

#订单ID点菜数量前十名
data['total_amounts']=data['counts']*data['amounts']
dataGroup=data[['order_id','counts','amounts','total_amounts']].groupby(by='order_id')
Group_sum=dataGroup.sum()
sort_counts=Group_sum.sort_values(by='counts',ascending=False)
plt.figure(figsize=(18,8))
sort_counts['counts'][:10].plot(kind='bar')
plt.xlabel('订单ID')
plt.ylabel('订单数量')
plt.title('订单ID点菜数量TOP10')


#订单ID消费前十名
sort_total_amounts=Group_sum.sort_values(by='total_amounts',ascending=False)
plt.figure(figsize=(18,8))
sort_total_amounts['total_amounts'][:10].plot(kind='bar',fontsize=12)
plt.xlabel('订单ID')
plt.ylabel('订单金额')
plt.title('订单ID消费TOP10')

#哪个订单ID平均消费最贵
Group_sum['average']=Group_sum['total_amounts']/Group_sum['counts']
sort_average=Group_sum.sort_values(by='average',ascending=False)
plt.figure(figsize=(18,8))
sort_average['average'][:10].plot(kind='bar',fontsize=12)
plt.xlabel('订单ID')
plt.ylabel('平均消费金额')
plt.title('订单ID平均消费TOP10')

#下单量与时间的关系
data['hourcount'] = 1
data['time'] = pd.to_datetime(data['place_order_time'])
data['hour']=data['time'].map(lambda x:x.hour)
gp_by_hour=data.groupby(by='hour').count()['hourcount']
gp_by_hour.plot(kind='bar')
plt.figure(figsize=(18,8))
plt.xlabel('时间')
plt.ylabel('下单量')
plt.title('下单量与时间的关系')

#一个月中哪一天下单量最多
data['daycount']=1
data['day']=data['time'].map(lambda x:x.day)
gp_by_day=data.groupby(by='day').count()['daycount']
gp_by_day.plot(kind='bar')
plt.figure(figsize=(18,8))
plt.xlabel('日期')
plt.ylabel('下单量')
plt.title('下单量与日期的关系')