2.爬取的数据内容:股票名称,股票代码, 最新价, 涨跌幅, 涨跌额,成交量,成交额, 振幅, 换手率, 市盈率, 量比, 最高, 最低, 今开,昨收, 市净率, 60日涨跌幅, 年初至今涨跌幅, 总市值, 流通市值, 五分钟涨跌。
3.爬虫设计方案概述:本次案例使用request对东方股票进行爬取,使用xlutils对excel文件进行处理,之后使用pandas、pyecharts、jieba对数据进行可视化
import json
import requests, time
from tkinter import *
import xlrd, xlwt, os
from xlutils.copy import copy
class GuPiao():
def __init__(self):
self.num = 0
# 爬虫部分
def spider(self):
for page in range(1,21): #这个地方可以页数增加数据
start_url = 'https://83.push2.eastmoney.com/api/qt/clist/get'
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
# Requests sorts cookies= alphabetically
# 'Cookie': 'qgqp_b_id=ffb939776279b0bdd1f6e61281c3c294; st_si=89921490520325; websitepoptg_api_time=1700894106860; websitepoptg_show_time=1700894107214; HAList=ty-1-600789-%u9C81%u6297%u533B%u836F%2Cty-1-600519-%u8D35%u5DDE%u8305%u53F0%2Cty-0-000858-%u4E94%20%u7CAE%20%u6DB2%2Cty-0-002456-%u6B27%u83F2%u5149; st_asi=delete; st_pvi=61091067222435; st_sp=2023-11-03%2018%3A10%3A35; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=5; st_psi=20231125143800994-113200301321-8038820015',
'Pragma': 'no-cache',
'Referer': 'https://quote.eastmoney.com/center/gridlist.html',
'Sec-Fetch-Dest': 'script',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
fomdata = {
'cb': 'jQuery1124001114974346054809_1700894280689',
'pn': page,
'pz': '20',
'po': '1',
'np': '1',
'ut': 'bd1d9ddb04089700cf9c27f6f7426281',
'fltt': '2',
'invt': '2',
'wbp2u': '|0|0|0|web',
'fid': 'f3',
'fs': 'm:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048',
'fields': 'f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152',
'_': '1700894280699'
}
response = requests.get(url=start_url, headers=headers, params=fomdata).text
response = re.findall('jQuery1124001114974346054809_1700894280689\((.*?)\);', response)[0]
response = json.loads(response)
datas = response['data']['diff']
time.sleep(2)
self.parase_data(datas)
# 解析部分
def parase_data(self, datas):
for data in datas:
# 0、序号
self.num += 1
# 1、最新价
zuixingjia = data['f2']
# 2、涨跌幅
diezhangfu = str(data['f3']) + '%'
# 3、涨跌额
diezhange = data['f4']
# 4、成交量(手)
chengjiaoliang = data['f5']
# 5、成交额
chengjiaoe = data['f6']
# 6、振幅
zhengfu = str(data['f7']) + '%'
# 7、换手率
huanshoulv = str(data['f8']) + '%'
# 8、市盈率
shiyinglv = data['f9']
# 9、量比
liangbi = data['f10']
# 10、股票代码
diama = data['f12']
# 11、股票名称
name = data['f14']
# 12、最高
zuigao = data['f15']
# 13、最低
zuidi = data['f16']
# 14、今开
jinkai = data['f17']
# 15、昨收
zuoshou = data['f18']
# 16、市净率
jinshilv = data['f23']
# 17、60日涨跌幅
liushirezhangdiefu = str(data['f24']) + '%'
# 18、年初至今涨跌幅
nianchuzhijinzhangdiefu = str(data['f25']) + '%'
# 19、总市值
zongshizhi = data['f20']
# 20流通市值
liutongshizhi = data['f21']
# 21、五分钟涨跌
wufenzhongzhangdie = str(data['f11']) + '%'
data = {
f'股票数据': [self.num, name, diama, zuixingjia, diezhangfu, diezhange,
chengjiaoliang, chengjiaoe, zhengfu, huanshoulv, shiyinglv, liangbi,
zuigao, zuidi, jinkai, zuoshou, jinshilv, liushirezhangdiefu,
nianchuzhijinzhangdiefu, zongshizhi, liutongshizhi,
wufenzhongzhangdie]
}
gupiao_dict = {
'序号': self.num,
'股票名称': name,
'股票代码': diama,
'最新价': zuixingjia,
'涨跌幅': diezhangfu,
'涨跌额': diezhange,
'成交量(手)': chengjiaoliang,
'成交额': chengjiaoe,
'振幅': zhengfu,
'换手率': huanshoulv,
'市盈率': shiyinglv,
'量比': liangbi,
'最高': zuigao,
'最低': zuidi,
'今开': jinkai,
'昨收': zuoshou,
'市净率': zuoshou,
'60日涨跌幅': liushirezhangdiefu,
'年初至今涨跌幅': nianchuzhijinzhangdiefu,
'总市值': zongshizhi,
'流通市值': liutongshizhi,
'五分钟涨跌': wufenzhongzhangdie
}
print(gupiao_dict)
self.chucun_excel(data)
# 储存部分
def chucun_excel(self, data):
if not os.path.exists(f'股票数据.xls'):
# 1、创建 Excel 文件
wb = xlwt.Workbook(encoding='utf-8')
# 2、创建新的 Sheet 表
sheet = wb.add_sheet(f'股票数据', cell_overwrite_ok=True)
# 3、设置 Borders边框样式
borders = xlwt.Borders()
borders.left = xlwt.Borders.THIN
borders.right = xlwt.Borders.THIN
borders.top = xlwt.Borders.THIN
borders.bottom = xlwt.Borders.THIN
borders.left_colour = 0x40
borders.right_colour = 0x40
borders.top_colour = 0x40
borders.bottom_colour = 0x40
style = xlwt.XFStyle() # Create Style
style.borders = borders # Add Borders to Style
# 4、写入时居中设置
align = xlwt.Alignment()
align.horz = 0x02 # 水平居中
align.vert = 0x01 # 垂直居中
style.alignment = align
# 5、设置表头信息, 遍历写入数据, 保存数据
header = (
'序号', '股票名称', '股票代码', '最新价', '涨跌幅', '涨跌额', '成交量(手)', '成交额', '振幅', '换手率', '市盈率', '量比', '最高', '最低', '今开',
'昨收', '市净率', '60日涨跌幅', '年初至今涨跌幅', '总市值', '流通市值', '五分钟涨跌')
for i in range(0, len(header)):
sheet.col(i).width = 2560 * 3
# 行,列, 内容, 样式
sheet.write(0, i, header[i], style)
wb.save(f'股票数据.xls')
# 判断工作表是否存在
if os.path.exists(f'股票数据.xls'):
# 打开工作薄
wb = xlrd.open_workbook(f'股票数据.xls')
# 获取工作薄中所有表的个数
sheets = wb.sheet_names()
for i in range(len(sheets)):
for name in data.keys():
worksheet = wb.sheet_by_name(sheets[i])
# 获取工作薄中所有表中的表名与数据名对比
if worksheet.name == name:
# 获取表中已存在的行数
rows_old = worksheet.nrows
# 将xlrd对象拷贝转化为xlwt对象
new_workbook = copy(wb)
# 获取转化后的工作薄中的第i张表
new_worksheet = new_workbook.get_sheet(i)
for num in range(0, len(data[name])):
new_worksheet.write(rows_old, num, data[name][num])
new_workbook.save(f'股票数据.xls')
xai_data = f'已经存储第{self.num}条股票数据至《股票数据》'
print(xai_data)
if __name__ == '__main__':
g=GuPiao()
g.spider()
五、数据可视化
1.将股票60日涨跌情况进行汇总并绘制成折现图,
通过此图可以看出来自赛腾股份的盈利最多。
数据视图
代码实现
import pandas as pd
from pyecharts.charts import Line
from pyecharts import options as opts
def shujufenxi():
# =============== 60 日 涨 跌 幅 ========================
# 读取数据
file_path = f'股票数据.xls' # 替换为您的文件路径
df = pd.read_excel(file_path)
stock_names = df['股票名称'].tolist() # 股票名称
latest_prices = df['60日涨跌幅'].tolist() # 60日涨跌幅
# 将百分比的字符串转换为浮点数
latest_prices = [float(x.strip('%')) for x in latest_prices]
# 创建高级折线图
line_chart1 = Line(init_opts=opts.InitOpts(theme='infographic', bg_color='#e7e5d0'))
line_chart1.add_xaxis(stock_names)
line_chart1.add_yaxis("60日涨跌幅", latest_prices, label_opts=opts.LabelOpts(position="top"))
line_chart1.set_global_opts(
title_opts=opts.TitleOpts(title="股票60日涨跌幅折线图"),
toolbox_opts=opts.ToolboxOpts(),
tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
yaxis_opts=opts.AxisOpts(name="60日涨跌幅"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),
datazoom_opts=[
opts.DataZoomOpts(type_="slider", is_show=True),
opts.DataZoomOpts(type_="inside", is_show=True),
],
)
# 设置系列选项,添加最大值、最小值、平均值标记
line_chart1.set_series_opts(
marker_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
opts.MarkPointItem(type_="average", name="平均值")
]
)
)
html_content_1 = line_chart1.render_embed()
2.将股票五分钟涨跌情况进行汇总并绘制成折现图,
通过此图能够及时地反映股票市场的动态,帮助投资者了解市场走势和个股表现。
# ==================五分钟涨跌=================================
# 读取数据
file_path = f'股票数据.xls' # 替换为您的文件路径
df = pd.read_excel(file_path)
stock_names = df['股票名称'].tolist() # 股票名称
latest_prices = df['五分钟涨跌'].tolist() # 五分钟涨跌
latest_prices = [float(x.strip('%')) for x in latest_prices]
line_chart2 = Line(init_opts=opts.InitOpts(theme='dark'))
line_chart2.add_xaxis(stock_names)
line_chart2.add_yaxis("五分钟涨跌", latest_prices, label_opts=opts.LabelOpts(position="top"))
line_chart2.set_global_opts(
title_opts=opts.TitleOpts(title="股票五分钟涨跌折线图"),
toolbox_opts=opts.ToolboxOpts(),
tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
yaxis_opts=opts.AxisOpts(name="五分钟涨跌"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),
datazoom_opts=[
opts.DataZoomOpts(type_="slider", is_show=True),
opts.DataZoomOpts(type_="inside", is_show=True),
],
)
line_chart2.set_series_opts(
marker_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
opts.MarkPointItem(type_="average", name="平均值")
]
)
)
完整源代码如下:
import json
import requests, time
from tkinter import *
import xlrd, xlwt, os
from xlutils.copy import copy
class GuPiao():
def __init__(self):
self.num = 0
# 爬虫部分
def spider(self):
for page in range(1,21): #这个地方可以页数增加数据
start_url = 'https://83.push2.eastmoney.com/api/qt/clist/get'
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
# Requests sorts cookies= alphabetically
# 'Cookie': 'qgqp_b_id=ffb939776279b0bdd1f6e61281c3c294; st_si=89921490520325; websitepoptg_api_time=1700894106860; websitepoptg_show_time=1700894107214; HAList=ty-1-600789-%u9C81%u6297%u533B%u836F%2Cty-1-600519-%u8D35%u5DDE%u8305%u53F0%2Cty-0-000858-%u4E94%20%u7CAE%20%u6DB2%2Cty-0-002456-%u6B27%u83F2%u5149; st_asi=delete; st_pvi=61091067222435; st_sp=2023-11-03%2018%3A10%3A35; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=5; st_psi=20231125143800994-113200301321-8038820015',
'Pragma': 'no-cache',
'Referer': 'https://quote.eastmoney.com/center/gridlist.html',
'Sec-Fetch-Dest': 'script',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
fomdata = {
'cb': 'jQuery1124001114974346054809_1700894280689',
'pn': page,
'pz': '20',
'po': '1',
'np': '1',
'ut': 'bd1d9ddb04089700cf9c27f6f7426281',
'fltt': '2',
'invt': '2',
'wbp2u': '|0|0|0|web',
'fid': 'f3',
'fs': 'm:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048',
'fields': 'f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152',
'_': '1700894280699'
}
response = requests.get(url=start_url, headers=headers, params=fomdata).text
response = re.findall('jQuery1124001114974346054809_1700894280689\((.*?)\);', response)[0]
response = json.loads(response)
datas = response['data']['diff']
time.sleep(2)
self.parase_data(datas)
# 解析部分
def parase_data(self, datas):
for data in datas:
# 0、序号
self.num += 1
# 1、最新价
zuixingjia = data['f2']
# 2、涨跌幅
diezhangfu = str(data['f3']) + '%'
# 3、涨跌额
diezhange = data['f4']
# 4、成交量(手)
chengjiaoliang = data['f5']
# 5、成交额
chengjiaoe = data['f6']
# 6、振幅
zhengfu = str(data['f7']) + '%'
# 7、换手率
huanshoulv = str(data['f8']) + '%'
# 8、市盈率
shiyinglv = data['f9']
# 9、量比
liangbi = data['f10']
# 10、股票代码
diama = data['f12']
# 11、股票名称
name = data['f14']
# 12、最高
zuigao = data['f15']
# 13、最低
zuidi = data['f16']
# 14、今开
jinkai = data['f17']
# 15、昨收
zuoshou = data['f18']
# 16、市净率
jinshilv = data['f23']
# 17、60日涨跌幅
liushirezhangdiefu = str(data['f24']) + '%'
# 18、年初至今涨跌幅
nianchuzhijinzhangdiefu = str(data['f25']) + '%'
# 19、总市值
zongshizhi = data['f20']
# 20流通市值
liutongshizhi = data['f21']
# 21、五分钟涨跌
wufenzhongzhangdie = str(data['f11']) + '%'
data = {
f'股票数据': [self.num, name, diama, zuixingjia, diezhangfu, diezhange,
chengjiaoliang, chengjiaoe, zhengfu, huanshoulv, shiyinglv, liangbi,
zuigao, zuidi, jinkai, zuoshou, jinshilv, liushirezhangdiefu,
nianchuzhijinzhangdiefu, zongshizhi, liutongshizhi,
wufenzhongzhangdie]
}
gupiao_dict = {
'序号': self.num,
'股票名称': name,
'股票代码': diama,
'最新价': zuixingjia,
'涨跌幅': diezhangfu,
'涨跌额': diezhange,
'成交量(手)': chengjiaoliang,
'成交额': chengjiaoe,
'振幅': zhengfu,
'换手率': huanshoulv,
'市盈率': shiyinglv,
'量比': liangbi,
'最高': zuigao,
'最低': zuidi,
'今开': jinkai,
'昨收': zuoshou,
'市净率': zuoshou,
'60日涨跌幅': liushirezhangdiefu,
'年初至今涨跌幅': nianchuzhijinzhangdiefu,
'总市值': zongshizhi,
'流通市值': liutongshizhi,
'五分钟涨跌': wufenzhongzhangdie
}
print(gupiao_dict)
self.chucun_excel(data)
# 储存部分
def chucun_excel(self, data):
if not os.path.exists(f'股票数据.xls'):
# 1、创建 Excel 文件
wb = xlwt.Workbook(encoding='utf-8')
# 2、创建新的 Sheet 表
sheet = wb.add_sheet(f'股票数据', cell_overwrite_ok=True)
# 3、设置 Borders边框样式
borders = xlwt.Borders()
borders.left = xlwt.Borders.THIN
borders.right = xlwt.Borders.THIN
borders.top = xlwt.Borders.THIN
borders.bottom = xlwt.Borders.THIN
borders.left_colour = 0x40
borders.right_colour = 0x40
borders.top_colour = 0x40
borders.bottom_colour = 0x40
style = xlwt.XFStyle() # Create Style
style.borders = borders # Add Borders to Style
# 4、写入时居中设置
align = xlwt.Alignment()
align.horz = 0x02 # 水平居中
align.vert = 0x01 # 垂直居中
style.alignment = align
# 5、设置表头信息, 遍历写入数据, 保存数据
header = (
'序号', '股票名称', '股票代码', '最新价', '涨跌幅', '涨跌额', '成交量(手)', '成交额', '振幅', '换手率', '市盈率', '量比', '最高', '最低', '今开',
'昨收', '市净率', '60日涨跌幅', '年初至今涨跌幅', '总市值', '流通市值', '五分钟涨跌')
for i in range(0, len(header)):
sheet.col(i).width = 2560 * 3
# 行,列, 内容, 样式
sheet.write(0, i, header[i], style)
wb.save(f'股票数据.xls')
# 判断工作表是否存在
if os.path.exists(f'股票数据.xls'):
# 打开工作薄
wb = xlrd.open_workbook(f'股票数据.xls')
# 获取工作薄中所有表的个数
sheets = wb.sheet_names()
for i in range(len(sheets)):
for name in data.keys():
worksheet = wb.sheet_by_name(sheets[i])
# 获取工作薄中所有表中的表名与数据名对比
if worksheet.name == name:
# 获取表中已存在的行数
rows_old = worksheet.nrows
# 将xlrd对象拷贝转化为xlwt对象
new_workbook = copy(wb)
# 获取转化后的工作薄中的第i张表
new_worksheet = new_workbook.get_sheet(i)
for num in range(0, len(data[name])):
new_worksheet.write(rows_old, num, data[name][num])
new_workbook.save(f'股票数据.xls')
xai_data = f'已经存储第{self.num}条股票数据至《股票数据》'
print(xai_data)
if __name__ == '__main__':
g=GuPiao()
g.spider()
import pandas as pd
from pyecharts.charts import Line
from pyecharts import options as opts
def shujufenxi():
# =============== 60 日 涨 跌 幅 ========================
# 读取数据
file_path = f'股票数据.xls' # 替换为您的文件路径
df = pd.read_excel(file_path)
stock_names = df['股票名称'].tolist() # 股票名称
latest_prices = df['60日涨跌幅'].tolist() # 60日涨跌幅
# 将百分比的字符串转换为浮点数
latest_prices = [float(x.strip('%')) for x in latest_prices]
# 创建高级折线图
line_chart1 = Line(init_opts=opts.InitOpts(theme='infographic', bg_color='#e7e5d0'))
line_chart1.add_xaxis(stock_names)
line_chart1.add_yaxis("60日涨跌幅", latest_prices, label_opts=opts.LabelOpts(position="top"))
line_chart1.set_global_opts(
title_opts=opts.TitleOpts(title="股票60日涨跌幅折线图"),
toolbox_opts=opts.ToolboxOpts(),
tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
yaxis_opts=opts.AxisOpts(name="60日涨跌幅"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),
datazoom_opts=[
opts.DataZoomOpts(type_="slider", is_show=True),
opts.DataZoomOpts(type_="inside", is_show=True),
],
)
# 设置系列选项,添加最大值、最小值、平均值标记
line_chart1.set_series_opts(
marker_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
opts.MarkPointItem(type_="average", name="平均值")
]
)
)
html_content_1 = line_chart1.render_embed()
# ==================五分钟涨跌=================================
# 读取数据
file_path = f'股票数据.xls' # 替换为您的文件路径
df = pd.read_excel(file_path)
stock_names = df['股票名称'].tolist() # 股票名称
latest_prices = df['五分钟涨跌'].tolist() # 五分钟涨跌
latest_prices = [float(x.strip('%')) for x in latest_prices]
line_chart2 = Line(init_opts=opts.InitOpts(theme='dark'))
line_chart2.add_xaxis(stock_names)
line_chart2.add_yaxis("五分钟涨跌", latest_prices, label_opts=opts.LabelOpts(position="top"))
line_chart2.set_global_opts(
title_opts=opts.TitleOpts(title="股票五分钟涨跌折线图"),
toolbox_opts=opts.ToolboxOpts(),
tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
yaxis_opts=opts.AxisOpts(name="五分钟涨跌"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=0)),
datazoom_opts=[
opts.DataZoomOpts(type_="slider", is_show=True),
opts.DataZoomOpts(type_="inside", is_show=True),
],
)
line_chart2.set_series_opts(
marker_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
opts.MarkPointItem(type_="average", name="平均值")
]
)
)
# 渲染图表
html_content_2 = line_chart2.render_embed()
# 创建最终的 HTML 页面
complete_html = f"""
<html>
<head>
<title>股票数据可视化</title>
</head>
<body style="background: linear-gradient(to right, #e5b8c3, #c17886);">
<h1 style="text-align: center;margin-top: 20px;">股 票 数 据 可 视 化</h1>
<div class="one" style="display: flex; justify-content: center; flex-wrap: wrap; height: 100%;">
<div style="margin: 10px; padding: 10px;">
<h3 style="text-align: center;margin-top: 20px;">60日涨跌幅</h3>
{html_content_1}
</div>
<div style="margin: 10px; padding: 10px;">
<h1 style="text-align: center;margin-top: 20px;">五分钟涨跌</h1>
{html_content_2}
</div>
</div>
</body>
</html>
"""
# 保存整合后的 HTML 文件
with open(f'60日涨跌幅和5分钟涨跌可视化.html', "w", encoding="utf-8") as file:
file.write(complete_html)
xai_data = f'已生成股票《60日涨跌幅和5分钟涨跌》可视化.html'
print(xai_data)
shujufenxi()