爬取某宝商品数据,附加某宝秒杀脚本
爬取某宝商品数据
源码
"""
import requests # 第三方模块
import re
import json
import csv
f = open('taobao.csv', mode='a', newline='', encoding='utf-8')
csv_writer = csv.writer(f)
csv_writer.writerow(['raw_title', 'view_price', 'item_loc', 'view_sales', 'comment_count', 'nick', 'detail_url'])
# 为什么要做伪装?
# 服务器肯定是不想给你数据的
# headers 构建成字典的
# 什么是字典呢? 字典是Python里面的一个数据容器{}, 列表[], 元组()
# {"":"", "":"", "":"", "":"", "":""}
headers = {
'cookie': 'cna=s/5FG78j/FUCAa8APiecOvNg; tracknick=tb668512329; thw=cn; enc=5QzxAFeTLCIaj4DdlClUUmCfmppq0mVmYnRM4MnjLLB4RjqMpvuUixwqmjkBvCn0Jgo9mK5a7GX5bTUVvYOjcKlG6Dcyihb49SfHSHh4p5w%3D; t=213a75d5f9b973a401f09b4b2ec812d7; _cc_=URm48syIZQ%3D%3D; sgcookie=E100uQe2yhvlDzLeFPm4%2BfB6tf%2BFsK%2FMda5f7206IxmCCrAvLuVZh8UBxD%2FJNv7XB8FEpm04JpToQ7vBpAnzq53Nd%2Be35XveHYnbr7vbksiQXTo%3D; uc3=nk2=F5RDKmf768KMcHQ%3D&vt3=F8dCv4of0HO1FFYJIBE%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D&id2=UUpgRsItw%2BrsB7dvyw%3D%3D; lgc=tb668512329; uc4=id4=0%40U2gqyZJ81Yv14cp6ZGKPzfQ18kyJG1rt&nk4=0%40FY4I6earzOZXUhcMjuCe8tiaY1Stpw%3D%3D; mt=ci=-1_0; _tb_token_=43f7e76e367f; _m_h5_tk=d9ed16b25f4b5df7ba6b499f4a885508_1667573369978; _m_h5_tk_enc=2b5734c43a982a947a8c43805ca6f756; cookie2=119afda486ca9dece1dd8cddb6af1ebd; xlly_s=1; uc1=cookie14=UoeyCURCeMBd0w%3D%3D; JSESSIONID=80D01100395EA2871F8B9EA1E137609F; l=eBrY7YtILf1CV5oyBO5ahurza77O2QOb8sPzaNbMiInca6BRtKdgnNCUVupDSdtjgtCXWetzmSrNYdEvJp4daxDDBexrCyCoExvO.; tfstk=cz9NBFt5MAHZ8nKxy9X2UmYzO95OagKMmJS5sqjsuG5gAlCGzsqgkMb1XMSbYrfG.; isg=BN7eZuVIMWxljWQk6kJ9bdpuL3Qgn6IZKkIfcIhkCyGEq3-F8C9tKT0Jo7enk5ox',
'referer': 'https://s.taobao.com/search?q=iPhone14&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}
# https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=44
# https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset=-2&ntoffset=-2&p4ppushleft=2%2C48&s=88
# https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset=-5&ntoffset=-5&p4ppushleft=2%2C48&s=132
# https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset=-8&ntoffset=-8&p4ppushleft=2%2C48&s=176
for page in range(1, 100):
print(f"----正在爬取第{page}页----")
s = page*44
coffset = 1-3*(page-1)
url = f'https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset={coffset}&ntoffset={coffset}&p4ppushleft=2%2C48&s={s}'
# 1. 发送请求
response = requests.get(url=url, headers=headers)
# 2. 获取数据
html_data = response.text
# 3. 解析数据
# 结构化数据: json数据 字典取值
# 非结构化数据: 网页源代码 xpath/css/re
# g_page_config = (.*);
# (.*): 匹配任意内容 直到最后一个;结束
# 搜索功能高级用法
# 从html_data里面匹配出 符合 g_page_config = (.*); 规则的数据
# 以列表的形式返回给你 re.S 如果要匹配换行
g_page_config = re.findall('g_page_config = (.*);', html_data)[0]
# 从g_page_config里面取出所有的商品信息
json_dict = json.loads(g_page_config) # 字典格式数据了
# 字典的好处 好取值 方便取值
# xpath 提取标签属性或者文本内容
# xpath 不能提取字典
# {"键(拼音)":"值(字)", "A":"啊", "B":"不", "":"", "":""}['A']
auctions = json_dict["mods"]['itemlist']['data']['auctions'] # 列表
for auction in auctions:
raw_title = auction['raw_title']
view_price = auction['view_price']
item_loc = auction['item_loc']
view_sales = auction['view_sales']
comment_count = auction['comment_count']
nick = auction['nick']
detail_url = auction['detail_url']
print(raw_title, view_price, item_loc, view_sales, comment_count, nick, detail_url)
# 4. 保存数据
csv_writer.writerow([raw_title, view_price, item_loc, view_sales, comment_count, nick, detail_url])
某宝秒杀脚本
import datetime
import time
from selenium import webdriver
now = datetime.datetime.now().strftme('%Y-%m-%d %H:%M:%S.%f')
times = "2022-03-03 21:07:00.00000000"
driver = webdriver.Chrome(r'C:\Users\Tony\PycharmProjects\Module_Tony_Demo\Moudle_游戏源码锦集\chromedriver.exe')
driver.get("https://www.taobao.com")
time.sleep(3)
driver.find_element_by_link_text("亲,请登录").click()
print(f"请尽快扫码登录")
time.sleep(20)
driver.get("https://cart.taobao.com/cart.htm")
time.sleep(3)
# 是否全选购物车
while True:
try:
if driver.find_element_by_id("J_SelectAll1"):
driver.find_element_by_id("J_SelectAll1").click()
break
except:
print(f"找不到购买按钮")
while True:
# 获取时间
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
print(now)
# 判断 -
if now > times:
# 结算
while True:
try:
if driver.find_element_by_link_text("结 算"):
print("here")
driver.find_element_by_link_text("结 算").click()
print(f"程序已将商品锁定,结算成功")
break
except :
pass
# 提交订单
while True:
try:
if driver.find_element_by_link_text('提交订单'):
driver.find_element_by_link_text('提交订单').click()
print(f"抢购成功,请尽快付款")
except:
print(f"恭喜,本程序已帮你抢到商品啦,您来支付吧")
break
time.sleep(0.01)
效果展示
END
原文公众号:Python顾木子吖