Python 之 爬虫实战 -- 爬取某宝商品数据,附加某宝秒杀脚本

发布时间 2023-09-18 18:00:59作者: Ivan丶c

爬取某宝商品数据,附加某宝秒杀脚本

爬取某宝商品数据

源码

"""
import requests     # 第三方模块
import re
import json
import csv

f = open('taobao.csv', mode='a', newline='', encoding='utf-8')
csv_writer = csv.writer(f)
csv_writer.writerow(['raw_title', 'view_price', 'item_loc', 'view_sales', 'comment_count', 'nick', 'detail_url'])
# 为什么要做伪装?
# 服务器肯定是不想给你数据的
# headers 构建成字典的
# 什么是字典呢? 字典是Python里面的一个数据容器{}, 列表[], 元组()
# {"":"", "":"", "":"", "":"", "":""}
headers = {
    'cookie': 'cna=s/5FG78j/FUCAa8APiecOvNg; tracknick=tb668512329; thw=cn; enc=5QzxAFeTLCIaj4DdlClUUmCfmppq0mVmYnRM4MnjLLB4RjqMpvuUixwqmjkBvCn0Jgo9mK5a7GX5bTUVvYOjcKlG6Dcyihb49SfHSHh4p5w%3D; t=213a75d5f9b973a401f09b4b2ec812d7; _cc_=URm48syIZQ%3D%3D; sgcookie=E100uQe2yhvlDzLeFPm4%2BfB6tf%2BFsK%2FMda5f7206IxmCCrAvLuVZh8UBxD%2FJNv7XB8FEpm04JpToQ7vBpAnzq53Nd%2Be35XveHYnbr7vbksiQXTo%3D; uc3=nk2=F5RDKmf768KMcHQ%3D&vt3=F8dCv4of0HO1FFYJIBE%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D&id2=UUpgRsItw%2BrsB7dvyw%3D%3D; lgc=tb668512329; uc4=id4=0%40U2gqyZJ81Yv14cp6ZGKPzfQ18kyJG1rt&nk4=0%40FY4I6earzOZXUhcMjuCe8tiaY1Stpw%3D%3D; mt=ci=-1_0; _tb_token_=43f7e76e367f; _m_h5_tk=d9ed16b25f4b5df7ba6b499f4a885508_1667573369978; _m_h5_tk_enc=2b5734c43a982a947a8c43805ca6f756; cookie2=119afda486ca9dece1dd8cddb6af1ebd; xlly_s=1; uc1=cookie14=UoeyCURCeMBd0w%3D%3D; JSESSIONID=80D01100395EA2871F8B9EA1E137609F; l=eBrY7YtILf1CV5oyBO5ahurza77O2QOb8sPzaNbMiInca6BRtKdgnNCUVupDSdtjgtCXWetzmSrNYdEvJp4daxDDBexrCyCoExvO.; tfstk=cz9NBFt5MAHZ8nKxy9X2UmYzO95OagKMmJS5sqjsuG5gAlCGzsqgkMb1XMSbYrfG.; isg=BN7eZuVIMWxljWQk6kJ9bdpuL3Qgn6IZKkIfcIhkCyGEq3-F8C9tKT0Jo7enk5ox',
    'referer': 'https://s.taobao.com/search?q=iPhone14&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}
# https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=44
# https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset=-2&ntoffset=-2&p4ppushleft=2%2C48&s=88
# https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset=-5&ntoffset=-5&p4ppushleft=2%2C48&s=132
# https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset=-8&ntoffset=-8&p4ppushleft=2%2C48&s=176
for page in range(1, 100):
    print(f"----正在爬取第{page}页----")
    s = page*44
    coffset = 1-3*(page-1)
    url = f'https://s.taobao.com/search?q=%E7%A1%AC%E7%9B%98&imgfile=&js=1&style=grid&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20221104&ie=utf8&bcoffset={coffset}&ntoffset={coffset}&p4ppushleft=2%2C48&s={s}'
    # 1. 发送请求
    response = requests.get(url=url, headers=headers)
    # 2. 获取数据
    html_data = response.text
    # 3. 解析数据
    # 结构化数据: json数据  字典取值
    # 非结构化数据: 网页源代码  xpath/css/re
    # g_page_config = (.*);
    # (.*): 匹配任意内容 直到最后一个;结束
    # 搜索功能高级用法
    # 从html_data里面匹配出 符合 g_page_config = (.*); 规则的数据
    # 以列表的形式返回给你  re.S 如果要匹配换行
    g_page_config = re.findall('g_page_config = (.*);', html_data)[0]
    # 从g_page_config里面取出所有的商品信息
    json_dict = json.loads(g_page_config)       # 字典格式数据了
    # 字典的好处 好取值 方便取值
    # xpath 提取标签属性或者文本内容
    # xpath 不能提取字典
    # {"键(拼音)":"值(字)", "A":"啊", "B":"不", "":"", "":""}['A']
    auctions = json_dict["mods"]['itemlist']['data']['auctions']    # 列表
    for auction in auctions:
        raw_title = auction['raw_title']
        view_price = auction['view_price']
        item_loc = auction['item_loc']
        view_sales = auction['view_sales']
        comment_count = auction['comment_count']
        nick = auction['nick']
        detail_url = auction['detail_url']
        print(raw_title, view_price, item_loc, view_sales, comment_count, nick, detail_url)
        # 4. 保存数据
        csv_writer.writerow([raw_title, view_price, item_loc, view_sales, comment_count, nick, detail_url])

某宝秒杀脚本

import datetime
import time
from selenium import webdriver


now = datetime.datetime.now().strftme('%Y-%m-%d %H:%M:%S.%f')

times = "2022-03-03 21:07:00.00000000"
driver = webdriver.Chrome(r'C:\Users\Tony\PycharmProjects\Module_Tony_Demo\Moudle_游戏源码锦集\chromedriver.exe')
driver.get("https://www.taobao.com")
time.sleep(3)
driver.find_element_by_link_text("亲,请登录").click()

print(f"请尽快扫码登录")
time.sleep(20)
driver.get("https://cart.taobao.com/cart.htm")
time.sleep(3)

# 是否全选购物车
while True:
    try:
        if driver.find_element_by_id("J_SelectAll1"):
            driver.find_element_by_id("J_SelectAll1").click()
            break
    except:
        print(f"找不到购买按钮")


while True:
    # 获取时间
    now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
    print(now)
    # 判断  -
    if now > times:
        # 结算
        while True:
            try:
                if driver.find_element_by_link_text("结 算"):
                    print("here")
                    driver.find_element_by_link_text("结 算").click()
                    print(f"程序已将商品锁定,结算成功")
                    break
            except :
                pass
        # 提交订单
        while True:
            try:
                if driver.find_element_by_link_text('提交订单'):
                    driver.find_element_by_link_text('提交订单').click()
                    print(f"抢购成功,请尽快付款")
            except:
                print(f"恭喜,本程序已帮你抢到商品啦,您来支付吧")
                break
        time.sleep(0.01)

效果展示

ldtyber3.png
ldtybjgd.png

END

原文公众号:Python顾木子吖