korean sexy pet网站爬取

发布时间 2024-01-01 10:01:08作者: 萧海~


import requests
from lxml import etree
cookies = {
    'fb_external_id': '13ff5c994e2864ebef4d0a2dba4d0ba6985ad724fe2b9608a0de1237e082102d',
    '_fwb': '57RgnHB1Jl9KS0nY5fIGRX.1704026411225',
    'ec_ipad_device': 'F',
    'iSearchKey': '0',
    'CFAE_CID': 'CFAE_CID.sexypet_1.5QHSFSM.1704026417344',
    'CFAE_CUK1Y': 'CFAE_CUK1Y.sexypet_1.5QHSFSM.1704026417344',
    'CFAE_CUK45': 'CFAE_CUK45.sexypet_1.5QHSFSM.1704026417344',
    'CVID_Y': 'CVID_Y.42574c4d4850446602.1704026417344',
    '_ga': 'GA1.1.1020315262.1704026420',
    'CUK45': 'cuk45_sexypet_a1d61d29001606b63855f6202e11a441',
    'CUK2Y': 'cuk2y_sexypet_a1d61d29001606b63855f6202e11a441',
    'CID': 'CID6fb2ec5a77875091c1625cdd273f788f',
    'ECSESSID': '42ab0ae41561643f523cb70c6715b91b',
    'atl_epcheck': '1',
    'atl_option': '1%2C1%2CH',
    'basketcount_1': '0',
    'isviewtype': 'pc',
    'CID6fb2ec5a77875091c1625cdd273f788f': '025ef7de5576ef52aa2f76b18102cb52%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%2Fproduct%2F%25EC%25A0%2584%25EC%258B%25A0-%25EC%258A%25A4%25ED%258A%25B8%25EB%259E%25A9-%25EC%2598%25AC%25EC%259D%25B8%25EC%259B%2590%2F2286%2Fcategory%2F47%2Fdisplay%2F2%2F%3A%3A1704073352%3A%3A%3A%3Appdp%3A%3A1704073352%3A%3A%3A%3A%3A%3A%3A%3A',
    'CVID': 'CVID.42574c4d4850446602.1704073353111',
    'recent_plist': '2286%7C2287%7C2022',
    'fb_event_id': 'event_id.sexypet.1.P21JHCPLLGPFE9VYAD395IPS35OYP2W4H',
    'wcs_bt': 's_22145f2006:1704073455',
    'CFAE_LC': 'CFAE_LC.sexypet_1.1MGMESI.1704073457108',
    'vt': '1704073456',
    '_ga_5JX34RJSG0': 'GS1.1.1704073352.2.1.1704073497.0.0.0',
}

headers = {
    'authority': 'sexypet.co.kr',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'zh-CN,zh;q=0.9',
    'cache-control': 'no-cache',
    # Requests sorts cookies= alphabetically
    # 'cookie': 'fb_external_id=13ff5c994e2864ebef4d0a2dba4d0ba6985ad724fe2b9608a0de1237e082102d; _fwb=57RgnHB1Jl9KS0nY5fIGRX.1704026411225; ec_ipad_device=F; iSearchKey=0; CFAE_CID=CFAE_CID.sexypet_1.5QHSFSM.1704026417344; CFAE_CUK1Y=CFAE_CUK1Y.sexypet_1.5QHSFSM.1704026417344; CFAE_CUK45=CFAE_CUK45.sexypet_1.5QHSFSM.1704026417344; CVID_Y=CVID_Y.42574c4d4850446602.1704026417344; _ga=GA1.1.1020315262.1704026420; CUK45=cuk45_sexypet_a1d61d29001606b63855f6202e11a441; CUK2Y=cuk2y_sexypet_a1d61d29001606b63855f6202e11a441; CID=CID6fb2ec5a77875091c1625cdd273f788f; ECSESSID=42ab0ae41561643f523cb70c6715b91b; atl_epcheck=1; atl_option=1%2C1%2CH; basketcount_1=0; isviewtype=pc; CID6fb2ec5a77875091c1625cdd273f788f=025ef7de5576ef52aa2f76b18102cb52%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%2Fproduct%2F%25EC%25A0%2584%25EC%258B%25A0-%25EC%258A%25A4%25ED%258A%25B8%25EB%259E%25A9-%25EC%2598%25AC%25EC%259D%25B8%25EC%259B%2590%2F2286%2Fcategory%2F47%2Fdisplay%2F2%2F%3A%3A1704073352%3A%3A%3A%3Appdp%3A%3A1704073352%3A%3A%3A%3A%3A%3A%3A%3A; CVID=CVID.42574c4d4850446602.1704073353111; recent_plist=2286%7C2287%7C2022; fb_event_id=event_id.sexypet.1.P21JHCPLLGPFE9VYAD395IPS35OYP2W4H; wcs_bt=s_22145f2006:1704073455; CFAE_LC=CFAE_LC.sexypet_1.1MGMESI.1704073457108; vt=1704073456; _ga_5JX34RJSG0=GS1.1.1704073352.2.1.1704073497.0.0.0',
    'pragma': 'no-cache',
    'referer': 'https://sexypet.co.kr/category/best-100/24/?page=2',
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
}

params = {
    'page': '1',
}

response = requests.get('https://sexypet.co.kr/category/best-100/24/', params=params, cookies=cookies, headers=headers)
# print(response)

html=etree.HTML(response.text)
res=html.xpath('//div[@class="thumbnail"]/a/@href')
# print(res)
next_url_lst=['https://sexypet.co.kr'+item for item in res]
# print(next_url_lst)
for item in next_url_lst:
    response = requests.get(item, params=params, cookies=cookies,
                            headers=headers)
    html = etree.HTML(response.text)
    res = html.xpath('//div[@class="thumbnail"]/a//img/@src')
    print("https:"+res[0])

在这里插入图片描述