scrapy框架之Cookie及自动登陆抽屉并点赞-526互联

自动登录抽屉并点赞

 1 # -*- coding: utf-8 -*-
 2 import scrapy
 3 from scrapy.selector import Selector,HtmlXPathSelector #选择器，标签查找
 4 from ..items import ChoutiItem #创建item对象
 5 from scrapy.http import Request #创建request对象，用于请求url
 6 from scrapy.http.cookies import CookieJar #cookies对象
 7 
 8 class ChoutiSpider(scrapy.Spider):
 9     name = 'chouti'
10     allowed_domains = ['chouti.com'] #递归时会检测域名是否包含，不包含则不能递归
11     start_urls = ['https://dig.chouti.com/'] #运行最开始请求的url
12 
13     cookie_dict = None
14     has_request_url_set = set()
15     def parse(self, response):
16         cookie_obj = CookieJar()
17         cookie_obj.extract_cookies(response,response.request)
18         #保存cookies
19         self.cookie_dict = cookie_obj._cookies
20         #请求：用户名密码+cookies+headers
21         yield Request(
22             url='https://dig.chouti.com/login',
23             method="POST",
24             body="phone=8617744503421&password=Huang123&oneMonth=1",
25             headers={'content-type':'application/x-www-form-urlencoded; charset=UTF-8'},
26             cookies=self.cookie_dict,
27             callback=self.check_login,
28         )
29 
30     #登录成功,进入首页
31     def check_login(self, response):
32         yield Request(
33             url='https://dig.chouti.com/',
34             method='GET',
35             cookies=self.cookie_dict,
36             callback=self.good,
37         )
38 
39     #https://dig.chouti.com/link/vote?linksId=24868551
40     #进行点赞
41     def good(self,response):
42         id_list = Selector(response=response).xpath('//div[@share-linkid]/@share-linkid').extract()
43         for id in id_list:
44             print(id)
45             url = 'https://dig.chouti.com/link/vote?linksId=%s' % id
46             yield Request(
47                 url=url,
48                 method="POST",
49                 cookies=self.cookie_dict,
50                 callback=self.show,
51             )
52 
53         #获取页码进行逐页点赞
54         page_urls = Selector(response=response).xpath('//div[@id="dig_lcpage"]//a/@href').extract()
55         for page_url in page_urls:
56             url = "https://dig.chouti.com%s" % page_url
57             md5_url = self.md5(url)
58             if md5_url in self.has_request_url_set:
59                 pass
60             else:
61                 print(page_url)
62                 self.has_request_url_set.add(md5_url)
63                 yield Request(url=url,method="POST",callback=self.good)
64 
65     #查看是否点赞成功
66     def show(self,response):
67         print(response.text)
68 
69     @staticmethod
70     def md5(url):
71         import hashlib
72         m = hashlib.md5()
73         m.update(bytes(url,encoding='utf-8'))
74         return m.hexdigest()

1 #注意：settings.py中设置DEPTH_LIMIT = 1来指定“递归”的层数。
2 DEPTH_LIMIT = 4

 1 import scrapy
 2 from scrapy.http.response.html import HtmlResponse
 3 from scrapy.http import Request
 4 from scrapy.http.cookies import CookieJar
 5 
 6 
 7 class ChoutiSpider(scrapy.Spider):
 8     name = "chouti"
 9     allowed_domains = ["chouti.com"]
10     start_urls = (
11         'http://www.chouti.com/',
12     )
13 
14     def start_requests(self):
15         url = 'http://dig.chouti.com/'
16         yield Request(url=url, callback=self.login, meta={'cookiejar': True})
17 
18     def login(self, response):
19         print(response.headers.getlist('Set-Cookie'))
20         req = Request(
21             url='http://dig.chouti.com/login',
22             method='POST',
23             headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'},
24             body='phone=8613121758648&password=woshiniba&oneMonth=1',
25             callback=self.check_login,
26             meta={'cookiejar': True}
27         )
28         yield req
29 
30     def check_login(self, response):
31         print(response.text)

处理Cookie

抽屉selenium cnblogs scrapy