scrapy ja3 tls

发布时间 2023-09-14 10:36:57作者: AngDH

 

 

 

# -*- coding:utf-8 -*-
import random

from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
from scrapy.core.downloader.handlers.http11 import HTTP11DownloadHandler, ScrapyAgent

ORIGIN_CIPHERS = 'TLS13-AES-256-GCM-SHA384:TLS13-CHACHA20-POLY1305-SHA256:TLS13-AES-128-GCM-SHA256:ECDH+AESGCM:ECDH+CHACHA20:DH+AESGCM:DH+CHACHA20:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:RSA+AESGCM:RSA+AES'


def shuffle_ciphers():
    ciphers = ORIGIN_CIPHERS.split(":")
    random.shuffle(ciphers)

    ciphers = ":".join(ciphers)

    print("________")
    print(ciphers)
    print("________")

    return ciphers + ":!aNULL:!MD5:!DSS"


class MyHTTPDHandler(HTTP11DownloadHandler):

    def download_request(self, request, spider):
        """Return a deferred for the HTTP download"""

        tls_cliphers = shuffle_ciphers()
        _contextFactory = ScrapyClientContextFactory(tls_ciphers=tls_cliphers)

        agent = ScrapyAgent(
            contextFactory=_contextFactory,
            pool=self._pool,
            maxsize=getattr(spider, 'download_maxsize', self._default_maxsize),
            warnsize=getattr(spider, 'download_warnsize', self._default_warnsize),
            fail_on_dataloss=self._fail_on_dataloss,
            crawler=self._crawler,
        )
        return agent.download_request(request)


class MyHTTPDownloadHandler(MyHTTPDHandler):
    def download_request(self, request, spider):
        return super().download_request(request, spider)

 

 

 

 

 

 

         'DOWNLOAD_HANDLERS': {
             'http': 'middlewares.sc_middlewares.MyHTTPDownloadHandler',
             'https': 'middlewares.sc_middlewares.MyHTTPDownloadHandler',
         },