使用代理中转服务,解决selenium 使用短时效代理避免重启

发布时间 2023-09-21 14:55:04作者: JadePeng

背景

selenium 配置代理需要在启动的时候配置,如果代理失效或者不可用,切换需要重启,浪费时间。

思路解决方案

可以使用搭建一个代理中转服务,让selenium连接固定的中转服务,中转服务选择可用的代理。

代码实现

代码实现就简单了,中转服务将收到的网络请求,直接转发给可用的代理就行,如果代理连接失败,则切换到新的代理。

以下是python实现

import socket
import _thread
from threading import Thread

from redis.client import Redis

from adsl_proxy_pool import ADSLProxyPool

redis_client = Redis(host="127.0.0.12", port=6383,
                     password="password", db=5)
adsl_proxy_pool = ADSLProxyPool(redis_client=redis_client)
cache_proxy = None


def communicate(sock1, sock2):
    """
    socket之间的数据交换
    :param sock1:
    :param sock2:
    :return:
    """
    try:
        while 1:
            data = sock1.recv(1024)
            if not data:
                return
            sock2.sendall(data)
    except:
        pass


def handle(client):
    """
    处理连接进来的客户端
    :param client:
    :return:
    """
    global cache_proxy

    timeout = 60
    client.settimeout(timeout)

    proxy_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    while 1:
        try:
            # 获取代理
            host_info = get_proxy()
            proxy_server.connect(host_info)
            proxy_server.settimeout(timeout)
            break
        except:
            cache_proxy = None
            pass

    # 转发到proxy_server
    _thread.start_new_thread(communicate, (client, proxy_server))

    # 将proxy_server响应给client
    communicate(proxy_server, client)


def get_proxy():
    # 先缓存proxy
    global cache_proxy
    # 从代理池获取可用代理
    proxy = adsl_proxy_pool.get_proxy() if cache_proxy is None else cache_proxy
    if not proxy:
        return get_proxy()
    else:
        cache_proxy = proxy
    
    proxy = proxy.replace("http://", "").split(":")
    return proxy[0], int(proxy[1])


def serve(ip, port):
    """
    代理服务
    :param ip:
    :param port:
    :return:
    """
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    s.bind((ip, port))
    s.listen(10)
    print('\n local proxy server started...\n')
    while True:
        conn, addr = s.accept()
        _thread.start_new_thread(handle, (conn,))



def start_local_proxy_server(server="127.0.0.1", port=8081):
    """
    启动本地
    :return:
    """
    proxy_thread = Thread(target=serve, name='APScheduler', args=(server, port,))
    proxy_thread.daemon = True
    proxy_thread.start()


if __name__ == '__main__':
    IP = "127.0.0.1"
    PORT = 8080
    serve(IP, PORT)


selenium 直接配置本地代理


start_local_proxy_server("127.0.0.1", 8081)

agent = Agent(chrome_args=get_chrome_opt_list(),
              proxy="http://127.0.0.1:8081",
              verbose=True,
              chrome_driver_path='/usr/local/bin/chromedriver')
              
-----------

        options = uc.ChromeOptions()
        if self.proxy:
            self.logger.info("use proxy:{}".format(self.proxy))
            options.add_argument(f'--proxy-server={self.proxy}')

总结

一开始考虑,使用nginx来中转代理服务,但想到nginx本质就是一个中转,从方便性和可控性考虑,直接用代码实现更好。