python获取文本中网址并判断是否可以正常访问

发布时间 2023-04-15 00:18:44作者: 大话人生
# encoding=utf8
#-*-coding:utf-8 -*-


import re
import requests

# 打开文本文件并读取内容
with open('input.txt', 'r',encoding="utf-8") as f:
    content = f.read()

# 使用正则表达式查找所有符合规则的URL
urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', content)

# 检查每个URL是否可行
for url in urls:
    try:
        r = requests.get(url)
        if r.status_code == 200:
            print(f"{url} is reachable")
        else:
            print(f"{url} returned status code {r.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Could not connect to {url}: {str(e)}")