参照:
使用API提交URL到百度和Bing - duanguyuan - 博客园 (cnblogs.com)
背景
为了方便爬虫爬取我们站点里的文章,我们可以将站点地图(sitemap.xml)提交到搜索网站。提交之后,爬虫在光临我们的网站时,会根据sitemap.xml的指引,抓取所有的URL。传统的sitemap.txt或者robots.txt是搜索引擎通过爬虫采集,属于是一种被动采集。但是,爬虫光临我们站点的周期太长(至少要几天吧),如果想发布文章之后尽快被搜索引擎收录,我们可以主动提交URL到搜索网站。Google只能在网页上操作,百度和Bing都提供了API。下面介绍如何使用API提交URL。
百度API推送
需要站点管理_站长工具_百度搜索资源平台 (baidu.com)注册登录,将信任文件放在你根目录下
封装定时任务执行接口,每天提交
public async Task<(int,int)> PostTodayArticlesToBaidu()
{
int pcCount = 0;
int mobileCount = 0;
//查询全部可用的文章
Expression<Func<Articles, bool>> expression = t => !t.IsDelete && t.IsShow && t.PublishTime < DateTime.Now;
int count = _baseContext.Articles.Where(expression).Count();
//将今日发布的文章拼接链接(pc、mobile)
//单次次提交二千,提交三千百度接口会报错,可能是数据量太大
int size = 2000;
int page = count / size;
int left = count % size;
if ( page > 0)
{
for (int i=0 ; i < page; i++)
{
var ids = await _baseContext.Articles.Where(expression).Skip(i * size).Take(size).Select(s=>s.PostId).ToListAsync();
(BaiduPublishResult, BaiduPublishResult) result = await publishToBaidu(ids);
pcCount = pcCount + result.Item1.success;
mobileCount = mobileCount + result.Item2.success;
}
}
//推送剩余
if (left > 0)
{
var leftIds = await _baseContext.Articles.Where(expression).Skip(page * size).Take(left).Select(s => s.PostId).ToListAsync();
(BaiduPublishResult, BaiduPublishResult) result = await publishToBaidu(leftIds);
pcCount = pcCount + result.Item1.success;
mobileCount = mobileCount + result.Item2.success;
}
//返回
return (pcCount, mobileCount);
}
其中百度的api返回接口封装
/// <summary>
/// 百度api收录推送结果
/// </summary>
public class BaiduPublishResult
{
//成功多少
public int success { get; set; }
//当天额度还剩多少
public int remain { get; set; }
//提交的url要和请求里的站点一致,不一致的url会返回在这个字符串数组里
public string[] not_same_site { get; set; }
//url哪些是不合法的放到这个数组里
public string[] not_valid { get; set; }
}
通用方法的封装publishToBaidu
//定义一个推送文章到百度的方法
public async Task<(BaiduPublishResult, BaiduPublishResult)> publishToBaidu(List<long> ids)
{
// Pc请求,数据里的url的host要和这里指定的站点https://www.xxx.cn一致,不然会报错
string pcRequest = "http://data.zz.baidu.com/urls?site=https://www.xxx.cn&token=Z8Z9GaZVYjhH6Ggo";
// mobile H5请求(因为我们系统有移动端所以这里也主动推送)
string mobileRequest = "http://data.zz.baidu.com/urls?site=https://m.xxx.cn&token=Z8Z9GaZVYjhH6Ggo";
try
{
// 创建 HttpClient 实例
using (HttpClient client = _httpClientFactory.CreateClient())
{
// 准备请求内容,包含换行符
StringBuilder contentBuilderPc = new StringBuilder();
StringBuilder contentBuilderMobile = new StringBuilder();
foreach (var item in ids)
{
string pcUrl = string.Format(_coreApiUrlOption.PcLink, item);//拼接url
contentBuilderPc.AppendLine(pcUrl);
string mobileUrl = string.Format(_coreApiUrlOption.MobileLink, item);
contentBuilderMobile.AppendLine(mobileUrl );
}
string pcData = contentBuilderPc.ToString();
string mobileData = contentBuilderMobile.ToString();
// 设置请求头的 Content-Type
client.DefaultRequestHeaders.TryAddWithoutValidation("Host", "data.zz.baidu.com");
client.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "text/plain");
client.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "curl/7.12.1");
// 发送 POST 请求
HttpResponseMessage pcResponse = await client.PostAsync(pcRequest, new StringContent(pcData, Encoding.UTF8));
if (pcResponse.StatusCode != System.Net.HttpStatusCode.OK)
{
throw new EsbException($"请求服务{pcRequest}失败,Http状态码为{pcResponse.StatusCode}");
}
// 处理响应
string responseBody = await pcResponse.Content.ReadAsStringAsync();
BaiduPublishResult pcResult = JsonConvert.DeserializeObject<BaiduPublishResult>(responseBody);
// 发送mobile请求
HttpResponseMessage mobileResponse = await client.PostAsync(mobileRequest, new StringContent(mobileData, Encoding.UTF8));
// 处理mobile响应
if (mobileResponse.StatusCode != System.Net.HttpStatusCode.OK)
{
throw new EsbException($"请求服务{pcRequest}失败,Http状态码为{mobileResponse.StatusCode}");
}
string mobileResponseBody = await mobileResponse.Content.ReadAsStringAsync();
BaiduPublishResult mobileResult = JsonConvert.DeserializeObject<BaiduPublishResult>(mobileResponseBody);
return (pcResult, mobileResult);
}
}
catch (Exception ex)
{
throw ex;
}
return (null,null);
}
Bing的API提交
也是在对应平台Bing Webmaster Tools去登录注册,生成一下api密钥
//定义一个推送文章到必应的方法
public async Task<(bool, bool)> publishToBing(List<long> ids)
{
string request = "https://www.bing.com/webmaster/api.svc/json/SubmitUrlbatch?apikey=aaf075142ea147868a4c8ba66490c6ec"; // apikey是在bing里生成的,手动刷新后记得在这里更新
try
{
// 创建 HttpClient 实例
using (HttpClient client = _httpClientFactory.CreateClient())
{
// 准备请求内容,包含换行符
var pcUrls = ids.Select(s => string.Format(_coreApiUrlOption.PcLink, s)).ToList();
var moblieUrls = ids.Select(s => string.Format(_coreApiUrlOption.MobileLink, s)).ToList();
// 设置请求头的 Content-Type
client.DefaultRequestHeaders.TryAddWithoutValidation("Host", "ssl.bing.com");
client.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "application/json; charset=utf-8");
// 发送 POST 请求
var pcData = new
{
siteUrl = "https://www.eshebao.cn",
urlList = pcUrls
};
HttpContent pcContent = new StringContent(JsonConvert.SerializeObject(pcData), Encoding.UTF8, "application/json");
HttpResponseMessage pcResponse = await client.PostAsync(request, pcContent);
if (pcResponse.StatusCode != System.Net.HttpStatusCode.OK)
{
throw new EsbException($"请求服务{request}失败,Http状态码为{pcResponse.StatusCode}");
}
// 处理响应
string responseBody = await pcResponse.Content.ReadAsStringAsync();
bool pcResult = false;
if (!string.IsNullOrWhiteSpace(responseBody) && responseBody.Equals("{\"d\":null}"))
{
pcResult = true;
}
// 发送 POST 请求
var mobileData = new
{
siteUrl = "https://www.eshebao.cn",
urlList = pcUrls
};
// 发送mobile请求
HttpContent mobileContent = new StringContent(JsonConvert.SerializeObject(pcData), Encoding.UTF8, "application/json");
HttpResponseMessage mobileResponse = await client.PostAsync(request, mobileContent);
// 处理mobile响应
if (mobileResponse.StatusCode != System.Net.HttpStatusCode.OK)
{
throw new EsbException($"请求服务{request}失败,Http状态码为{mobileResponse.StatusCode}");
}
string mobileResponseBody = await mobileResponse.Content.ReadAsStringAsync();
bool mobileResult = false;
//成功后的返回结果是{"d":null}这样的
if (!string.IsNullOrWhiteSpace(mobileResponseBody) && mobileResponseBody.Equals("{\"d\":null}"))
{
mobileResult = true;
}
return (pcResult, mobileResult);
}
}
catch (Exception ex)
{
throw ex;
}
return (false, false);
}
小小记录下。