C#百度与bing的url推送api封装

发布时间 2023-06-09 14:36:14作者: 海岸线summer

参照:

使用API提交URL到百度和Bing - duanguyuan - 博客园 (cnblogs.com)

背景

为了方便爬虫爬取我们站点里的文章,我们可以将站点地图(sitemap.xml)提交到搜索网站。提交之后,爬虫在光临我们的网站时,会根据sitemap.xml的指引,抓取所有的URL。传统的sitemap.txt或者robots.txt是搜索引擎通过爬虫采集,属于是一种被动采集。但是,爬虫光临我们站点的周期太长(至少要几天吧),如果想发布文章之后尽快被搜索引擎收录,我们可以主动提交URL到搜索网站。Google只能在网页上操作,百度和Bing都提供了API。下面介绍如何使用API提交URL。

百度API推送

需要站点管理_站长工具_百度搜索资源平台 (baidu.com)注册登录,将信任文件放在你根目录下

封装定时任务执行接口,每天提交

public async Task<(int,int)> PostTodayArticlesToBaidu()
        {
            int pcCount = 0;
            int mobileCount = 0;
            //查询全部可用的文章
            Expression<Func<Articles, bool>> expression = t => !t.IsDelete && t.IsShow && t.PublishTime < DateTime.Now;
            int count = _baseContext.Articles.Where(expression).Count();
            //将今日发布的文章拼接链接(pc、mobile)
            //单次次提交二千,提交三千百度接口会报错,可能是数据量太大
            int size = 2000;
            int page = count / size;
            int left = count % size;
   
            if ( page > 0)
            {
                for (int i=0 ; i < page; i++)
                {
                    var ids = await _baseContext.Articles.Where(expression).Skip(i * size).Take(size).Select(s=>s.PostId).ToListAsync();
                    (BaiduPublishResult, BaiduPublishResult) result = await publishToBaidu(ids);
                    pcCount = pcCount + result.Item1.success;
                    mobileCount = mobileCount + result.Item2.success;
                }
            }
            //推送剩余
            if (left > 0)
            {
                 var leftIds = await _baseContext.Articles.Where(expression).Skip(page * size).Take(left).Select(s => s.PostId).ToListAsync();
                (BaiduPublishResult, BaiduPublishResult) result = await publishToBaidu(leftIds);
                pcCount = pcCount + result.Item1.success;
                mobileCount = mobileCount + result.Item2.success;
            }
            //返回
            return (pcCount, mobileCount);
        }

其中百度的api返回接口封装

 /// <summary>
    /// 百度api收录推送结果
    /// </summary>
    public class BaiduPublishResult
    {
        //成功多少
        public int success { get; set; }
        //当天额度还剩多少
        public int remain { get; set; }
        //提交的url要和请求里的站点一致,不一致的url会返回在这个字符串数组里
        public string[] not_same_site { get; set; }
        //url哪些是不合法的放到这个数组里
        public string[] not_valid { get; set; }


    }

通用方法的封装publishToBaidu


 //定义一个推送文章到百度的方法
       public async Task<(BaiduPublishResult, BaiduPublishResult)> publishToBaidu(List<long> ids)
        {
	    // Pc请求,数据里的url的host要和这里指定的站点https://www.xxx.cn一致,不然会报错
            string pcRequest = "http://data.zz.baidu.com/urls?site=https://www.xxx.cn&token=Z8Z9GaZVYjhH6Ggo"; 
            // mobile H5请求(因为我们系统有移动端所以这里也主动推送)
            string mobileRequest = "http://data.zz.baidu.com/urls?site=https://m.xxx.cn&token=Z8Z9GaZVYjhH6Ggo"; 
            try
            {
                // 创建 HttpClient 实例
                using (HttpClient client = _httpClientFactory.CreateClient())
                {
                    // 准备请求内容,包含换行符
                    StringBuilder contentBuilderPc = new StringBuilder();
                    StringBuilder contentBuilderMobile = new StringBuilder();
                    foreach (var item in ids)
                    {
                        string pcUrl = string.Format(_coreApiUrlOption.PcLink, item);//拼接url
                        contentBuilderPc.AppendLine(pcUrl);
                        string mobileUrl = string.Format(_coreApiUrlOption.MobileLink, item);
                        contentBuilderMobile.AppendLine(mobileUrl );
                    }
                    string pcData = contentBuilderPc.ToString();
                    string mobileData = contentBuilderMobile.ToString();


                    // 设置请求头的 Content-Type
                    client.DefaultRequestHeaders.TryAddWithoutValidation("Host", "data.zz.baidu.com");
                    client.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "text/plain");
                    client.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "curl/7.12.1");

                    // 发送 POST 请求
                    HttpResponseMessage pcResponse = await client.PostAsync(pcRequest, new StringContent(pcData, Encoding.UTF8));


                    if (pcResponse.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        throw new EsbException($"请求服务{pcRequest}失败,Http状态码为{pcResponse.StatusCode}");
                    }
                    // 处理响应
                    string responseBody = await pcResponse.Content.ReadAsStringAsync();
                    BaiduPublishResult pcResult = JsonConvert.DeserializeObject<BaiduPublishResult>(responseBody);
                    // 发送mobile请求
                    HttpResponseMessage mobileResponse = await client.PostAsync(mobileRequest, new StringContent(mobileData, Encoding.UTF8));
                    // 处理mobile响应
                    if (mobileResponse.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        throw new EsbException($"请求服务{pcRequest}失败,Http状态码为{mobileResponse.StatusCode}");
                    }
                    string mobileResponseBody = await mobileResponse.Content.ReadAsStringAsync();
                    BaiduPublishResult mobileResult = JsonConvert.DeserializeObject<BaiduPublishResult>(mobileResponseBody);
                    return (pcResult, mobileResult);
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }
   
            return (null,null);
        }

Bing的API提交

也是在对应平台Bing Webmaster Tools去登录注册,生成一下api密钥

//定义一个推送文章到必应的方法
        public async Task<(bool, bool)> publishToBing(List<long> ids)
        {
            string request = "https://www.bing.com/webmaster/api.svc/json/SubmitUrlbatch?apikey=aaf075142ea147868a4c8ba66490c6ec"; // apikey是在bing里生成的,手动刷新后记得在这里更新

            try
            {
                // 创建 HttpClient 实例
                using (HttpClient client = _httpClientFactory.CreateClient())
                {
                    // 准备请求内容,包含换行符
                    var pcUrls = ids.Select(s => string.Format(_coreApiUrlOption.PcLink, s)).ToList();
                    var moblieUrls = ids.Select(s => string.Format(_coreApiUrlOption.MobileLink, s)).ToList();


                    // 设置请求头的 Content-Type
                    client.DefaultRequestHeaders.TryAddWithoutValidation("Host", "ssl.bing.com");
                    client.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "application/json; charset=utf-8");

                    // 发送 POST 请求
                    var pcData = new
                    {
                        siteUrl = "https://www.eshebao.cn",
                        urlList = pcUrls
                    };
                    HttpContent pcContent = new StringContent(JsonConvert.SerializeObject(pcData), Encoding.UTF8, "application/json");
                    HttpResponseMessage pcResponse = await client.PostAsync(request, pcContent);


                    if (pcResponse.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        throw new EsbException($"请求服务{request}失败,Http状态码为{pcResponse.StatusCode}");
                    }
                    // 处理响应
                    string responseBody = await pcResponse.Content.ReadAsStringAsync();
                    bool pcResult = false;
                    if (!string.IsNullOrWhiteSpace(responseBody) && responseBody.Equals("{\"d\":null}"))
                    {
                        pcResult = true;
                    }
                    // 发送 POST 请求
                    var mobileData = new
                    {
                        siteUrl = "https://www.eshebao.cn",
                        urlList = pcUrls
                    };
                    // 发送mobile请求
                    HttpContent mobileContent = new StringContent(JsonConvert.SerializeObject(pcData), Encoding.UTF8, "application/json");
                    HttpResponseMessage mobileResponse = await client.PostAsync(request, mobileContent);
                    // 处理mobile响应
                    if (mobileResponse.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        throw new EsbException($"请求服务{request}失败,Http状态码为{mobileResponse.StatusCode}");
                    }
                    string mobileResponseBody = await mobileResponse.Content.ReadAsStringAsync();
                    bool mobileResult = false;
		    //成功后的返回结果是{"d":null}这样的
                    if (!string.IsNullOrWhiteSpace(mobileResponseBody) && mobileResponseBody.Equals("{\"d\":null}"))
                    {
                        mobileResult = true;
                    }
                    return (pcResult, mobileResult);
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }

            return (false, false);
        }

小小记录下。