使用urllib爬取京东苹果手机的评论信息

发布时间 2023-06-20 20:41:34作者: 李迎辉
import csv
import urllib.request
import json
import time
import xlwt

# ======》爬取评论信息《=======#

end_page = int(input('请输入爬取的结束页码:'))
for i in range(0, end_page + 1):
    print('第%s页开始爬取------' % (i + 1))
    url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100038004389&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&fold=1'
    url = url.format(i)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
        'Referer': 'https://item.jd.com/100026796992.html'
    }

    request = urllib.request.Request(url=url, headers=headers)
    content = urllib.request.urlopen(request).read().decode('gbk')
    content = content.strip('fetchJSON_comment98vv385();')
    obj = json.loads(content)
    comments = obj['comments']
    fp = open('苹果.json', 'a', encoding='gbk')
    for comment in comments:
        id=comment['id']
        guid=comment['guid']
        # 评论内容
        content = comment['content']
        # 评论时间
        creationTime = comment['creationTime']
        isTop=comment['isTop']
        referenceTime=comment['referenceTime']
        firstCategory=comment['firstCategory']
        secondCategory=comment['secondCategory']
        thirdCategory=comment['thirdCategory']
        replyCount=comment['replyCount']
        score=comment['score']
        # 评论人
        nickname = comment['nickname']
        userClient= comment['userClient']
        productColor= comment['productColor']
        productSize= comment['productSize']
        # 会员等级
        userLevelName = comment['plusAvailable']
        if userLevelName == "201":
            userLevelName = "PLUS会员"
        elif userLevelName == "203":
            userLevelName = "金牌会员"
        elif userLevelName == "103":
            userLevelName = "普通用户"
        elif userLevelName == "0":
            userLevelName = "无价值用户"
        else:
            userLevelName = "银牌会员"
       # userLevelName= comment['user-level']
        plusAvailable= comment['plusAvailable']
        productSales= comment['productSales']
        userClientShow ="京东客户端"
       # userClientShow= comment['userClientShow']
       # isMobile= comment['isMobile']
        # 移动端PC端
        isMobile = comment['userClient']
        if isMobile == "4":
            isMobile = "移动端"
        else:
            isMobile = "PC端"
        days= comment['days']
        afterDays= comment['afterDays']
        # 写入文件
        with open('comments_jd2.csv', 'a', encoding='utf8') as csv_file:
            rows = (id,guid,content,creationTime,isTop,referenceTime,firstCategory,secondCategory,thirdCategory,replyCount,score,nickname,userClient,productColor,productSize, userLevelName,plusAvailable,productSales, userClientShow,isMobile,days,afterDays)
            writer = csv.writer(csv_file)
            writer.writerow(rows)
    print('第%s页完成----------' % (i + 1))
    time.sleep(4)
    fp.close()