import csv import urllib.request import json import time import xlwt # ======》爬取评论信息《=======# end_page = int(input('请输入爬取的结束页码:')) for i in range(0, end_page + 1): print('第%s页开始爬取------' % (i + 1)) url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100038004389&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&fold=1' url = url.format(i) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36', 'Referer': 'https://item.jd.com/100026796992.html' } request = urllib.request.Request(url=url, headers=headers) content = urllib.request.urlopen(request).read().decode('gbk') content = content.strip('fetchJSON_comment98vv385();') obj = json.loads(content) comments = obj['comments'] fp = open('苹果.json', 'a', encoding='gbk') for comment in comments: id=comment['id'] guid=comment['guid'] # 评论内容 content = comment['content'] # 评论时间 creationTime = comment['creationTime'] isTop=comment['isTop'] referenceTime=comment['referenceTime'] firstCategory=comment['firstCategory'] secondCategory=comment['secondCategory'] thirdCategory=comment['thirdCategory'] replyCount=comment['replyCount'] score=comment['score'] # 评论人 nickname = comment['nickname'] userClient= comment['userClient'] productColor= comment['productColor'] productSize= comment['productSize'] # 会员等级 userLevelName = comment['plusAvailable'] if userLevelName == "201": userLevelName = "PLUS会员" elif userLevelName == "203": userLevelName = "金牌会员" elif userLevelName == "103": userLevelName = "普通用户" elif userLevelName == "0": userLevelName = "无价值用户" else: userLevelName = "银牌会员" # userLevelName= comment['user-level'] plusAvailable= comment['plusAvailable'] productSales= comment['productSales'] userClientShow ="京东客户端" # userClientShow= comment['userClientShow'] # isMobile= comment['isMobile'] # 移动端PC端 isMobile = comment['userClient'] if isMobile == "4": isMobile = "移动端" else: isMobile = "PC端" days= comment['days'] afterDays= comment['afterDays'] # 写入文件 with open('comments_jd2.csv', 'a', encoding='utf8') as csv_file: rows = (id,guid,content,creationTime,isTop,referenceTime,firstCategory,secondCategory,thirdCategory,replyCount,score,nickname,userClient,productColor,productSize, userLevelName,plusAvailable,productSales, userClientShow,isMobile,days,afterDays) writer = csv.writer(csv_file) writer.writerow(rows) print('第%s页完成----------' % (i + 1)) time.sleep(4) fp.close()