python爬虫——爬取网易云音乐评论内容

发布时间 2023-06-08 21:58:10作者: 作业逆流成河

一、选题背景  

    随着互联网的快速发展,网络上的评论文本资源急速增长。面对海量的网络资源信息,如何运用python爬虫技术爬取数据进行数据分析,挖掘评论文本中蕴含的丰富价值。

二、大数据分析设计方案

1.爬虫网站

https://music.163.com/?from=wsdh#/song?id=2052441038

 

2.运用技术:

  • 初始化
  • JS逆向部分
  • 数据分析

三、数据分析步骤

1.初始化

import os
import xlrd
import xlwt
import execjs
import requests
import time
import jieba
import random
from xlutils.copy import copy
from collections import Counter
from wordcloud import WordCloud
from matplotlib import image

class WyplSpider(object):
    def __init__(self):
        '''
            1、初始化部分
        '''
        self.number = 1
        self.cursor = -1
        self.song_id = r'2013972674'
        self.start_url = r'https://music.163.com/weapi/comment/resource/comments/get?csrf_token='

 

2.JS逆向

 def Rervese_JS(self, index):
        '''
            2、JS逆向部分
        '''
        js_params = {
            "rid": "R_SO_4_{}".format(self.song_id),
            "threadId": "R_SO_4_{}".format(self.song_id),
            "pageNo": "{}".format(index),
            "pageSize": "20",
            "cursor": "{}".format(self.cursor),
            "offset": "0",
            "orderType": "1",
            "csrf_token": ""
        }
        with open(r'./wyypl.js', 'r', encoding='utf-8') as f:
            js_code = f.read()
        # print(js_code)
        Encrypt_Data = execjs.compile(js_code).call('Encrypt', js_params)
        # print(Encrypt_Data)
        return Encrypt_Data

3.构造请求函数

 def confrim_form_data(self):
        '''
            3、构造请求参数
        '''
        for index in range(1, 11):
            Encrypt_Data = self.Rervese_JS(index)
            form_data = {
                'params': Encrypt_Data['encText'],
                'encSecKey': Encrypt_Data['encSecKey'],
            }
            self.requests_start_url(form_data)
            time.sleep(random.randint(1, 2))

 

4.请求起始地址,获取响应

    def requests_start_url(self, form_data):
        '''
            4、请求起始地址,获取响应
        '''
        headers = {
            'content-length': '598',
            'content-type': 'application/x-www-form-urlencoded',
            'origin': 'https://music.163.com',
            'referer': 'https://music.163.com/song?id=1875268931',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
            'cookie': '_ntes_nnid=eadb0ed7dfc8af2c6ed4396d450e9938,1648110937354; _ntes_nuid=eadb0ed7dfc8af2c6ed4396d450e9938; WEVNSM=1.0.0; WNMCID=mddiyi.1648110937474.01.0; NMTID=00Od0BeiWIkyLEwVUHwssxUTr-r2LIAAAF_uxElTA; _ga=GA1.2.1535768445.1649667578; WM_TID=FtvgfNvoSrNEVQBEBVbBDQYCVb%2FjT7t6; __snaker__id=5TABdjtOTDN1U2tJ; gdxidpyhxdE=VYfr%2BnSPJ6ODm1EDZXGxJW%2BECo%2FRJxYc8v0M2x32VyLNuMSEbZ9jE9Qoysw554Mc9tp4BgS7tUUzjnPg7keV50Oq81vYoukO8aj4LiR6u1emmXKieAAy4Evx%2FWABrZzUtIVU5doj3owiOul19jSchdcirxqNefJ5jA5DLUXMX7XIVfas%3A1658144530294; _9755xjdesxxd_=32; YD00000558929251%3AWM_NI=2v1eq0TfwrPYTg5PfAfeFZ%2FubGfNIo59ROgLJ0DRIwDSt1wB4oNLjAJ95V%2FgdLCQBZw4oH7LNVAmkT2vbvEWE7d5ulnufuxBBGSnhO3m1Eu9vfjSsa6FlnUiH3WXA9x8Vlg%3D; YD00000558929251%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eeaec85c82bce1b9ae548bb08ea3c44f839b9b87c45996869ca6e53dfb918694db2af0fea7c3b92af2aaff86f43c98bab993d35398b9fbb1e54ba6bb8abbae7fa299aaafcc65f7e79eb0e6488af08cb3cc7d888dbcd2f64af8b182ade94b8ae7fbb7ae6da8948db2c152b5eca1a9b6488e8a8f8eea60f2aaae97c85d87e9acd6f868aeabaa96fb4a82999d8feb45f1b3ffa6f833b6baa6b4f26bf69aa48fd8638ced99b9c433bce89a8de637e2a3; YD00000558929251%3AWM_TID=w8IReXz%2BLiFARRQAUUPVCQITUf%2FsedpY; ntes_kaola_ad=1; __bid_n=183ad76cde163b76a44207; FPTOKEN=abAuNIT7IiWc7nm72caPyHMim3GbB3ndEqWZqxe2dsl0h7BsF8NRrDLz11pjH1wnBVo4LWYbU5moL1KsvG8uhXfZ1ybws29YIuDgNWYIk5VZ/DA+XH/4QO9hElD6r3XpGvChd0XqePFXBa7q8jv/cuIuEr56OAwwx4TOLSRup6QrukAfEuTA3j9KKaBlApH3d8US3TvIZciEDriWREOb9/LXvesScPBCvfgfASg52AyfECYfGmOC8Oh+/4H+CxytddLmTFiP1JEV4TdSwrh9qLIu0u3dIKiX1R+cZ4O8kTQn7kweNy99WXhbXwASYqBCvF2AFooxqzWdjot/KTyoNPs4IUucx52JV0jW68S/QEXl9wL4vbOe+OWRUXlmRTbG+J9jeq4dyoffREBTfjIt2Q==|ybGVAEE0NHfQzVDO++o+ZedWI9CsVKGJ/FRuD61FzxU=|10|5bb1d021120aaa6b613f4355f2980e3d; WM_NI=ADAXoWw%2BG0IKVi2cSMLggc3gDF8Fbvs1%2FO%2Fi1lmXoGBmyAmHyDX4wKOk6cWj1Nqa%2B%2Fl4ImO3YEhlXcODWc8Xh2hTx5GCkrbXPKGtZwHVqDbJBw4iVVCIX0GW0ZM7qkuLZms%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eed8f564959dbed7ce3a8cb48aa2c44f829b8a86d55c9aabfa96b76282bab8b7bc2af0fea7c3b92a839c8fb9ee5db5f589d1cc4196b7fe84d24790b9a984b26d889af8a7e17eb1efe1b0ae63f3b4a3b9ee4d82b3b7a2f044bb9c98d5f6418db5fa84d662f392bf82aa25b0ec878ece399b9ba88ddb698d9c9984bb54aceea8b5fb3f859da593e15494958995d97d8c8abfa5e14282a9fc8ad55e819798d1c65df48683d8f461b794968ef237e2a3; JSESSIONID-WYYY=5x%2BXCiX8V8e%5Cgs7G7Zo%5C%2BX8rbaWljCJ7X4bdO4NBPIdHlx3ygKCFbKDtGh%5ClOozZPayeX0r8X86ROWGQC9eG11E9UI9K%2BZ%2B8TPbXhg4kfqhnm2DR3H1oUJ%2FQ3ru126pV%5CB49jyEu2sDrdwn6ZnnSWxMSfYeBnmSkrf2R4tD0phopQ97B%3A1678264351561; _iuqxldmzr_=32'
        }
        response_first = requests.post(self.start_url, data=form_data, headers=headers).json()
        # print(response_first)
        self.parse_response_first(response_first)

5.解析获取评论字段信息

    def parse_response_first(self, response_first):
        '''
            5、解析获取评论字段信息
        '''
        # ===============================A、获取评论信息大列表===========================
        comment_infos = response_first['data']['comments']
        self.cursor = response_first['data']['cursor']
        print(self.cursor)
        for comment_info in comment_infos:
            # ===============================B、解析获取评论信息=========================
            # 1、用户ID userId
            user_id = comment_info['user']['userId']
            # 2、用户名称
            user_name = comment_info['user']['nickname']
            # 3、评论内容
            content = comment_info['content']
            # 4、评论时间
            pub_time = comment_info['time']
            pub_time = time.strftime("%Y-%m-%d", time.localtime(pub_time / 1000))
            # 5、翻页cursor
            self.cursor = response_first['data']['cursor']
            # print(user_id, user_name, content, pub_time, self.cursor)
            # ===============================C、构造保存大字典=========================
            data = {
                '数据': [self.number, user_id, user_name, content, pub_time]
            }
            with open('评论文本.txt', 'a+', encoding='utf-8') as f:
                f.write(content)
            self.save_data(data, user_name)
            self.number += 1

 

 

 

6.保存Excel数据

    def save_data(self, data, user_name):
        '''
            5、保存Excel数据
        '''
        if not os.path.exists(r'./网易云音乐评论内容.xls'):
            # 1、创建 Excel 文件
            wb = xlwt.Workbook(encoding='utf-8')
            # 2、创建新的 Sheet 表
            sheet = wb.add_sheet('数据', cell_overwrite_ok=True)
            # 3、设置 Borders边框样式
            borders = xlwt.Borders()
            borders.left = xlwt.Borders.THIN
            borders.right = xlwt.Borders.THIN
            borders.top = xlwt.Borders.THIN
            borders.bottom = xlwt.Borders.THIN
            borders.left_colour = 0x40
            borders.right_colour = 0x40
            borders.top_colour = 0x40
            borders.bottom_colour = 0x40
            style = xlwt.XFStyle()  # Create Style
            style.borders = borders  # Add Borders to Style
            # 4、写入时居中设置
            align = xlwt.Alignment()
            align.horz = 0x02  # 水平居中
            align.vert = 0x01  # 垂直居中
            style.alignment = align
            # 5、设置表头信息, 遍历写入数据, 保存数据
            header = ('序号', '用户ID', '用户名称', '评论内容', '评论时间')
            for i in range(0, len(header)):
                sheet.col(i).width = 2560 * 3
                #           行,列, 内容,   样式
                sheet.write(0, i, header[i], style)
                wb.save(r'./网易云音乐评论内容.xls')

 

 

7.检查

 # 判断工作表是否存在
        if os.path.exists(r'./网易云音乐评论内容.xls'):
            # 打开工作薄
            wb = xlrd.open_workbook(r'./网易云音乐评论内容.xls')
            # 获取工作薄中所有表的个数
            sheets = wb.sheet_names()
            for i in range(len(sheets)):
                for name in data.keys():
                    worksheet = wb.sheet_by_name(sheets[i])
                    # 获取工作薄中所有表中的表名与数据名对比
                    if worksheet.name == name:
                        # 获取表中已存在的行数
                        rows_old = worksheet.nrows
                        # 将xlrd对象拷贝转化为xlwt对象
                        new_workbook = copy(wb)
                        # 获取转化后的工作薄中的第i张表
                        new_worksheet = new_workbook.get_sheet(i)
                        for num in range(0, len(data[name])):
                            new_worksheet.write(rows_old, num, data[name][num])
                        new_workbook.save(r'./网易云音乐评论内容.xls')
        print(r'***正在保存: 第{}条网易云音乐评论数据: {}'.format(self.number, user_name))

 

 

8.读取统计

  def show_image(self):
        # 词云图部分
        # 读取原始文本
        with open('评论文本.txt', 'r', encoding='utf-8') as f:
            data = f.read()

        # 进行jieba分词, 数据存到 source_list中
        source_list = list(jieba.cut(data))
        # 去除空格
        source_list = [i for i in source_list if i != ' ']
        # print(source_list)

        # 读取停用词文本
        stop_words = []
        with open('stop_words.txt', 'r', encoding='utf-8') as f:
            for line in f:
                stop_words.append(line.strip().lower())
        # print(stop_words)

        # 去除停用词
        result_words = []
        for word in source_list:
            if word != '\n':
                if word.lower() not in stop_words:
                    if word not in stop_words:
                        result_words.append(word)
        # print(result_words)

        # 统计词频
        word_count = Counter(result_words)
        # print(word_count)
        top_words = word_count.most_common(100)
        # print(top_words)
        # print('==================前20的单词和数量如下===================')
        # for w, c in top_words:
        #     print(w, c)

        # 绘制词云图
        # A、添加背景图片
        mask_pic = image.imread('star.jpg')
        # B、设置词云图样式
        wd = WordCloud(
            font_path="msyh.ttc",
            background_color="white",
            scale=4,
            mask=mask_pic,
            max_words=100,
            contour_width=1,
            contour_color='steelblue',
        ).generate(data)
        # 添加数据
        wd.generate_from_frequencies(dict(top_words))
        wd.to_file('词云图.png')
        print('===================词云图创建完成================')

    def main(self):
        '''
            逻辑控制部分
        '''
        self.confrim_form_data()
        self.show_image()


if __name__ == '__main__':
    wypl = WyplSpider()
    wypl.main()

 

 

9.附完整程序源代码(以及输出结果)

import os
import xlrd
import xlwt
import execjs
import requests
import time
import jieba
import random
from xlutils.copy import copy
from collections import Counter
from wordcloud import WordCloud
from matplotlib import image

class WyplSpider(object):
    def __init__(self):
        '''
            1、初始化部分
        '''
        self.number = 1
        self.cursor = -1
        self.song_id = r'2013972674'
        self.start_url = r'https://music.163.com/weapi/comment/resource/comments/get?csrf_token='

    def Rervese_JS(self, index):
        '''
            2、JS逆向部分
        '''
        js_params = {
            "rid": "R_SO_4_{}".format(self.song_id),
            "threadId": "R_SO_4_{}".format(self.song_id),
            "pageNo": "{}".format(index),
            "pageSize": "20",
            "cursor": "{}".format(self.cursor),
            "offset": "0",
            "orderType": "1",
            "csrf_token": ""
        }
        with open(r'./wyypl.js', 'r', encoding='utf-8') as f:
            js_code = f.read()
        # print(js_code)
        Encrypt_Data = execjs.compile(js_code).call('Encrypt', js_params)
        # print(Encrypt_Data)
        return Encrypt_Data

    def confrim_form_data(self):
        '''
            3、构造请求参数
        '''
        for index in range(1, 11):
            Encrypt_Data = self.Rervese_JS(index)
            form_data = {
                'params': Encrypt_Data['encText'],
                'encSecKey': Encrypt_Data['encSecKey'],
            }
            self.requests_start_url(form_data)
            time.sleep(random.randint(1, 2))

    def requests_start_url(self, form_data):
        '''
            4、请求起始地址,获取响应
        '''
        headers = {
            'content-length': '598',
            'content-type': 'application/x-www-form-urlencoded',
            'origin': 'https://music.163.com',
            'referer': 'https://music.163.com/song?id=1875268931',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
            'cookie': '_ntes_nnid=eadb0ed7dfc8af2c6ed4396d450e9938,1648110937354; _ntes_nuid=eadb0ed7dfc8af2c6ed4396d450e9938; WEVNSM=1.0.0; WNMCID=mddiyi.1648110937474.01.0; NMTID=00Od0BeiWIkyLEwVUHwssxUTr-r2LIAAAF_uxElTA; _ga=GA1.2.1535768445.1649667578; WM_TID=FtvgfNvoSrNEVQBEBVbBDQYCVb%2FjT7t6; __snaker__id=5TABdjtOTDN1U2tJ; gdxidpyhxdE=VYfr%2BnSPJ6ODm1EDZXGxJW%2BECo%2FRJxYc8v0M2x32VyLNuMSEbZ9jE9Qoysw554Mc9tp4BgS7tUUzjnPg7keV50Oq81vYoukO8aj4LiR6u1emmXKieAAy4Evx%2FWABrZzUtIVU5doj3owiOul19jSchdcirxqNefJ5jA5DLUXMX7XIVfas%3A1658144530294; _9755xjdesxxd_=32; YD00000558929251%3AWM_NI=2v1eq0TfwrPYTg5PfAfeFZ%2FubGfNIo59ROgLJ0DRIwDSt1wB4oNLjAJ95V%2FgdLCQBZw4oH7LNVAmkT2vbvEWE7d5ulnufuxBBGSnhO3m1Eu9vfjSsa6FlnUiH3WXA9x8Vlg%3D; YD00000558929251%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eeaec85c82bce1b9ae548bb08ea3c44f839b9b87c45996869ca6e53dfb918694db2af0fea7c3b92af2aaff86f43c98bab993d35398b9fbb1e54ba6bb8abbae7fa299aaafcc65f7e79eb0e6488af08cb3cc7d888dbcd2f64af8b182ade94b8ae7fbb7ae6da8948db2c152b5eca1a9b6488e8a8f8eea60f2aaae97c85d87e9acd6f868aeabaa96fb4a82999d8feb45f1b3ffa6f833b6baa6b4f26bf69aa48fd8638ced99b9c433bce89a8de637e2a3; YD00000558929251%3AWM_TID=w8IReXz%2BLiFARRQAUUPVCQITUf%2FsedpY; ntes_kaola_ad=1; __bid_n=183ad76cde163b76a44207; FPTOKEN=abAuNIT7IiWc7nm72caPyHMim3GbB3ndEqWZqxe2dsl0h7BsF8NRrDLz11pjH1wnBVo4LWYbU5moL1KsvG8uhXfZ1ybws29YIuDgNWYIk5VZ/DA+XH/4QO9hElD6r3XpGvChd0XqePFXBa7q8jv/cuIuEr56OAwwx4TOLSRup6QrukAfEuTA3j9KKaBlApH3d8US3TvIZciEDriWREOb9/LXvesScPBCvfgfASg52AyfECYfGmOC8Oh+/4H+CxytddLmTFiP1JEV4TdSwrh9qLIu0u3dIKiX1R+cZ4O8kTQn7kweNy99WXhbXwASYqBCvF2AFooxqzWdjot/KTyoNPs4IUucx52JV0jW68S/QEXl9wL4vbOe+OWRUXlmRTbG+J9jeq4dyoffREBTfjIt2Q==|ybGVAEE0NHfQzVDO++o+ZedWI9CsVKGJ/FRuD61FzxU=|10|5bb1d021120aaa6b613f4355f2980e3d; WM_NI=ADAXoWw%2BG0IKVi2cSMLggc3gDF8Fbvs1%2FO%2Fi1lmXoGBmyAmHyDX4wKOk6cWj1Nqa%2B%2Fl4ImO3YEhlXcODWc8Xh2hTx5GCkrbXPKGtZwHVqDbJBw4iVVCIX0GW0ZM7qkuLZms%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eed8f564959dbed7ce3a8cb48aa2c44f829b8a86d55c9aabfa96b76282bab8b7bc2af0fea7c3b92a839c8fb9ee5db5f589d1cc4196b7fe84d24790b9a984b26d889af8a7e17eb1efe1b0ae63f3b4a3b9ee4d82b3b7a2f044bb9c98d5f6418db5fa84d662f392bf82aa25b0ec878ece399b9ba88ddb698d9c9984bb54aceea8b5fb3f859da593e15494958995d97d8c8abfa5e14282a9fc8ad55e819798d1c65df48683d8f461b794968ef237e2a3; JSESSIONID-WYYY=5x%2BXCiX8V8e%5Cgs7G7Zo%5C%2BX8rbaWljCJ7X4bdO4NBPIdHlx3ygKCFbKDtGh%5ClOozZPayeX0r8X86ROWGQC9eG11E9UI9K%2BZ%2B8TPbXhg4kfqhnm2DR3H1oUJ%2FQ3ru126pV%5CB49jyEu2sDrdwn6ZnnSWxMSfYeBnmSkrf2R4tD0phopQ97B%3A1678264351561; _iuqxldmzr_=32'
        }
        response_first = requests.post(self.start_url, data=form_data, headers=headers).json()
        # print(response_first)
        self.parse_response_first(response_first)

    def parse_response_first(self, response_first):
        '''
            5、解析获取评论字段信息
        '''
        # ===============================A、获取评论信息大列表===========================
        comment_infos = response_first['data']['comments']
        self.cursor = response_first['data']['cursor']
        print(self.cursor)
        for comment_info in comment_infos:
            # ===============================B、解析获取评论信息=========================
            # 1、用户ID userId
            user_id = comment_info['user']['userId']
            # 2、用户名称
            user_name = comment_info['user']['nickname']
            # 3、评论内容
            content = comment_info['content']
            # 4、评论时间
            pub_time = comment_info['time']
            pub_time = time.strftime("%Y-%m-%d", time.localtime(pub_time / 1000))
            # 5、翻页cursor
            self.cursor = response_first['data']['cursor']
            # print(user_id, user_name, content, pub_time, self.cursor)
            # ===============================C、构造保存大字典=========================
            data = {
                '数据': [self.number, user_id, user_name, content, pub_time]
            }
            with open('评论文本.txt', 'a+', encoding='utf-8') as f:
                f.write(content)
            self.save_data(data, user_name)
            self.number += 1

    def save_data(self, data, user_name):
        '''
            5、保存Excel数据
        '''
        if not os.path.exists(r'./网易云音乐评论内容.xls'):
            # 1、创建 Excel 文件
            wb = xlwt.Workbook(encoding='utf-8')
            # 2、创建新的 Sheet 表
            sheet = wb.add_sheet('数据', cell_overwrite_ok=True)
            # 3、设置 Borders边框样式
            borders = xlwt.Borders()
            borders.left = xlwt.Borders.THIN
            borders.right = xlwt.Borders.THIN
            borders.top = xlwt.Borders.THIN
            borders.bottom = xlwt.Borders.THIN
            borders.left_colour = 0x40
            borders.right_colour = 0x40
            borders.top_colour = 0x40
            borders.bottom_colour = 0x40
            style = xlwt.XFStyle()  # Create Style
            style.borders = borders  # Add Borders to Style
            # 4、写入时居中设置
            align = xlwt.Alignment()
            align.horz = 0x02  # 水平居中
            align.vert = 0x01  # 垂直居中
            style.alignment = align
            # 5、设置表头信息, 遍历写入数据, 保存数据
            header = ('序号', '用户ID', '用户名称', '评论内容', '评论时间')
            for i in range(0, len(header)):
                sheet.col(i).width = 2560 * 3
                #           行,列, 内容,   样式
                sheet.write(0, i, header[i], style)
                wb.save(r'./网易云音乐评论内容.xls')

        # 判断工作表是否存在
        if os.path.exists(r'./网易云音乐评论内容.xls'):
            # 打开工作薄
            wb = xlrd.open_workbook(r'./网易云音乐评论内容.xls')
            # 获取工作薄中所有表的个数
            sheets = wb.sheet_names()
            for i in range(len(sheets)):
                for name in data.keys():
                    worksheet = wb.sheet_by_name(sheets[i])
                    # 获取工作薄中所有表中的表名与数据名对比
                    if worksheet.name == name:
                        # 获取表中已存在的行数
                        rows_old = worksheet.nrows
                        # 将xlrd对象拷贝转化为xlwt对象
                        new_workbook = copy(wb)
                        # 获取转化后的工作薄中的第i张表
                        new_worksheet = new_workbook.get_sheet(i)
                        for num in range(0, len(data[name])):
                            new_worksheet.write(rows_old, num, data[name][num])
                        new_workbook.save(r'./网易云音乐评论内容.xls')
        print(r'***正在保存: 第{}条网易云音乐评论数据: {}'.format(self.number, user_name))

    def show_image(self):
        # 词云图部分
        # 读取原始文本
        with open('评论文本.txt', 'r', encoding='utf-8') as f:
            data = f.read()

        # 进行jieba分词, 数据存到 source_list中
        source_list = list(jieba.cut(data))
        # 去除空格
        source_list = [i for i in source_list if i != ' ']
        # print(source_list)

        # 读取停用词文本
        stop_words = []
        with open('stop_words.txt', 'r', encoding='utf-8') as f:
            for line in f:
                stop_words.append(line.strip().lower())
        # print(stop_words)

        # 去除停用词
        result_words = []
        for word in source_list:
            if word != '\n':
                if word.lower() not in stop_words:
                    if word not in stop_words:
                        result_words.append(word)
        # print(result_words)

        # 统计词频
        word_count = Counter(result_words)
        # print(word_count)
        top_words = word_count.most_common(100)
        # print(top_words)
        # print('==================前20的单词和数量如下===================')
        # for w, c in top_words:
        #     print(w, c)

        # 绘制词云图
        # A、添加背景图片
        mask_pic = image.imread('star.jpg')
        # B、设置词云图样式
        wd = WordCloud(
            font_path="msyh.ttc",
            background_color="white",
            scale=4,
            mask=mask_pic,
            max_words=100,
            contour_width=1,
            contour_color='steelblue',
        ).generate(data)
        # 添加数据
        wd.generate_from_frequencies(dict(top_words))
        wd.to_file('词云图.png')
        print('===================词云图创建完成================')

    def main(self):
        '''
            逻辑控制部分
        '''
        self.confrim_form_data()
        self.show_image()


if __name__ == '__main__':
    wypl = WyplSpider()
    wypl.main()

 JS文件:

var window = global;
var CryptoJS = require('crypto-js')

function RSAKeyPair(a, b, c) {
this.e = biFromHex(a),
this.d = biFromHex(b),
this.m = biFromHex(c),
this.chunkSize = 2 * biHighIndex(this.m),
this.radix = 16,
this.barrett = new BarrettMu(this.m)
}
function twoDigit(a) {
return (10 > a ? "0" : "") + String(a)
}
function encryptedString(a, b) {
for (var f, g, h, i, j, k, l, c = new Array, d = b.length, e = 0; d > e; )
c[e] = b.charCodeAt(e),
e++;
for (; 0 != c.length % a.chunkSize; )
c[e++] = 0;
for (f = c.length,
g = "",
e = 0; f > e; e += a.chunkSize) {
for (j = new BigInt,
h = 0,
i = e; i < e + a.chunkSize; ++h)
j.digits[h] = c[i++],
j.digits[h] += c[i++] << 8;
k = a.barrett.powMod(j, a.e),
l = 16 == a.radix ? biToHex(k) : biToString(k, a.radix),
g += l + " "
}
return g.substring(0, g.length - 1)
}
function decryptedString(a, b) {
var e, f, g, h, c = b.split(" "), d = "";
for (e = 0; e < c.length; ++e)
for (h = 16 == a.radix ? biFromHex(c[e]) : biFromString(c[e], a.radix),
g = a.barrett.powMod(h, a.d),
f = 0; f <= biHighIndex(g); ++f)
d += String.fromCharCode(255 & g.digits[f], g.digits[f] >> 8);
return 0 == d.charCodeAt(d.length - 1) && (d = d.substring(0, d.length - 1)),
d
}
function setMaxDigits(a) {
maxDigits = a,
ZERO_ARRAY = new Array(maxDigits);
for (var b = 0; b < ZERO_ARRAY.length; b++)
ZERO_ARRAY[b] = 0;
bigZero = new BigInt,
bigOne = new BigInt,
bigOne.digits[0] = 1
}
function BigInt(a) {
this.digits = "boolean" == typeof a && 1 == a ? null : ZERO_ARRAY.slice(0),
this.isNeg = !1
}
function biFromDecimal(a) {
for (var d, e, f, b = "-" == a.charAt(0), c = b ? 1 : 0; c < a.length && "0" == a.charAt(c); )
++c;
if (c == a.length)
d = new BigInt;
else {
for (e = a.length - c,
f = e % dpl10,
0 == f && (f = dpl10),
d = biFromNumber(Number(a.substr(c, f))),
c += f; c < a.length; )
d = biAdd(biMultiply(d, lr10), biFromNumber(Number(a.substr(c, dpl10)))),
c += dpl10;
d.isNeg = b
}
return d
}
function biCopy(a) {
var b = new BigInt(!0);
return b.digits = a.digits.slice(0),
b.isNeg = a.isNeg,
b
}
function biFromNumber(a) {
var c, b = new BigInt;
for (b.isNeg = 0 > a,
a = Math.abs(a),
c = 0; a > 0; )
b.digits[c++] = a & maxDigitVal,
a >>= biRadixBits;
return b
}
function reverseStr(a) {
var c, b = "";
for (c = a.length - 1; c > -1; --c)
b += a.charAt(c);
return b
}
function biToString(a, b) {
var d, e, c = new BigInt;
for (c.digits[0] = b,
d = biDivideModulo(a, c),
e = hexatrigesimalToChar[d[1].digits[0]]; 1 == biCompare(d[0], bigZero); )
d = biDivideModulo(d[0], c),
digit = d[1].digits[0],
e += hexatrigesimalToChar[d[1].digits[0]];
return (a.isNeg ? "-" : "") + reverseStr(e)
}
function biToDecimal(a) {
var c, d, b = new BigInt;
for (b.digits[0] = 10,
c = biDivideModulo(a, b),
d = String(c[1].digits[0]); 1 == biCompare(c[0], bigZero); )
c = biDivideModulo(c[0], b),
d += String(c[1].digits[0]);
return (a.isNeg ? "-" : "") + reverseStr(d)
}
function digitToHex(a) {
var b = 15
, c = "";
for (i = 0; 4 > i; ++i)
c += hexToChar[a & b],
a >>>= 4;
return reverseStr(c)
}
function biToHex(a) {
var d, b = "";
for (biHighIndex(a),
d = biHighIndex(a); d > -1; --d)
b += digitToHex(a.digits[d]);
return b
}
function charToHex(a) {
var h, b = 48, c = b + 9, d = 97, e = d + 25, f = 65, g = 90;
return h = a >= b && c >= a ? a - b : a >= f && g >= a ? 10 + a - f : a >= d && e >= a ? 10 + a - d : 0
}
function hexToDigit(a) {
var d, b = 0, c = Math.min(a.length, 4);
for (d = 0; c > d; ++d)
b <<= 4,
b |= charToHex(a.charCodeAt(d));
return b
}
function biFromHex(a) {
var d, e, b = new BigInt, c = a.length;
for (d = c,
e = 0; d > 0; d -= 4,
++e)
b.digits[e] = hexToDigit(a.substr(Math.max(d - 4, 0), Math.min(d, 4)));
return b
}
function biFromString(a, b) {
var g, h, i, j, c = "-" == a.charAt(0), d = c ? 1 : 0, e = new BigInt, f = new BigInt;
for (f.digits[0] = 1,
g = a.length - 1; g >= d; g--)
h = a.charCodeAt(g),
i = charToHex(h),
j = biMultiplyDigit(f, i),
e = biAdd(e, j),
f = biMultiplyDigit(f, b);
return e.isNeg = c,
e
}
function biDump(a) {
return (a.isNeg ? "-" : "") + a.digits.join(" ")
}
function biAdd(a, b) {
var c, d, e, f;
if (a.isNeg != b.isNeg)
b.isNeg = !b.isNeg,
c = biSubtract(a, b),
b.isNeg = !b.isNeg;
else {
for (c = new BigInt,
d = 0,
f = 0; f < a.digits.length; ++f)
e = a.digits[f] + b.digits[f] + d,
c.digits[f] = 65535 & e,
d = Number(e >= biRadix);
c.isNeg = a.isNeg
}
return c
}
function biSubtract(a, b) {
var c, d, e, f;
if (a.isNeg != b.isNeg)
b.isNeg = !b.isNeg,
c = biAdd(a, b),
b.isNeg = !b.isNeg;
else {
for (c = new BigInt,
e = 0,
f = 0; f < a.digits.length; ++f)
d = a.digits[f] - b.digits[f] + e,
c.digits[f] = 65535 & d,
c.digits[f] < 0 && (c.digits[f] += biRadix),
e = 0 - Number(0 > d);
if (-1 == e) {
for (e = 0,
f = 0; f < a.digits.length; ++f)
d = 0 - c.digits[f] + e,
c.digits[f] = 65535 & d,
c.digits[f] < 0 && (c.digits[f] += biRadix),
e = 0 - Number(0 > d);
c.isNeg = !a.isNeg
} else
c.isNeg = a.isNeg
}
return c
}
function biHighIndex(a) {
for (var b = a.digits.length - 1; b > 0 && 0 == a.digits[b]; )
--b;
return b
}
function biNumBits(a) {
var e, b = biHighIndex(a), c = a.digits[b], d = (b + 1) * bitsPerDigit;
for (e = d; e > d - bitsPerDigit && 0 == (32768 & c); --e)
c <<= 1;
return e
}
function biMultiply(a, b) {
var d, h, i, k, c = new BigInt, e = biHighIndex(a), f = biHighIndex(b);
for (k = 0; f >= k; ++k) {
for (d = 0,
i = k,
j = 0; e >= j; ++j,
++i)
h = c.digits[i] + a.digits[j] * b.digits[k] + d,
c.digits[i] = h & maxDigitVal,
d = h >>> biRadixBits;
c.digits[k + e + 1] = d
}
return c.isNeg = a.isNeg != b.isNeg,
c
}
function biMultiplyDigit(a, b) {
var c, d, e, f;
for (result = new BigInt,
c = biHighIndex(a),
d = 0,
f = 0; c >= f; ++f)
e = result.digits[f] + a.digits[f] * b + d,
result.digits[f] = e & maxDigitVal,
d = e >>> biRadixBits;
return result.digits[1 + c] = d,
result
}
function arrayCopy(a, b, c, d, e) {
var g, h, f = Math.min(b + e, a.length);
for (g = b,
h = d; f > g; ++g,
++h)
c[h] = a[g]
}
function biShiftLeft(a, b) {
var e, f, g, h, c = Math.floor(b / bitsPerDigit), d = new BigInt;
for (arrayCopy(a.digits, 0, d.digits, c, d.digits.length - c),
e = b % bitsPerDigit,
f = bitsPerDigit - e,
g = d.digits.length - 1,
h = g - 1; g > 0; --g,
--h)
d.digits[g] = d.digits[g] << e & maxDigitVal | (d.digits[h] & highBitMasks[e]) >>> f;
return d.digits[0] = d.digits[g] << e & maxDigitVal,
d.isNeg = a.isNeg,
d
}
function biShiftRight(a, b) {
var e, f, g, h, c = Math.floor(b / bitsPerDigit), d = new BigInt;
for (arrayCopy(a.digits, c, d.digits, 0, a.digits.length - c),
e = b % bitsPerDigit,
f = bitsPerDigit - e,
g = 0,
h = g + 1; g < d.digits.length - 1; ++g,
++h)
d.digits[g] = d.digits[g] >>> e | (d.digits[h] & lowBitMasks[e]) << f;
return d.digits[d.digits.length - 1] >>>= e,
d.isNeg = a.isNeg,
d
}
function biMultiplyByRadixPower(a, b) {
var c = new BigInt;
return arrayCopy(a.digits, 0, c.digits, b, c.digits.length - b),
c
}
function biDivideByRadixPower(a, b) {
var c = new BigInt;
return arrayCopy(a.digits, b, c.digits, 0, c.digits.length - b),
c
}
function biModuloByRadixPower(a, b) {
var c = new BigInt;
return arrayCopy(a.digits, 0, c.digits, 0, b),
c
}
function biCompare(a, b) {
if (a.isNeg != b.isNeg)
return 1 - 2 * Number(a.isNeg);
for (var c = a.digits.length - 1; c >= 0; --c)
if (a.digits[c] != b.digits[c])
return a.isNeg ? 1 - 2 * Number(a.digits[c] > b.digits[c]) : 1 - 2 * Number(a.digits[c] < b.digits[c]);
return 0
}
function biDivideModulo(a, b) {
var f, g, h, i, j, k, l, m, n, o, p, q, r, s, c = biNumBits(a), d = biNumBits(b), e = b.isNeg;
if (d > c)
return a.isNeg ? (f = biCopy(bigOne),
f.isNeg = !b.isNeg,
a.isNeg = !1,
b.isNeg = !1,
g = biSubtract(b, a),
a.isNeg = !0,
b.isNeg = e) : (f = new BigInt,
g = biCopy(a)),
new Array(f,g);
for (f = new BigInt,
g = a,
h = Math.ceil(d / bitsPerDigit) - 1,
i = 0; b.digits[h] < biHalfRadix; )
b = biShiftLeft(b, 1),
++i,
++d,
h = Math.ceil(d / bitsPerDigit) - 1;
for (g = biShiftLeft(g, i),
c += i,
j = Math.ceil(c / bitsPerDigit) - 1,
k = biMultiplyByRadixPower(b, j - h); -1 != biCompare(g, k); )
++f.digits[j - h],
g = biSubtract(g, k);
for (l = j; l > h; --l) {
for (m = l >= g.digits.length ? 0 : g.digits[l],
n = l - 1 >= g.digits.length ? 0 : g.digits[l - 1],
o = l - 2 >= g.digits.length ? 0 : g.digits[l - 2],
p = h >= b.digits.length ? 0 : b.digits[h],
q = h - 1 >= b.digits.length ? 0 : b.digits[h - 1],
f.digits[l - h - 1] = m == p ? maxDigitVal : Math.floor((m * biRadix + n) / p),
r = f.digits[l - h - 1] * (p * biRadix + q),
s = m * biRadixSquared + (n * biRadix + o); r > s; )
--f.digits[l - h - 1],
r = f.digits[l - h - 1] * (p * biRadix | q),
s = m * biRadix * biRadix + (n * biRadix + o);
k = biMultiplyByRadixPower(b, l - h - 1),
g = biSubtract(g, biMultiplyDigit(k, f.digits[l - h - 1])),
g.isNeg && (g = biAdd(g, k),
--f.digits[l - h - 1])
}
return g = biShiftRight(g, i),
f.isNeg = a.isNeg != e,
a.isNeg && (f = e ? biAdd(f, bigOne) : biSubtract(f, bigOne),
b = biShiftRight(b, i),
g = biSubtract(b, g)),
0 == g.digits[0] && 0 == biHighIndex(g) && (g.isNeg = !1),
new Array(f,g)
}
function biDivide(a, b) {
return biDivideModulo(a, b)[0]
}
function biModulo(a, b) {
return biDivideModulo(a, b)[1]
}
function biMultiplyMod(a, b, c) {
return biModulo(biMultiply(a, b), c)
}
function biPow(a, b) {
for (var c = bigOne, d = a; ; ) {
if (0 != (1 & b) && (c = biMultiply(c, d)),
b >>= 1,
0 == b)
break;
d = biMultiply(d, d)
}
return c
}
function biPowMod(a, b, c) {
for (var d = bigOne, e = a, f = b; ; ) {
if (0 != (1 & f.digits[0]) && (d = biMultiplyMod(d, e, c)),
f = biShiftRight(f, 1),
0 == f.digits[0] && 0 == biHighIndex(f))
break;
e = biMultiplyMod(e, e, c)
}
return d
}
function BarrettMu(a) {
this.modulus = biCopy(a),
this.k = biHighIndex(this.modulus) + 1;
var b = new BigInt;
b.digits[2 * this.k] = 1,
this.mu = biDivide(b, this.modulus),
this.bkplus1 = new BigInt,
this.bkplus1.digits[this.k + 1] = 1,
this.modulo = BarrettMu_modulo,
this.multiplyMod = BarrettMu_multiplyMod,
this.powMod = BarrettMu_powMod
}
function BarrettMu_modulo(a) {
var i, b = biDivideByRadixPower(a, this.k - 1), c = biMultiply(b, this.mu), d = biDivideByRadixPower(c, this.k + 1), e = biModuloByRadixPower(a, this.k + 1), f = biMultiply(d, this.modulus), g = biModuloByRadixPower(f, this.k + 1), h = biSubtract(e, g);
for (h.isNeg && (h = biAdd(h, this.bkplus1)),
i = biCompare(h, this.modulus) >= 0; i; )
h = biSubtract(h, this.modulus),
i = biCompare(h, this.modulus) >= 0;
return h
}
function BarrettMu_multiplyMod(a, b) {
var c = biMultiply(a, b);
return this.modulo(c)
}
function BarrettMu_powMod(a, b) {
var d, e, c = new BigInt;
for (c.digits[0] = 1,
d = a,
e = b; ; ) {
if (0 != (1 & e.digits[0]) && (c = this.multiplyMod(c, d)),
e = biShiftRight(e, 1),
0 == e.digits[0] && 0 == biHighIndex(e))
break;
d = this.multiplyMod(d, d)
}
return c
}

var maxDigits, ZERO_ARRAY, bigZero, bigOne, dpl10, lr10, hexatrigesimalToChar, hexToChar, highBitMasks, lowBitMasks, biRadixBase = 2, biRadixBits = 16, bitsPerDigit = biRadixBits, biRadix = 65536, biHalfRadix = biRadix >>> 1, biRadixSquared = biRadix * biRadix, maxDigitVal = biRadix - 1, maxInteger = 9999999999999998;
setMaxDigits(20),
dpl10 = 15,
lr10 = biFromNumber(1e15),
hexatrigesimalToChar = new Array("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"),
hexToChar = new Array("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f"),
highBitMasks = new Array(0,32768,49152,57344,61440,63488,64512,65024,65280,65408,65472,65504,65520,65528,65532,65534,65535),
lowBitMasks = new Array(0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535);

!function() {
function a(a) {
var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
for (d = 0; a > d; d += 1)
e = Math.random() * b.length,
e = Math.floor(e),
c += b.charAt(e);
return c
}
function b(a, b) {
var c = CryptoJS.enc.Utf8.parse(b)
, d = CryptoJS.enc.Utf8.parse("0102030405060708")
, e = CryptoJS.enc.Utf8.parse(a)
, f = CryptoJS.AES.encrypt(e, c, {
iv: d,
mode: CryptoJS.mode.CBC
});
return f.toString()
}
function c(a, b, c) {
var d, e;
return setMaxDigits(131),
d = new RSAKeyPair(b,"",c),
e = encryptedString(d, a)
}
function d(d, e, f, g) {
var h = {}
, i = a(16);
return h.encText = b(d, g),
h.encText = b(h.encText, i),
h.encSecKey = c(i, e, f),
h
}
function e(a, b, d, e) {
var f = {};
return f.encText = c(a + e, b, d),
f
}
window.asrsea = d,
window.ecnonasr = e
}();

function Encrypt(i0x) {
var TH5M = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
var bMr1x = window.asrsea(JSON.stringify(i0x), '010001', TH5M, '0CoJUm6Qyw8W8jud');
return bMr1x
}

//console.log(Encrypt())

四、总结

首先,学习爬虫需要具备一定的编程基础和计算机网络等相关知识。需要掌握HTML、CSS、JavaScript等前端技术,能够使用Python、Java等编程语言进行开发。此外,还需要了解HTTP协议、TCP/IP协议等相关知识,对Web应用的工作原理和运行机制有深刻的理解。

其次,学习爬虫需要具备耐心和细心的态度。由于互联网上的信息形式多样,爬取数据要面临很多问题和挑战,例如网站反爬虫机制、页面布局结构复杂等。需要仔细分析和处理各种情况,并寻找最优解决方案才能获得所需数据。

最后,学习爬虫需要掌握良好的法律意识。在进行网站爬取时,需要遵守相关法律法规和道德规范,尊重数据所有人的权益和利益,并避免对网站服务器造成不必要的负担和影响。

总之,学习爬虫是一项既有挑战性又有趣味性的技能,不仅可以提高数据采集和处理能力,还有助于深入了解互联网世界的构成和运作方式。但需要注意合法合规、避免滥用等问题,才能真正发挥好爬虫技术的作用。