jieba 分词红楼梦相关的分词,出现次数最高的20个

发布时间 2023-12-18 19:45:45作者: 暮木叁水
点击查看代码
import jieba
import wordcloud


def takeSecond(elem):
    return elem[1]

def createWordCloud(text):           #生成词云函数
    w=wordcloud.WordCloud(font_path="STZHONGS.TTF", width=1000, height=500, background_color="white")
    w.generate(text)

if __name__=='__main__':
    path = r"红楼梦.txt"
    file = open(path, "r", encoding="utf-8")
    text = file.read()
    print(text)
    file.close()

    words = jieba.lcut(text)      # jieba分词
    counts = {}
    for word in words:            # 如果词长度不为1就留起来 (不是单个的字就留起来)
        if len(word) == 1:
            continue
        else:
            counts[word] = counts.get(word, 0) + 1


    fourStopwords = open(r"cn_stopwords.txt", "r", encoding='utf-8').read()          # 打开中文停词表
    StopWord = fourStopwords.split("\n")
    for delWord in StopWord:
        try:
            del counts[delWord]
        except:
            continue

    items = list(counts.items())
    items.sort(key=takeSecond, reverse=True)
    print(items)