【C++】统计文本词频程序

发布时间 2023-04-11 23:40:17作者: 幻想Elapse
 1 #include <iostream>
 2 #include <fstream>
 3 #include <string>
 4 #include <iomanip>
 5 #include <vector>
 6 #include <map>
 7 #include <cctype>
 8 #include <algorithm>
 9 bool cmp(std::pair<std::string, size_t>& a,
10          std::pair<std::string, size_t>& b){
11     return a.second > b.second;
12 }//词频从大到小排序
13 
14 int main(){
15     std::ifstream ifs{};
16     ifs.open("./input.txt", std::ifstream::in);//从input.txt读入文本
17     std::string text( (std::istreambuf_iterator<char>(ifs) ),
18                        (std::istreambuf_iterator<char>()    ) );
19     //std::istreambuf_iterator<char>()表示文件结尾
20     ifs.close();
21     const std::string separators{" ():;.[],\n"};//分隔符,可自定义
22     std::map<std::string, size_t> mp;
23     std::string word{};
24     size_t start{text.find_first_not_of(separators)}, end{};
25     while(start != std::string::npos){
26         end = text.find_first_of(separators, start+1);
27         word = text.substr(start, end-start);
28         std::transform(word.begin(), word.end(), word.begin(),
29             [](unsigned char c){ return std::tolower(c); });//将单词统一转换为小写字母
30         ++mp[word];
31         start =text.find_first_not_of(separators, end+1);
32     }
33     size_t cnt{};
34     const size_t perline{6};//每行输出6个单词和词频
35     std::vector<std::pair<std::string, size_t> > v;
36     copy(mp.begin(),
37             mp.end(),
38             std::back_inserter<std::vector<std::pair<std::string, size_t> > >(v));
39     sort(v.begin(), v.end(), cmp);
40     std::stringstream ss{};
41     for(const auto& item : v){
42         ss << std::right << std::setw(15) << item.first << " : " << std::left << std::setw(5) << item.second;
43         if(++cnt % perline == 0){
44             ss << std::endl;
45         } 
46     }
47     ss << std::endl;
48     std::ofstream ofs{};
49     ofs.open("./res.txt", std::ofstream::out | std::ofstream::trunc );//结果输出到res.txt中
50     ofs << ss.str();
51     ofs.close();
52     return 0;
53 }