当test.txt只有十几行文本时运行正常
当文件有几百行后就会出错
#include <iostream>
#include <unordered_map>
#include <vector>
#include <cctype>
#include <fstream>
#include <string>
#include <iterator>
#include <sstream>
#include <cstring>
#include <algorithm>
#include <iomanip>
using namespace std;
void count(unordered_map<string, int> &m, int &sum){
ifstream fin("test.txt");
string line;
while(1){
getline(fin, line);
if(fin.eof()) break;
istringstream ss(line);
string text, word;
while(1){
ss >> text;
if(ss.eof()) break;
remove_copy_if(text.begin(), text.end(), back_inserter(word), [](const char &c){ return !isalpha(c); });
for(int i = 0; i < word.size() - 1; ++i){
char ch1 = toupper(word[i]), ch2 = toupper(word[i + 1]);
string s({ch1, ch2});
if(m.find(s) == m.end()) m[s] = 1;
else ++m[s];
++sum;
}
word.clear();
}
}
fin.close();
}
int main(){
unordered_map<string, int> alphabet_freq;
int sum = 1;
count(alphabet_freq, sum); //统计频率
vector<pair<string, double>> v; // 将关联容器数据存储于顺序容器排序
for(auto it = alphabet_freq.cbegin(); it != alphabet_freq.cend(); ++it){
v.push_back(make_pair(it->first, double(it->second) / sum));
}
//按second降序
sort(v.begin(), v.end(), [](const pair<string, double> &lhs, const pair<string, double> &rhs){
return lhs.second > rhs.second; });
for(auto it = v.cbegin(); it != v.cend(); ++it){
if(it->second < 0.01) continue;
cout << it->first << " : " << it->second <<endl;
}
return 0;
}