IF911 2021-05-21 23:39 采纳率: 66.7%
浏览 110
已采纳

C++按词频降序排列输出

在已完成单词次数统计的C++程序的基础上,按词频降序排列输出

已有程序:

#include<iostream>

#include<string>

#include<fstream>

#include<vector>

#include<algorithm>

#include<sstream>

#include<map>

#include<set>

using namespace std;

 

int main()

{

ifstream myfile("文本.txt", ios::in);

string str;

if (!myfile.is_open())

{

cout << "Some errors" << endl;

}

while (getline(myfile, str))

{

//把大写字母变成小写字母

for (int a = 0; a < str.length(); a++)

{

if (str[a] >= 'A' && str[a] <= 'Z')

{

str[a] = str[a] + 32;

}

}

//把文章中的标点符号变成空格

for (int b = 0; b < str.length(); b++)

{

if (ispunct(str[b]))

str[b] = ' ';

}

//统计单词个数

stringstream ss(str);

string s;

vector<string>wordcount;

while (getline(ss, s, ' '))

{

wordcount.push_back(s);

}

map<string, int>m1;

for (int i = 0; i < wordcount.size(); i++)

{

m1[wordcount[i]] += 1;

}

map<string, int>::iterator word;

 

cout << "单词次数统计:" << endl;

for (word = m1.begin(); word != m1.end(); word++)

{

cout << word->first << "=" << word->second << endl;

}

}

myfile.close();

return 0;

}

 

 

 

 

文本附件:

Our best estimates show there are over 7 billion people on the planet and 300 billion stars in the Milky Way galaxy. By comparison, the adult human body contains 37 trillion cells. To determine the function and relationship among these cells is a monumental undertaking. Many areas of human health would be impacted if we better understand cellular activity. A problem with this much data is a great match for the Kaggle community.Just as the Human Genome Project mapped the entirety of human DNA, the Human BioMolecular Atlas Program is a major endeavor. Sponsored by the National Institutes of Health , HuBMAP is working to catalyze the development of a framework for mapping the human body at a level of glomeruli functional tissue units for the first time in history. Hoping to become one of the world's largest collaborative biological projects, HuBMAP aims to be an open map of the human body at the cellular level.This competition, "Hacking the Kidney," starts by mapping the human kidney at single cell resolution.

  • 写回答

1条回答 默认 最新

  • benbenli 2021-05-22 02:30
    关注

     

    把 map 里的键和值对存到 vector 里,然后按值排序后输出。调用 sort 算法时,需要定义一个比较类。

    从文件里读出单词时,要过滤掉空串。把标点符号替换成空格后会有连续空格。

    我用的网上编译器,不好读文件,所以把你的测试字符串放在常量里。

    代码调试通过了。如下:

    #include<iostream>
    #include<string>
    #include<fstream>
    #include<vector>
    #include<algorithm>
    #include<sstream>
    #include<map>
    #include<set>
    
    using namespace std;
    
    struct ValueComparator {
        bool  operator()(const pair<string, int>& x, const pair<string, int>& y) {
            return x.second > y.second;
        }
    } comparator;
    
    int main() {
        // ifstream myfile("文本.txt", ios:: in );
        string str = "Our best estimates show there are over 7 billion people on the planet and 300 billion stars in the Milky Way galaxy. By comparison, the adult human body contains 37 trillion cells. To determine the function and relationship among these cells is a monumental undertaking. Many areas of human health would be impacted if we better understand cellular activity. A problem with this much data is a great match for the Kaggle community.Just as the Human Genome Project mapped the entirety of human DNA, the Human BioMolecular Atlas Program is a major endeavor. Sponsored by the National Institutes of Health , HuBMAP is working to catalyze the development of a framework for mapping the human body at a level of glomeruli functional tissue units for the first time in history. Hoping to become one of the world's largest collaborative biological projects, HuBMAP aims to be an open map of the human body at the cellular level.This competition, \"Hacking the Kidney,\" starts by mapping the human kidney at single cell resolution.";
        // if (!myfile.is_open()) {
        //     cout << "Some errors" << endl;
        //}
        
        //while (getline(myfile, str)) {
            //把大写字母变成小写字母
            for (int a = 0; a < str.length(); a++) {
                if (str[a] >= 'A' && str[a] <= 'Z') {
                    str[a] = str[a] + 32;
                }
            }
    
            //把文章中的标点符号变成空格
            for (int b = 0; b < str.length(); b++) {
                if (ispunct(str[b]))
                    str[b] = ' ';
            }
            
            //统计单词个数
            stringstream ss(str);
            string s;
            vector < string > wordcount;
            while (getline(ss, s, ' ')) {
                if (s.length() > 0)
                    wordcount.push_back(s);
            }
            map < string, int > m1;
            for (int i = 0; i < wordcount.size(); i++) {
                m1[wordcount[i]] += 1;
            }
            
            // 把 map 里的键和值对存到 vector 里,然后按值排序后输出
            vector<pair<string, int> > pairs;
            for (auto& it : m1) {
                pairs.push_back(it);
            }
            
            sort(pairs.begin(), pairs.end(), comparator);
            cout << "单词次数统计:" << endl;
            for (auto it = pairs.begin(); it != pairs.end(); ++it) {
                cout << it -> first << "=" << it -> second << endl;
            }
        //}
    
        //myfile.close();
        return 0;
    }
    
    
    // Output
    单词次数统计:
    the=17
    human=8
    of=7
    a=6
    is=4
    to=4
    for=3
    by=3
    body=3
    at=3
    cellular=2
    cells=2
    health=2
    billion=2
    this=2
    hubmap=2
    level=2
    mapping=2
    in=2
    and=2
    kidney=2
    be=2
    much=1
    planet=1
    people=1
    s=1
    over=1
    our=1
    open=1
    one=1
    on=1
    major=1
    national=1
    many=1
    map=1
    mapped=1
    monumental=1
    milky=1
    match=1
    these=1
    would=1
    world=1
    working=1
    with=1
    we=1
    way=1
    units=1
    undertaking=1
    understand=1
    trillion=1
    tissue=1
    time=1
    problem=1
    there=1
    starts=1
    stars=1
    sponsored=1
    single=1
    show=1
    impacted=1
    resolution=1
    relationship=1
    projects=1
    project=1
    program=1
    become=1
    contains=1
    competition=1
    comparison=1
    community=1
    collaborative=1
    cell=1
    catalyze=1
    biomolecular=1
    biological=1
    better=1
    best=1
    data=1
    atlas=1
    as=1
    areas=1
    are=1
    an=1
    among=1
    aims=1
    adult=1
    activity=1
    7=1
    37=1
    galaxy=1
    kaggle=1
    just=1
    institutes=1
    300=1
    if=1
    hoping=1
    history=1
    hacking=1
    great=1
    glomeruli=1
    genome=1
    largest=1
    functional=1
    function=1
    framework=1
    first=1
    estimates=1
    entirety=1
    endeavor=1
    dna=1
    development=1
    determine=1
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥50 易语言把MYSQL数据库中的数据添加至组合框
  • ¥20 求数据集和代码#有偿答复
  • ¥15 关于下拉菜单选项关联的问题
  • ¥20 java-OJ-健康体检
  • ¥15 rs485的上拉下拉,不会对a-b<-200mv有影响吗,就是接受时,对判断逻辑0有影响吗
  • ¥15 使用phpstudy在云服务器上搭建个人网站
  • ¥15 应该如何判断含间隙的曲柄摇杆机构,轴与轴承是否发生了碰撞?
  • ¥15 vue3+express部署到nginx
  • ¥20 搭建pt1000三线制高精度测温电路
  • ¥15 使用Jdk8自带的算法,和Jdk11自带的加密结果会一样吗,不一样的话有什么解决方案,Jdk不能升级的情况