IF911 2021-05-21 23:39 采纳率: 66.7%
浏览 111
已采纳

C++按词频降序排列输出

在已完成单词次数统计的C++程序的基础上,按词频降序排列输出

已有程序:

#include<iostream>

#include<string>

#include<fstream>

#include<vector>

#include<algorithm>

#include<sstream>

#include<map>

#include<set>

using namespace std;

 

int main()

{

ifstream myfile("文本.txt", ios::in);

string str;

if (!myfile.is_open())

{

cout << "Some errors" << endl;

}

while (getline(myfile, str))

{

//把大写字母变成小写字母

for (int a = 0; a < str.length(); a++)

{

if (str[a] >= 'A' && str[a] <= 'Z')

{

str[a] = str[a] + 32;

}

}

//把文章中的标点符号变成空格

for (int b = 0; b < str.length(); b++)

{

if (ispunct(str[b]))

str[b] = ' ';

}

//统计单词个数

stringstream ss(str);

string s;

vector<string>wordcount;

while (getline(ss, s, ' '))

{

wordcount.push_back(s);

}

map<string, int>m1;

for (int i = 0; i < wordcount.size(); i++)

{

m1[wordcount[i]] += 1;

}

map<string, int>::iterator word;

 

cout << "单词次数统计:" << endl;

for (word = m1.begin(); word != m1.end(); word++)

{

cout << word->first << "=" << word->second << endl;

}

}

myfile.close();

return 0;

}

 

 

 

 

文本附件:

Our best estimates show there are over 7 billion people on the planet and 300 billion stars in the Milky Way galaxy. By comparison, the adult human body contains 37 trillion cells. To determine the function and relationship among these cells is a monumental undertaking. Many areas of human health would be impacted if we better understand cellular activity. A problem with this much data is a great match for the Kaggle community.Just as the Human Genome Project mapped the entirety of human DNA, the Human BioMolecular Atlas Program is a major endeavor. Sponsored by the National Institutes of Health , HuBMAP is working to catalyze the development of a framework for mapping the human body at a level of glomeruli functional tissue units for the first time in history. Hoping to become one of the world's largest collaborative biological projects, HuBMAP aims to be an open map of the human body at the cellular level.This competition, "Hacking the Kidney," starts by mapping the human kidney at single cell resolution.

  • 写回答

1条回答 默认 最新

  • benbenli 2021-05-22 02:30
    关注

     

    把 map 里的键和值对存到 vector 里,然后按值排序后输出。调用 sort 算法时,需要定义一个比较类。

    从文件里读出单词时,要过滤掉空串。把标点符号替换成空格后会有连续空格。

    我用的网上编译器,不好读文件,所以把你的测试字符串放在常量里。

    代码调试通过了。如下:

    #include<iostream>
    #include<string>
    #include<fstream>
    #include<vector>
    #include<algorithm>
    #include<sstream>
    #include<map>
    #include<set>
    
    using namespace std;
    
    struct ValueComparator {
        bool  operator()(const pair<string, int>& x, const pair<string, int>& y) {
            return x.second > y.second;
        }
    } comparator;
    
    int main() {
        // ifstream myfile("文本.txt", ios:: in );
        string str = "Our best estimates show there are over 7 billion people on the planet and 300 billion stars in the Milky Way galaxy. By comparison, the adult human body contains 37 trillion cells. To determine the function and relationship among these cells is a monumental undertaking. Many areas of human health would be impacted if we better understand cellular activity. A problem with this much data is a great match for the Kaggle community.Just as the Human Genome Project mapped the entirety of human DNA, the Human BioMolecular Atlas Program is a major endeavor. Sponsored by the National Institutes of Health , HuBMAP is working to catalyze the development of a framework for mapping the human body at a level of glomeruli functional tissue units for the first time in history. Hoping to become one of the world's largest collaborative biological projects, HuBMAP aims to be an open map of the human body at the cellular level.This competition, \"Hacking the Kidney,\" starts by mapping the human kidney at single cell resolution.";
        // if (!myfile.is_open()) {
        //     cout << "Some errors" << endl;
        //}
        
        //while (getline(myfile, str)) {
            //把大写字母变成小写字母
            for (int a = 0; a < str.length(); a++) {
                if (str[a] >= 'A' && str[a] <= 'Z') {
                    str[a] = str[a] + 32;
                }
            }
    
            //把文章中的标点符号变成空格
            for (int b = 0; b < str.length(); b++) {
                if (ispunct(str[b]))
                    str[b] = ' ';
            }
            
            //统计单词个数
            stringstream ss(str);
            string s;
            vector < string > wordcount;
            while (getline(ss, s, ' ')) {
                if (s.length() > 0)
                    wordcount.push_back(s);
            }
            map < string, int > m1;
            for (int i = 0; i < wordcount.size(); i++) {
                m1[wordcount[i]] += 1;
            }
            
            // 把 map 里的键和值对存到 vector 里,然后按值排序后输出
            vector<pair<string, int> > pairs;
            for (auto& it : m1) {
                pairs.push_back(it);
            }
            
            sort(pairs.begin(), pairs.end(), comparator);
            cout << "单词次数统计:" << endl;
            for (auto it = pairs.begin(); it != pairs.end(); ++it) {
                cout << it -> first << "=" << it -> second << endl;
            }
        //}
    
        //myfile.close();
        return 0;
    }
    
    
    // Output
    单词次数统计:
    the=17
    human=8
    of=7
    a=6
    is=4
    to=4
    for=3
    by=3
    body=3
    at=3
    cellular=2
    cells=2
    health=2
    billion=2
    this=2
    hubmap=2
    level=2
    mapping=2
    in=2
    and=2
    kidney=2
    be=2
    much=1
    planet=1
    people=1
    s=1
    over=1
    our=1
    open=1
    one=1
    on=1
    major=1
    national=1
    many=1
    map=1
    mapped=1
    monumental=1
    milky=1
    match=1
    these=1
    would=1
    world=1
    working=1
    with=1
    we=1
    way=1
    units=1
    undertaking=1
    understand=1
    trillion=1
    tissue=1
    time=1
    problem=1
    there=1
    starts=1
    stars=1
    sponsored=1
    single=1
    show=1
    impacted=1
    resolution=1
    relationship=1
    projects=1
    project=1
    program=1
    become=1
    contains=1
    competition=1
    comparison=1
    community=1
    collaborative=1
    cell=1
    catalyze=1
    biomolecular=1
    biological=1
    better=1
    best=1
    data=1
    atlas=1
    as=1
    areas=1
    are=1
    an=1
    among=1
    aims=1
    adult=1
    activity=1
    7=1
    37=1
    galaxy=1
    kaggle=1
    just=1
    institutes=1
    300=1
    if=1
    hoping=1
    history=1
    hacking=1
    great=1
    glomeruli=1
    genome=1
    largest=1
    functional=1
    function=1
    framework=1
    first=1
    estimates=1
    entirety=1
    endeavor=1
    dna=1
    development=1
    determine=1
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 如何用Labview在myRIO上做LCD显示?(语言-开发语言)
  • ¥15 Vue3地图和异步函数使用
  • ¥15 C++ yoloV5改写遇到的问题
  • ¥20 win11修改中文用户名路径
  • ¥15 win2012磁盘空间不足,c盘正常,d盘无法写入
  • ¥15 用土力学知识进行土坡稳定性分析与挡土墙设计
  • ¥70 PlayWright在Java上连接CDP关联本地Chrome启动失败,貌似是Windows端口转发问题
  • ¥15 帮我写一个c++工程
  • ¥30 Eclipse官网打不开,官网首页进不去,显示无法访问此页面,求解决方法
  • ¥15 关于smbclient 库的使用