Fairy-HE 2017-10-26 00:48 采纳率: 20%
浏览 963
已结题

C#给这个倒排索引添加一个输出方法

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;

namespace InvertedIndex
{
class Term
{
public string word;
public int doc_freq;
}
class invertedIndex
{
Dictionary> index = new Dictionary>();
Dictionary mapping = new Dictionary();

    public invertedIndex(string path)
    {
        init(path);
    }
    void init(string path)
    {
        string[] files = Directory.GetFiles(path);
        foreach (string file in files)
        {
            int doc_id = mapping.Count + 1;
            mapping.Add(doc_id, file);
            StreamReader sr = new StreamReader(file);
            string content = sr.ReadToEnd();
            string[] words = content.Split(new char[] { ' ', ',', ';', '.', '!' }, StringSplitOptions.RemoveEmptyEntries);
            foreach (string word in words)
            {
                Term term = index.Keys.FirstOrDefault(m => m.word == word);
                if (term == null)
                {
                    term = new Term();
                    term.word = word;
                    term.doc_freq = 1;

                    List<int> posting = new List<int>();
                    posting.Add(doc_id);

                    index.Add(term, posting);
                }
                else
                {
                    List<int> posting = index[term];
                    if (!posting.Contains(doc_id))
                    {
                        posting.Add(doc_id);
                        index.Remove(term);
                        term.doc_freq++;
                        index.Add(term, posting);
                    }
                }
            }
        }
    }
    public void output()
    {

    }
}

}

  • 写回答

1条回答

  • threenewbee 2017-10-26 01:03
    关注

    foreach (var item in index.Keys)
    {
    Console.WriteLine(item + "," + index[item]);
    }

    评论

报告相同问题?

悬赏问题

  • ¥15 如何在scanpy上做差异基因和通路富集?
  • ¥20 关于#硬件工程#的问题,请各位专家解答!
  • ¥15 关于#matlab#的问题:期望的系统闭环传递函数为G(s)=wn^2/s^2+2¢wn+wn^2阻尼系数¢=0.707,使系统具有较小的超调量
  • ¥15 FLUENT如何实现在堆积颗粒的上表面加载高斯热源
  • ¥30 截图中的mathematics程序转换成matlab
  • ¥15 动力学代码报错,维度不匹配
  • ¥15 Power query添加列问题
  • ¥50 Kubernetes&Fission&Eleasticsearch
  • ¥15 報錯:Person is not mapped,如何解決?
  • ¥15 c++头文件不能识别CDialog