using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
namespace InvertedIndex
{
class Term
{
public string word;
public int doc_freq;
}
class invertedIndex
{
Dictionary> index = new Dictionary>();
Dictionary mapping = new Dictionary();
public invertedIndex(string path)
{
init(path);
}
void init(string path)
{
string[] files = Directory.GetFiles(path);
foreach (string file in files)
{
int doc_id = mapping.Count + 1;
mapping.Add(doc_id, file);
StreamReader sr = new StreamReader(file);
string content = sr.ReadToEnd();
string[] words = content.Split(new char[] { ' ', ',', ';', '.', '!' }, StringSplitOptions.RemoveEmptyEntries);
foreach (string word in words)
{
Term term = index.Keys.FirstOrDefault(m => m.word == word);
if (term == null)
{
term = new Term();
term.word = word;
term.doc_freq = 1;
List<int> posting = new List<int>();
posting.Add(doc_id);
index.Add(term, posting);
}
else
{
List<int> posting = index[term];
if (!posting.Contains(doc_id))
{
posting.Add(doc_id);
index.Remove(term);
term.doc_freq++;
index.Add(term, posting);
}
}
}
}
}
public void output()
{
}
}
}