代码如下
import java.io.*; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.Reader; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class Hello { private static final String PATH_OF_FILE = "D:/authors/"; // 待索引文件的目录 private static final String PATH_OF_INDEX = "D:/a-index/"; // 存放索引文件的目录2 /** * 测试时,要在D:/img/文件夹中准备几个包含内容的文件(比如txt格式的) * 然后先执行createIndex()方法,再执行searchFile()方法,最后观看控制台输出即可 */ public static void main(String[] args) { Hello h = new Hello(); String aa,bb; for (int j=44711;j<52601;j++){ String filePath1 = "D:/text/"+j+"-1.txt"; String filePath2 = "D:/text/"+j+"-2.txt"; // System.out.println(filePath1); // System.out.println(filePath2); aa=read(filePath1); bb=read(filePath2); // System.out.println(aa); // System.out.println(bb); h.testSearch(aa,j); System.out.println("#################"); h.testSearch(bb,j); System.out.println("#################"); // "res/"; // h.createIndex(); //先跑这一条,然后把String aa,bb一下到这一条的先注释掉,然后再把这一条注释掉,恢复上面的开始跑。 } private void createIndex();{ Directory directory = null;//指定索引被保存的位置 IndexWriter writer = null;//通过IndexWriter写索引 Document doc = null;//我们索引的有可能是一段文本or数据库中的一张表 try { // 这里是在硬盘上"D:/index/"文件夹中创建索引 directory = FSDirectory.open(new File(PATH_OF_INDEX)); // 这里通过IndexWriterConfig()构造方法的Version.LUCENE_41参数值指明索引所匹配的版本号,并使用了Lucene的标准分词器 writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_41, new StandardAnalyzer(Version.LUCENE_41))); for (File file : new File(PATH_OF_FILE).listFiles()) { doc = new Document(); doc.add(new Field("content", new FileReader(file))); doc.add(new Field("fileName", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("filePath", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); } } catch (Exception e) { System.out.println("创建索引的过程中遇到异常,堆栈轨迹如下"); e.printStackTrace(); } finally { if (null != writer) { try { writer.close(); // IndexWriter在用完之后一定要关闭 } catch (IOException ce) { System.out.println("关闭IndexWriter时遇到异常,堆栈轨迹如下"); ce.printStackTrace(); } } } } public static String read(String filePath) { // 读取txt内容为字符串 StringBuffer txtContent = new StringBuffer(); // 每次读取的byte数 byte[] b = new byte[8 * 1024]; InputStream in = null; try { // 文件输入流 in = new FileInputStream(filePath); while (in.read(b) != -1) { // 字符串拼接 txtContent.append(new String(b)); } // 关闭流 in.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (in != null) { try { in.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return txtContent.toString(); } private String getContentFromFile(File myFile) { StringBuffer sb = new StringBuffer(); if (!myFile.exists()) { return ""; } try { BufferedReader in = new BufferedReader(new FileReader(myFile)); String str; while ((str = in.readLine()) != null) { sb.append(str); } in.close(); } catch (IOException e) { e.getStackTrace(); } return sb.toString(); } /** * 搜索文件 * * @see 1、创建Directory * @see 2、创建IndexReader * @see 3、根据IndexReader创建IndexSearcher * @see 4、创建搜索的Query * @see 5、根据searcher搜索并返回TopDocs * @see 6、根据TopDocs获取ScoreDoc对象 * @see 7、根据searcher和ScoreDoc对象获取具体的Document对象 * @see 8、根据Document对象获取需要的值 * @see 9、关闭IndexReader */ @SuppressWarnings("deprecation") // private void searchFile() { // IndexReader reader = null; // try { // reader = IndexReader.open(FSDirectory.open(new File(PATH_OF_INDEX))); // IndexSearcher searcher = new IndexSearcher(reader); // // 创建基于Parser搜索的Query,创建时需指定其"搜索的版本,默认搜索的域,分词器"....这里的域指的是创建索引时Field的名字 // QueryParser parser = new QueryParser(Version.LUCENE_41, "content", new StandardAnalyzer(Version.LUCENE_41)); // Query query = parser.parse(" let her go no she had to confront it was a gift of something that would not irk her for the rest of eternity and yet a curse of more pain the choice of selfishness or selflessness when both loosed their boundaries and came together and that hurt more than anything that hurt the scorch of disappointment denial anxiety fear there was too much of it and yet she did not want him dead it had taken her time so much time to realize that but she did not wish death upon anyone having seen too much of it as the clock ticked away she had never wanted anyone to truly die gilbert beilschmidt though she hated him above all was no exception but in times such as now when war tore the world apart when history decided to twist itself about when the nations crumbled how could death not be a blessing at this point she could almost want them all to be dead roderich gilbert veneziano ludwig and yet she would watch with relief on the day that each of them would pick themselves up and charge into the distance with life she hated war she blinked rapidly staring always watching though she ached she let the next batter of coughing come to pass shock it overcame her then charging upward body screaming in protest she could only slap him with strength that came by habit before wrapping both arms around him wondering how and why in hell get off me woman he protested attempting to flail she saw now that the tin had been stripped away or at least most of it as she felt him flesh and cloth and hair from the unhealthy pallor of his skin from the dried hard uniform to the pinked and browned hair silver still shining boldly beneath the blood he was unusually warm hot even when she felt him clearly the cold had been warded off though his hands still felt like ice remembering the documents found in ludwig s office she realized that it was fever but his head feeling about the neck she knew it was there desperately feeling just to know how cruel death was she felt it finding no seam with a thin cry of joy and pain she kissed it squeezing him till he was strangled you re alive she breathed even as garbled speech flew from his lips she was going into spasms this was too unreal and yet it was he was so so solid his eyes were ruby no petty pink glaze upon the kingdom of prussia s gaze no death frosting them over clinging to him something told her that she would never let go a stupid little piece of wishful thought no doubt never let go how ridiculous and yet she did not release him as he calmed between her limbs resigning with guilty pleasure rigid with denial she could not even loosen about him there was nothing but the jail scene playing and replaying itself in her mind letting their hands drop parting and then the sickening sound of the guillotine s strike water red pink silver white black the swastika rose over all as if to mock where in isten s name have you been she muttered against his shoulder before lifting her head away hastily still though she did not let go she wanted to savor it as the vampire feasts on blood the scent of metal clung to him get off was the obstinate reply she lifted her hand and slapped him again not knowing what else to do ow where ve you been she said again you were supposed to be dead well i m so freaking not i saw your head she said voice reaching a higher octave it was a fake impossible she declared eyes widening before she finished it came to her suddenly the way it had fallen the sudden hush of death the blindfold of stars the blood she had seen it touched it waded in the water it buried itself in she had suffered with him at death you were dead she whispered it gave way to coughing and he only watched as she did held onto her and she held onto him i heard the thud if you were dead he grunted when she looked up his eyes were a bitter red the head was fake they held me down scowling he looked away admittance was ignominy they held me down up there there were lots of them hungary scheiße there was a lot of back up that we didn t notice i couldn t beat off all of them and my gun ran out his voice faded still his scowl deepened and he finished to his own satisfaction their leader was fun to kill though he laughed cruelly forcefully he found no love for death though he had seen enough of it to mind less elizaveta decided against shaking her head it could only be true she had known him for years since they were incomplete nations just children running about with swords of both wood and steel just as he had sifted out her identity from that of elisabeth wertheim s it was with ease that she found that this man in front of her was real as nations in human form they were allowed the same detached senses as those immortal stupid she scolded herself knowing not to trust so easily but it was what it was she noticed the shards of tin still upon his flesh then of course he could not have been thrown out so easily time was needed so much time some things did not change but could they trapped him shackled his person still not releasing he was too real too solid to let go she let the "); // 指定==>搜索域为content(即上一行代码指定的"content")中包含"java"的文档 // TopDocs tds = searcher.search(query, 10); // 第二个参数指定搜索后显示的条数,若查到5条则显示为5条,查到15条则只显示10条 // ScoreDoc[] sds = tds.scoreDocs; // TopDocs中存放的并不是我们的文档,而是文档的ScoreDoc对象 // for (ScoreDoc sd : sds) { // ScoreDoc对象相当于每个文档的ID号,我们就可以通过ScoreDoc来遍历文档 // Document doc = searcher.doc(sd.doc); // sd.doc得到的是文档的序号 // System.out.println(doc.get("fileName") + "[" + doc.get("filePath") + "]"); // 输出该文档所存储的信息 // } // } // catch (Exception e) { // System.out.println("搜索文件的过程中遇到异常,堆栈轨迹如下"); // e.printStackTrace(); // } // finally { // if (null != reader) { // try { // reader.close(); // } // catch (IOException e) { // System.out.println("关闭IndexReader时遇到异常,堆栈轨迹如下"); // e.printStackTrace(); // } // } // } // } public void testSearch(String aa,int i) { IndexReader reader = null; try { // 索引目录对象 // Directory directory = FSDirectory.open(new File(PATH_OF_INDEX)); reader = IndexReader.open(FSDirectory.open(new File(PATH_OF_INDEX))); // 索引读取工具 // 索引搜索工具 IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); // 创建查询解析器,两个参数:默认要查询的字段的名称,分词器 QueryParser parser = new QueryParser(Version.LUCENE_41, "content", new StandardAnalyzer(Version.LUCENE_41)); //QueryParser parser = new QueryParser("title", new IKAnalyzer()); // 创建查询对象 String a=aa; Query query = parser.parse(a); // 搜索数据,两个参数:查询条件对象要查询的最大结果条数 // 返回的结果是 按照匹配度排名得分前N名的文档信息(包含查询到的总条数信息、所有符合条件的文档的编号信息)。 TopDocs topDocs = searcher.search(query, 10); // 获取总条数 System.out.println("本次搜索共找到" + topDocs.totalHits + "条数据"); // 获取得分文档对象(ScoreDoc)数组.SocreDoc中包含:文档的编号、文档的得分 ScoreDoc[] scoreDocs = topDocs.scoreDocs; BufferedWriter out = new BufferedWriter(new FileWriter("D:\\score\\score"+"-"+i+".txt",true)); for (ScoreDoc scoreDoc : scoreDocs) { // 取出文档编号 int docID = scoreDoc.doc; //Document doc = searcher.doc(sd.doc); // sd.doc得到的是文档的序号 // 根据编号去找文档 Document doc = reader.document(docID); System.out.println(doc.get("fileName")+"得分: " + scoreDoc.score); // 输出该文档所存储的信息 try { out.write(doc.get("fileName")+"得分: " + scoreDoc.score+"\n"); } catch (IOException e) { } //System.out.println("id: " + doc.get("id")); // System.out.println("title: " + doc.get("title")); // 取出文档得分 // System.out.println(); } out.write("\n"); out.close(); System.out.println("文件创建成功!"); } catch (Exception e) { System.out.println("搜索文件的过程中遇到异常,堆栈轨迹如下"); e.printStackTrace(); } finally { if (null != reader) { try { reader.close(); } catch (IOException e) { System.out.println("关闭IndexReader时遇到异常,堆栈轨迹如下"); e.printStackTrace(); } } } } }