Lucene3.3 如题: 比如说 ,创建一个字符串“test_java”的索引,但是用_test来搜索,是搜不出
这条索引记录的。请问 ,如何在创建索引的时候, 把一些特殊符号 (@#$%^&*) 转化我想要的字符串,如空格等 ?听说是可以调
用分词器里面的方法过滤掉的。但是具体要怎么做?我看了很久的API,还是一头雾水。希望大家帮帮我。
测试代码:
package test;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
public class Test {
private static final Version MACTH_VERSION = Version.LUCENE_33; // version
// number
public void testCreate() throws IOException {
Analyzer LUCENE_ANALUZER = new StandardAnalyzer(MACTH_VERSION);
File indexDir = new File("D:\\index");
Directory dir = FSDirectory.open(indexDir);
IndexWriterConfig iwc = new IndexWriterConfig(MACTH_VERSION,
LUCENE_ANALUZER);
// iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); 创建索引时把历史索引删除
IndexWriter indexWriter = new IndexWriter(dir, iwc);
Document document = new Document();
Field field = new Field("name", "java_Test", Store.YES, Index.ANALYZED);
document.add(field);
indexWriter.addDocument(document, LUCENE_ANALUZER);
indexWriter.optimize();
indexWriter.close(); // close IndexWriter
}
public void testSearch() throws Exception {
File indexDir = new File("D:\\index");
Directory dir = FSDirectory.open(indexDir);
IndexSearcher indexSearcher = new IndexSearcher(dir);
Term term = new Term("name", "_test");
Query query = new TermQuery(term);
TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE);
int totalResults = topDocs.totalHits;
System.out.println(totalResults);
for (int i = 0; i < totalResults; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docSn = scoreDoc.doc;
Document document = indexSearcher.doc(docSn);
System.out.println(document.get("name"));
}
System.out.println(" ------------- ");
indexSearcher.close();
}
@Test
public void testAnalyzer() throws Exception {
String text = "test_java";
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33);
System.out.println("当前使用的分词器:" + analyzer.getClass());
TokenStream tokenStream = analyzer.tokenStream("content",new StringReader(text));
tokenStream.addAttribute(TermAttribute.class);
while (tokenStream.incrementToken()) {
TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
System.out.println(termAttribute.term());
}
tokenStream.end();
tokenStream.close();
}
}