wc8820
wc8820
2011-11-10 11:08

关于 Lucene3.3 特殊符号 (@#$%^&*) 无法索引问题

已采纳

Lucene3.3 如题: 比如说 ,创建一个字符串“test_java”的索引,但是用_test来搜索,是搜不出

这条索引记录的。请问 ,如何在创建索引的时候, 把一些特殊符号 (@#$%^&*) 转化我想要的字符串,如空格等 ?听说是可以调

用分词器里面的方法过滤掉的。但是具体要怎么做?我看了很久的API,还是一头雾水。希望大家帮帮我。

测试代码:

package test;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

public class Test {

private static final Version MACTH_VERSION = Version.LUCENE_33; // version
                                                                // number

public void testCreate() throws IOException {

    Analyzer LUCENE_ANALUZER = new StandardAnalyzer(MACTH_VERSION);

    File indexDir = new File("D:\\index");
    Directory dir = FSDirectory.open(indexDir);
    IndexWriterConfig iwc = new IndexWriterConfig(MACTH_VERSION,
            LUCENE_ANALUZER);
    // iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); 创建索引时把历史索引删除
    IndexWriter indexWriter = new IndexWriter(dir, iwc);

    Document document = new Document();
    Field field = new Field("name", "java_Test", Store.YES, Index.ANALYZED);
    document.add(field);
    indexWriter.addDocument(document, LUCENE_ANALUZER);
    indexWriter.optimize();
    indexWriter.close(); // close IndexWriter
}

public void testSearch() throws Exception {
    File indexDir = new File("D:\\index");
    Directory dir = FSDirectory.open(indexDir);
    IndexSearcher indexSearcher = new IndexSearcher(dir);

    Term term = new Term("name", "_test");
    Query query = new TermQuery(term);

    TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE);
    int totalResults = topDocs.totalHits;
    System.out.println(totalResults);
    for (int i = 0; i < totalResults; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        int docSn = scoreDoc.doc;
        Document document = indexSearcher.doc(docSn);
        System.out.println(document.get("name"));

    }
    System.out.println(" ------------- ");
    indexSearcher.close();

}

@Test
public void testAnalyzer() throws Exception {


    String text = "test_java";
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33);

    System.out.println("当前使用的分词器:" + analyzer.getClass());

    TokenStream tokenStream = analyzer.tokenStream("content",new StringReader(text));
    tokenStream.addAttribute(TermAttribute.class);

    while (tokenStream.incrementToken()) {
        TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
        System.out.println(termAttribute.term());
    }
    tokenStream.end();
    tokenStream.close();
}

}

  • 点赞
  • 写回答
  • 关注问题
  • 收藏
  • 复制链接分享
  • 邀请回答

1条回答

  • Stefen_720 Stefen_720 10年前

    这问题,我也遇到了。楼主如果解决好了 ,请帮忙联系我。

    点赞 评论 复制链接分享

相关推荐