wc8820 2011-11-10 11:08
浏览 433
已采纳

关于 Lucene3.3 特殊符号 (@#$%^&*) 无法索引问题

Lucene3.3 如题: 比如说 ,创建一个字符串“test_java”的索引,但是用_test来搜索,是搜不出

这条索引记录的。请问 ,如何在创建索引的时候, 把一些特殊符号 (@#$%^&*) 转化我想要的字符串,如空格等 ?听说是可以调

用分词器里面的方法过滤掉的。但是具体要怎么做?我看了很久的API,还是一头雾水。希望大家帮帮我。

测试代码:

package test;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

public class Test {

private static final Version MACTH_VERSION = Version.LUCENE_33; // version
                                                                // number

public void testCreate() throws IOException {

    Analyzer LUCENE_ANALUZER = new StandardAnalyzer(MACTH_VERSION);

    File indexDir = new File("D:\\index");
    Directory dir = FSDirectory.open(indexDir);
    IndexWriterConfig iwc = new IndexWriterConfig(MACTH_VERSION,
            LUCENE_ANALUZER);
    // iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); 创建索引时把历史索引删除
    IndexWriter indexWriter = new IndexWriter(dir, iwc);

    Document document = new Document();
    Field field = new Field("name", "java_Test", Store.YES, Index.ANALYZED);
    document.add(field);
    indexWriter.addDocument(document, LUCENE_ANALUZER);
    indexWriter.optimize();
    indexWriter.close(); // close IndexWriter
}

public void testSearch() throws Exception {
    File indexDir = new File("D:\\index");
    Directory dir = FSDirectory.open(indexDir);
    IndexSearcher indexSearcher = new IndexSearcher(dir);

    Term term = new Term("name", "_test");
    Query query = new TermQuery(term);

    TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE);
    int totalResults = topDocs.totalHits;
    System.out.println(totalResults);
    for (int i = 0; i < totalResults; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        int docSn = scoreDoc.doc;
        Document document = indexSearcher.doc(docSn);
        System.out.println(document.get("name"));

    }
    System.out.println(" ------------- ");
    indexSearcher.close();

}

@Test
public void testAnalyzer() throws Exception {


    String text = "test_java";
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33);

    System.out.println("当前使用的分词器:" + analyzer.getClass());

    TokenStream tokenStream = analyzer.tokenStream("content",new StringReader(text));
    tokenStream.addAttribute(TermAttribute.class);

    while (tokenStream.incrementToken()) {
        TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
        System.out.println(termAttribute.term());
    }
    tokenStream.end();
    tokenStream.close();
}

}

  • 写回答

1条回答 默认 最新

  • Stefen_720 2011-11-12 09:19
    关注

    这问题,我也遇到了。楼主如果解决好了 ,请帮忙联系我。

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 PADS Logic 原理图
  • ¥15 PADS Logic 图标
  • ¥15 电脑和power bi环境都是英文如何将日期层次结构转换成英文
  • ¥20 气象站点数据求取中~
  • ¥15 如何获取APP内弹出的网址链接
  • ¥15 wifi 图标不见了 不知道怎么办 上不了网 变成小地球了
  • ¥50 STM32单片机传感器读取错误
  • ¥15 (关键词-阻抗匹配,HFSS,RFID标签天线)
  • ¥15 机器人轨迹规划相关问题
  • ¥15 word样式右侧翻页键消失