coldboold 2010-03-10 09:25
浏览 225
已采纳

使用lucene和paoding建立索引报错

这是其中的创建索引的代码:大家帮忙看下吧
package com.foresee.paodinganalyzer;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;

public class CreateIndexer {

/**
 * @param args
 */
private static String INDEX_DIR = "e:\\mypaoding\\index\\";// 索引存放目录
private static String DATA_DIR = "e:\\mypaoding\\small\\";// 原文件存放路径

public static void main(String[] args) throws IOException {
    // TODO Auto-generated method stub
    long start = new Date().getTime();
    int numIndexed = index(new File(INDEX_DIR), new File(DATA_DIR));// 调用index方法
    long end = new Date().getTime();
    System.out.println("Indexing " + numIndexed + " files took "
            + (end - start) + " millseconds");
}

private static int index(File indexDir, File dataDir) throws IOException {
    // TODO Auto-generated method stub
    if (!dataDir.exists() || !dataDir.isDirectory()) {
        throw new IOException(dataDir
                + " does not exist or is not a directory");
    }
    IndexWriter iw = new IndexWriter(FSDirectory.open(indexDir),
            new PaodingAnalyzer()/*new StandardAnalyzer(Version.LUCENE_30)*/, true, IndexWriter.MaxFieldLength.LIMITED);
    indexDirectory(iw, dataDir);
    int numIndexed = iw.numDocs();
    iw.optimize();
    iw.close();
    return numIndexed;
}

/**
 * 循环遍历目录下的所有.txt文件并进行索引
 * 
 * @param iw
 * @param dataDir
 * @throws IOException
 */
private static void indexDirectory(IndexWriter iw, File dataDir)
        throws IOException {
    // TODO Auto-generated method stub
    File[] files = dataDir.listFiles();
    for (int i = 0; i < files.length; i++) {
        File f = files[i];
        if (f.isDirectory()) {
            indexDirectory(iw, f);// 递归遍历
        } else if (f.getName().endsWith(".txt")) {
            indexFile(iw, f);
        }
    }
}

private static void indexFile(IndexWriter iw, File f) throws IOException {
    // TODO Auto-generated method stub
    if (f.isHidden() || !f.exists() || !f.canRead()) {
        return;
    }
    System.out.println("Indexing " + f.getCanonicalPath());
    Document doc = new Document();
    doc.add(new Field("contents", new FileReader(f)));// 有变化的地方
    doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES,
            Field.Index.ANALYZED));// 有变化的地方
    iw.addDocument(doc);
}

}
下面是报错的信息:
2010-3-10 9:16:00 net.paoding.analysis.knife.PaodingMaker getProperties
信息: config paoding analysis from: D:\workspace\PaodingAnalyzer\file:\E:\jar\pading\paoding-analysis.jar!\paoding-analysis.properties;D:\workspace\PaodingAnalyzer\file:\E:\jar\pading\paoding-analysis.jar!\paoding-analysis-default.properties;D:\workspace\PaodingAnalyzer\file:\E:\jar\pading\paoding-analysis.jar!\paoding-analyzer.properties;D:\workspace\PaodingAnalyzer\bin\paoding-dic-home.properties;E:\data\paoding\dic\paoding-dic-names.properties;D:\workspace\PaodingAnalyzer\file:\E:\jar\pading\paoding-analysis.jar!\paoding-knives.properties;D:\workspace\PaodingAnalyzer\file:\E:\jar\pading\paoding-analysis.jar!\paoding-knives-user.properties
2010-3-10 9:16:00 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.CJKKnife
2010-3-10 9:16:00 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.LetterKnife
2010-3-10 9:16:00 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.NumberKnife
Indexing E:\mypaoding\small\girl0.txt
Exception in thread "main" java.lang.AbstractMethodError: org.apache.lucene.analysis.TokenStream.incrementToken()Z
at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:137)
at org.apache.lucene.index.DocFieldProcessorPerThread.processDocument(DocFieldProcessorPerThread.java:246)
at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:773)
at org.apache.lucene.index.DocumentsWriter.addDocument(DocumentsWriter.java:751)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1928)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1902)
at com.foresee.paodinganalyzer.CreateIndexer.indexFile(CreateIndexer.java:81)
at com.foresee.paodinganalyzer.CreateIndexer.indexDirectory(CreateIndexer.java:66)
at com.foresee.paodinganalyzer.CreateIndexer.index(CreateIndexer.java:43)
at com.foresee.paodinganalyzer.CreateIndexer.main(CreateIndexer.java:29)

  • 写回答

2条回答 默认 最新

  • iteye_13500 2010-03-14 22:34
    关注

    庖丁提供了例子的,你看看例子里面用的是Lucene哪个版本的,你就找哪个版本吧,lucene 各版本都有不小改动,兼容性方面做的不好。

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

悬赏问题

  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程
  • ¥15 完成下列问题完成下列问题
  • ¥15 C#算法问题, 不知道怎么处理这个数据的转换
  • ¥15 YoloV5 第三方库的版本对照问题
  • ¥15 请完成下列相关问题!
  • ¥15 drone 推送镜像时候 purge: true 推送完毕后没有删除对应的镜像,手动拷贝到服务器执行结果正确在样才能让指令自动执行成功删除对应镜像,如何解决?
  • ¥15 求daily translation(DT)偏差订正方法的代码
  • ¥15 js调用html页面需要隐藏某个按钮