Lucene索引和检索中文文件的问题

lucene初学者,基础不好,碰到以下问题,恳请各位大侠指导,小女子感激不尽!

用的是IKAnalyzer2012_u6.jar和lucene-core-3.6.2.jar

是对本地的文件进行的索引和检索,做的web版的

1、能检索英文的,就是检索不了中文的

2、英文的检索出来的doc.get("contents")的值是空的,但是文件名和路径能读出来
(附件里有代码)
[color=orange]IndexUtil1.java
[/color]
[code="java"]package com.lium.bean;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class IndexUtil1 {
private Directory directory;
private String indexPath = "F:/Lucene/test/indexDir1.2";
private String dataPath = "F:/Lucene/test/dataDir1.2";
public IndexUtil1() {
try {
directory = FSDirectory.open(new File(indexPath));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public String readFile(File f) {
    String s;
    StringBuffer text = new StringBuffer();
    try {
        BufferedReader br = new BufferedReader(new FileReader(f));
        while((s = br.readLine())!=null) {
            text.append(s);
        }   
        return text.toString();
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;        
}

public void Index() {
    IndexWriter writer = null;
    try {
        Analyzer analyzer = new IKAnalyzer();
        writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
        writer.deleteAll();
        Document doc = null;
        File file = new File(dataPath);
        for (File f : file.listFiles()) {
            String text = readFile(f);
            doc = new Document();
            doc.add(new Field("contents",text, Field.Store.YES,Field.Index.ANALYZED));
            /*System.out.println(text);*/ /*text值没有问题,可以读出来*/
            doc.add(new Field("filename", f.getName(), Field.Store.YES,Field.Index.NOT_ANALYZED));
            doc.add(new Field("filepath", f.getCanonicalPath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
            writer.addDocument(doc);     
        }
    } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        if(writer != null) {
            try {
                writer.close();
            } catch (CorruptIndexException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }
}

}[/code]

[color=orange]SearcherUtil.java[/color]
[code="java"]package com.lium.bean;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class SearcherUtil {
private Directory directory;
private IndexReader reader;
public SearcherUtil() {
try {
directory = FSDirectory.open(new File("F:/Lucene/indexDir"));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public IndexSearcher getsearcher() {
    try {
        if(reader == null) {        
            reader = IndexReader.open(directory);
        }else {
            IndexReader dr = IndexReader.openIfChanged(reader);
            if(dr != null) {
                reader = dr;
            }
        }
        return new IndexSearcher(reader);
    } /*catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }*/ catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}

public List<Results> seach(String keys) {
    try {
        IndexSearcher searcher = getsearcher();
        Query query = new TermQuery(new Term("contents", keys));
        TopDocs tds = searcher.search(query, 10);
        List<Results> rsList = new ArrayList<Results>();
        for(ScoreDoc sd:tds.scoreDocs) {
            Document doc = searcher.doc(sd.doc);
            /*System.out.println(doc.get("contents"));*/  /*这里打印出来值是空的*/

            Results rs = new Results(doc.get("filename"), doc.get("filepath"), doc.get("contents"));
            rsList.add(rs);
        }
        return rsList;
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}

}
[/code]
[color=orange]Results.java[/color]
[code="java"]package com.lium.bean;

public class Results {

String filename;
String filepath;
String content;

public Results(String filename, String filepath, String content) {
    super();
    this.filename = filename;
    this.filepath = filepath;
    this.content = content;
}

public String getFilename() {
    return filename;
}

public void setFilename(String filename) {
    this.filename = filename;
}

public String getFilepath() {
    return filepath;
}

public void setFilepath(String filepath) {
    this.filepath = filepath;
}

public String getContent() {
    return content;
}

public void setContent(String content) {
    this.content = content;
}

}[/code]

[color=orange]ISservlet.java[/color]
[code="java"]package com.lium.servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import com.lium.bean.IndexUtil1;
import com.lium.bean.Results;
import com.lium.bean.SearcherUtil;

public class ISservlet extends HttpServlet {
private static final long serialVersionUID = 1L;

/**
 * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
 */
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    this.doPost(request, response);
}

/**
 * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
 */
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    //System.out.println(request.getCharacterEncoding());
    request.setCharacterEncoding("UTF-8");
    String keys = request.getParameter("keywords");
    IndexUtil1 iu = new IndexUtil1();
    iu.Index();
    SearcherUtil su = new SearcherUtil();   
    List<Results> rsList = su.seach(keys);
    if(rsList.size() > 0) {
        request.setAttribute("rsList", rsList);
        request.getRequestDispatcher("/SeacherResult.jsp").forward(request, response);
    } else {
        request.getRequestDispatcher("/fail.jsp").forward(request, response);
    }
}

}[/code]

[color=orange]Seacher.html[/color]
[code="java"]<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">




用户搜索界面

A Simple Searcher

搜索


[/code]

[color=orange]SeacherResult.jsp[/color]
[code="java"]<%@page import="java.util.List"%>
<%@ page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%>
<%@page import="com.lium.bean.*" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">



搜索结果


<%
List list=(List)request.getAttribute("rsList");
for(int i=0;i Results rs = list.get(i);
%>
文件名:<%=rs.getFilename()%>

文件内容:<%=rs.getContent()%>

文件路径:<%=rs.getFilepath()%>

<%
}
%>


[/code]

3个回答

SearcherUtil 的索引目录错了
directory = FSDirectory.open(new File("F:/Lucene/test/indexDir1.2"));

liuman1990
liuman1990 嗯,好的,嘿嘿,就是这个目录写错了,一直在找其他的问题,这个明显的错误却没发现,现在可以了,搜得出来了……
6 年多之前 回复
jinnianshilongnian
jinnianshilongnian 建议你把 这种变量做成全局的 而不是 分散到很多位置 单一性
6 年多之前 回复
liuman1990
liuman1990 谢谢,改来改去改晕了……
6 年多之前 回复
jinnianshilongnian
jinnianshilongnian 中文的话 估计分词的问题 你可以试试 “收件箱” 测试无问题
6 年多之前 回复

readFile方法修改为:[code="java"]FileInputStream fis=new FileInputStream(f);// 按照 UTF-8 编码方式将字节流转化为字符流InputStreamReader isr=new InputStreamReader(fis,"UNICODE");// 从字符流中获取文本并进行缓冲BufferedReader br=new BufferedReader(isr);[/code]试试

liuman1990
liuman1990 嗯嗯,非常感谢!
6 年多之前 回复

readfile方法添加编码方式试试:
[code="java"]
FileInputStream fis=new FileInputStream(f);// 按照 UTF-8 编码方式将字节流转化为字符流InputStreamReader isr=new InputStreamReader(fis,"UNICODE");// 从字符流中获取文本并进行缓冲BufferedReader br=new BufferedReader(isr);
[/code]

liuman1990
liuman1990 好的,谢谢了!
6 年多之前 回复
Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问