这个项目一共有五个java类,一个search.jsp,一个web.xml。问题在于在eclipse上加入tomcat后,在浏览器输入地址就会报下面截图上的错误,我觉得可能是建立索引和找索引有问题,但我不太了解这是怎么回事还有该怎么解决:
问题截图:
代码全部:
package sample.dw.paper.lucene.servlet;
import java.io.IOException;
import java.util.List;
import javax.servlet.RequestDispatcher;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import sample.dw.paper.lucene.search.SearchManager;
public class SearchController extends HttpServlet{
/**
*
*/
private static final long serialVersionUID = 1L;
public void doGet(HttpServletRequest request,HttpServletResponse response)
throws IOException, ServletException{
doPost(request, response);
}
public void doPost(HttpServletRequest request,HttpServletResponse response)
throws IOException,ServletException{
// String searchWord = request.getParameter("searchWord");
String searchWord = toChinese(request.getParameter("searchWord"));
SearchManager searchManager = new SearchManager(searchWord);
List searchResult = null;
searchResult = searchManager.search();
RequestDispatcher dispatcher = request.getRequestDispatcher("search.jsp");
request.setAttribute("searchResult", searchResult);
dispatcher.forward(request,response);
}
public static String toChinese(String strvalue){
try {
if(strvalue==null){
return null;
}else{
strvalue = new String(strvalue.getBytes("ISO-8859-1"),"UTF-8");
return strvalue;
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
return null;
}
}
}
package sample.dw.paper.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import sample.dw.paper.lucene.index.IndexManager;
public class SearchManager {
private String searchWord;
private IndexManager indexManager;
private Analyzer analyzer;
private IndexSearcher indexSearcher;
public SearchManager(String serachWord){
this.searchWord = serachWord;
this.indexManager = new IndexManager();
this.analyzer = new StandardAnalyzer();
}
public List search(){
List searchResult = new ArrayList();
if (false == indexManager.ifIndexExist()) {
try {
if (false == indexManager.createIndex()) {
return searchResult;
}
} catch (IOException e) {
// TODO: handle exception
e.printStackTrace();
return searchResult;
}
}
indexSearcher = null;
try {
indexSearcher = new IndexSearcher(indexManager.getIndexDir());
} catch (IOException e) {
// TODO: handle exceptio
e.printStackTrace();
}
QueryParser queryParser = new QueryParser( "content",analyzer);
Query query = null;
try {
query = queryParser.parse(searchWord);
} catch (ParseException e) {
// TODO: handle exception
e.printStackTrace();
}
if( null !=query && null != indexSearcher){
try {
Hits hits=indexSearcher.search(query);
for(int i=0; i<hits.length();i++){
SearchResultBean resultBean = new SearchResultBean();
resultBean.setHtmlPath(hits.doc(i).get("path"));
resultBean.setHtmlTitle(hits.doc(i).get("title"));
searchResult.add(resultBean);
}
} catch (IOException e) {
// TODO: handle exception
e.printStackTrace();
}
}
return searchResult;
}
}
package sample.dw.paper.lucene.search;
public class SearchResultBean {
private String htmlPath;
private String htmlTitle;
public String getHtmlPath() {
int startPos = htmlPath.indexOf("mirror")+6;
String url=htmlPath;
url = url.substring(startPos);
url = url.replaceAll("\\\\","/");
htmlPath = "http:"+url;
return htmlPath;
}
public void setHtmlPath(String htmlPath) {
this.htmlPath = htmlPath;
}
public String getHtmlTitle() {
return htmlTitle;
}
public void setHtmlTitle(String htmlTitle) {
this.htmlTitle = htmlTitle;
}
}
package sample.dw.paper.lucene.index;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import sample.dw.paper.lucene.util.HTMLDocParser;
public class IndexManager {
public final String dataDir = "C:\\Users\\yang\\workspace\\graduation_project_heritrix\\jobs"+"\\csdn-20150423124021677\\mirror";
public final String indexDir ="C:\\indexDir";
public void ccreateIndex(File file,IndexWriter indexWriter)throws IOException{
if(file.isDirectory()){
File [] files =file.listFiles();
for(int i=0 ; i<files.length; i++){
ccreateIndex(files[i],indexWriter);
}
}else if(file.getAbsolutePath().endsWith(".html") || file.getAbsolutePath().endsWith(".htm")){
String htmlPath = file.getAbsolutePath();
addDocument(htmlPath,indexWriter);
}
}
public boolean createIndex() throws IOException{
if(true == ifIndexExist()){
return true;
}
File dir = new File(dataDir);
if(!dir.exists()){
return false;
}
File[] htmls = dir.listFiles();
Directory fsDirectory = FSDirectory.getDirectory(indexDir, true);
Analyzer analyzer = new StandardAnalyzer();
IndexWriter indexWriter = new IndexWriter(fsDirectory, analyzer, true);
for(int i = 0; i < htmls.length; i++){
String htmlPath = htmls[i].getAbsolutePath();
if(htmlPath.endsWith(".html") || htmlPath.endsWith(".htm")){
addDocument(htmlPath, indexWriter);
}
}
indexWriter.optimize();
indexWriter.close();
return true;
// Directory fsDirectory = FSDirectory.getDirectory(indexDir, true);
// Analyzer analyzer = new StandardAnalyzer();
// IndexWriter indexWriter = new IndexWriter(fsDirectory,analyzer,true);
// ccreateIndex(filee,indexWriter);
//
// indexWriter.close();
// return true;
}
public void addDocument(String htmlPath,IndexWriter indexWriter) {
/*HTMLDocParser htmlParser = new HTMLDocParser(htmlPath);
String path = htmlParser.getPath();
String title = htmlParser.getTitle();
String content = htmlParser.getContent();
//
Document document = new Document();
document.add(new Field(title,new StringReader(content)));
indexWriter.addDocument(document);*/
HTMLDocParser htmlParser = new HTMLDocParser(htmlPath);
String path = htmlParser.getPath();
String title = htmlParser.getTitle();
Reader content = htmlParser.getContent();
Document document = new Document();
document.add(new Field("path",path,Field.Store.YES,Field.Index.NO));
document.add(new Field("title",title,Field.Store.YES,Field.Index.TOKENIZED));
document.add(new Field("content",content));
try {
indexWriter.addDocument(document);
} catch (IOException e) {
e.printStackTrace();
}
}
public boolean ifIndexExist(){
File directory = new File(indexDir);
if(0 < directory.listFiles().length){
return true;
}else{
return false;
}
}
public String getDataDir(){
return this.dataDir;
}
public String getIndexDir(){
return this.indexDir;
}
}
package sample.dw.paper.lucene.util;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import org.apache.lucene.demo.html.HTMLParser;
public class HTMLDocParser {
private String htmlPath;
private HTMLParser htmlParser;
public HTMLDocParser(String htmlPath){
this.htmlPath = htmlPath;
initHtmlParser();
}
private void initHtmlParser(){
InputStream inputStream = null;
try {
inputStream = new FileInputStream(htmlPath);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
if(null != inputStream){
try {
htmlParser = new HTMLParser(new InputStreamReader(inputStream, "utf-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
}
public String getTitle(){
if(null != htmlParser){
try {
return htmlParser.getTitle();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
return "";
}
public Reader getContent(){
if(null != htmlParser){
try {
return htmlParser.getReader();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
public String getPath(){
return this.htmlPath;
}
}