慢生活的人生 2015-06-05 06:17 采纳率: 0%
浏览 1227
已结题

用基于lucene的web项目实现的简单的搜索引擎,出现线程抛出的空指针异。

这个项目一共有五个java类,一个search.jsp,一个web.xml。问题在于在eclipse上加入tomcat后,在浏览器输入地址就会报下面截图上的错误,我觉得可能是建立索引和找索引有问题,但我不太了解这是怎么回事还有该怎么解决:
问题截图:
图片说明
图片说明

代码全部:

 package sample.dw.paper.lucene.servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.RequestDispatcher;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import sample.dw.paper.lucene.search.SearchManager;

public class SearchController extends HttpServlet{

    /**
     * 
     */
    private static final long serialVersionUID = 1L;

    public void doGet(HttpServletRequest request,HttpServletResponse response)
            throws IOException, ServletException{
        doPost(request, response);
    }

    public void doPost(HttpServletRequest request,HttpServletResponse response)
            throws IOException,ServletException{

//      String searchWord = request.getParameter("searchWord");
        String searchWord = toChinese(request.getParameter("searchWord"));
        SearchManager searchManager = new SearchManager(searchWord);
        List searchResult = null;
        searchResult = searchManager.search();
        RequestDispatcher dispatcher = request.getRequestDispatcher("search.jsp");
        request.setAttribute("searchResult", searchResult);
        dispatcher.forward(request,response);
    }


    public static String toChinese(String strvalue){
        try {
            if(strvalue==null){
                return null;
            }else{
                strvalue = new String(strvalue.getBytes("ISO-8859-1"),"UTF-8");
                return strvalue;
            }
        } catch (Exception e) {
            // TODO: handle exception
            e.printStackTrace();
            return null;
        }
    }

}

 package sample.dw.paper.lucene.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

import sample.dw.paper.lucene.index.IndexManager;

public class SearchManager {
    private String searchWord;
    private IndexManager indexManager;
    private Analyzer analyzer;
    private IndexSearcher  indexSearcher;

    public SearchManager(String serachWord){
        this.searchWord = serachWord;
        this.indexManager = new IndexManager();
        this.analyzer = new StandardAnalyzer();


    }
    public List search(){
        List  searchResult = new ArrayList();

        if (false == indexManager.ifIndexExist()) {
            try {
                if (false == indexManager.createIndex()) {
                    return searchResult;
                }

            } catch (IOException e) {
                // TODO: handle exception
                e.printStackTrace();
                return searchResult;
            }

        }

         indexSearcher = null;

        try {
            indexSearcher = new IndexSearcher(indexManager.getIndexDir());
        } catch (IOException e) {
            // TODO: handle exceptio
            e.printStackTrace();
        }

        QueryParser queryParser = new QueryParser( "content",analyzer);
        Query query = null;
        try {
            query = queryParser.parse(searchWord);
        } catch (ParseException e) {
            // TODO: handle exception
            e.printStackTrace();
        }

        if( null !=query && null != indexSearcher){
            try {
                Hits hits=indexSearcher.search(query);    

                for(int i=0; i<hits.length();i++){
                    SearchResultBean resultBean = new SearchResultBean();
                    resultBean.setHtmlPath(hits.doc(i).get("path"));
                    resultBean.setHtmlTitle(hits.doc(i).get("title"));
                    searchResult.add(resultBean);
                }
            } catch (IOException e) {
                // TODO: handle exception
                e.printStackTrace();
            }
        }
        return searchResult;
    }
}

 package sample.dw.paper.lucene.search;

public class SearchResultBean {
    private String htmlPath;
    private String htmlTitle;

    public String getHtmlPath() {
        int startPos = htmlPath.indexOf("mirror")+6;
        String url=htmlPath;
        url = url.substring(startPos);
        url = url.replaceAll("\\\\","/");
        htmlPath = "http:"+url;
        return htmlPath;
    }
    public void setHtmlPath(String htmlPath) {
        this.htmlPath = htmlPath;
    }
    public String getHtmlTitle() {
        return htmlTitle;
    }
    public void setHtmlTitle(String htmlTitle) {
        this.htmlTitle = htmlTitle;
    }


}

 package sample.dw.paper.lucene.index;

import java.io.File;
import java.io.IOException;
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import sample.dw.paper.lucene.util.HTMLDocParser;

public class IndexManager {

    public final String dataDir = "C:\\Users\\yang\\workspace\\graduation_project_heritrix\\jobs"+"\\csdn-20150423124021677\\mirror";
    public final String indexDir ="C:\\indexDir";

    public void ccreateIndex(File file,IndexWriter indexWriter)throws IOException{
        if(file.isDirectory()){
            File [] files =file.listFiles();
            for(int i=0 ; i<files.length; i++){
                ccreateIndex(files[i],indexWriter);
            }
        }else if(file.getAbsolutePath().endsWith(".html") || file.getAbsolutePath().endsWith(".htm")){
            String htmlPath = file.getAbsolutePath();
            addDocument(htmlPath,indexWriter);

        }
    }



    public boolean createIndex() throws IOException{
        if(true == ifIndexExist()){
            return true;
        }

        File dir = new File(dataDir);
        if(!dir.exists()){
            return false;
        }

        File[] htmls = dir.listFiles();
        Directory fsDirectory = FSDirectory.getDirectory(indexDir, true);
        Analyzer  analyzer    = new StandardAnalyzer();
        IndexWriter indexWriter = new IndexWriter(fsDirectory, analyzer, true);
        for(int i = 0; i < htmls.length; i++){
            String htmlPath = htmls[i].getAbsolutePath();

            if(htmlPath.endsWith(".html") || htmlPath.endsWith(".htm")){
          addDocument(htmlPath, indexWriter);
         }
        }
        indexWriter.optimize();
        indexWriter.close();
        return true;
//      Directory fsDirectory = FSDirectory.getDirectory(indexDir, true);
//      Analyzer analyzer = new StandardAnalyzer();
//      IndexWriter indexWriter  = new IndexWriter(fsDirectory,analyzer,true);
//      ccreateIndex(filee,indexWriter);
//      
//      indexWriter.close();
//      return true;

    }





    public void addDocument(String htmlPath,IndexWriter indexWriter) {
        /*HTMLDocParser htmlParser = new HTMLDocParser(htmlPath);
        String path = htmlParser.getPath();
        String title = htmlParser.getTitle();
        String content = htmlParser.getContent();
        //
        Document document = new Document();
        document.add(new Field(title,new StringReader(content)));
        indexWriter.addDocument(document);*/
        HTMLDocParser htmlParser = new HTMLDocParser(htmlPath);
        String path    = htmlParser.getPath();
        String title   = htmlParser.getTitle();
        Reader content = htmlParser.getContent();

        Document document = new Document();
        document.add(new Field("path",path,Field.Store.YES,Field.Index.NO));
        document.add(new Field("title",title,Field.Store.YES,Field.Index.TOKENIZED));
        document.add(new Field("content",content));
        try {
              indexWriter.addDocument(document);
            } catch (IOException e) {
              e.printStackTrace();
                                      }


    }

    public boolean ifIndexExist(){
        File directory = new File(indexDir);
        if(0 < directory.listFiles().length){
            return true;
        }else{
            return false;
        }
    }

    public String getDataDir(){
        return this.dataDir;
    }

    public String getIndexDir(){
        return this.indexDir;
    }


}

 package sample.dw.paper.lucene.util;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;

import org.apache.lucene.demo.html.HTMLParser;




public class HTMLDocParser {
     private String htmlPath;

        private HTMLParser htmlParser;

        public HTMLDocParser(String htmlPath){
            this.htmlPath = htmlPath;
            initHtmlParser();
        }

        private void initHtmlParser(){
            InputStream inputStream = null;
            try {
                inputStream = new FileInputStream(htmlPath);
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
            if(null != inputStream){
             try {
                    htmlParser = new HTMLParser(new InputStreamReader(inputStream, "utf-8"));
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
            }
        }

        public String getTitle(){
            if(null != htmlParser){
                try {
                    return htmlParser.getTitle();
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        return "";
        }

        public Reader getContent(){
        if(null != htmlParser){
                try {
                      return htmlParser.getReader();
                  } catch (IOException e) {
                      e.printStackTrace();
                  }
            }
            return null;
        }

        public String getPath(){
            return this.htmlPath;  
        }

}

  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥15 gwas 分析-数据质控之过滤稀有突变中出现的问题
    • ¥15 没有注册类 (异常来自 HRESULT: 0x80040154 (REGDB_E_CLASSNOTREG))
    • ¥15 知识蒸馏实战博客问题
    • ¥15 用PLC设计纸袋糊底机送料系统
    • ¥15 simulink仿真中dtc控制永磁同步电机如何控制开关频率
    • ¥15 用C语言输入方程怎么
    • ¥15 网站显示不安全连接问题
    • ¥15 51单片机显示器问题
    • ¥20 关于#qt#的问题:Qt代码的移植问题
    • ¥50 求图像处理的matlab方案