Lee-ocean 2017-05-20 13:36 采纳率: 100%
浏览 1512
已采纳

lucene5.4 中文单字搜索命中为0

如题。求大神帮忙,这边的需求是将中文拆成单个字进行匹配。用过S、andardAnalyzer/JcsegAnalyzer5X效果好差。比如说存有“单车”这个词,如果我以“单”作为条件的话。是无法命中的。除非加了模糊*。但是加了模糊就没办法高亮了。。。。求大神帮忙!搞了几天一直没处理这个问题
  • 写回答

2条回答 默认 最新

  • AngusC· 2017-05-20 16:11
    关注

    给你看我做百度网盘的时候用到lucene做的工具类,

     /**
     * 索引类
     * @author user
     *
     */
    public class PanIndex {
    
        private Directory dir=null;
    
        private static final String LUCENE_PATH=PropertiesUtil.getValue("lucenePath");
    
        /**
         * 获取IndexWriter实例
         * @return
         * @throws Exception
         */
        private IndexWriter getWriter()throws Exception{
            dir=FSDirectory.open(Paths.get(LUCENE_PATH));
            Analyzer analyzer=new StandardAnalyzer();
            IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
            IndexWriter writer=new IndexWriter(dir, iwc);
            return writer;
        }
    
        /**
         * 添加Pan索引
         * @param jar
         */
        public void addIndex(PanBean pan)throws Exception{
            IndexWriter writer=getWriter();
            Document doc=new Document();
            doc.add(new StringField("id",pan.getUuid(),Field.Store.YES));
            doc.add(new TextField("name",pan.getName(),Field.Store.YES));
            writer.addDocument(doc);
            writer.close();
        }
    
        /**
         * 更新Pan索引
         * @param blog
         * @throws Exception
         */
        public void updateIndex(PanBean pan)throws Exception{
            IndexWriter writer=getWriter();
            Document doc=new Document();
            doc.add(new StringField("id",pan.getUuid(),Field.Store.YES));
            doc.add(new TextField("name",pan.getName(),Field.Store.YES));
            writer.updateDocument(new Term("id", String.valueOf(pan.getUuid())), doc);
            writer.close();
        }
    
        /**
         * 删除指定Pan的索引
         * @param jarId
         * @throws Exception
         */
        public void deleteIndex(String panId)throws Exception{
            IndexWriter writer=getWriter();
            writer.deleteDocuments(new Term("id",panId));
            writer.forceMergeDeletes(); // 强制删除
            writer.commit();
            writer.close();
        }
    
        /**
         * 查询信息
         * @param q 查询关键字
         * @param n 查询个数
         * @return
         * @throws Exception
         */
        public List<PanBean> searchPan(String q,int n)throws Exception{
            dir=FSDirectory.open(Paths.get(LUCENE_PATH));
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher is=new IndexSearcher(reader);
            Analyzer analyzer = null;
            try {
            analyzer=new StandardAnalyzer();
            //Analyzer analyzer = new PaodingAnalyzer();
            QueryParser parser=new QueryParser("name",analyzer);
            Query query=parser.parse(q);
            TopDocs hits=is.search(query, n); // 查询n条
            QueryScorer scorer=new QueryScorer(query);  
            Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);  
            SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
            Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
            highlighter.setTextFragmenter(fragmenter);  
            List<PanBean> panList=new LinkedList<PanBean>();
            for(ScoreDoc scoreDoc:hits.scoreDocs){
                Document doc=is.doc(scoreDoc.doc);
                PanBean pan=new PanBean();
                pan.setUuid(doc.get(("id")));
                pan.setType(doc.get("type"));
                pan.setSize(doc.get("size"));
                String name=doc.get("name");
                pan.setName2(name);
                if(name!=null){
                    TokenStream tokenStream = analyzer.tokenStream("name", new StringReader(name));
                    String hName=highlighter.getBestFragment(tokenStream, name);
                    if(StringUtil.isEmpty(hName)){
                        pan.setName(doc.get("name"));
                    }else{
                        pan.setName(hName);
                    }
                }
                panList.add(pan);
            }
            return panList;
            } catch (Exception e) {
                e.printStackTrace();
            }finally {
                reader.close();
            }
            return null;
        }
        /**
         * 模糊查询
         * @param q 参数
         * @param n 查询个数
         * @return
         * @throws Exception
         */
        public List<PanBean> searchMohuPan(String q,int n)throws Exception{
            dir=FSDirectory.open(Paths.get(LUCENE_PATH));
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher is=new IndexSearcher(reader);
            Analyzer analyzer = null;
            try {   
            analyzer=new StandardAnalyzer();
            Query parser=new FuzzyQuery(new Term("name",q+"~"));
            //Query query=parser.parse(q);
            TopDocs hits=is.search(parser, n); // 查询n条
            QueryScorer scorer=new QueryScorer(parser);  
            Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);  
            SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
            Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
            highlighter.setTextFragmenter(fragmenter);  
            List<PanBean> panList=new LinkedList<PanBean>();
            for(ScoreDoc scoreDoc:hits.scoreDocs){
                Document doc=is.doc(scoreDoc.doc);
                PanBean pan=new PanBean();
                pan.setUuid(doc.get(("id")));
                String name=doc.get("name");
                pan.setType(doc.get("type"));
                pan.setSize(doc.get("size"));
                //pan.setClick(Integer.valueOf(doc.get("click")));
                pan.setName2(name);
                if(name!=null){
                    TokenStream tokenStream = analyzer.tokenStream("name", new StringReader(name));
                    String hName=highlighter.getBestFragment(tokenStream, name);
                    if(StringUtil.isEmpty(hName)){
                        pan.setName(doc.get("name"));
                    }else{
                        pan.setName(hName);
                    }
                    }
                    panList.add(pan);
                }
                return panList;
    
            } catch (Exception e) {
                e.printStackTrace();
            }finally {
                analyzer.close();
                reader.close();
            }
            return null;
        } 
        /**
         * 查询相关资源 不用html包装
         * @param q 参数
         * @param n 查询个数
         * @return
         * @throws Exception
         */
        public List<PanBean> searchRelatedPan(String q,int n)throws Exception{
            dir=FSDirectory.open(Paths.get(LUCENE_PATH));
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher is=new IndexSearcher(reader);
            Analyzer analyzer = null;
            try {
            analyzer=new StandardAnalyzer();
            QueryParser parser=new QueryParser("name",analyzer);
            Query query=parser.parse(q);
            TopDocs hits=is.search(query, n); // 查询n条
        //  QueryScorer scorer=new QueryScorer(query);  
            //Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);  
            List<PanBean> panList=new LinkedList<PanBean>();
            for(ScoreDoc scoreDoc:hits.scoreDocs){
                Document doc=is.doc(scoreDoc.doc);
                PanBean pan=new PanBean();
                pan.setUuid(doc.get(("id")));
                String name=doc.get("name");
                //pan.setName2(name);
                pan.setName(name);
                pan.setType(doc.get("type"));
                panList.add(pan);
            }
            return panList;
            } catch (Exception e) {
                e.printStackTrace();
            }finally {
                analyzer.close();
                reader.close();
            }
            return null;
        } 
        /**
         * 精确查找
         * @param q
         * @param n
         * @return
         * @throws Exception
         */
        public List<PanBean> searchPanByType(String q,int n)throws Exception{
            dir=FSDirectory.open(Paths.get(LUCENE_PATH));
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher is=new IndexSearcher(reader);
            try {
    
            Query query=new TermQuery(new Term("type",q)); 
            TopDocs hits=is.search(query, n); // 查询n条
            List<PanBean> panList=new LinkedList<PanBean>();
            for(ScoreDoc scoreDoc:hits.scoreDocs){
                Document doc=is.doc(scoreDoc.doc);
                PanBean pan=new PanBean();
                pan.setUuid(doc.get(("id")));
                pan.setType(doc.get("type"));
                pan.setSize(doc.get("size"));
                //pan.setClick(GetRandom.getRandomInt(235, 879));
                String name=doc.get("name");
                pan.setName(name);
                panList.add(pan);
            }
            return panList;
            } catch (Exception e) {
                e.printStackTrace();
            }finally {
                reader.close();
            }
            return null;
        }
        public static void main(String[] args) throws Exception {
            Long start =System.currentTimeMillis();
            System.out.println(new PanIndex().searchPanByType("压缩包",100).size());
            Long end = System.currentTimeMillis();
            System.out.println(end-start);
        } 
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

悬赏问题

  • ¥15 DS18B20内部ADC模数转换器
  • ¥15 做个有关计算的小程序
  • ¥15 MPI读取tif文件无法正常给各进程分配路径
  • ¥15 如何用MATLAB实现以下三个公式(有相互嵌套)
  • ¥30 关于#算法#的问题:运用EViews第九版本进行一系列计量经济学的时间数列数据回归分析预测问题 求各位帮我解答一下
  • ¥15 setInterval 页面闪烁,怎么解决
  • ¥15 如何让企业微信机器人实现消息汇总整合
  • ¥50 关于#ui#的问题:做yolov8的ui界面出现的问题
  • ¥15 如何用Python爬取各高校教师公开的教育和工作经历
  • ¥15 TLE9879QXA40 电机驱动