如题。求大神帮忙,这边的需求是将中文拆成单个字进行匹配。用过S、andardAnalyzer/JcsegAnalyzer5X效果好差。比如说存有“单车”这个词,如果我以“单”作为条件的话。是无法命中的。除非加了模糊*。但是加了模糊就没办法高亮了。。。。求大神帮忙!搞了几天一直没处理这个问题
lucene5.4 中文单字搜索命中为0
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
2条回答 默认 最新
- AngusC· 2017-05-20 16:11关注
给你看我做百度网盘的时候用到lucene做的工具类,
/** * 索引类 * @author user * */ public class PanIndex { private Directory dir=null; private static final String LUCENE_PATH=PropertiesUtil.getValue("lucenePath"); /** * 获取IndexWriter实例 * @return * @throws Exception */ private IndexWriter getWriter()throws Exception{ dir=FSDirectory.open(Paths.get(LUCENE_PATH)); Analyzer analyzer=new StandardAnalyzer(); IndexWriterConfig iwc=new IndexWriterConfig(analyzer); IndexWriter writer=new IndexWriter(dir, iwc); return writer; } /** * 添加Pan索引 * @param jar */ public void addIndex(PanBean pan)throws Exception{ IndexWriter writer=getWriter(); Document doc=new Document(); doc.add(new StringField("id",pan.getUuid(),Field.Store.YES)); doc.add(new TextField("name",pan.getName(),Field.Store.YES)); writer.addDocument(doc); writer.close(); } /** * 更新Pan索引 * @param blog * @throws Exception */ public void updateIndex(PanBean pan)throws Exception{ IndexWriter writer=getWriter(); Document doc=new Document(); doc.add(new StringField("id",pan.getUuid(),Field.Store.YES)); doc.add(new TextField("name",pan.getName(),Field.Store.YES)); writer.updateDocument(new Term("id", String.valueOf(pan.getUuid())), doc); writer.close(); } /** * 删除指定Pan的索引 * @param jarId * @throws Exception */ public void deleteIndex(String panId)throws Exception{ IndexWriter writer=getWriter(); writer.deleteDocuments(new Term("id",panId)); writer.forceMergeDeletes(); // 强制删除 writer.commit(); writer.close(); } /** * 查询信息 * @param q 查询关键字 * @param n 查询个数 * @return * @throws Exception */ public List<PanBean> searchPan(String q,int n)throws Exception{ dir=FSDirectory.open(Paths.get(LUCENE_PATH)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is=new IndexSearcher(reader); Analyzer analyzer = null; try { analyzer=new StandardAnalyzer(); //Analyzer analyzer = new PaodingAnalyzer(); QueryParser parser=new QueryParser("name",analyzer); Query query=parser.parse(q); TopDocs hits=is.search(query, n); // 查询n条 QueryScorer scorer=new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>"); Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); List<PanBean> panList=new LinkedList<PanBean>(); for(ScoreDoc scoreDoc:hits.scoreDocs){ Document doc=is.doc(scoreDoc.doc); PanBean pan=new PanBean(); pan.setUuid(doc.get(("id"))); pan.setType(doc.get("type")); pan.setSize(doc.get("size")); String name=doc.get("name"); pan.setName2(name); if(name!=null){ TokenStream tokenStream = analyzer.tokenStream("name", new StringReader(name)); String hName=highlighter.getBestFragment(tokenStream, name); if(StringUtil.isEmpty(hName)){ pan.setName(doc.get("name")); }else{ pan.setName(hName); } } panList.add(pan); } return panList; } catch (Exception e) { e.printStackTrace(); }finally { reader.close(); } return null; } /** * 模糊查询 * @param q 参数 * @param n 查询个数 * @return * @throws Exception */ public List<PanBean> searchMohuPan(String q,int n)throws Exception{ dir=FSDirectory.open(Paths.get(LUCENE_PATH)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is=new IndexSearcher(reader); Analyzer analyzer = null; try { analyzer=new StandardAnalyzer(); Query parser=new FuzzyQuery(new Term("name",q+"~")); //Query query=parser.parse(q); TopDocs hits=is.search(parser, n); // 查询n条 QueryScorer scorer=new QueryScorer(parser); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>"); Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); List<PanBean> panList=new LinkedList<PanBean>(); for(ScoreDoc scoreDoc:hits.scoreDocs){ Document doc=is.doc(scoreDoc.doc); PanBean pan=new PanBean(); pan.setUuid(doc.get(("id"))); String name=doc.get("name"); pan.setType(doc.get("type")); pan.setSize(doc.get("size")); //pan.setClick(Integer.valueOf(doc.get("click"))); pan.setName2(name); if(name!=null){ TokenStream tokenStream = analyzer.tokenStream("name", new StringReader(name)); String hName=highlighter.getBestFragment(tokenStream, name); if(StringUtil.isEmpty(hName)){ pan.setName(doc.get("name")); }else{ pan.setName(hName); } } panList.add(pan); } return panList; } catch (Exception e) { e.printStackTrace(); }finally { analyzer.close(); reader.close(); } return null; } /** * 查询相关资源 不用html包装 * @param q 参数 * @param n 查询个数 * @return * @throws Exception */ public List<PanBean> searchRelatedPan(String q,int n)throws Exception{ dir=FSDirectory.open(Paths.get(LUCENE_PATH)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is=new IndexSearcher(reader); Analyzer analyzer = null; try { analyzer=new StandardAnalyzer(); QueryParser parser=new QueryParser("name",analyzer); Query query=parser.parse(q); TopDocs hits=is.search(query, n); // 查询n条 // QueryScorer scorer=new QueryScorer(query); //Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); List<PanBean> panList=new LinkedList<PanBean>(); for(ScoreDoc scoreDoc:hits.scoreDocs){ Document doc=is.doc(scoreDoc.doc); PanBean pan=new PanBean(); pan.setUuid(doc.get(("id"))); String name=doc.get("name"); //pan.setName2(name); pan.setName(name); pan.setType(doc.get("type")); panList.add(pan); } return panList; } catch (Exception e) { e.printStackTrace(); }finally { analyzer.close(); reader.close(); } return null; } /** * 精确查找 * @param q * @param n * @return * @throws Exception */ public List<PanBean> searchPanByType(String q,int n)throws Exception{ dir=FSDirectory.open(Paths.get(LUCENE_PATH)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is=new IndexSearcher(reader); try { Query query=new TermQuery(new Term("type",q)); TopDocs hits=is.search(query, n); // 查询n条 List<PanBean> panList=new LinkedList<PanBean>(); for(ScoreDoc scoreDoc:hits.scoreDocs){ Document doc=is.doc(scoreDoc.doc); PanBean pan=new PanBean(); pan.setUuid(doc.get(("id"))); pan.setType(doc.get("type")); pan.setSize(doc.get("size")); //pan.setClick(GetRandom.getRandomInt(235, 879)); String name=doc.get("name"); pan.setName(name); panList.add(pan); } return panList; } catch (Exception e) { e.printStackTrace(); }finally { reader.close(); } return null; } public static void main(String[] args) throws Exception { Long start =System.currentTimeMillis(); System.out.println(new PanIndex().searchPanByType("压缩包",100).size()); Long end = System.currentTimeMillis(); System.out.println(end-start); } }
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报
悬赏问题
- ¥15 DS18B20内部ADC模数转换器
- ¥15 做个有关计算的小程序
- ¥15 MPI读取tif文件无法正常给各进程分配路径
- ¥15 如何用MATLAB实现以下三个公式(有相互嵌套)
- ¥30 关于#算法#的问题:运用EViews第九版本进行一系列计量经济学的时间数列数据回归分析预测问题 求各位帮我解答一下
- ¥15 setInterval 页面闪烁,怎么解决
- ¥15 如何让企业微信机器人实现消息汇总整合
- ¥50 关于#ui#的问题:做yolov8的ui界面出现的问题
- ¥15 如何用Python爬取各高校教师公开的教育和工作经历
- ¥15 TLE9879QXA40 电机驱动