lucene5.4 中文单字搜索命中为0
如题。求大神帮忙,这边的需求是将中文拆成单个字进行匹配。用过S、andardAnalyzer/JcsegAnalyzer5X效果好差。比如说存有“单车”这个词,如果我以“单”作为条件的话。是无法命中的。除非加了模糊*。但是加了模糊就没办法高亮了。。。。求大神帮忙!搞了几天一直没处理这个问题

2个回答

给你看我做百度网盘的时候用到lucene做的工具类,

 /**
 * 索引类
 * @author user
 *
 */
public class PanIndex {

    private Directory dir=null;

    private static final String LUCENE_PATH=PropertiesUtil.getValue("lucenePath");

    /**
     * 获取IndexWriter实例
     * @return
     * @throws Exception
     */
    private IndexWriter getWriter()throws Exception{
        dir=FSDirectory.open(Paths.get(LUCENE_PATH));
        Analyzer analyzer=new StandardAnalyzer();
        IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
        IndexWriter writer=new IndexWriter(dir, iwc);
        return writer;
    }

    /**
     * 添加Pan索引
     * @param jar
     */
    public void addIndex(PanBean pan)throws Exception{
        IndexWriter writer=getWriter();
        Document doc=new Document();
        doc.add(new StringField("id",pan.getUuid(),Field.Store.YES));
        doc.add(new TextField("name",pan.getName(),Field.Store.YES));
        writer.addDocument(doc);
        writer.close();
    }

    /**
     * 更新Pan索引
     * @param blog
     * @throws Exception
     */
    public void updateIndex(PanBean pan)throws Exception{
        IndexWriter writer=getWriter();
        Document doc=new Document();
        doc.add(new StringField("id",pan.getUuid(),Field.Store.YES));
        doc.add(new TextField("name",pan.getName(),Field.Store.YES));
        writer.updateDocument(new Term("id", String.valueOf(pan.getUuid())), doc);
        writer.close();
    }

    /**
     * 删除指定Pan的索引
     * @param jarId
     * @throws Exception
     */
    public void deleteIndex(String panId)throws Exception{
        IndexWriter writer=getWriter();
        writer.deleteDocuments(new Term("id",panId));
        writer.forceMergeDeletes(); // 强制删除
        writer.commit();
        writer.close();
    }

    /**
     * 查询信息
     * @param q 查询关键字
     * @param n 查询个数
     * @return
     * @throws Exception
     */
    public List<PanBean> searchPan(String q,int n)throws Exception{
        dir=FSDirectory.open(Paths.get(LUCENE_PATH));
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher is=new IndexSearcher(reader);
        Analyzer analyzer = null;
        try {
        analyzer=new StandardAnalyzer();
        //Analyzer analyzer = new PaodingAnalyzer();
        QueryParser parser=new QueryParser("name",analyzer);
        Query query=parser.parse(q);
        TopDocs hits=is.search(query, n); // 查询n条
        QueryScorer scorer=new QueryScorer(query);  
        Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);  
        SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
        Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
        highlighter.setTextFragmenter(fragmenter);  
        List<PanBean> panList=new LinkedList<PanBean>();
        for(ScoreDoc scoreDoc:hits.scoreDocs){
            Document doc=is.doc(scoreDoc.doc);
            PanBean pan=new PanBean();
            pan.setUuid(doc.get(("id")));
            pan.setType(doc.get("type"));
            pan.setSize(doc.get("size"));
            String name=doc.get("name");
            pan.setName2(name);
            if(name!=null){
                TokenStream tokenStream = analyzer.tokenStream("name", new StringReader(name));
                String hName=highlighter.getBestFragment(tokenStream, name);
                if(StringUtil.isEmpty(hName)){
                    pan.setName(doc.get("name"));
                }else{
                    pan.setName(hName);
                }
            }
            panList.add(pan);
        }
        return panList;
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            reader.close();
        }
        return null;
    }
    /**
     * 模糊查询
     * @param q 参数
     * @param n 查询个数
     * @return
     * @throws Exception
     */
    public List<PanBean> searchMohuPan(String q,int n)throws Exception{
        dir=FSDirectory.open(Paths.get(LUCENE_PATH));
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher is=new IndexSearcher(reader);
        Analyzer analyzer = null;
        try {   
        analyzer=new StandardAnalyzer();
        Query parser=new FuzzyQuery(new Term("name",q+"~"));
        //Query query=parser.parse(q);
        TopDocs hits=is.search(parser, n); // 查询n条
        QueryScorer scorer=new QueryScorer(parser);  
        Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);  
        SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
        Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
        highlighter.setTextFragmenter(fragmenter);  
        List<PanBean> panList=new LinkedList<PanBean>();
        for(ScoreDoc scoreDoc:hits.scoreDocs){
            Document doc=is.doc(scoreDoc.doc);
            PanBean pan=new PanBean();
            pan.setUuid(doc.get(("id")));
            String name=doc.get("name");
            pan.setType(doc.get("type"));
            pan.setSize(doc.get("size"));
            //pan.setClick(Integer.valueOf(doc.get("click")));
            pan.setName2(name);
            if(name!=null){
                TokenStream tokenStream = analyzer.tokenStream("name", new StringReader(name));
                String hName=highlighter.getBestFragment(tokenStream, name);
                if(StringUtil.isEmpty(hName)){
                    pan.setName(doc.get("name"));
                }else{
                    pan.setName(hName);
                }
                }
                panList.add(pan);
            }
            return panList;

        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            analyzer.close();
            reader.close();
        }
        return null;
    } 
    /**
     * 查询相关资源 不用html包装
     * @param q 参数
     * @param n 查询个数
     * @return
     * @throws Exception
     */
    public List<PanBean> searchRelatedPan(String q,int n)throws Exception{
        dir=FSDirectory.open(Paths.get(LUCENE_PATH));
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher is=new IndexSearcher(reader);
        Analyzer analyzer = null;
        try {
        analyzer=new StandardAnalyzer();
        QueryParser parser=new QueryParser("name",analyzer);
        Query query=parser.parse(q);
        TopDocs hits=is.search(query, n); // 查询n条
    //  QueryScorer scorer=new QueryScorer(query);  
        //Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);  
        List<PanBean> panList=new LinkedList<PanBean>();
        for(ScoreDoc scoreDoc:hits.scoreDocs){
            Document doc=is.doc(scoreDoc.doc);
            PanBean pan=new PanBean();
            pan.setUuid(doc.get(("id")));
            String name=doc.get("name");
            //pan.setName2(name);
            pan.setName(name);
            pan.setType(doc.get("type"));
            panList.add(pan);
        }
        return panList;
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            analyzer.close();
            reader.close();
        }
        return null;
    } 
    /**
     * 精确查找
     * @param q
     * @param n
     * @return
     * @throws Exception
     */
    public List<PanBean> searchPanByType(String q,int n)throws Exception{
        dir=FSDirectory.open(Paths.get(LUCENE_PATH));
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher is=new IndexSearcher(reader);
        try {

        Query query=new TermQuery(new Term("type",q)); 
        TopDocs hits=is.search(query, n); // 查询n条
        List<PanBean> panList=new LinkedList<PanBean>();
        for(ScoreDoc scoreDoc:hits.scoreDocs){
            Document doc=is.doc(scoreDoc.doc);
            PanBean pan=new PanBean();
            pan.setUuid(doc.get(("id")));
            pan.setType(doc.get("type"));
            pan.setSize(doc.get("size"));
            //pan.setClick(GetRandom.getRandomInt(235, 879));
            String name=doc.get("name");
            pan.setName(name);
            panList.add(pan);
        }
        return panList;
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            reader.close();
        }
        return null;
    }
    public static void main(String[] args) throws Exception {
        Long start =System.currentTimeMillis();
        System.out.println(new PanIndex().searchPanByType("压缩包",100).size());
        Long end = System.currentTimeMillis();
        System.out.println(end-start);
    } 
}
u013895992
Lee-ocean 奇怪,我也是用的StandardAnalyzer。为啥没有这种效果呢。。。。
2 年多之前 回复

我的网站是这个http://www.panpk.com

Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
其他相关推荐
在Sharepoint 2013 搜索中文单字和句子的问题
大家好 rn我的测试环境是的Sharepoint2013标准(英文版),我已安装了简体中文语言包,在搜索中心中我能搜到中文,但我只能用“词语”来搜索,用“单字”或“句子”都搜不到rn例如,某文档中有一句“中国语文”,我搜索“中国”,“语文”或“中国语文”都成功回传这份文档,可是当我搜“中”,“文”,“国语”或“中国语文”就搜不到rn请问这是微软的设计还是我遗留了什么呢?rn我试过用注册表编辑器把简体的分词(word breaker)元件的dll换成英文的(换句话说,我停用了中文分词系统),变成“句子”搜到了,但“单字”和“词语”都搜不到。rn有什么办法呢?rnrn谢谢大家
solr 中文单字分词
solr单字分词很简单只需配置 相应fieldType就可以使用了。 例:     &amp;lt;fieldType name=&quot;text_single_word&quot; class=&quot;solr.TextField&quot; positionIncrementGap=&quot;100&quot;&amp;gt;       &amp;lt;analyzer type=&quot;index&quot;&amp;gt;         &amp;lt;tokenize
lucene5.4 + IKAnalyzer
lucene5.4 + IKAnalyzer支持同义词、停用词、扩展词,IKAnalyzer是中同义词是自己改的,就没打包了,如果还有其它需求可以自己改改.
nutch搜索结果为0
crawl started in: mydirrnrootUrlDir = urlsrnthreads = 4rndepth = 2rntopN = 50rnInjector: startingrnInjector: crawlDb: mydir/crawldbrnInjector: urlDir: urlsrnInjector: Converting injected urls to crawl db entries.rnInjector: Merging injected urls into crawl db.rnInjector: donernGenerator: Selecting best-scoring urls due for fetch.rnGenerator: startingrnGenerator: segment: mydir/segments/20100601161618rnGenerator: filtering: truernGenerator: topN: 50rnGenerator: jobtracker is 'local', generating exactly one partition.rnGenerator: Partitioning selected urls by host, for politeness.rnGenerator: done.rnFetcher: startingrnFetcher: segment: mydir/segments/20100601161618rnFetcher: threads: 4rnQueueFeeder finished: total 1 records.rn-finishing thread FetcherThread, activeThreads=3rnfetching http://www.sina.com.cn/rn-finishing thread FetcherThread, activeThreads=1rn-finishing thread FetcherThread, activeThreads=2rn-activeThreads=1, spinWaiting=0, fetchQueues.totalSize=0rn-activeThreads=1, spinWaiting=0, fetchQueues.totalSize=0rn-activeThreads=1, spinWaiting=0, fetchQueues.totalSize=0rn-activeThreads=1, spinWaiting=0, fetchQueues.totalSize=0rn-finishing thread FetcherThread, activeThreads=0rn-activeThreads=0, spinWaiting=0, fetchQueues.totalSize=0rn-activeThreads=0rnFetcher: donernCrawlDb update: startingrnCrawlDb update: db: mydir/crawldbrnCrawlDb update: segments: [mydir/segments/20100601161618]rnCrawlDb update: additions allowed: truernCrawlDb update: URL normalizing: truernCrawlDb update: URL filtering: truernCrawlDb update: Merging segment data into db.rnCrawlDb update: donernGenerator: Selecting best-scoring urls due for fetch.rnGenerator: startingrnGenerator: segment: mydir/segments/20100601161637rnGenerator: filtering: truernGenerator: topN: 50rnGenerator: jobtracker is 'local', generating exactly one partition.rnGenerator: Partitioning selected urls by host, for politeness.rnGenerator: done.rnFetcher: startingrnFetcher: segment: mydir/segments/20100601161637rnFetcher: threads: 4rnfetching http://news.sina.com.cn/pfpnews/js/libweb.jsrnfetching http://pfp.sina.com.cn/pfpnew/merge/res_PGLS000022_FP.jsrnQueueFeeder finished: total 3 records.rn-finishing thread FetcherThread, activeThreads=3rnfetching http://i2.sinaimg.cn/dy/deco/2010/0527/headwww.jsrnError parsing: http://news.sina.com.cn/pfpnews/js/libweb.js: org.apache.nutch.parse.ParseException: parser not found for contentType=application/javascript url=http://news.sina.com.cn/pfpnews/js/libweb.jsrn at org.apache.nutch.parse.ParseUtil.parse(ParseUtil.java:74)rn at org.apache.nutch.fetcher.Fetcher$FetcherThread.output(Fetcher.java:766)rn at org.apache.nutch.fetcher.Fetcher$FetcherThread.run(Fetcher.java:552)rnrn-finishing thread FetcherThread, activeThreads=2rnError parsing: http://pfp.sina.com.cn/pfpnew/merge/res_PGLS000022_FP.js: org.apache.nutch.parse.ParseException: parser not found for contentType=application/javascript url=http://pfp.sina.com.cn/pfpnew/merge/res_PGLS000022_FP.jsrn at org.apache.nutch.parse.ParseUtil.parse(ParseUtil.java:74)rn at org.apache.nutch.fetcher.Fetcher$FetcherThread.output(Fetcher.java:766)rn at org.apache.nutch.fetcher.Fetcher$FetcherThread.run(Fetcher.java:552)rnrn-finishing thread FetcherThread, activeThreads=1rn-activeThreads=1, spinWaiting=0, fetchQueues.totalSize=0rn-finishing thread FetcherThread, activeThreads=0rn-activeThreads=0, spinWaiting=0, fetchQueues.totalSize=0rn-activeThreads=0rnFetcher: donernCrawlDb update: startingrnCrawlDb update: db: mydir/crawldbrnCrawlDb update: segments: [mydir/segments/20100601161637]rnCrawlDb update: additions allowed: truernCrawlDb update: URL normalizing: truernCrawlDb update: URL filtering: truernCrawlDb update: Merging segment data into db.rnCrawlDb update: donernLinkDb: startingrnLinkDb: linkdb: mydir/linkdbrnLinkDb: URL normalize: truernLinkDb: URL filter: truernLinkDb: adding segment: file:/C:/cygwin/home/Administrator/nutch-1.0/mydir/segments/20100601161618rnLinkDb: adding segment: file:/C:/cygwin/home/Administrator/nutch-1.0/mydir/segments/20100601161637rnLinkDb: donernIndexer: startingrnIndexer: donernDedup: startingrnDedup: adding indexes in: mydir/indexesrnDedup: donernmerging indexes to: mydir/indexrnAdding file:/C:/cygwin/home/Administrator/nutch-1.0/mydir/indexes/part-00000rndone mergingrncrawl finished: mydirrn
nutch-1.1搜索结果为0?????
win7+cygwin+nutch-1.1rnrn刚开始接触nutch ,不是很精通,搜索结果一直为0。按照网上的说明各种配置应该都没问题,折腾了N久。后来查看logs发现有一段错误(其他的我查看了觉得没啥问题,问题可能就出在这里),但是又不明白是啥意思,请各位高手帮忙看看,应该如何解决。。。。。rn注:我的urls中的网址是www.sina.com.cnrn-------------------------------rn2011-04-01 11:12:06,941 INFO fetcher.Fetcher - Fetcher: startingrn2011-04-01 11:12:06,941 INFO fetcher.Fetcher - Fetcher: segment: crawled/segments/20110401111205rn2011-04-01 11:12:06,941 WARN mapred.JobClient - Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.rn2011-04-01 11:12:07,908 INFO fetcher.Fetcher - Fetcher: threads: 10rn2011-04-01 11:12:07,908 INFO plugin.PluginRepository - Plugins: looking in: C:\cygwin\nutch-1.1\pluginsrn2011-04-01 11:12:07,908 INFO fetcher.Fetcher - QueueFeeder finished: total 2 records + hit by time limit :0rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Plugin Auto-activation mode: [true]rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Registered Plugins:rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - the nutch core extension points (nutch-extensionpoints)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Basic Query Filter (query-basic)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Basic URL Normalizer (urlnormalizer-basic)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Html Parse Plug-in (parse-html)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Basic Indexing Filter (index-basic)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Site Query Filter (query-site)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Basic Summarizer Plug-in (summary-basic)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - HTTP Framework (lib-http)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Text Parse Plug-in (parse-text)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Pass-through URL Normalizer (urlnormalizer-pass)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Regex URL Filter (urlfilter-regex)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Http Protocol Plug-in (protocol-http)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - XML Response Writer Plug-in (response-xml)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Regex URL Normalizer (urlnormalizer-regex)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - OPIC Scoring Plug-in (scoring-opic)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Tika Parser Plug-in (parse-tika)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - CyberNeko HTML Parser (lib-nekohtml)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Anchor Indexing Filter (index-anchor)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - JavaScript Parser (parse-js)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - URL Query Filter (query-url)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Regex URL Filter Framework (lib-regex-filter)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - JSON Response Writer Plug-in (response-json)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Registered Extension-Points:rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Summarizer (org.apache.nutch.searcher.Summarizer)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Protocol (org.apache.nutch.protocol.Protocol)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Analysis (org.apache.nutch.analysis.NutchAnalyzer)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Field Filter (org.apache.nutch.indexer.field.FieldFilter)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - HTML Parse Filter (org.apache.nutch.parse.HtmlParseFilter)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Query Filter (org.apache.nutch.searcher.QueryFilter)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Search Results Response Writer (org.apache.nutch.searcher.response.ResponseWriter)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch URL Normalizer (org.apache.nutch.net.URLNormalizer)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch URL Filter (org.apache.nutch.net.URLFilter)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Online Search Results Clustering Plugin (org.apache.nutch.clustering.OnlineClusterer)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Indexing Filter (org.apache.nutch.indexer.IndexingFilter)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Content Parser (org.apache.nutch.parse.Parser)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Nutch Scoring (org.apache.nutch.scoring.ScoringFilter)rn2011-04-01 11:12:08,002 INFO plugin.PluginRepository - Ontology Model Loader (org.apache.nutch.ontology.Ontology)rn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - fetching http://news.sina.com.cn/pfpnews/js/libweb.jsrn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - fetching http://pfp.sina.com.cn/pfpnew/merge/res_PGLS000022_FP.jsrn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=2rn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=2rn2011-04-01 11:12:08,049 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=2rn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=2rn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=2rn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=3rn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=3rn2011-04-01 11:12:08,033 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=4rn2011-04-01 11:12:08,049 INFO http.Http - http.proxy.host = nullrn2011-04-01 11:12:08,049 INFO http.Http - http.proxy.port = 8080rn2011-04-01 11:12:08,049 INFO http.Http - http.timeout = 10000rn2011-04-01 11:12:08,049 INFO http.Http - http.content.limit = 65536rn2011-04-01 11:12:08,049 INFO http.Http - http.agent = */Nutch-1.1rn2011-04-01 11:12:08,049 INFO http.Http - http.accept.language = en-us,en-gb,en;q=0.7,*;q=0.3rn2011-04-01 11:12:08,049 INFO http.Http - protocol.plugin.check.blocking = falsern2011-04-01 11:12:08,049 INFO http.Http - protocol.plugin.check.robots = falsern2011-04-01 11:12:08,049 INFO http.Http - http.proxy.host = nullrn2011-04-01 11:12:08,049 INFO http.Http - http.proxy.port = 8080rn2011-04-01 11:12:08,049 INFO http.Http - http.timeout = 10000rn2011-04-01 11:12:08,049 INFO http.Http - http.content.limit = 65536rn2011-04-01 11:12:08,049 INFO http.Http - http.agent = */Nutch-1.1rn2011-04-01 11:12:08,049 INFO http.Http - http.accept.language = en-us,en-gb,en;q=0.7,*;q=0.3rn2011-04-01 11:12:08,049 INFO http.Http - protocol.plugin.check.blocking = falsern2011-04-01 11:12:08,049 INFO http.Http - protocol.plugin.check.robots = falsernrn[color=#FF0000]2011-04-01 11:12:08,189 INFO parse.ParserFactory - The parsing plugins: [org.apache.nutch.parse.tika.Parser]are enabled via the plugin.includes system property, and all claim to support the content type application/javascript, but they are not mapped to it in the parse-plugins.xml filernrn2011-04-01 11:12:08,267 ERROR tika.TikaParser - Can't retrieve Tika parser for mime-type application/javascriptrnrn2011-04-01 11:12:08,267 WARN fetcher.Fetcher - Error parsing: http://pfp.sina.com.cn/pfpnew/merge/res_PGLS000022_FP.js: failed(2,0): Can't retrieve Tika parser for mime-type application/javascriptrnrn2011-04-01 11:12:08,267 INFO crawl.SignatureFactory - Using Signature impl: org.apache.nutch.crawl.MD5Signaturernrn2011-04-01 11:12:08,267 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=1rnrn2011-04-01 11:12:08,283 ERROR tika.TikaParser - Can't retrieve Tika parser for mime-type application/javascriptrnrn2011-04-01 11:12:08,298 WARN fetcher.Fetcher - Error parsing: http://news.sina.com.cn/pfpnews/js/libweb.js: failed(2,0): Can't retrieve Tika parser for mime-type application/javascript[/color]rnrn2011-04-01 11:12:08,298 INFO fetcher.Fetcher - -finishing thread FetcherThread, activeThreads=0rn2011-04-01 11:12:09,047 INFO fetcher.Fetcher - -activeThreads=0, spinWaiting=0, fetchQueues.totalSize=0rn2011-04-01 11:12:09,047 INFO fetcher.Fetcher - -activeThreads=0rn2011-04-01 11:12:09,281 INFO plugin.PluginRepository - Plugins: looking in: C:\cygwin\nutch-1.1\pluginsrn2011-04-01 11:12:09,343 INFO plugin.PluginRepository - Plugin Auto-activation mode: [true]rn2011-04-01 11:12:09,343 INFO plugin.PluginRepository - Registered Plugins:rn......
单字\随机单字
单字\随机单字单字
拼音单字字库(txt字库搜索)
之前一直想做Qt下的输入法,无奈找不到合适的拼音字库,经过几天的整理,终于完成了这份字库,分享给大家。
搜索爬网 用户项目为0
怎么样 设置,才能搜索到用户啊? 我导入ad用户后,该怎么设置爬网,老是爬不到用户啊。那位有资料 或 能指点一二
为什么搜索的结果为0?(附原代码)
SELECT draccno, SUM(CASE WHEN trxcode = '2123' THEN amount1 ELSE 0 END) rn AS abcrnFROM testrnGROUP BY draccnornrn我想统计draccno的代码为2123时的数量,amount1的格式为money型,rn搜索的结果abc列全部为0,不知道为什么?rn请帮忙分析一下,rnrnrnrn
tomcat nutch 搜索结果为0
在eclipse里装nutch,然后在tomcat里边配置,完了之后我的tomcat里的nutch.site.xml文件也配置了,还改了存储路径,但是索引呀什么的还是抓取存到了默认的F:\nutch\crawl里边,我设的路径目录下没有。而且打开http://localhost:8085/nutch-1.2/zh/(我的端口改的8085)时搜索之后显示的都是0项搜索结果。rn rn求帮助……急呀,谢啦!
css命中与jquery命中
:nth-of-type()选择器是jQuery 1.9新增的选择器,它是遵循CSS 3中结构性伪类选择符 E:nth-of-type(n)实现的。和CSS 3中结构性伪类选择符 E:nth-of-type(n)一样,它选择的是匹配所有父元素下同类型中的第n个同级兄弟元素E。 :nth-of-type()选择器和 :nth-child()选择器很容易混淆,至少我是这么认为的。看下面一个例子
简易(单字)LED中文显示屏制作
自己制作LED中文显示屏,一个个字切换显示!使用AT89C52单片机,制作完成后可以显示自己输入的汉字! 程序使用汇编语言,适合单片机初学者制作!
怎么分割一个字符串为单字呢?
我是想把一个连续的字符串分割为单字,能用Split函数么,参数是什么?Split(str,""),这第二个参数是什么呢?
Elasticsearch搜索详解(四):高亮命中关键字
通常显示在网页上时,用不同颜色标记出搜索命中的是哪些关键字是很有用的,这就要用到 highlight 参数。 举个例子, GET /_search { &quot;query&quot; : { &quot;multi_match&quot;: { &quot;query&quot;: &quot;投资&quot; } }, &quot;highlight&quot; : { &quot;fields&quot; : {
搜索出错,dt.Rows[0][0]
搜索按纽的代码: rn protected void Button1_Click(object sender, EventArgs e) rn rn string findtype = ""; rn rn switch (DropDownList1.SelectedValue) rn rn case "家具名称": rn findtype = "select * from tblBook where jiajuName like '%" + TextBox1.Text + "%'"; rn break; rnrn case "家具品牌": rn findtype = " select * from tblBook where brand like '%" + TextBox1.Text + "%'"; rn break; rn case "家具材质": rn findtype = " select * from tblBook where material like '%" + TextBox1.Text + "%'"; rn break; rn rn Response.Cookies["findtype"].Value = findtype; rn Response.Cookies["findtype"].Expires = DateTime.Now.AddHours(1); rn Response.Redirect("findjiaju.aspx"); rn rn rnrnfindjiaju.aspx.cs的代码: rnrnusing System; rnusing System.Data; rnusing System.Configuration; rnusing System.Collections; rnusing System.Web; rnusing System.Web.Security; rnusing System.Web.UI; rnusing System.Web.UI.WebControls; rnusing System.Web.UI.WebControls.WebParts; rnusing System.Web.UI.HtmlControls; rnusing System.Data.SqlClient; rnrnpublic partial class findjiaju : System.Web.UI.Page rn rn protected void Page_Load(object sender, EventArgs e) rn rn String jiajuinfo; rn jiajuinfo = Request["findtype"].ToString(); rn DataTable dt = Class1.ExecSel(jiajuinfo); rnrnrn int id = Convert.ToInt32(dt.Rows[0][0]); rn string info = "select * from tblBook where jiajuID=" + id; rnrn DataTable datatable = Class1.ExecSel(info); rn Label1.Text = dt.Rows[0][1].ToString(); rn Label2.Text = dt.Rows[0][2].ToString(); rn Label3.Text = dt.Rows[0][3].ToString(); rn Label4.Text = dt.Rows[0][4].ToString(); rn Label5.Text = dt.Rows[0][5].ToString(); rn Label6.Text = dt.Rows[0][6].ToString(); rnrnrn if (Convert.ToInt32(dt.Rows[0][8]) == 0) rn rn Label7.Text = "没有存货"; rn rn if (Convert.ToInt32(dt.Rows[0][8]) == 1) rn rn Label7.Text = "有存货"; rn rn Image1.ImageUrl = "~/picture/" + dt.Rows[0][7].ToString(); rn rn rnrnrn在不进行调试的情况下,系统报错 rnrn“/WebSite4”应用程序中的服务器错误。 rn-------------------------------------------------------------------------------- rnrn在位置 0 处没有任何行。 rn说明: 执行当前 Web 请求期间,出现未处理的异常。请检查堆栈跟踪信息,以了解有关该错误以及代码中导致错误的出处的详细信息。 rnrn异常详细信息: System.IndexOutOfRangeException: 在位置 0 处没有任何行。 rnrn源错误: rn行 20: rn行 21: rn行 22: int id = Convert.ToInt32(dt.Rows[0][0]);行 23: string info = "select * from tblBook where jiajuID=" + id; rn行 24: rnrnrn单步调试: rn在页面上输入相应的查询内容后,发现所输内容无法传递到jiajuinfo中,即jiajuinfo显示为null. rnrn各位帮忙,在线等....
PHP 搜索关键字为中文 分页问题
在搜索框里 填入中文 例如: 人rn首页显示正常 rn从第二页开始 以及 上一页 下一页rnindex.php?key=人&page=2rnrnecho $_GET['key']; //值为 "乱码+空格+page=2" 例:"浜?page=2" rnrn不知道这个问题怎么解决rn麻烦哪位指点一下
搜索0之1001
1 题目编号:1001 2 题目内容: Problem Description Now,given the equation 8*x^4 + 7*x^3 + 2*x^2 + 3*x + 6 == Y,can you find its solution between 0 and 100;Now please try your lucky.   Input T
为什么用ftp程序上传中文内容字节为0?
用socket写的ftp上传,可以上传中文名称的文件,但文件内容为中文时候,就是空了,为什么?
solr 短文本搜索(模糊,拼音,单字分词)
一、期望达到的效果 短文本模糊搜索,支持拼音搜索、数字的前后缀搜索、单字或者单字母搜索。最多支持20个字符长度,更多介绍及注意事项参见模糊搜索使用说明 如文档字段内容为“菊花茶”,则搜索“菊花茶”、“菊花”、“茶”、“花茶”、“菊”、“花”、“菊茶”、“ju”、“juhua”、“juhuacha”、“j”、“jh”、“jhc”等情况下可以召回。 如文档字段内容为手机号“138123456...
郑码单字输入法
这是郑码单字输入法,适合对郑码感兴趣的网友,而且只是单字,打字流利。
ThinkPHP的单字母函数
C('参数名称') 注意:配置参数名称中不能含有 “.” 和特殊字符,允许字母、数字和下划线。 读取二级配置 C('USER_CONFIG.USER_TYPE'); // 如果my_config尚未设置的话,则返回default_config字符串 C('my_config',null,'default_config');   C('参数名称','新的参数值') 动态配置赋值仅...
Python 书法提取单字
自己弄得小程序,提取字符,省事省心,没了,为啥不能免费
单字模糊查询问题!!
我用like模糊查询怎么查出来的结果不对呀?例如:rnrnaa="路"rnrn我用 like "%".$aa."%"查询出来的内容并不仅仅是含有"路"的结果,还查出来很多其他的结果.是怎么回事呀.rnrn好像只有单字查询才会出现问题.是不是编码问题?如何解决?
ThinkPHP单字母函数汇总
A方法A方法用于在内部实例化控制器,调用格式:A(‘[项目://][分组/]模块’,’控制器层名称’) 最简单的用法: $User = A('User');表示实例化当前项目的UserAction控制器(这个控制器对应的文件位于Lib/Action/UserAction.class.php),如果采用了分组模式,并且要实例化另外一个Admin分组的控制器可以用: $User = A('Admin
ios-单字输入框.zip
一个简单的文本输入框
thinkphp单字母函数
thinkphp单字母函数功能及用法总结PPT资料,(C U I A D M T E G S F )推荐下载
单字练习字库
单字练习字库
拼音输入法中的单字码表
用于制作自己的拼音输入法的码元。自己用的,供出来,方便需要的朋友
缓存命中
缓存命中游戏开发者可分为两类:在他们的游戏引擎中使用STL模板库之类的。以及不使用的。一些开发者认为STL内存分配模式(memory allocation pattern)不高效,也导致内存碎片问题,使STL不能在游戏中使用。一些开发者认为STL的强大和方便超过它的问题,而且大部分的问题还是可以变通解决,笔者个人认为STL在PC上可以无障碍使用。因为PC上可以无障碍使用虚拟内存(virtual me
标题旋转之单字旋转
标题旋转之单字旋转,在整行旋转的基础上,突破,做出单字旋转
ThinkPHP单字母函数整理
这是一篇TdWeb的笔记,在征得他的同意后,我放了上来。他最初是为Lite版本写的,但现在这些单字母函数在标准版里也都同样存在了。因此这些资料在两个版本的ThinkPHP中都同样适用。由于Tdweb偷了点懒,部分代码都是直接复制过来的,所以,我稍做了一下整理(主要是页面样式的稍做更新,同时对于部分语句进行了一些更新,tdweb莫生气)原文网址:http://bitctw.cn/hl/docs/...
PHP去除单字
$str = "学霸,[color=#FF0000]称,[/color]考试,简单,不公平,[color=#FF0000]应,让,[/color]不努力,的,[color=#FF0000]a,[/color]team";rn变成rn$str = "学霸,考试,简单,不公平,不努力,team";rnrn有人写过类似的方法吗?
如何实现单字索引?
?
汉字单字字频总表
/* Combined character frequency list of Classical and Modern Chinese */ /* 汉字单字字频总表 */ /* 拼音取自于http://www.mandarintools.com/cedict.html, http://www.chinese-forums.com/vocabulary/和http://sh.netsh.com/bbs/1951/。 */ /* 英文翻译来自于CEDICT at http://www.mandarintools.com/cedict.html */
单字车牌数据集
本人整理的单字车牌数据集,包含数字、字母、汉字,共约55000张图片。训练集约90%及测试集10%。分门别类放在各个文件夹。有label。 (刚刚发现label有点问题想重新上传)
测试Nutch的时候,搜索结果一直为0 附log
最近在机器上测试nutch0.9,每次搜索结果都是0 不知道是哪里变量设置错误了。rn参考了书店的书还有网上的很多教程就是没法做到预期的效果。rn环境rncygwin最新版(前天在线安装的)/ Tomcat 5.5 /JDK 1.6 /nutch 0.9rnrn如爬取apache.org网站以后,索引内容应该是正确建立了。以下是抓取的状态信息,使用luke也确实看到了索引内容rn但是每次使用 “bin/nutch org.apache.nutch.searcher.NutchBean apache >&search.log”来测试的时候rn检测的结果都是 “Total hits: 0”rnrn注:使用tomcat的时候也是检索不到结果rn如下是机器在cygwin时候的logrn=========================================================rnrnbin/nutch readdb apache.org/crawldb -stats >&stats.logrnCrawlDb statistics start: apache.org/crawldbrnStatistics for CrawlDb: apache.org/crawldbrnTOTAL urls: 2207rnretry 0: 2207rnmin score: 0.0rnavg score: 0.0030rnmax score: 1.03rnstatus 1 (db_unfetched): 2106rnstatus 2 (db_fetched): 94rnstatus 3 (db_gone): 2rnstatus 5 (db_redir_perm): 5rnCrawlDb statistics: donernrnbin/nutch org.apache.nutch.searcher.NutchBean apache >&search.logrnTotal hits: 0rnrn如下是tomcat的检索logrn===============================================rnrn2010-01-16 19:17:58,679 WARN Configuration - bad conf file: element not rn2010-01-16 19:17:58,689 INFO PluginRepository - Plugins: looking in: C:\Tomcat\webapps\ROOT\WEB-INF\classes\pluginsrn2010-01-16 19:17:58,879 INFO PluginRepository - Plugin Auto-activation mode: [true]rn2010-01-16 19:17:58,879 INFO PluginRepository - Registered Plugins:rn2010-01-16 19:17:58,879 INFO PluginRepository - the nutch core extension points (nutch-extensionpoints)rn2010-01-16 19:17:58,879 INFO PluginRepository - Basic Query Filter (query-basic)rn2010-01-16 19:17:58,879 INFO PluginRepository - Basic URL Normalizer (urlnormalizer-basic)rn2010-01-16 19:17:58,879 INFO PluginRepository - Basic Indexing Filter (index-basic)rn2010-01-16 19:17:58,879 INFO PluginRepository - Html Parse Plug-in (parse-html)rn2010-01-16 19:17:58,879 INFO PluginRepository - Basic Summarizer Plug-in (summary-basic)rn2010-01-16 19:17:58,879 INFO PluginRepository - Site Query Filter (query-site)rn2010-01-16 19:17:58,879 INFO PluginRepository - HTTP Framework (lib-http)rn2010-01-16 19:17:58,879 INFO PluginRepository - Text Parse Plug-in (parse-text)rn2010-01-16 19:17:58,879 INFO PluginRepository - Regex URL Filter (urlfilter-regex)rn2010-01-16 19:17:58,879 INFO PluginRepository - Pass-through URL Normalizer (urlnormalizer-pass)rn2010-01-16 19:17:58,879 INFO PluginRepository - Http Protocol Plug-in (protocol-http)rn2010-01-16 19:17:58,879 INFO PluginRepository - Regex URL Normalizer (urlnormalizer-regex)rn2010-01-16 19:17:58,879 INFO PluginRepository - OPIC Scoring Plug-in (scoring-opic)rn2010-01-16 19:17:58,879 INFO PluginRepository - CyberNeko HTML Parser (lib-nekohtml)rn2010-01-16 19:17:58,879 INFO PluginRepository - JavaScript Parser (parse-js)rn2010-01-16 19:17:58,879 INFO PluginRepository - URL Query Filter (query-url)rn2010-01-16 19:17:58,879 INFO PluginRepository - Regex URL Filter Framework (lib-regex-filter)rn2010-01-16 19:17:58,879 INFO PluginRepository - Registered Extension-Points:rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch Summarizer (org.apache.nutch.searcher.Summarizer)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch URL Normalizer (org.apache.nutch.net.URLNormalizer)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch Protocol (org.apache.nutch.protocol.Protocol)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch Analysis (org.apache.nutch.analysis.NutchAnalyzer)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch URL Filter (org.apache.nutch.net.URLFilter)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch Indexing Filter (org.apache.nutch.indexer.IndexingFilter)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch Online Search Results Clustering Plugin (org.apache.nutch.clustering.OnlineClusterer)rn2010-01-16 19:17:58,879 INFO PluginRepository - HTML Parse Filter (org.apache.nutch.parse.HtmlParseFilter)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch Content Parser (org.apache.nutch.parse.Parser)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch Scoring (org.apache.nutch.scoring.ScoringFilter)rn2010-01-16 19:17:58,879 INFO PluginRepository - Nutch Query Filter (org.apache.nutch.searcher.QueryFilter)rn2010-01-16 19:17:58,879 INFO PluginRepository - Ontology Model Loader (org.apache.nutch.ontology.Ontology)rn2010-01-16 19:17:58,889 INFO NutchBean - creating new beanrn2010-01-16 19:17:58,909 INFO NutchBean - opening indexes in crawl/indexesrn2010-01-16 19:17:58,999 INFO Configuration - found resource common-terms.utf8 at file:/C:/Tomcat/webapps/ROOT/WEB-INF/classes/common-terms.utf8rn2010-01-16 19:17:59,009 INFO NutchBean - opening segments in crawl/segmentsrn2010-01-16 19:17:59,029 INFO SummarizerFactory - Using the first summarizer extension found: Basic Summarizerrn2010-01-16 19:17:59,029 INFO NutchBean - opening linkdb in crawl/linkdbrn2010-01-16 19:17:59,049 INFO NutchBean - query request from 127.0.0.1rn2010-01-16 19:17:59,069 INFO NutchBean - query: apachern2010-01-16 19:17:59,069 INFO NutchBean - lang: rn2010-01-16 19:17:59,139 INFO NutchBean - searching for 20 raw hitsrn2010-01-16 19:17:59,310 INFO NutchBean - total hits: 0rn
为谷歌拼音增加自然码单字辅助码输入功能
谷歌拼音有自然码的编码方案,但是没有自然码的单字输入辅助码功能。虽然整句输入速度很快但是针对姓名中的单字,输入效率就不高了。利用谷歌拼音的自定义短语功能就可以定义单字的辅助码输入。这里有现成的LINUX码表,稍作编辑(将=换成TAB)就可以导入谷歌拼音中....
搜索软件的中文
搜索软件的中文
elasticsearch 中文字段搜索
{ “字段”: { “type”“text”   “fields”: { “keyword”: { “type”:“keyword”,   “ignore_above”:256 }  }  }  } 在中文搜索时"字段.keyword" 可以把中文按照全部匹配搜索,也可以按照分词匹配 es5.0以上版本聚合时字段要设置fileddata=true
lucene搜索中文问题
我做了一个lucene的例子,只能搜索英文rn中文不行.rn中文没有出现乱码问题,但就是搜不到.rnif(null != query && null != indexSearcher)rntry rnHits hits = indexSearcher.search(query);rnrn用的是StandardAnalyzer.rnrn有没有哪位高手遇到过.
相关热词 c# 去空格去转义符 c#用户登录窗体代码 c# 流 c# linux 可视化 c# mvc 返回图片 c# 像素空间 c# 日期 最后一天 c#字典序排序 c# 截屏取色 c#中的哪些属于托管机制