今天 用lucene完成了 一个简单的web应用,提取了早期编写的一个测试类, 首先简介下lucene几个常用包;
lucene 包的组成结构:对于外部应用来说索引模块(index)和检索模块(search)是主要的外部应用入口
org.apache.Lucene.search/ 搜索入口
org.apache.Lucene.index/ 索引入口
org.apache.Lucene.analysis/ 语言分析器
org.apache.Lucene.queryParser/ 查询分析器
org.apache.Lucene.document/ 存储结构
org.apache.Lucene.store/ 底层IO/存储结构
org.apache.Lucene.util/ 一些公用的数据结构
话不多说,直接上代码(这是早期封装的一个测试类,封装的还算比较完善,有兴趣的朋友可以在此基础上继续完善):
package com.lucene.util;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.message.base.search.SearchBean;
/**
* lucene 4.10.1
*
* @creatTime 2014-10-28
* @author 胡慧超
*
*/
public class HhcIndexTools {
private final static Logger logger = Logger.getLogger(HhcIndexTools.class);
private static String indexPath = "E://lucene//index";
public static void main(String[] args) {
try {
// createIndex();
// searchIndex("码农");
// query();
// deleteIndex(null);
forceDeleteIndex();
query();
highlighterSearch();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 创建索引
*/
public static void createIndex() {
// 最细粒切分算法--true的话是 智能切分
Analyzer analyzer = new IKAnalyzer(false);
Document doc = null;
IndexWriter indexWriter = null;
try {
indexWriter = getIndexWriter(analyzer);
// 添加索引
doc = new Document();
doc.add(new StringField("id", "1", Store.YES));
doc.add(new TextField("title", "标题:开始", Store.YES));
d