全文检索(二)-基于lucene4.10的增删改查(二)
oc.add(new TextField("content", "内容:我现在是个码农", Store.YES));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "2", Store.YES));
doc.add(new TextField("title", "标题:结束", Store.YES));
doc.add(new TextField("content", "内容:我现在是个lucene开发工程师的专家",
Store.YES));
indexWriter.addDocument(doc);
indexWriter.commit();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
logger.info("索引器发送异常");
} finally {
try {
destroyWriter(indexWriter);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 搜索文档
*
* @param keyword
*/
@SuppressWarnings("deprecation")
public static void searchIndex(String keyword) {
IndexReader indexReader = null;
IndexSearcher indexSearcher = null;
try {
// 1.创建Directory 在硬盘上的F:/luence/index下建立索引
Directory dir = FSDirectory.open(new File(indexPath));
// 2.创建IndexReader
indexReader = IndexReader.open(dir);
// 实例化搜索器
indexSearcher = new IndexSearcher(indexReader);
// 使用QueryParser查询分析器构造Query对象
QueryParser parse = new QueryParser(Version.LUCENE_4_10_1,
"content", new IKAnalyzer(false));
// 搜索包含keyword关键字的文档
Query query = parse.parse(keyword.trim());
// 使用lucene构造搜索引擎的时候,如果要针对多个域进行一次性查询
// 这种方法的好处就是可以加权给字段的控制
// 在这四个域中检索
String[] fields = { "phoneType", "name", "category", "price" };
Query querys = new MultiFieldQueryParser(Version.LATEST, fields,
new IKAnalyzer(false)).parse(keyword.trim());
TopDocs results = indexSearcher.search(query, 1000);
// 6.根据TopDocs获取ScoreDoc对象
ScoreDoc[] score = results.scoreDocs;
if (score.length > 0) {
logger.info("查询结果数:" + score.length);
System.out.println("查询结果数:" + score.length);
for (int i = 0; i < score.length; i++) {
// 7.根据Seacher和ScoreDoc对象获取具体的Document对象
Document doc = indexSearcher.doc(score[i].doc);
// 8.根据Document对象获取需要的值
System.out.println(doc.toString());
System.out.println(doc.get("title") + "["
+ doc.get("content") + "]");
}
} else {
}
} catch (Exception e) {
// TODO: handle exception
logger.info("查询结果为空!");
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
/**
* 对搜索返回的前n条结果进行分页显示
*
* @param keyWord
* 查询关键词
* @param pageSize
* 每页显示记录数
* @param currentPage
* 当前页
* @throws ParseException
*/
@SuppressWarnings("deprecation")
public void paginationQuery(String keyWord, int pageSize, int currentPage)
throws IOException, ParseException {
String[] fields = { "title", "content" };
QueryParser queryParser = new MultiFieldQueryParser(Version.LATEST,
fields, new IKAnalyzer());
Query query = queryParser.parse(keyWord.trim());
IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(
indexPath)));
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// TopDocs 搜索返回的结果
TopDocs topDocs = indexSearcher.search(query, 100);// 只返回前100条记录
TopDocs all = indexSearcher.search(new MatchAllDocsQuery(), 100);
// int totalCount = topDocs.totalHits; // 搜索结果总数量
ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 搜索返回的结果集合
// 查询起始记录位置
int begin = pageSize * (currentPage - 1);
// 查询终止记录位置
int end = Math.min(begin + pageSize, scoreDocs.length);
// 进行分页查询
for (int i = begin; i < end; i++) {
int docID = scoreDocs[i].doc;
System.out.println("docID=" + docID);
Document doc = indexSearcher.doc(docID);
String title = doc.get("title");
System.out.println("title is : " + title);
}
indexReader.close();
}
@SuppressWarnings("deprecation")
public static void highlighterSearch() throws IOException, ParseException, InvalidTokenOffsetsException {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(
indexPath)));
IndexSearcher searcher = new IndexSearcher(reader);
// String []fields={"title","content"};
// QueryParser parser=new MultiFieldQueryParser(Version.LATEST, fields,
// new IKAnalyzer());
// Query query=parser.parse("");
Term term = new Term("content", "lucene");
TermQuery query = new TermQuery(term);
TopDocs topdocs = searcher.search(query, Integer.MAX_VALUE);
ScoreDoc[] scoreDoc = topdocs.scoreDocs;
System.out.println("查询结果总数:" + topdocs.totalHits);
System.out.println("最大的评分:" + topdocs.getMaxScore());
for(int i=0;i
", "");
Highlighter highlighter=new Highlighter(formatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(content.length()));
if(!"".equals(content)){
TokenStream tokenstream=new IKAnalyzer().tokenStream(content, new StringReader(content));
String highLightText = highlighter.getBestFragment(tokenstream,content);
System.out.println("高亮显示第 " + (i + 1) + " 条检索结果如下所示:");
System.out.println(highLightText);
/*End:结束关键字高亮*/
System.out.println("文件内容:"+content);
System.out.println("匹配相关度:"+scoreDoc[i].score);
}
}
}
/**
* 获取indexWriter对象---获取索引器
*
* @param dir
* @param analyer
* @return
* @throws IOException
*/
private static IndexWriter getIndexWriter(Analyzer analyzer)
throws IOException {
File indexFile = new File(indexPath);
if (!indexFile.exists())
indexFile.mkdir();// 索引库不存在 则新建一个
Directory directory = FSDirectory.open(indexFile);
// Directory directory = new RAMDirectory(); //在内存中建立索引
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_1,
analyzer);
LogMergePolicy mergePolicy = new LogDocMergePolicy();
// 索引基本配置
// 设置segment添加文档(Document)时的合并频率
// 值较小,建立索引的速度就较慢
// 值较大,建立索引的速度就较快,>10适合批量建立索引
mergePolicy.setMergeFactor(30);
// 设置segment最大合并文档(Document)数
// 值较小有利于追加索引的速度
// 值较大,适合批量建立索引和更快的搜索
mergePolicy.setMaxMergeDocs(5000);
conf.setMaxBufferedDocs(10000);
conf.setMergePolicy(mergePolicy);
conf.setRAMBufferSizeMB(64);
conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
if (IndexWriter.isLocked(directory)) {// ?
IndexWriter.unlock(directory);
}
IndexWriter indexWriter = new IndexWriter(directory, conf);
return indexWriter;
}
/**
* 销毁writer
*
* @param writer
* @throws IOException
*/
private static void destroyWriter(IndexWriter indexWriter)
throws IOException {
if (indexWriter != null) {
indexWriter.close();
}
}
/**
* 批量删除
*
* @param list
* @throws IOException
*/
public static void deleteIndexs(List
list) throws IOException { if (list == null || list.size() > 0) { logger.debug("beans is null"); return; } for (SearchBean bean : list) { deleteIndex(bean); } } /** * 删除单个索引 --不会立刻删除,生成.del文件 * * @param bean * @throws IOException */ private static void deleteIndex(SearchBean bean) throws IOException { // if(bean==null){ // logger.debug("Get search bean is empty!"); // return; // } IndexWriter indexWriter = getIndexWriter(new IKAnalyzer()); // 参数是一个选项,可以是一个Query,也可以是一个term,term是一个精确查找的值 // 这里删除id=1的文档,还会留在”回收站“。xxx.del indexWriter.deleteDocuments(new Term("id", "1")); destroyWriter(indexWriter); } /** * 查询文档 */ @Suppre