?
?
package InvertedIndex;
import java.util.ArrayList;
/**
* 文档类
* @author lyq
*
*/
public class Document {
//文档的唯一标识
int docId;
//文档的文件地址
String filePath;
//文档中的有效词
ArrayList effectWords;
public Document(ArrayList effectWords, String filePath){
this.effectWords = effectWords;
this.filePath = filePath;
}
public Document(ArrayList effectWords, String filePath, int docId){
this(effectWords, filePath);
this.docId = docId;
}
}
BSBI算法工具类BSBITool.java:
?
?
package InvertedIndex;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
/**
* BSBI基于磁盘的外部排序算法
*
* @author lyq
*
*/
public class BSBITool {
// 文档唯一标识ID
public static int DOC_ID = 0;
// 读缓冲区的大小
private int readBufferSize;
// 写缓冲区的大小
private int writeBufferSize;
// 读入的文档的有效词文件地址
private ArrayList effectiveWordFiles;
// 倒排索引输出文件地址
private String outputFilePath;
// 读缓冲 1
private String[][] readBuffer1;
// 读缓冲2
private String[][] readBuffer2;
// 写缓冲区
private String[][] writeBuffer;
// 有效词与hashcode的映射
private Map code2word;
public BSBITool(ArrayList effectiveWor