|
pend("\n");
}
try {
File file = new File(filePath);
PrintStream ps = new PrintStream(new FileOutputStream(file));
ps.print(strBuilder.toString());// 往文件里写入字符串
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 将剩余的读缓冲区中的数据读入写缓冲区中
*
* @param remainBuffer
* 读缓冲区的剩余缓冲
* @param currentReadPos
* 当前的读取位置
* @param outputPath
* 写缓冲区的写出文件路径
*/
private void writeRemainReadBuffer(String[][] remainBuffer,
int currentReadPos, String outputPath) {
while (remainBuffer[currentReadPos][0] != null
&& currentReadPos < readBufferSize) {
removeRBToWB(remainBuffer[currentReadPos]);
currentReadPos++;
// 如果写满写缓冲区时,进行写出到文件操作
if (writeBuffer[writeBufferSize - 1][0] != null) {
writeOutOperation(writeBuffer, outputPath);
}
}
}
/**
* 将剩余读缓冲区中的数据通过插入排序的方式插入写缓冲区
*
* @param record
*/
private void removeRBToWB(String[] record) {
int insertIndex = 0;
int endIndex = 0;
long num1;
long num2;
long code = Long.parseLong(record[0]);
// 如果写缓冲区目前为空,则直接加入
if (writeBuffer[0][0] == null) {
writeBuffer[0] = record;
return;
}
// 寻找待插入的位置
for (int i = 0; i < writeBufferSize - 1; i++) {
if (writeBuffer[i][0] == null) {
endIndex = i;
break;
}
num1 = Long.parseLong(writeBuffer[i][0]);
if (writeBuffer[i + 1][0] == null) {
if (code > num1) {
endIndex = i + 1;
insertIndex = i + 1;
}
} else {
num2 = Long.parseLong(writeBuffer[i + 1][0]);
if (code > num1 && code < num2) {
insertIndex = i + 1;
}
}
}
// 进行插入操作,相关数据进行位置迁移
for (int i = endIndex; i > insertIndex; i--) {
writeBuffer[i] = writeBuffer[i - 1];
}
writeBuffer[insertIndex] = record;
}
/**
* 将磁盘中的2个倒排索引数据进行合并
*
* @param invertedData1
* 倒排索引为文件数据1
* @param invertedData2
* 倒排索引文件数据2
* @param isSort
* 是否需要对缓冲区中的数据进行排序
* @param outputPath
* 倒排索引输出文件地址
*/
private void mergeInvertedData(ArrayList invertedData1,
ArrayList invertedData2, boolean ifSort, String outputPath) {
int rIndex1 = 0;
int rIndex2 = 0;
// 重新初始化缓冲区
initBuffers();
while (invertedData1.size() > 0 && invertedData2.size() > 0) {
readBuffer1[rIndex1][0] = invertedData1.get(0)[0];
readBuffer1[rIndex1][1] = invertedData1.get(0)[1];
readBuffer2[rIndex2][0] = invertedData2.get(0)[0];
readBuffer2[rIndex2][1] = invertedData2.get(0)[1];
invertedData1.remove(0);
invertedData2.remove(0);
rIndex1++;
rIndex2++;
if (rIndex1 == readBufferSize) {
if (ifSort) {
wordBufferSort(readBuffer1);
wordBufferSort(readBuffer2);
}
mergeWordBuffers(outputPath);
initBuffers();
}
}
if (ifSort) {
wordBufferSort(readBuffer1);
wordBufferSort(readBuffer2);
}
mergeWordBuffers(outputPath);
readBuffer1 = new String[readBufferSize][2];
readBuffer2 = new String[readBufferSize][2];
if (invertedData1.size() == 0 && invertedData2.size() > 0) {
readRemainDataToRB(invertedData2, outputPath);
} else if (invertedData1.size() > 0 && invertedData2.size() == 0) {
readRemainDataToRB(invertedData1, outputPath);
}
}
/**
* 剩余的有效词数据读入读缓冲区
*
* @param remainData
* 剩余数据
* @param outputPath
* 输出文件路径
*/
private void readRemainDataToRB(ArrayList remainData,
String outputPath) {
int rIndex = 0;
while (remainData.size() > 0) {
readBuffer1[rIndex][0] = remainData.get(0)[0];
readBuffer1[rIndex][1] = remainData.get(0)[1];
remainData.remove(0);
rIndex++;
// 读缓冲 区写满,进行写入到写缓冲区中
if (readBuffer1[readBufferSize - 1][0] != null) {
wordBufferSort(readBuffer1);
writeRemainReadBuffer(readBuffer1, 0, outputPath);
initBuffers();
}
}
wordBufferSort(readBuffer1);
writeRemainReadBuffer(readBuffer1, 0, outputPath);
}
/**
* 缓冲区数据进行排序
*
* @param buffer
* 缓冲空间
*/
private void wordBufferSort(String[][] buffer) {
String[] temp;
int k = 0;
long num1 = 0;
long num2 = 0;
|