设为首页 加入收藏

TOP

倒排索引构建算法BSBI和SPIMI(四)
2015-07-24 12:16:29 来源: 作者: 【 】 浏览:766
Tags:索引 构建 算法 BSBI SPIMI
pend("\n"); } try { File file = new File(filePath); PrintStream ps = new PrintStream(new FileOutputStream(file)); ps.print(strBuilder.toString());// 往文件里写入字符串 } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 将剩余的读缓冲区中的数据读入写缓冲区中 * * @param remainBuffer * 读缓冲区的剩余缓冲 * @param currentReadPos * 当前的读取位置 * @param outputPath * 写缓冲区的写出文件路径 */ private void writeRemainReadBuffer(String[][] remainBuffer, int currentReadPos, String outputPath) { while (remainBuffer[currentReadPos][0] != null && currentReadPos < readBufferSize) { removeRBToWB(remainBuffer[currentReadPos]); currentReadPos++; // 如果写满写缓冲区时,进行写出到文件操作 if (writeBuffer[writeBufferSize - 1][0] != null) { writeOutOperation(writeBuffer, outputPath); } } } /** * 将剩余读缓冲区中的数据通过插入排序的方式插入写缓冲区 * * @param record */ private void removeRBToWB(String[] record) { int insertIndex = 0; int endIndex = 0; long num1; long num2; long code = Long.parseLong(record[0]); // 如果写缓冲区目前为空,则直接加入 if (writeBuffer[0][0] == null) { writeBuffer[0] = record; return; } // 寻找待插入的位置 for (int i = 0; i < writeBufferSize - 1; i++) { if (writeBuffer[i][0] == null) { endIndex = i; break; } num1 = Long.parseLong(writeBuffer[i][0]); if (writeBuffer[i + 1][0] == null) { if (code > num1) { endIndex = i + 1; insertIndex = i + 1; } } else { num2 = Long.parseLong(writeBuffer[i + 1][0]); if (code > num1 && code < num2) { insertIndex = i + 1; } } } // 进行插入操作,相关数据进行位置迁移 for (int i = endIndex; i > insertIndex; i--) { writeBuffer[i] = writeBuffer[i - 1]; } writeBuffer[insertIndex] = record; } /** * 将磁盘中的2个倒排索引数据进行合并 * * @param invertedData1 * 倒排索引为文件数据1 * @param invertedData2 * 倒排索引文件数据2 * @param isSort * 是否需要对缓冲区中的数据进行排序 * @param outputPath * 倒排索引输出文件地址 */ private void mergeInvertedData(ArrayList invertedData1, ArrayList invertedData2, boolean ifSort, String outputPath) { int rIndex1 = 0; int rIndex2 = 0; // 重新初始化缓冲区 initBuffers(); while (invertedData1.size() > 0 && invertedData2.size() > 0) { readBuffer1[rIndex1][0] = invertedData1.get(0)[0]; readBuffer1[rIndex1][1] = invertedData1.get(0)[1]; readBuffer2[rIndex2][0] = invertedData2.get(0)[0]; readBuffer2[rIndex2][1] = invertedData2.get(0)[1]; invertedData1.remove(0); invertedData2.remove(0); rIndex1++; rIndex2++; if (rIndex1 == readBufferSize) { if (ifSort) { wordBufferSort(readBuffer1); wordBufferSort(readBuffer2); } mergeWordBuffers(outputPath); initBuffers(); } } if (ifSort) { wordBufferSort(readBuffer1); wordBufferSort(readBuffer2); } mergeWordBuffers(outputPath); readBuffer1 = new String[readBufferSize][2]; readBuffer2 = new String[readBufferSize][2]; if (invertedData1.size() == 0 && invertedData2.size() > 0) { readRemainDataToRB(invertedData2, outputPath); } else if (invertedData1.size() > 0 && invertedData2.size() == 0) { readRemainDataToRB(invertedData1, outputPath); } } /** * 剩余的有效词数据读入读缓冲区 * * @param remainData * 剩余数据 * @param outputPath * 输出文件路径 */ private void readRemainDataToRB(ArrayList remainData, String outputPath) { int rIndex = 0; while (remainData.size() > 0) { readBuffer1[rIndex][0] = remainData.get(0)[0]; readBuffer1[rIndex][1] = remainData.get(0)[1]; remainData.remove(0); rIndex++; // 读缓冲 区写满,进行写入到写缓冲区中 if (readBuffer1[readBufferSize - 1][0] != null) { wordBufferSort(readBuffer1); writeRemainReadBuffer(readBuffer1, 0, outputPath); initBuffers(); } } wordBufferSort(readBuffer1); writeRemainReadBuffer(readBuffer1, 0, outputPath); } /** * 缓冲区数据进行排序 * * @param buffer * 缓冲空间 */ private void wordBufferSort(String[][] buffer) { String[] temp; int k = 0; long num1 = 0; long num2 = 0;
首页 上一页 1 2 3 4 5 6 7 下一页 尾页 4/7/7
】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
分享到: 
上一篇通过DAC杀死指定会话 下一篇Power Designer反向工程/正向工程..

评论

帐  号: 密码: (新用户注册)
验 证 码:
表  情:
内  容: