设为首页 加入收藏

TOP

外部排序(二)
2014-11-23 23:33:40 来源: 作者: 【 】 浏览:6
Tags:外部 排序
;
BUF_SIZE = PAGE_SIZE*BUF_PAGES;
num_runs = data_size / PAGE_SIZE; /* 初始时的归并段数量,每个归并段有4096 byte, 即1024个整数 */
buffer = (int *)malloc(BUF_SIZE);

run_length = 1;
run_t **runs = (run_t **)malloc(sizeof(run_t *)*(K+1));
for (i = 0; i < K; i++) {
runs[i] = (run_t *)malloc(sizeof(run_t));
runs[i]->buf = (int *)calloc(1, BUF_SIZE+4);
}
while (num_runs > 1) {
num_merges = num_runs / K;
int left_runs = num_runs % K;
if(left_runs > 0) num_merges++;
for (i = 0; i < num_merges; i++) {
num_runs_in_merge = K;
if ((i+1) == num_merges && left_runs > 0) {
num_runs_in_merge = left_runs;
}
int base = 0;
printf("Merge %d of %d,%d ways\n", i, num_merges, num_runs_in_merge);
for (j = 0; j < num_runs_in_merge; j++) {
if (run_length == 1) {
base = 1;
bytes = read(fd, runs[j]->buf, PAGE_SIZE);
runs[j]->length = bytes/sizeof(int);
quick_sort(runs[j]->buf, 0, runs[j]->length-1);
} else {
snprintf(filename, 20, "%s%d.dat", input_prefix, i*K+j);
int infd = open(filename, O_RDONLY);
bytes = read(infd, runs[j]->buf, BUF_SIZE);
runs[j]->length = bytes/sizeof(int);
close(infd);
}
runs[j]->idx = 0;
runs[j]->offset = bytes;
}
k_merge(runs, input_prefix, num_runs_in_merge, base, i);
}

strcpy(filename, output_prefix);
strcpy(output_prefix, input_prefix);
strcpy(input_prefix, filename);

run_length *= K;
num_runs = num_merges;
}

for (i = 0; i < K; i++) {
free(runs[i]->buf);
free(runs[i]);
}
free(runs);
free(buffer);
close(fd);

long end_usecs = get_time_usecs();
double secs = (double)(end_usecs - start_usecs) / (double)1000000;
printf("Sorting took %.02f seconds.\n", secs);
printf("sorting result saved in %s%d.dat.\n", input_prefix, 0);

return 0;
}

void k_merge(run_t** runs, char* input_prefix, int num_runs, int base, int n_merge)
{
int bp, bytes, output_fd;
int live_runs = num_runs;
run_t *mr;
char filename[20];

bp = 0;
create_loser_tree(runs, num_runs);

snprintf(filename, 100, "%s%d.dat", output_prefix, n_merge);
output_fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC,
S_IRWXU|S_IRWXG);
if (output_fd < 0) {
printf("create file %s fail\n", filename);
exit(0);
}

while (live_runs > 0) {
mr = runs[ls[0]];
buffer[bp++] = mr->buf[mr->idx++];
// 输出缓冲区已满
if (bp*4 == BUF_SIZE) {
bytes = write(output_fd, buffer, BUF_SIZE);
bp = 0;
}
// mr的输入缓冲区用完
if (mr->idx == mr->length) {
snprintf(filename, 20, "%s%d.dat", input_prefix, ls[0]+n_merge*K);
if (base) {
mr->buf[mr->idx] = MAX_INT;
live_runs--;
} else {
int fd = open(filename, O_RDONLY);
lseek(fd, mr->offset, SEEK_SET);
bytes = read(fd, mr->buf, BUF_SIZE);
close(fd);
if (by
首页 上一页 1 2 3 下一页 尾页 2/3/3
】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
分享到: 
上一篇c语言算术运算符和算术表达式整理.. 下一篇 c语言中各种数据类型的长度

评论

帐  号: 密码: (新用户注册)
验 证 码:
表  情:
内  容: