; BUF_SIZE = PAGE_SIZE*BUF_PAGES; num_runs = data_size / PAGE_SIZE; /* 初始时的归并段数量,每个归并段有4096 byte, 即1024个整数 */ buffer = (int *)malloc(BUF_SIZE); run_length = 1; run_t **runs = (run_t **)malloc(sizeof(run_t *)*(K+1)); for (i = 0; i < K; i++) { runs[i] = (run_t *)malloc(sizeof(run_t)); runs[i]->buf = (int *)calloc(1, BUF_SIZE+4); } while (num_runs > 1) { num_merges = num_runs / K; int left_runs = num_runs % K; if(left_runs > 0) num_merges++; for (i = 0; i < num_merges; i++) { num_runs_in_merge = K; if ((i+1) == num_merges && left_runs > 0) { num_runs_in_merge = left_runs; } int base = 0; printf("Merge %d of %d,%d ways\n", i, num_merges, num_runs_in_merge); for (j = 0; j < num_runs_in_merge; j++) { if (run_length == 1) { base = 1; bytes = read(fd, runs[j]->buf, PAGE_SIZE); runs[j]->length = bytes/sizeof(int); quick_sort(runs[j]->buf, 0, runs[j]->length-1); } else { snprintf(filename, 20, "%s%d.dat", input_prefix, i*K+j); int infd = open(filename, O_RDONLY); bytes = read(infd, runs[j]->buf, BUF_SIZE); runs[j]->length = bytes/sizeof(int); close(infd); } runs[j]->idx = 0; runs[j]->offset = bytes; } k_merge(runs, input_prefix, num_runs_in_merge, base, i); } strcpy(filename, output_prefix); strcpy(output_prefix, input_prefix); strcpy(input_prefix, filename); run_length *= K; num_runs = num_merges; } for (i = 0; i < K; i++) { free(runs[i]->buf); free(runs[i]); } free(runs); free(buffer); close(fd); long end_usecs = get_time_usecs(); double secs = (double)(end_usecs - start_usecs) / (double)1000000; printf("Sorting took %.02f seconds.\n", secs); printf("sorting result saved in %s%d.dat.\n", input_prefix, 0); return 0; } void k_merge(run_t** runs, char* input_prefix, int num_runs, int base, int n_merge) { int bp, bytes, output_fd; int live_runs = num_runs; run_t *mr; char filename[20]; bp = 0; create_loser_tree(runs, num_runs); snprintf(filename, 100, "%s%d.dat", output_prefix, n_merge); output_fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, S_IRWXU|S_IRWXG); if (output_fd < 0) { printf("create file %s fail\n", filename); exit(0); } while (live_runs > 0) { mr = runs[ls[0]]; buffer[bp++] = mr->buf[mr->idx++]; // 输出缓冲区已满 if (bp*4 == BUF_SIZE) { bytes = write(output_fd, buffer, BUF_SIZE); bp = 0; } // mr的输入缓冲区用完 if (mr->idx == mr->length) { snprintf(filename, 20, "%s%d.dat", input_prefix, ls[0]+n_merge*K); if (base) { mr->buf[mr->idx] = MAX_INT; live_runs--; } else { int fd = open(filename, O_RDONLY); lseek(fd, mr->offset, SEEK_SET); bytes = read(fd, mr->buf, BUF_SIZE); close(fd); if (by |