|
1 |
| -# This is a sample Python script. |
| 1 | +""" |
| 2 | +比较 3 种中位数选择算法的性能 |
| 3 | + - 算法 1:排序后选择 |
| 4 | + - 算法 2: 确定型中位数线性时间选择 (BFPRT) |
| 5 | + - 算法 3: 中位数选择随机算法 |
2 | 6 |
|
3 |
| -# Press Shift+F10 to execute it or replace it with your code. |
4 |
| -# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. |
| 7 | +实验内容: |
| 8 | + - 实现三种算法 |
| 9 | + - 数据集自己寻找或生成 |
| 10 | + - 运行时间比较,准确度比较 |
| 11 | + - 扩展性比较 |
| 12 | + - 以恰当、准确、规范的形式表述实验结果 |
| 13 | +""" |
5 | 14 |
|
| 15 | +import time |
| 16 | +import numpy as np |
| 17 | +import matplotlib.pyplot as plt |
6 | 18 |
|
7 |
| -def print_hi(name): |
8 |
| - # Use a breakpoint in the code line below to debug your script. |
9 |
| - print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. |
| 19 | +from sort_select import sort_select |
| 20 | +from bfprt_select import bfprt_select |
| 21 | +from lazy_select import lazy_select |
| 22 | +from gen_data import gen_data |
10 | 23 |
|
11 | 24 |
|
12 |
| -# Press the green button in the gutter to run the script. |
13 |
| -if __name__ == '__main__': |
14 |
| - print_hi('PyCharm') |
| 25 | +def run_select(arr: list, k_list: list, func) -> list: |
| 26 | + """测试选择算法, 返回运行结果""" |
| 27 | + result = [] |
| 28 | + for k in k_list: |
| 29 | + result.append(func(arr, k)) |
| 30 | + return result |
15 | 31 |
|
16 |
| -# See PyCharm help at https://www.jetbrains.com/help/pycharm/ |
| 32 | + |
| 33 | +def test_all_on_data(arr: list, k_list: list): |
| 34 | + run_time = [] |
| 35 | + |
| 36 | + start_time = time.time() |
| 37 | + sort_select_result = run_select(arr, k_list, sort_select) |
| 38 | + run_time.append(time.time() - start_time) |
| 39 | + |
| 40 | + start_time = time.time() |
| 41 | + bfprt_select_result = run_select(arr, k_list, bfprt_select) |
| 42 | + run_time.append(time.time() - start_time) |
| 43 | + |
| 44 | + start_time = time.time() |
| 45 | + lazy_select_result = run_select(arr, k_list, lazy_select) |
| 46 | + run_time.append(time.time() - start_time) |
| 47 | + |
| 48 | + if (sort_select_result != bfprt_select_result) or (sort_select_result != lazy_select_result): |
| 49 | + print("Results are not equal!") |
| 50 | + |
| 51 | + return run_time |
| 52 | + |
| 53 | + |
| 54 | +def test(data_type: str, n_list: list, iter_num: int): |
| 55 | + run_times = [[] for _ in range(3)] # [[sort_select], [bfprt_select], [lazy_select |
| 56 | + for n in n_list: |
| 57 | + arr, k_list = gen_data(data_type, n, iter_num) |
| 58 | + run_time = test_all_on_data(arr, k_list) |
| 59 | + for i in range(3): |
| 60 | + run_times[i].append(run_time[i] / iter_num) |
| 61 | + |
| 62 | + fig = plt.figure(dpi=400) |
| 63 | + ax = fig.add_subplot(111) |
| 64 | + ax.plot(n_list, run_times[0], label="sort_select") |
| 65 | + ax.plot(n_list, run_times[1], label="bfprt_select") |
| 66 | + ax.plot(n_list, run_times[2], label="lazy_select") |
| 67 | + ax.set_xlabel("Data Size") |
| 68 | + ax.set_ylabel("Run Time") |
| 69 | + ax.set_title(("Run Time of Three Select Algorithms on " + data_type + " Data").title()) |
| 70 | + ax.legend() |
| 71 | + plt.show() |
| 72 | + |
| 73 | + |
| 74 | +def test_theta(n: int, iter_num: int): |
| 75 | + theta_list = np.linspace(0.5, 1, 100).tolist() |
| 76 | + run_times = [] |
| 77 | + for theta in theta_list: |
| 78 | + arr, k_list = gen_data("uniform", n, iter_num) |
| 79 | + start_time = time.time() |
| 80 | + for k in k_list: |
| 81 | + lazy_select(arr, k, theta) |
| 82 | + run_times.append((time.time() - start_time) / iter_num) |
| 83 | + |
| 84 | + fig = plt.figure(dpi=400) |
| 85 | + ax = fig.add_subplot(111) |
| 86 | + ax.plot(theta_list, run_times) |
| 87 | + ax.set_xlabel("Theta") |
| 88 | + ax.set_ylabel("Run Time") |
| 89 | + ax.set_title("Run Time of Lazy Select Algorithm on Different Theta") |
| 90 | + plt.show() |
| 91 | + |
| 92 | + |
| 93 | +def main(): |
| 94 | + # 测试 3 种算法的性能和扩展性 |
| 95 | + iter_num = 3 # 测试次数 |
| 96 | + n_list = np.linspace(10000, 100000, 20, dtype=int).tolist() # 数据规模 |
| 97 | + data_type_list = ["uniform", "normal", "zipf"] |
| 98 | + for data_type in data_type_list: |
| 99 | + test(data_type, n_list, iter_num) |
| 100 | + |
| 101 | + # 测试随机算法中的关键参数 theta 对性能的影响 |
| 102 | + n = 10000 |
| 103 | + test_theta(n, iter_num) |
| 104 | + |
| 105 | + |
| 106 | +if __name__ == "__main__": |
| 107 | + main() |
0 commit comments