Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d6c0a5f

Browse files
tomershafirlukel97
andauthored
[compare.py] Add --coefficient-varaition option (#372)
* [compare.py] Add --coefficient-varaition option This patch adds `--coefficient-variation` option to compare.py to report the coefficient of variantion (CV) statistic, instead of plain stddev. It is only active under the `--statistics` option. It should be useful when the benchmark workloads have multiple heterogeneous units or scales, then the relative spread calculated as `stddev / mean` can be more readable than an absolute stddev. Note: the default stddev can be better for other cases, where scores are homogeneous, or near zero (then CV can be sensitive). The implementaiton follows the current statistics impl (which is tightly coupled to arithmetic mean), reporting CV as percentage. * inf->nan * use a more pythonic `+=` instead of `.extend()` Co-authored-by: Luke Lau <[email protected]> --------- Co-authored-by: Luke Lau <[email protected]>
1 parent fcd8624 commit d6c0a5f

1 file changed

Lines changed: 51 additions & 15 deletions

File tree

utils/compare.py

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ def merge_values(values, merge_function):
130130
def get_values(values, lhs_name=None, rhs_name=None):
131131
exclude_cols = ["diff", "t-value", "p-value", "significant"]
132132
exclude_cols.extend([f'std_{lhs_name}', f'std_{rhs_name}'])
133+
exclude_cols.extend([f'cv_{lhs_name}', f'cv_{rhs_name}'])
133134
values = values[[c for c in values.columns if c not in exclude_cols]]
134135
has_two_runs = len(values.columns) == 2
135136
if has_two_runs:
@@ -162,7 +163,7 @@ def add_diff_column(metric, values, absolute_diff=False):
162163
return values
163164

164165

165-
def compute_statistics(lhs_d, rhs_d, metrics, alpha, lhs_name, rhs_name):
166+
def compute_statistics(lhs_d, rhs_d, metrics, alpha, coef_var, lhs_name, rhs_name):
166167
stats_dict = {}
167168

168169
for metric in metrics:
@@ -178,24 +179,47 @@ def compute_statistics(lhs_d, rhs_d, metrics, alpha, lhs_name, rhs_name):
178179

179180
# Compute t-test if we have enough samples
180181
if len(lhs_values) >= 2 and len(rhs_values) >= 2:
181-
stats_dict[metric][program] = {
182-
f'std_{lhs_name}': lhs_values.std(ddof=1),
183-
f'std_{rhs_name}': rhs_values.std(ddof=1),
184-
}
182+
lhs_std = lhs_values.std(ddof=1)
183+
rhs_std = rhs_values.std(ddof=1)
184+
if coef_var:
185+
lhs_mean = lhs_values.mean()
186+
rhs_mean = rhs_values.mean()
187+
stats_dict[metric][program] = {
188+
f'cv_{lhs_name}': lhs_std / lhs_mean if lhs_mean != 0 else float('nan'),
189+
f'cv_{rhs_name}': rhs_std / rhs_mean if rhs_mean != 0 else float('nan'),
190+
}
191+
else:
192+
stats_dict[metric][program] = {
193+
f'std_{lhs_name}': lhs_std,
194+
f'std_{rhs_name}': rhs_std,
195+
}
185196
t_stat, p_val = stats.ttest_ind(lhs_values, rhs_values)
186197
stats_dict[metric][program]['t-value'] = t_stat
187198
stats_dict[metric][program]['p-value'] = p_val
188199
stats_dict[metric][program]['significant'] = "Y" if p_val < alpha else "N"
189200
else:
190-
stats_dict[metric][program] = {
191-
f'std_{lhs_name}': float('nan'),
192-
f'std_{rhs_name}': float('nan'),
193-
't-value': float('nan'),
194-
'p-value': float('nan'),
195-
'significant': ""
196-
}
201+
if coef_var:
202+
stats_dict[metric][program] = {
203+
f'cv_{lhs_name}': float('nan'),
204+
f'cv_{rhs_name}': float('nan')
205+
}
206+
else:
207+
stats_dict[metric][program] = {
208+
f'std_{lhs_name}': float('nan'),
209+
f'std_{rhs_name}': float('nan')
210+
}
211+
stats_dict[metric][program]['t-value'] = float('nan')
212+
stats_dict[metric][program]['p-value'] = float('nan')
213+
stats_dict[metric][program]['significant'] = ""
214+
215+
stat_col_names = []
216+
if coef_var:
217+
stat_col_names += [f'cv_{lhs_name}', f'cv_{rhs_name}']
218+
else:
219+
stat_col_names += [f'std_{lhs_name}', f'std_{rhs_name}']
220+
stat_col_names += ['t-value', 'p-value', 'significant']
197221

198-
return stats_dict
222+
return stats_dict, stat_col_names
199223

200224

201225
def add_precomputed_statistics(data, stats_dict, stat_col_names):
@@ -369,6 +393,10 @@ def print_result(
369393
formatters[(m, f'std_{lhs_name}')] = lambda x: "%.3f" % x if not pd.isna(x) else ""
370394
if (m, f'std_{rhs_name}') in dataout.columns:
371395
formatters[(m, f'std_{rhs_name}')] = lambda x: "%.3f" % x if not pd.isna(x) else ""
396+
if (m, f'cv_{lhs_name}') in dataout.columns:
397+
formatters[(m, f'cv_{lhs_name}')] = lambda x: "%4.1f%%" % (x * 100) if not pd.isna(x) else ""
398+
if (m, f'cv_{rhs_name}') in dataout.columns:
399+
formatters[(m, f'cv_{rhs_name}')] = lambda x: "%4.1f%%" % (x * 100) if not pd.isna(x) else ""
372400
# Turn index into a column so we can format it...
373401
formatted_program = dataout.index.to_series()
374402
if shorten_names:
@@ -419,6 +447,7 @@ def float_format(x):
419447
print(out)
420448
exclude_from_summary = ["t-value", "p-value", "significant"]
421449
exclude_from_summary.extend([f'std_{lhs_name}', f'std_{rhs_name}'])
450+
exclude_from_summary.extend([f'cv_{lhs_name}', f'cv_{rhs_name}'])
422451
d_summary = d.drop(columns=exclude_from_summary, level=1, errors='ignore')
423452
print(d_summary.describe())
424453

@@ -528,6 +557,13 @@ def main():
528557
default=False,
529558
help="Show only significant results when used with --statistics",
530559
)
560+
parser.add_argument(
561+
"--coefficient-variation",
562+
action="store_true",
563+
dest="coefficient_variation",
564+
default=False,
565+
help="Compute relative coefficient of variation (%%) rather than absolute stddev",
566+
)
531567
config = parser.parse_args()
532568

533569
if config.show_diff is None:
@@ -567,13 +603,13 @@ def main():
567603
# Compute statistics on raw data before merging (if requested)
568604
if config.statistics:
569605
metrics_for_stats = config.metrics if len(config.metrics) > 0 else get_default_metric(lhs_d, rhs_d)
570-
stats_dict = compute_statistics(
606+
stats_dict, stat_col_names = compute_statistics(
571607
lhs_d, rhs_d, metrics_for_stats,
572608
alpha=config.alpha,
609+
coef_var=config.coefficient_variation,
573610
lhs_name=config.lhs_name,
574611
rhs_name=config.rhs_name
575612
)
576-
stat_col_names = [f'std_{config.lhs_name}', f'std_{config.rhs_name}', 't-value', 'p-value', 'significant']
577613

578614
# Merge data
579615
lhs_merged = merge_values(lhs_d, config.merge_function)

0 commit comments

Comments
 (0)