react/scripts/bench/analyze.py
2017-09-25 17:02:43 -07:00

110 lines
3.2 KiB
Python
Executable File

#!/usr/bin/env python
# Copyright (c) 2015-present, Facebook, Inc.
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
import math
import sys
import numpy as np
import numpy.random as npr
import scipy.stats
def _bootstrap_mean_sem(samples):
"""Return the estimated standard error for a distribution's mean."""
samples = np.array(samples)
n = len(samples)
indices = npr.randint(0, n, (10000, n))
samples = samples[indices]
means = np.sort(np.mean(samples, axis=1))
return np.std(means, ddof=1)
def _read_measurements(f):
"""Read measurements from a file.
Returns {'a': [1.0, 2.0, 3.0], 'b': [5.0, 5.0, 5.0]} for a file containing
the six lines: ['a 1', 'a 2', 'a 3', 'b 5', 'b 5', 'b 5'].
"""
measurements = {}
for line in f:
label, value = line.split(None, 1)
measurements.setdefault(label, []).append(float(value))
return measurements
def _compute_mean_and_sd_of_ratio_from_delta_method(
mean_test,
sem_test,
mean_control,
sem_control
):
mean = (
((mean_test - mean_control) / mean_control) -
(pow(sem_control, 2) * mean_test / pow(mean_control, 3))
)
var = (
pow(sem_test / mean_control, 2) +
(pow(sem_control * mean_test, 2) / pow(mean_control, 4))
)
return (mean, math.sqrt(var))
def _main():
if len(sys.argv) != 3:
sys.stderr.write("usage: analyze.py control.txt test.txt\n")
return 1
ci_size = 0.99
p_value = scipy.stats.norm.ppf(0.5 * (1 + ci_size))
control, test = sys.argv[1:]
with open(control) as f:
control_measurements = _read_measurements(f)
with open(test) as f:
test_measurements = _read_measurements(f)
keys = set()
keys.update(control_measurements.iterkeys())
keys.update(test_measurements.iterkeys())
print "Comparing %s (control) vs %s (test)" % (control, test)
print "Significant differences marked by ***"
print "%% change from control to test, with %g%% CIs:" % (ci_size * 100,)
print
any_sig = False
for key in sorted(keys):
print "* %s" % (key,)
control_nums = control_measurements.get(key, [])
test_nums = test_measurements.get(key, [])
if not control_nums or not test_nums:
print " skipping..."
continue
mean_control = np.mean(control_nums)
mean_test = np.mean(test_nums)
sem_control = _bootstrap_mean_sem(control_nums)
sem_test = _bootstrap_mean_sem(test_nums)
rat_mean, rat_sem = _compute_mean_and_sd_of_ratio_from_delta_method(
mean_test, sem_test, mean_control, sem_control
)
rat_low = rat_mean - p_value * rat_sem
rat_high = rat_mean + p_value * rat_sem
sig = rat_high < 0 or rat_low > 0
any_sig = any_sig or sig
print " %% change: %+6.2f%% [%+6.2f%%, %+6.2f%%]%s" % (
100 * rat_mean,
100 * rat_low,
100 * rat_high,
' ***' if sig else ''
)
print " means: %g (control), %g (test)" % (mean_control, mean_test)
if __name__ == '__main__':
sys.exit(_main())