Online Variance Calculation in Python
Since I couldn’t easily find this code anywhere, I figured I’d post it here for quick reference:
""" 2012.1.25 CKS Incremental calculation of both the mean and variance. http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance """ import unittest ## Dumb slow mean/variance formulas. def mean(seq): return sum(seq)/float(len(seq)) def variance(seq): m = mean(seq) return sum((v-m)**2 for v in seq)/float(len(seq)) ## Incremental mean/variance formulas. class Stat(object): def __init__(self): self.mean_sum = 0 self.mean_count = 0 self.last_variance = 0 def __iadd__(self, value): last_mean = self.mean self.mean_sum += value self.mean_count += 1 if last_mean is not None: self.last_variance = self.last_variance + (value - last_mean)*(value - self.mean) return self @property def mean(self): if self.mean_count: return self.mean_sum/float(self.mean_count) @property def variance(self): return self.last_variance/float(self.mean_count) class Test(unittest.TestCase): def test(self): nums = range(1,7) s = Stat() for n in nums: s += n print 'mean:',s.mean print 'variance:',s.variance self.assertAlmostEqual(s.mean, mean(nums), 1) self.assertAlmostEqual(s.variance, variance(nums), 2) if __name__ == '__main__': unittest.main()
No Comments »
Filed under: Python