Online Variance Calculation in Python

Since I couldn’t easily find this code anywhere, I figured I’d post it here for quick reference:


"""
2012.1.25 CKS
Incremental calculation of both the mean and variance.
http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
"""
import unittest
 
## Dumb slow mean/variance formulas.
 
def mean(seq):
    return sum(seq)/float(len(seq))
 
def variance(seq):
    m = mean(seq)
    return sum((v-m)**2 for v in seq)/float(len(seq))
 
## Incremental mean/variance formulas.
 
class Stat(object):
 
    def __init__(self):
        self.mean_sum = 0
        self.mean_count = 0
        self.last_variance = 0
 
    def __iadd__(self, value):
        last_mean = self.mean
        self.mean_sum += value
        self.mean_count += 1
        if last_mean is not None:
            self.last_variance = self.last_variance + (value  - last_mean)*(value - self.mean)
        return self
 
    @property
    def mean(self):
        if self.mean_count:
            return self.mean_sum/float(self.mean_count)
 
    @property
    def variance(self):
        return self.last_variance/float(self.mean_count)
 
class Test(unittest.TestCase):
 
    def test(self):
        nums = range(1,7)
        s = Stat()
        for n in nums:
            s += n
            print 'mean:',s.mean
            print 'variance:',s.variance
        self.assertAlmostEqual(s.mean, mean(nums), 1)
        self.assertAlmostEqual(s.variance, variance(nums), 2)
 
if __name__ == '__main__':
    unittest.main()

Leave a Reply