Mercurial Hosting > traffic-intelligence
changeset 85:7f1e54234f96
added empirical discrete distribution, modified class organization and names
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 18 Apr 2011 19:31:53 -0400 |
parents | 731df2fa0010 |
children | f03ec4697a09 |
files | python/utils.py |
diffstat | 1 files changed, 29 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/python/utils.py Tue Mar 29 01:47:00 2011 -0400 +++ b/python/utils.py Mon Apr 18 19:31:53 2011 -0400 @@ -23,7 +23,35 @@ result += ((e-o)*(e-o))/e return result -class empiricalDistribution: +class EmpiricalDistribution: + def nSamples(self): + return sum(self.counts) + + +class EmpiricalDiscreteDistribution(EmpiricalDistribution): + '''Class to represent a sample of a distribution for a discrete random variable + ''' + def __init__(self, categories, counts): + self.categories = categories + self.counts = counts + + def mean(self): + from numpy.core.fromnumeric import sum + result = [float(x*y) for x,y in zip(self.categories, self.counts)] + return sum(result)/self.nSamples() + + def var(self, mean = None): + from numpy.core.fromnumeric import sum + if not mean: + m = self.mean() + else: + m = mean + result = 0. + squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)] + return sum(squares)/(self.nSamples()-1) + + +class EmpiricalContinuousDistribution(EmpiricalDistribution): '''Class to represent a sample of a distribution for a continuous random variable with the number of observations for each interval intervals (categories variable) are defined by their left limits, the last one being the right limit @@ -49,9 +77,6 @@ result += self.counts[i]*(mid - m)*(mid - m) return result/(self.nSamples()-1) - def nSamples(self): - return sum(self.counts) - def referenceCounts(self, cdf): '''cdf is a cumulative distribution function returning the probability of the variable being less that x'''