Mercurial Hosting > traffic-intelligence
changeset 76:64fde2b1f96d
simplified intervales in empiricalDistribution
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Thu, 10 Feb 2011 22:15:54 -0500 |
parents | 46ec876ce90e |
children | 5e6cd36a991c |
files | python/utils.py |
diffstat | 1 files changed, 10 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/python/utils.py Thu Feb 10 20:34:16 2011 -0500 +++ b/python/utils.py Thu Feb 10 22:15:54 2011 -0500 @@ -25,16 +25,18 @@ class empiricalDistribution: '''Class to represent a sample of a distribution for a continuous random variable - with the number of observations for each interval''' + with the number of observations for each interval + intervals (categories variable) are defined by their left limits, the last one being the right limit + categories contain therefore one more element than the counts''' def __init__(self, categories, counts): self.categories = categories self.counts = counts def mean(self): result = 0. - for i,c in zip(self.categories, self.counts): - result += c*(i[1]+i[0])/2 - return result/sum(self.counts) + for i in range(len(self.counts)-1): + result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 + return result/self.nSamples() def var(self, mean = None): if not mean: @@ -42,9 +44,9 @@ else: m = mean result = 0. - for i,c in zip(self.categories, self.counts): - mid = (i[1]+i[0])/2 - result += c*(mid - m)*(mid - m) + for i in range(len(self.counts)-1): + mid = (self.categories[i]+self.categories[i+1])/2 + result += self.counts[i]*(mid - m)*(mid - m) return result/(self.nSamples()-1) def nSamples(self): @@ -56,7 +58,7 @@ # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] # for inter in self.categories: # refCumulativeCounts.append(cdf(inter[1])) - refCumulativeCounts = [cdf(inter[1]) for inter in self.categories[:-1]] + refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]] refProba = [refCumulativeCounts[0]] for i in xrange(1,len(refCumulativeCounts)):