Mercurial Hosting > traffic-intelligence
changeset 35:8cafee54466f
forgotten update of histogram class
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 12 Apr 2010 11:18:19 -0400 |
parents | 388a5a25fe92 |
children | 571b11304ec9 |
files | python/utils.py |
diffstat | 1 files changed, 38 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/python/utils.py Sun Apr 11 02:23:48 2010 -0400 +++ b/python/utils.py Mon Apr 12 11:18:19 2010 -0400 @@ -24,8 +24,45 @@ class histogram: '''Class to represent a sample of a distribution for a continuous random variable with the number of observations for each interval''' + def __init__(self, categories, counts): + self.categories = categories + self.counts = counts - + def mean(self): + result = 0. + for i,c in zip(self.categories, self.counts): + result += c*(i[1]+i[0])/2 + return result/sum(self.counts) + + def var(self, mean = None): + if not mean: + m = self.mean() + else: + m = mean + result = 0. + for i,c in zip(self.categories, self.counts): + mid = (i[1]+i[0])/2 + result += c*(mid - m)*(mid - m) + return result/(self.nSamples()-1) + + def nSamples(self): + return sum(self.counts) + + def referenceCounts(self, cdf): + '''cdf is a cumulative distribution function + returning the probability of the variable being less that x''' + # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] +# for inter in self.categories: +# refCumulativeCounts.append(cdf(inter[1])) + refCumulativeCounts = [cdf(inter[1]) for inter in self.categories[:-1]] + + refProba = [refCumulativeCounts[0]] + for i in xrange(1,len(refCumulativeCounts)): + refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1]) + refProba.append(1-refCumulativeCounts[-1]) + refCounts = [p*self.nSamples() for p in refProba] + + return refCounts, refProba ######################### # maths section