diff python/utils.py @ 35:8cafee54466f

forgotten update of histogram class
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 12 Apr 2010 11:18:19 -0400
parents 48e56179c39e
children 1a2ac2d4f53a
line wrap: on
line diff
--- a/python/utils.py	Sun Apr 11 02:23:48 2010 -0400
+++ b/python/utils.py	Mon Apr 12 11:18:19 2010 -0400
@@ -24,8 +24,45 @@
 class histogram:
     '''Class to represent a sample of a distribution for a continuous random variable
     with the number of observations for each interval'''
+    def __init__(self, categories, counts):
+        self.categories = categories
+        self.counts = counts
 
-    
+    def mean(self):
+        result = 0.
+        for i,c in zip(self.categories, self.counts):
+            result += c*(i[1]+i[0])/2
+        return result/sum(self.counts)
+
+    def var(self, mean = None):
+        if not mean:
+            m = self.mean()
+        else:
+            m = mean
+        result = 0.
+        for i,c in zip(self.categories, self.counts):
+            mid = (i[1]+i[0])/2
+            result += c*(mid - m)*(mid - m)
+        return result/(self.nSamples()-1)
+
+    def nSamples(self):
+        return sum(self.counts)
+
+    def referenceCounts(self, cdf):
+        '''cdf is a cumulative distribution function
+        returning the probability of the variable being less that x'''
+        # refCumulativeCounts = [0]#[cdf(self.categories[0][0])]
+#         for inter in self.categories:
+#             refCumulativeCounts.append(cdf(inter[1]))
+        refCumulativeCounts = [cdf(inter[1]) for inter in self.categories[:-1]]
+
+        refProba = [refCumulativeCounts[0]]
+        for i in xrange(1,len(refCumulativeCounts)):
+            refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1])
+        refProba.append(1-refCumulativeCounts[-1])
+        refCounts = [p*self.nSamples() for p in refProba]
+        
+        return refCounts, refProba
 
 #########################
 # maths section