changeset 76:64fde2b1f96d

simplified intervales in empiricalDistribution
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 10 Feb 2011 22:15:54 -0500
parents 46ec876ce90e
children 5e6cd36a991c
files python/utils.py
diffstat 1 files changed, 10 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/python/utils.py	Thu Feb 10 20:34:16 2011 -0500
+++ b/python/utils.py	Thu Feb 10 22:15:54 2011 -0500
@@ -25,16 +25,18 @@
 
 class empiricalDistribution:
     '''Class to represent a sample of a distribution for a continuous random variable
-    with the number of observations for each interval'''
+    with the number of observations for each interval
+    intervals (categories variable) are defined by their left limits, the last one being the right limit
+    categories contain therefore one more element than the counts'''
     def __init__(self, categories, counts):
         self.categories = categories
         self.counts = counts
 
     def mean(self):
         result = 0.
-        for i,c in zip(self.categories, self.counts):
-            result += c*(i[1]+i[0])/2
-        return result/sum(self.counts)
+        for i in range(len(self.counts)-1):
+            result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2
+        return result/self.nSamples()
 
     def var(self, mean = None):
         if not mean:
@@ -42,9 +44,9 @@
         else:
             m = mean
         result = 0.
-        for i,c in zip(self.categories, self.counts):
-            mid = (i[1]+i[0])/2
-            result += c*(mid - m)*(mid - m)
+        for i in range(len(self.counts)-1):
+            mid = (self.categories[i]+self.categories[i+1])/2
+            result += self.counts[i]*(mid - m)*(mid - m)
         return result/(self.nSamples()-1)
 
     def nSamples(self):
@@ -56,7 +58,7 @@
         # refCumulativeCounts = [0]#[cdf(self.categories[0][0])]
 #         for inter in self.categories:
 #             refCumulativeCounts.append(cdf(inter[1]))
-        refCumulativeCounts = [cdf(inter[1]) for inter in self.categories[:-1]]
+        refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]]
 
         refProba = [refCumulativeCounts[0]]
         for i in xrange(1,len(refCumulativeCounts)):