comparison python/utils.py @ 85:7f1e54234f96

added empirical discrete distribution, modified class organization and names
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 18 Apr 2011 19:31:53 -0400
parents 5e6cd36a991c
children f03ec4697a09
comparison
equal deleted inserted replaced
84:731df2fa0010 85:7f1e54234f96
21 result = 0. 21 result = 0.
22 for e, o in zip(expected, observed): 22 for e, o in zip(expected, observed):
23 result += ((e-o)*(e-o))/e 23 result += ((e-o)*(e-o))/e
24 return result 24 return result
25 25
26 class empiricalDistribution: 26 class EmpiricalDistribution:
27 def nSamples(self):
28 return sum(self.counts)
29
30
31 class EmpiricalDiscreteDistribution(EmpiricalDistribution):
32 '''Class to represent a sample of a distribution for a discrete random variable
33 '''
34 def __init__(self, categories, counts):
35 self.categories = categories
36 self.counts = counts
37
38 def mean(self):
39 from numpy.core.fromnumeric import sum
40 result = [float(x*y) for x,y in zip(self.categories, self.counts)]
41 return sum(result)/self.nSamples()
42
43 def var(self, mean = None):
44 from numpy.core.fromnumeric import sum
45 if not mean:
46 m = self.mean()
47 else:
48 m = mean
49 result = 0.
50 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)]
51 return sum(squares)/(self.nSamples()-1)
52
53
54 class EmpiricalContinuousDistribution(EmpiricalDistribution):
27 '''Class to represent a sample of a distribution for a continuous random variable 55 '''Class to represent a sample of a distribution for a continuous random variable
28 with the number of observations for each interval 56 with the number of observations for each interval
29 intervals (categories variable) are defined by their left limits, the last one being the right limit 57 intervals (categories variable) are defined by their left limits, the last one being the right limit
30 categories contain therefore one more element than the counts''' 58 categories contain therefore one more element than the counts'''
31 def __init__(self, categories, counts): 59 def __init__(self, categories, counts):
46 result = 0. 74 result = 0.
47 for i in range(len(self.counts)-1): 75 for i in range(len(self.counts)-1):
48 mid = (self.categories[i]+self.categories[i+1])/2 76 mid = (self.categories[i]+self.categories[i+1])/2
49 result += self.counts[i]*(mid - m)*(mid - m) 77 result += self.counts[i]*(mid - m)*(mid - m)
50 return result/(self.nSamples()-1) 78 return result/(self.nSamples()-1)
51
52 def nSamples(self):
53 return sum(self.counts)
54 79
55 def referenceCounts(self, cdf): 80 def referenceCounts(self, cdf):
56 '''cdf is a cumulative distribution function 81 '''cdf is a cumulative distribution function
57 returning the probability of the variable being less that x''' 82 returning the probability of the variable being less that x'''
58 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] 83 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])]