comparison python/utils.py @ 749:10dbab1e871d dev

modifications in samples and distributions
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 20 Oct 2015 00:03:25 -0400
parents fe71639f1ee7
children e01cabca4c55
comparison
equal deleted inserted replaced
747:d45ab817ee11 749:10dbab1e871d
72 result = 0. 72 result = 0.
73 for e, o in zip(expected, observed): 73 for e, o in zip(expected, observed):
74 result += ((e-o)*(e-o))/e 74 result += ((e-o)*(e-o))/e
75 return result 75 return result
76 76
77 class EmpiricalDistribution(object): 77 class DistributionSample(object):
78 def nSamples(self): 78 def nSamples(self):
79 return sum(self.counts) 79 return sum(self.counts)
80 80
81 def cumulativeDensityFunction(sample, normalized = False): 81 def cumulativeDensityFunction(sample, normalized = False):
82 '''Returns the cumulative density function of the sample of a random variable''' 82 '''Returns the cumulative density function of the sample of a random variable'''
84 counts = arange(1,len(sample)+1) # dtype = float 84 counts = arange(1,len(sample)+1) # dtype = float
85 if normalized: 85 if normalized:
86 counts /= float(len(sample)) 86 counts /= float(len(sample))
87 return xaxis, counts 87 return xaxis, counts
88 88
89 class EmpiricalDiscreteDistribution(EmpiricalDistribution): 89 class DiscreteDistributionSample(DistributionSample):
90 '''Class to represent a sample of a distribution for a discrete random variable 90 '''Class to represent a sample of a distribution for a discrete random variable'''
91 '''
92 def __init__(self, categories, counts): 91 def __init__(self, categories, counts):
93 self.categories = categories 92 self.categories = categories
94 self.counts = counts 93 self.counts = counts
95 94
96 def mean(self): 95 def mean(self):
111 refProba = [probability(c) for c in self.categories] 110 refProba = [probability(c) for c in self.categories]
112 refProba[-1] = 1-npsum(refProba[:-1]) 111 refProba[-1] = 1-npsum(refProba[:-1])
113 refCounts = [r*self.nSamples() for r in refProba] 112 refCounts = [r*self.nSamples() for r in refProba]
114 return refCounts, refProba 113 return refCounts, refProba
115 114
116 class EmpiricalContinuousDistribution(EmpiricalDistribution): 115 class ContinuousDistributionSample(DistributionSample):
117 '''Class to represent a sample of a distribution for a continuous random variable 116 '''Class to represent a sample of a distribution for a continuous random variable
118 with the number of observations for each interval 117 with the number of observations for each interval
119 intervals (categories variable) are defined by their left limits, the last one being the right limit 118 intervals (categories variable) are defined by their left limits, the last one being the right limit
120 categories contain therefore one more element than the counts''' 119 categories contain therefore one more element than the counts'''
121 def __init__(self, categories, counts): 120 def __init__(self, categories, counts):
122 # todo add samples for initialization and everything to None? (or setSamples?) 121 # todo add samples for initialization and everything to None? (or setSamples?)
123 self.categories = categories 122 self.categories = categories
124 self.counts = counts 123 self.counts = counts
124
125 @staticmethod
126 def generate(sample, categories):
127 if min(sample) < min(categories):
128 print('Sample has lower min than proposed categories ({}, {})'.format(min(sample), min(categories)))
129 if max(sample) > max(categories):
130 print('Sample has higher max than proposed categories ({}, {})'.format(max(sample), max(categories)))
131 dist = ContinuousDistributionSample(sorted(categories), [0]*(len(categories)-1))
132 for s in sample:
133 i = 0
134 while i<len(dist.categories) and dist.categories[i] <= s:
135 i += 1
136 if i <= len(dist.counts):
137 dist.counts[i-1] += 1
138 #print('{} in {} {}'.format(s, dist.categories[i-1], dist.categories[i]))
139 else:
140 print('Element {} is not in the categories'.format(s))
141 return dist
125 142
126 def mean(self): 143 def mean(self):
127 result = 0. 144 result = 0.
128 for i in range(len(self.counts)-1): 145 for i in range(len(self.counts)-1):
129 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 146 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2