Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 749:10dbab1e871d dev
modifications in samples and distributions
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 20 Oct 2015 00:03:25 -0400 |
parents | fe71639f1ee7 |
children | e01cabca4c55 |
comparison
equal
deleted
inserted
replaced
747:d45ab817ee11 | 749:10dbab1e871d |
---|---|
72 result = 0. | 72 result = 0. |
73 for e, o in zip(expected, observed): | 73 for e, o in zip(expected, observed): |
74 result += ((e-o)*(e-o))/e | 74 result += ((e-o)*(e-o))/e |
75 return result | 75 return result |
76 | 76 |
77 class EmpiricalDistribution(object): | 77 class DistributionSample(object): |
78 def nSamples(self): | 78 def nSamples(self): |
79 return sum(self.counts) | 79 return sum(self.counts) |
80 | 80 |
81 def cumulativeDensityFunction(sample, normalized = False): | 81 def cumulativeDensityFunction(sample, normalized = False): |
82 '''Returns the cumulative density function of the sample of a random variable''' | 82 '''Returns the cumulative density function of the sample of a random variable''' |
84 counts = arange(1,len(sample)+1) # dtype = float | 84 counts = arange(1,len(sample)+1) # dtype = float |
85 if normalized: | 85 if normalized: |
86 counts /= float(len(sample)) | 86 counts /= float(len(sample)) |
87 return xaxis, counts | 87 return xaxis, counts |
88 | 88 |
89 class EmpiricalDiscreteDistribution(EmpiricalDistribution): | 89 class DiscreteDistributionSample(DistributionSample): |
90 '''Class to represent a sample of a distribution for a discrete random variable | 90 '''Class to represent a sample of a distribution for a discrete random variable''' |
91 ''' | |
92 def __init__(self, categories, counts): | 91 def __init__(self, categories, counts): |
93 self.categories = categories | 92 self.categories = categories |
94 self.counts = counts | 93 self.counts = counts |
95 | 94 |
96 def mean(self): | 95 def mean(self): |
111 refProba = [probability(c) for c in self.categories] | 110 refProba = [probability(c) for c in self.categories] |
112 refProba[-1] = 1-npsum(refProba[:-1]) | 111 refProba[-1] = 1-npsum(refProba[:-1]) |
113 refCounts = [r*self.nSamples() for r in refProba] | 112 refCounts = [r*self.nSamples() for r in refProba] |
114 return refCounts, refProba | 113 return refCounts, refProba |
115 | 114 |
116 class EmpiricalContinuousDistribution(EmpiricalDistribution): | 115 class ContinuousDistributionSample(DistributionSample): |
117 '''Class to represent a sample of a distribution for a continuous random variable | 116 '''Class to represent a sample of a distribution for a continuous random variable |
118 with the number of observations for each interval | 117 with the number of observations for each interval |
119 intervals (categories variable) are defined by their left limits, the last one being the right limit | 118 intervals (categories variable) are defined by their left limits, the last one being the right limit |
120 categories contain therefore one more element than the counts''' | 119 categories contain therefore one more element than the counts''' |
121 def __init__(self, categories, counts): | 120 def __init__(self, categories, counts): |
122 # todo add samples for initialization and everything to None? (or setSamples?) | 121 # todo add samples for initialization and everything to None? (or setSamples?) |
123 self.categories = categories | 122 self.categories = categories |
124 self.counts = counts | 123 self.counts = counts |
124 | |
125 @staticmethod | |
126 def generate(sample, categories): | |
127 if min(sample) < min(categories): | |
128 print('Sample has lower min than proposed categories ({}, {})'.format(min(sample), min(categories))) | |
129 if max(sample) > max(categories): | |
130 print('Sample has higher max than proposed categories ({}, {})'.format(max(sample), max(categories))) | |
131 dist = ContinuousDistributionSample(sorted(categories), [0]*(len(categories)-1)) | |
132 for s in sample: | |
133 i = 0 | |
134 while i<len(dist.categories) and dist.categories[i] <= s: | |
135 i += 1 | |
136 if i <= len(dist.counts): | |
137 dist.counts[i-1] += 1 | |
138 #print('{} in {} {}'.format(s, dist.categories[i-1], dist.categories[i])) | |
139 else: | |
140 print('Element {} is not in the categories'.format(s)) | |
141 return dist | |
125 | 142 |
126 def mean(self): | 143 def mean(self): |
127 result = 0. | 144 result = 0. |
128 for i in range(len(self.counts)-1): | 145 for i in range(len(self.counts)-1): |
129 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 | 146 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 |