Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 672:5473b7460375
moved and rationalized imports in modules
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 26 May 2015 13:53:40 +0200 |
parents | 849f5f8bf4b9 |
children | 01b89182891a |
comparison
equal
deleted
inserted
replaced
671:849f5f8bf4b9 | 672:5473b7460375 |
---|---|
3 | 3 |
4 import matplotlib.pyplot as plt | 4 import matplotlib.pyplot as plt |
5 from datetime import time, datetime | 5 from datetime import time, datetime |
6 from math import sqrt, ceil, floor | 6 from math import sqrt, ceil, floor |
7 from scipy.stats import kruskal, shapiro | 7 from scipy.stats import kruskal, shapiro |
8 from numpy import zeros, array, exp, sum as npsum, arange, cumsum | |
8 | 9 |
9 datetimeFormat = "%Y-%m-%d %H:%M:%S" | 10 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
10 | 11 |
11 ######################### | 12 ######################### |
12 # Enumerations | 13 # Enumerations |
58 def nSamples(self): | 59 def nSamples(self): |
59 return sum(self.counts) | 60 return sum(self.counts) |
60 | 61 |
61 def cumulativeDensityFunction(sample, normalized = False): | 62 def cumulativeDensityFunction(sample, normalized = False): |
62 '''Returns the cumulative density function of the sample of a random variable''' | 63 '''Returns the cumulative density function of the sample of a random variable''' |
63 from numpy import arange, cumsum | |
64 xaxis = sorted(sample) | 64 xaxis = sorted(sample) |
65 counts = arange(1,len(sample)+1) # dtype = float | 65 counts = arange(1,len(sample)+1) # dtype = float |
66 if normalized: | 66 if normalized: |
67 counts /= float(len(sample)) | 67 counts /= float(len(sample)) |
68 return xaxis, counts | 68 return xaxis, counts |
69 | 69 |
70 class EmpiricalDiscreteDistribution(EmpiricalDistribution): | 70 class EmpiricalDiscreteDistribution(EmpiricalDistribution): |
71 '''Class to represent a sample of a distribution for a discrete random variable | 71 '''Class to represent a sample of a distribution for a discrete random variable |
72 ''' | 72 ''' |
73 from numpy.core.fromnumeric import sum | |
74 | |
75 def __init__(self, categories, counts): | 73 def __init__(self, categories, counts): |
76 self.categories = categories | 74 self.categories = categories |
77 self.counts = counts | 75 self.counts = counts |
78 | 76 |
79 def mean(self): | 77 def mean(self): |
80 result = [float(x*y) for x,y in zip(self.categories, self.counts)] | 78 result = [float(x*y) for x,y in zip(self.categories, self.counts)] |
81 return sum(result)/self.nSamples() | 79 return npsum(result)/self.nSamples() |
82 | 80 |
83 def var(self, mean = None): | 81 def var(self, mean = None): |
84 if not mean: | 82 if not mean: |
85 m = self.mean() | 83 m = self.mean() |
86 else: | 84 else: |
87 m = mean | 85 m = mean |
88 result = 0. | 86 result = 0. |
89 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)] | 87 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)] |
90 return sum(squares)/(self.nSamples()-1) | 88 return npsum(squares)/(self.nSamples()-1) |
91 | 89 |
92 def referenceCounts(self, probability): | 90 def referenceCounts(self, probability): |
93 '''probability is a function that returns the probability of the random variable for the category values''' | 91 '''probability is a function that returns the probability of the random variable for the category values''' |
94 refProba = [probability(c) for c in self.categories] | 92 refProba = [probability(c) for c in self.categories] |
95 refProba[-1] = 1-sum(refProba[:-1]) | 93 refProba[-1] = 1-npsum(refProba[:-1]) |
96 refCounts = [r*self.nSamples() for r in refProba] | 94 refCounts = [r*self.nSamples() for r in refProba] |
97 return refCounts, refProba | 95 return refCounts, refProba |
98 | 96 |
99 class EmpiricalContinuousDistribution(EmpiricalDistribution): | 97 class EmpiricalContinuousDistribution(EmpiricalDistribution): |
100 '''Class to represent a sample of a distribution for a continuous random variable | 98 '''Class to represent a sample of a distribution for a continuous random variable |
166 # return smoothed | 164 # return smoothed |
167 | 165 |
168 def kernelSmoothing(x, X, Y, weightFunc, halfwidth): | 166 def kernelSmoothing(x, X, Y, weightFunc, halfwidth): |
169 '''Returns the smoothed estimate of (X,Y) at x | 167 '''Returns the smoothed estimate of (X,Y) at x |
170 Sum_x weight(sample_x,x) * y(x)''' | 168 Sum_x weight(sample_x,x) * y(x)''' |
171 from numpy import zeros, array | |
172 weights = array([weightFunc(x,observedx, halfwidth) for observedx in X]) | 169 weights = array([weightFunc(x,observedx, halfwidth) for observedx in X]) |
173 if sum(weights)>0: | 170 if sum(weights)>0: |
174 return sum(weights*Y)/sum(weights) | 171 return sum(weights*Y)/sum(weights) |
175 else: | 172 else: |
176 return 0 | 173 return 0 |
180 return 1. | 177 return 1. |
181 else: | 178 else: |
182 return 0. | 179 return 0. |
183 | 180 |
184 def gaussian(center, x, halfwidth): | 181 def gaussian(center, x, halfwidth): |
185 from numpy import exp | |
186 return exp(-((center-x)/halfwidth)**2/2) | 182 return exp(-((center-x)/halfwidth)**2/2) |
187 | 183 |
188 def epanechnikov(center, x, halfwidth): | 184 def epanechnikov(center, x, halfwidth): |
189 diff = abs(center-x) | 185 diff = abs(center-x) |
190 if diff<halfwidth: | 186 if diff<halfwidth: |