Mercurial Hosting > traffic-intelligence
comparison trafficintelligence/utils.py @ 1031:045cb04ad7b8
corrected bug in distribution
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 19 Jun 2018 17:07:35 -0400 |
parents | aafbc0bab925 |
children | 4069d8545922 |
comparison
equal
deleted
inserted
replaced
1030:aafbc0bab925 | 1031:045cb04ad7b8 |
---|---|
8 from copy import deepcopy, copy | 8 from copy import deepcopy, copy |
9 | 9 |
10 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t | 10 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t |
11 from scipy.spatial import distance | 11 from scipy.spatial import distance |
12 from scipy.sparse import dok_matrix | 12 from scipy.sparse import dok_matrix |
13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit | 13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit, float as npfloat |
14 from numpy.random import permutation as nppermutation | 14 from numpy.random import permutation as nppermutation |
15 from pandas import DataFrame, concat | 15 from pandas import DataFrame, concat |
16 import matplotlib.pyplot as plt | 16 import matplotlib.pyplot as plt |
17 | 17 |
18 datetimeFormat = "%Y-%m-%d %H:%M:%S" | 18 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
133 | 133 |
134 def computeChi2(expected, observed): | 134 def computeChi2(expected, observed): |
135 '''Returns the Chi2 statistics''' | 135 '''Returns the Chi2 statistics''' |
136 return sum([((e-o)*(e-o))/float(e) for e, o in zip(expected, observed)]) | 136 return sum([((e-o)*(e-o))/float(e) for e, o in zip(expected, observed)]) |
137 | 137 |
138 class generateDistribution(rv_continuous): | 138 class EmpiricalContinuousDistribution(rv_continuous): |
139 def __init__(self, values, probabilities): | 139 def __init__(self, values, probabilities, **kwargs): |
140 '''The values (and corresponding probabilities) are supposed to be sorted by value | 140 '''The values (and corresponding probabilities) are supposed to be sorted by value |
141 for v, p in zip(values, probabilities): P(X<=v)=p''' | 141 for v, p in zip(values, probabilities): P(X<=v)=p''' |
142 assert probabilities[0]==0 | 142 assert probabilities[0]==0 |
143 super(EmpiricalContinuousDistribution, self).__init__(**kwargs) | |
143 self.values = values | 144 self.values = values |
144 self.probabilities = probabilities | 145 self.probabilities = probabilities |
145 | 146 |
146 def _cdf(self, x): | 147 def _cdf(self, x): |
147 if x < self.values[0]: | 148 if x < self.values[0]: |
151 while i+1<len(self.values) and self.values[i+1] < x: | 152 while i+1<len(self.values) and self.values[i+1] < x: |
152 i += 1 | 153 i += 1 |
153 if i == len(self.values)-1: | 154 if i == len(self.values)-1: |
154 return self.probabilities[-1] | 155 return self.probabilities[-1] |
155 else: | 156 else: |
156 return (self.probabilities[i+1]-self.probabilities[i])/(self.values[i+1]-self.values[i]) | 157 return self.probabilities[i]+(x-self.values[i])*float(self.probabilities[i+1]-self.probabilities[i])/float(self.values[i+1]-self.values[i]) |
157 | 158 |
158 class DistributionSample(object): | 159 class DistributionSample(object): |
159 def nSamples(self): | 160 def nSamples(self): |
160 return sum(self.counts) | 161 return sum(self.counts) |
161 | 162 |
162 def cumulativeDensityFunction(sample, normalized = False): | 163 def cumulativeDensityFunction(sample, normalized = False): |
163 '''Returns the cumulative density function of the sample of a random variable''' | 164 '''Returns the cumulative density function of the sample of a random variable''' |
164 xaxis = sorted(sample) | 165 xaxis = sorted(sample) |
165 counts = arange(1,len(sample)+1) # dtype = float | 166 counts = arange(1,len(sample)+1) # dtype = float |
166 if normalized: | 167 if normalized: |
167 counts /= float(len(sample)) | 168 counts = counts.astype(float)/float(len(sample)) |
168 return xaxis, counts | 169 return xaxis, counts |
169 | 170 |
170 class DiscreteDistributionSample(DistributionSample): | 171 class DiscreteDistributionSample(DistributionSample): |
171 '''Class to represent a sample of a distribution for a discrete random variable''' | 172 '''Class to represent a sample of a distribution for a discrete random variable''' |
172 def __init__(self, categories, counts): | 173 def __init__(self, categories, counts): |