comparison trafficintelligence/utils.py @ 1031:045cb04ad7b8

corrected bug in distribution
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 19 Jun 2018 17:07:35 -0400
parents aafbc0bab925
children 4069d8545922
comparison
equal deleted inserted replaced
1030:aafbc0bab925 1031:045cb04ad7b8
8 from copy import deepcopy, copy 8 from copy import deepcopy, copy
9 9
10 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t 10 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t
11 from scipy.spatial import distance 11 from scipy.spatial import distance
12 from scipy.sparse import dok_matrix 12 from scipy.sparse import dok_matrix
13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit 13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit, float as npfloat
14 from numpy.random import permutation as nppermutation 14 from numpy.random import permutation as nppermutation
15 from pandas import DataFrame, concat 15 from pandas import DataFrame, concat
16 import matplotlib.pyplot as plt 16 import matplotlib.pyplot as plt
17 17
18 datetimeFormat = "%Y-%m-%d %H:%M:%S" 18 datetimeFormat = "%Y-%m-%d %H:%M:%S"
133 133
134 def computeChi2(expected, observed): 134 def computeChi2(expected, observed):
135 '''Returns the Chi2 statistics''' 135 '''Returns the Chi2 statistics'''
136 return sum([((e-o)*(e-o))/float(e) for e, o in zip(expected, observed)]) 136 return sum([((e-o)*(e-o))/float(e) for e, o in zip(expected, observed)])
137 137
138 class generateDistribution(rv_continuous): 138 class EmpiricalContinuousDistribution(rv_continuous):
139 def __init__(self, values, probabilities): 139 def __init__(self, values, probabilities, **kwargs):
140 '''The values (and corresponding probabilities) are supposed to be sorted by value 140 '''The values (and corresponding probabilities) are supposed to be sorted by value
141 for v, p in zip(values, probabilities): P(X<=v)=p''' 141 for v, p in zip(values, probabilities): P(X<=v)=p'''
142 assert probabilities[0]==0 142 assert probabilities[0]==0
143 super(EmpiricalContinuousDistribution, self).__init__(**kwargs)
143 self.values = values 144 self.values = values
144 self.probabilities = probabilities 145 self.probabilities = probabilities
145 146
146 def _cdf(self, x): 147 def _cdf(self, x):
147 if x < self.values[0]: 148 if x < self.values[0]:
151 while i+1<len(self.values) and self.values[i+1] < x: 152 while i+1<len(self.values) and self.values[i+1] < x:
152 i += 1 153 i += 1
153 if i == len(self.values)-1: 154 if i == len(self.values)-1:
154 return self.probabilities[-1] 155 return self.probabilities[-1]
155 else: 156 else:
156 return (self.probabilities[i+1]-self.probabilities[i])/(self.values[i+1]-self.values[i]) 157 return self.probabilities[i]+(x-self.values[i])*float(self.probabilities[i+1]-self.probabilities[i])/float(self.values[i+1]-self.values[i])
157 158
158 class DistributionSample(object): 159 class DistributionSample(object):
159 def nSamples(self): 160 def nSamples(self):
160 return sum(self.counts) 161 return sum(self.counts)
161 162
162 def cumulativeDensityFunction(sample, normalized = False): 163 def cumulativeDensityFunction(sample, normalized = False):
163 '''Returns the cumulative density function of the sample of a random variable''' 164 '''Returns the cumulative density function of the sample of a random variable'''
164 xaxis = sorted(sample) 165 xaxis = sorted(sample)
165 counts = arange(1,len(sample)+1) # dtype = float 166 counts = arange(1,len(sample)+1) # dtype = float
166 if normalized: 167 if normalized:
167 counts /= float(len(sample)) 168 counts = counts.astype(float)/float(len(sample))
168 return xaxis, counts 169 return xaxis, counts
169 170
170 class DiscreteDistributionSample(DistributionSample): 171 class DiscreteDistributionSample(DistributionSample):
171 '''Class to represent a sample of a distribution for a discrete random variable''' 172 '''Class to represent a sample of a distribution for a discrete random variable'''
172 def __init__(self, categories, counts): 173 def __init__(self, categories, counts):