Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 670:f72ed51c6b65
corrected other missing imports
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 26 May 2015 11:39:36 +0200 |
parents | df6be882f325 |
children | 849f5f8bf4b9 |
comparison
equal
deleted
inserted
replaced
669:df6be882f325 | 670:f72ed51c6b65 |
---|---|
1 #! /usr/bin/env python | 1 #! /usr/bin/env python |
2 ''' Generic utilities.''' | 2 ''' Generic utilities.''' |
3 | 3 |
4 import matplotlib.pyplot as plt | 4 import matplotlib.pyplot as plt |
5 from datetime import time, datetime | 5 from datetime import time, datetime |
6 from math import sqrt | 6 from math import sqrt, ceil, floor |
7 from scipy.stats import kruskal, shapiro | 7 from scipy.stats import kruskal, shapiro |
8 | 8 |
9 datetimeFormat = "%Y-%m-%d %H:%M:%S" | 9 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
10 | 10 |
11 ######################### | 11 ######################### |
35 | 35 |
36 Use otherwise t.interval or norm.interval | 36 Use otherwise t.interval or norm.interval |
37 ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11)) | 37 ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11)) |
38 t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples)) | 38 t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples)) |
39 loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)''' | 39 loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)''' |
40 from math import sqrt | |
41 from scipy.stats.distributions import norm, t | 40 from scipy.stats.distributions import norm, t |
42 if trueStd: | 41 if trueStd: |
43 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. | 42 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. |
44 else: # use Student | 43 else: # use Student |
45 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100. | 44 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100. |
209 return max(d, key=d.get) | 208 return max(d, key=d.get) |
210 | 209 |
211 def framesToTime(nFrames, frameRate, initialTime = time()): | 210 def framesToTime(nFrames, frameRate, initialTime = time()): |
212 '''returns a datetime.time for the time in hour, minutes and seconds | 211 '''returns a datetime.time for the time in hour, minutes and seconds |
213 initialTime is a datetime.time''' | 212 initialTime is a datetime.time''' |
214 from math import floor | |
215 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second) | 213 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second) |
216 h = int(floor(seconds/3600.)) | 214 h = int(floor(seconds/3600.)) |
217 seconds = seconds - h*3600 | 215 seconds = seconds - h*3600 |
218 m = int(floor(seconds/60)) | 216 m = int(floor(seconds/60)) |
219 seconds = seconds - m*60 | 217 seconds = seconds - m*60 |
231 return xsorted, [D[x] for x in xsorted] | 229 return xsorted, [D[x] for x in xsorted] |
232 | 230 |
233 def ceilDecimals(v, nDecimals): | 231 def ceilDecimals(v, nDecimals): |
234 '''Rounds the number at the nth decimal | 232 '''Rounds the number at the nth decimal |
235 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' | 233 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' |
236 from math import ceil,pow | 234 tens = 10**nDecimals |
237 tens = pow(10,nDecimals) | |
238 return ceil(v*tens)/tens | 235 return ceil(v*tens)/tens |
239 | 236 |
240 def inBetween(bound1, bound2, x): | 237 def inBetween(bound1, bound2, x): |
241 return bound1 <= x <= bound2 or bound2 <= x <= bound1 | 238 return bound1 <= x <= bound2 or bound2 <= x <= bound1 |
242 | 239 |
421 experiments.loc[i,'nobs'] = int(results.nobs) | 418 experiments.loc[i,'nobs'] = int(results.nobs) |
422 return experiments | 419 return experiments |
423 | 420 |
424 def generateExperiments(independentVariables): | 421 def generateExperiments(independentVariables): |
425 '''Generates all possible models for including or not each independent variable''' | 422 '''Generates all possible models for including or not each independent variable''' |
423 from numpy import nan | |
424 from pandas import DataFrame | |
426 experiments = {} | 425 experiments = {} |
427 nIndependentVariables = len(independentVariables) | 426 nIndependentVariables = len(independentVariables) |
428 if nIndependentVariables != len(set(independentVariables)): | 427 if nIndependentVariables != len(set(independentVariables)): |
429 print("Duplicate variables. Exiting") | 428 print("Duplicate variables. Exiting") |
430 import sys | 429 import sys |
431 sys.exit() | 430 sys.exit() |
432 nModels = 2**nIndependentVariables | 431 nModels = 2**nIndependentVariables |
433 for i,var in enumerate(independentVariables): | 432 for i,var in enumerate(independentVariables): |
434 pattern = [False]*(2**i)+[True]*(2**i) | 433 pattern = [False]*(2**i)+[True]*(2**i) |
435 experiments[var] = pattern*(2**(nIndependentVariables-i-1)) | 434 experiments[var] = pattern*(2**(nIndependentVariables-i-1)) |
436 experiments = pd.DataFrame(experiments) | 435 experiments = DataFrame(experiments) |
437 experiments['r2adj'] = 0. | 436 experiments['r2adj'] = 0. |
438 experiments['condNum'] = np.nan | 437 experiments['condNum'] = nan |
439 experiments['shapiroP'] = -1 | 438 experiments['shapiroP'] = -1 |
440 experiments['nobs'] = -1 | 439 experiments['nobs'] = -1 |
441 return experiments | 440 return experiments |
442 | 441 |
443 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1): | 442 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1): |
444 '''Generates all possible model with the independentVariables | 443 '''Generates all possible model with the independentVariables |
445 and runs them, saving the results in experiments | 444 and runs them, saving the results in experiments |
446 with multiprocess option''' | 445 with multiprocess option''' |
446 from pandas import concat | |
447 experiments = generateExperiments(independentVariables) | 447 experiments = generateExperiments(independentVariables) |
448 nModels = len(experiments) | 448 nModels = len(experiments) |
449 print("Running {} models with {} processes".format(nModels, nProcesses)) | 449 print("Running {} models with {} processes".format(nModels, nProcesses)) |
450 if nProcesses == 1: | 450 if nProcesses == 1: |
451 return runModels(experiments, data, dependentVariable, independentVariables, regressionType) | 451 return runModels(experiments, data, dependentVariable, independentVariables, regressionType) |
452 else: | 452 else: |
453 pool = Pool(processes = nProcesses) | 453 pool = Pool(processes = nProcesses) |
454 chunkSize = int(np.ceil(nModels/nProcesses)) | 454 chunkSize = int(ceil(nModels/nProcesses)) |
455 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)] | 455 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)] |
456 return pd.concat([job.get() for job in jobs]) | 456 return concat([job.get() for job in jobs]) |
457 | 457 |
458 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None): | 458 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None): |
459 '''Forward search for best model (based on adjusted R2) | 459 '''Forward search for best model (based on adjusted R2) |
460 Randomly starting with one variable and adding randomly variables | 460 Randomly starting with one variable and adding randomly variables |
461 if they improve the model | 461 if they improve the model |
462 | 462 |
463 The results are added to experiments if provided as argument | 463 The results are added to experiments if provided as argument |
464 Storing in experiment relies on the index being the number equal | 464 Storing in experiment relies on the index being the number equal |
465 to the binary code derived from the independent variables''' | 465 to the binary code derived from the independent variables''' |
466 from numpy.random import permutation as nppermutation | |
466 if experiments is None: | 467 if experiments is None: |
467 experiments = generateExperiments(independentVariables) | 468 experiments = generateExperiments(independentVariables) |
468 nIndependentVariables = len(independentVariables) | 469 nIndependentVariables = len(independentVariables) |
469 permutation = np.random.permutation(range(nIndependentVariables)).tolist() | 470 permutation = nppermutation(range(nIndependentVariables)).tolist() |
470 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} | 471 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} |
471 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)])) | 472 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)])) |
472 bestModel = [False]*nIndependentVariables | 473 bestModel = [False]*nIndependentVariables |
473 currentVarNum = 0 | 474 currentVarNum = 0 |
474 currentR2Adj = 0. | 475 currentR2Adj = 0. |