comparison python/utils.py @ 670:f72ed51c6b65

corrected other missing imports
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 26 May 2015 11:39:36 +0200
parents df6be882f325
children 849f5f8bf4b9
comparison
equal deleted inserted replaced
669:df6be882f325 670:f72ed51c6b65
1 #! /usr/bin/env python 1 #! /usr/bin/env python
2 ''' Generic utilities.''' 2 ''' Generic utilities.'''
3 3
4 import matplotlib.pyplot as plt 4 import matplotlib.pyplot as plt
5 from datetime import time, datetime 5 from datetime import time, datetime
6 from math import sqrt 6 from math import sqrt, ceil, floor
7 from scipy.stats import kruskal, shapiro 7 from scipy.stats import kruskal, shapiro
8 8
9 datetimeFormat = "%Y-%m-%d %H:%M:%S" 9 datetimeFormat = "%Y-%m-%d %H:%M:%S"
10 10
11 ######################### 11 #########################
35 35
36 Use otherwise t.interval or norm.interval 36 Use otherwise t.interval or norm.interval
37 ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11)) 37 ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11))
38 t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples)) 38 t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples))
39 loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)''' 39 loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)'''
40 from math import sqrt
41 from scipy.stats.distributions import norm, t 40 from scipy.stats.distributions import norm, t
42 if trueStd: 41 if trueStd:
43 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. 42 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
44 else: # use Student 43 else: # use Student
45 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100. 44 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100.
209 return max(d, key=d.get) 208 return max(d, key=d.get)
210 209
211 def framesToTime(nFrames, frameRate, initialTime = time()): 210 def framesToTime(nFrames, frameRate, initialTime = time()):
212 '''returns a datetime.time for the time in hour, minutes and seconds 211 '''returns a datetime.time for the time in hour, minutes and seconds
213 initialTime is a datetime.time''' 212 initialTime is a datetime.time'''
214 from math import floor
215 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second) 213 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second)
216 h = int(floor(seconds/3600.)) 214 h = int(floor(seconds/3600.))
217 seconds = seconds - h*3600 215 seconds = seconds - h*3600
218 m = int(floor(seconds/60)) 216 m = int(floor(seconds/60))
219 seconds = seconds - m*60 217 seconds = seconds - m*60
231 return xsorted, [D[x] for x in xsorted] 229 return xsorted, [D[x] for x in xsorted]
232 230
233 def ceilDecimals(v, nDecimals): 231 def ceilDecimals(v, nDecimals):
234 '''Rounds the number at the nth decimal 232 '''Rounds the number at the nth decimal
235 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' 233 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3'''
236 from math import ceil,pow 234 tens = 10**nDecimals
237 tens = pow(10,nDecimals)
238 return ceil(v*tens)/tens 235 return ceil(v*tens)/tens
239 236
240 def inBetween(bound1, bound2, x): 237 def inBetween(bound1, bound2, x):
241 return bound1 <= x <= bound2 or bound2 <= x <= bound1 238 return bound1 <= x <= bound2 or bound2 <= x <= bound1
242 239
421 experiments.loc[i,'nobs'] = int(results.nobs) 418 experiments.loc[i,'nobs'] = int(results.nobs)
422 return experiments 419 return experiments
423 420
424 def generateExperiments(independentVariables): 421 def generateExperiments(independentVariables):
425 '''Generates all possible models for including or not each independent variable''' 422 '''Generates all possible models for including or not each independent variable'''
423 from numpy import nan
424 from pandas import DataFrame
426 experiments = {} 425 experiments = {}
427 nIndependentVariables = len(independentVariables) 426 nIndependentVariables = len(independentVariables)
428 if nIndependentVariables != len(set(independentVariables)): 427 if nIndependentVariables != len(set(independentVariables)):
429 print("Duplicate variables. Exiting") 428 print("Duplicate variables. Exiting")
430 import sys 429 import sys
431 sys.exit() 430 sys.exit()
432 nModels = 2**nIndependentVariables 431 nModels = 2**nIndependentVariables
433 for i,var in enumerate(independentVariables): 432 for i,var in enumerate(independentVariables):
434 pattern = [False]*(2**i)+[True]*(2**i) 433 pattern = [False]*(2**i)+[True]*(2**i)
435 experiments[var] = pattern*(2**(nIndependentVariables-i-1)) 434 experiments[var] = pattern*(2**(nIndependentVariables-i-1))
436 experiments = pd.DataFrame(experiments) 435 experiments = DataFrame(experiments)
437 experiments['r2adj'] = 0. 436 experiments['r2adj'] = 0.
438 experiments['condNum'] = np.nan 437 experiments['condNum'] = nan
439 experiments['shapiroP'] = -1 438 experiments['shapiroP'] = -1
440 experiments['nobs'] = -1 439 experiments['nobs'] = -1
441 return experiments 440 return experiments
442 441
443 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1): 442 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1):
444 '''Generates all possible model with the independentVariables 443 '''Generates all possible model with the independentVariables
445 and runs them, saving the results in experiments 444 and runs them, saving the results in experiments
446 with multiprocess option''' 445 with multiprocess option'''
446 from pandas import concat
447 experiments = generateExperiments(independentVariables) 447 experiments = generateExperiments(independentVariables)
448 nModels = len(experiments) 448 nModels = len(experiments)
449 print("Running {} models with {} processes".format(nModels, nProcesses)) 449 print("Running {} models with {} processes".format(nModels, nProcesses))
450 if nProcesses == 1: 450 if nProcesses == 1:
451 return runModels(experiments, data, dependentVariable, independentVariables, regressionType) 451 return runModels(experiments, data, dependentVariable, independentVariables, regressionType)
452 else: 452 else:
453 pool = Pool(processes = nProcesses) 453 pool = Pool(processes = nProcesses)
454 chunkSize = int(np.ceil(nModels/nProcesses)) 454 chunkSize = int(ceil(nModels/nProcesses))
455 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)] 455 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)]
456 return pd.concat([job.get() for job in jobs]) 456 return concat([job.get() for job in jobs])
457 457
458 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None): 458 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None):
459 '''Forward search for best model (based on adjusted R2) 459 '''Forward search for best model (based on adjusted R2)
460 Randomly starting with one variable and adding randomly variables 460 Randomly starting with one variable and adding randomly variables
461 if they improve the model 461 if they improve the model
462 462
463 The results are added to experiments if provided as argument 463 The results are added to experiments if provided as argument
464 Storing in experiment relies on the index being the number equal 464 Storing in experiment relies on the index being the number equal
465 to the binary code derived from the independent variables''' 465 to the binary code derived from the independent variables'''
466 from numpy.random import permutation as nppermutation
466 if experiments is None: 467 if experiments is None:
467 experiments = generateExperiments(independentVariables) 468 experiments = generateExperiments(independentVariables)
468 nIndependentVariables = len(independentVariables) 469 nIndependentVariables = len(independentVariables)
469 permutation = np.random.permutation(range(nIndependentVariables)).tolist() 470 permutation = nppermutation(range(nIndependentVariables)).tolist()
470 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} 471 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)}
471 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)])) 472 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)]))
472 bestModel = [False]*nIndependentVariables 473 bestModel = [False]*nIndependentVariables
473 currentVarNum = 0 474 currentVarNum = 0
474 currentR2Adj = 0. 475 currentR2Adj = 0.