comparison python/utils.py @ 997:4f3387a242a1

updated utils to python 3
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Fri, 25 May 2018 18:15:18 -0400
parents 94bee7b604eb
children 16932cefabc1
comparison
equal deleted inserted replaced
996:add667153087 997:4f3387a242a1
299 return 1 299 return 1
300 300
301 def sortByLength(instances, reverse = False): 301 def sortByLength(instances, reverse = False):
302 '''Returns a new list with the instances sorted by length (method __len__) 302 '''Returns a new list with the instances sorted by length (method __len__)
303 reverse is passed to sorted''' 303 reverse is passed to sorted'''
304 return sorted(instances, cmp = compareLengthForSort, reverse = reverse) 304 return sorted(instances, key = len, reverse = reverse)
305 305
306 def ceilDecimals(v, nDecimals): 306 def ceilDecimals(v, nDecimals):
307 '''Rounds the number at the nth decimal 307 '''Rounds the number at the nth decimal
308 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' 308 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3'''
309 tens = 10**nDecimals 309 tens = 10**nDecimals
404 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','') 404 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','')
405 data[newVariable] = (data[var] == val) 405 data[newVariable] = (data[var] == val)
406 newVariables.append(newVariable) 406 newVariables.append(newVariable)
407 return newVariables 407 return newVariables
408 408
409 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, renameVariables = lambda s: s, kwCaption = u''): 409 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, renameVariables = lambda s: s, kwCaption = ''):
410 '''Studies the influence of (nominal) independent variable over the dependent variable 410 '''Studies the influence of (nominal) independent variable over the dependent variable
411 411
412 Makes tests if the conditional distributions are normal 412 Makes tests if the conditional distributions are normal
413 using the Shapiro-Wilk test (in which case ANOVA could be used) 413 using the Shapiro-Wilk test (in which case ANOVA could be used)
414 Implements uses the non-parametric Kruskal Wallis test''' 414 Implements uses the non-parametric Kruskal Wallis test'''
510 return result 510 return result
511 511
512 def saveDokMatrix(filename, m, lowerTriangle = False): 512 def saveDokMatrix(filename, m, lowerTriangle = False):
513 'Saves a dok_matrix using savez' 513 'Saves a dok_matrix using savez'
514 if lowerTriangle: 514 if lowerTriangle:
515 keys = [k for k in m.keys() if k[0] > k[1]] 515 keys = [k for k in m if k[0] > k[1]]
516 savez(filename, shape = m.shape, keys = keys, values = [m[k[0],k[1]] for k in keys]) 516 savez(filename, shape = m.shape, keys = keys, values = [m[k[0],k[1]] for k in keys])
517 else: 517 else:
518 savez(filename, shape = m.shape, keys = m.keys(), values = m.values()) 518 savez(filename, shape = m.shape, keys = list(m.keys()), values = list(m.values()))
519 519
520 def loadDokMatrix(filename): 520 def loadDokMatrix(filename):
521 'Loads a dok_matrix saved using the above saveDokMatrix' 521 'Loads a dok_matrix saved using the above saveDokMatrix'
522 data = npload(filename) 522 data = npload(filename)
523 m = dok_matrix(tuple(data['shape'])) 523 m = dok_matrix(tuple(data['shape']))
610 to the binary code derived from the independent variables''' 610 to the binary code derived from the independent variables'''
611 from numpy.random import permutation as nppermutation 611 from numpy.random import permutation as nppermutation
612 if experiments is None: 612 if experiments is None:
613 experiments = generateExperiments(independentVariables) 613 experiments = generateExperiments(independentVariables)
614 nIndependentVariables = len(independentVariables) 614 nIndependentVariables = len(independentVariables)
615 permutation = nppermutation(range(nIndependentVariables)).tolist() 615 permutation = nppermutation(list(range(nIndependentVariables)))
616 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} 616 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)}
617 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)])) 617 print('Tested variables '+', '.join([variableMapping[i] for i in range(nIndependentVariables)]))
618 bestModel = [False]*nIndependentVariables 618 bestModel = [False]*nIndependentVariables
619 currentVarNum = 0 619 currentVarNum = 0
620 currentR2Adj = 0. 620 currentR2Adj = 0.
621 for currentVarNum in xrange(nIndependentVariables): 621 for currentVarNum in range(nIndependentVariables):
622 currentModel = [i for i in bestModel] 622 currentModel = [i for i in bestModel]
623 currentModel[currentVarNum] = True 623 currentModel[currentVarNum] = True
624 rowIdx = sum([0]+[2**i for i in xrange(nIndependentVariables) if currentModel[permutation[i]]]) 624 rowIdx = sum([0]+[2**i for i in range(nIndependentVariables) if currentModel[permutation[i]]])
625 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in xrange(nIndependentVariables) if currentModel[permutation[i]]]) 625 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in range(nIndependentVariables) if currentModel[permutation[i]]])
626 if experiments.loc[rowIdx, 'shapiroP'] < 0: 626 if experiments.loc[rowIdx, 'shapiroP'] < 0:
627 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables) 627 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables)
628 model = modelFunc(modelStr, data = data) 628 model = modelFunc(modelStr, data = data)
629 results = model.fit() 629 results = model.fit()
630 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj 630 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj
750 def similarities(self, l1, l2, jshift=0): 750 def similarities(self, l1, l2, jshift=0):
751 n1 = len(l1) 751 n1 = len(l1)
752 n2 = len(l2) 752 n2 = len(l2)
753 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) 753 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
754 if self.similarityFunc is not None: 754 if self.similarityFunc is not None:
755 for i in xrange(1,n1+1): 755 for i in range(1,n1+1):
756 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): 756 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
757 if self.similarityFunc(l1[i-1], l2[j-1]): 757 if self.similarityFunc(l1[i-1], l2[j-1]):
758 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 758 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
759 else: 759 else:
760 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) 760 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
761 elif self.metric is not None: 761 elif self.metric is not None:
762 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon 762 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
763 for i in xrange(1,n1+1): 763 for i in range(1,n1+1):
764 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): 764 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
765 if similarElements[i-1, j-1]: 765 if similarElements[i-1, j-1]:
766 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 766 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
767 else: 767 else:
768 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) 768 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
769 769
800 n2 = len(l2) 800 n2 = len(l2)
801 801
802 if self.aligned: 802 if self.aligned:
803 lcssValues = {} 803 lcssValues = {}
804 similarityTables = {} 804 similarityTables = {}
805 for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2 805 for i in range(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2
806 self.similarities(l1, l2, i) 806 self.similarities(l1, l2, i)
807 lcssValues[i] = self.similarityTable.max() 807 lcssValues[i] = self.similarityTable.max()
808 similarityTables[i] = self.similarityTable 808 similarityTables[i] = self.similarityTable
809 #print self.similarityTable 809 #print self.similarityTable
810 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance 810 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
892 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.'])) 892 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.']))
893 plt.rc('axes', prop_cycle=monochrome) 893 plt.rc('axes', prop_cycle=monochrome)
894 894
895 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1): 895 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1):
896 from matplotlib.pyplot import pcolor 896 from matplotlib.pyplot import pcolor
897 coords = array(indicatorMap.keys()) 897 coords = array(list(indicatorMap.keys()))
898 minX = min(coords[:,0]) 898 minX = min(coords[:,0])
899 minY = min(coords[:,1]) 899 minY = min(coords[:,1])
900 X = arange(minX, max(coords[:,0])+1.1)*squareSize 900 X = arange(minX, max(coords[:,0])+1.1)*squareSize
901 Y = arange(minY, max(coords[:,1])+1.1)*squareSize 901 Y = arange(minY, max(coords[:,1])+1.1)*squareSize
902 C = defaultValue*ones((len(Y), len(X))) 902 C = defaultValue*ones((len(Y), len(X)))
903 for k,v in indicatorMap.iteritems(): 903 for k,v in indicatorMap.items():
904 C[k[1]-minY,k[0]-minX] = v 904 C[k[1]-minY,k[0]-minX] = v
905 if masked: 905 if masked:
906 pcolor(X, Y, ma.masked_where(C==defaultValue,C)) 906 pcolor(X, Y, ma.masked_where(C==defaultValue,C))
907 else: 907 else:
908 pcolor(X, Y, C) 908 pcolor(X, Y, C)
924 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp') 924 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp')
925 925
926 for annee in `seq 2016 2017`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&timeframe=2&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done 926 for annee in `seq 2016 2017`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&timeframe=2&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done
927 for annee in `seq 2016 2017`;do for mois in `seq 1 12`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&Month=${mois}&timeframe=1&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done;done 927 for annee in `seq 2016 2017`;do for mois in `seq 1 12`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&Month=${mois}&timeframe=1&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done;done
928 ''' 928 '''
929 import urllib2 929 import urllib.request
930 if english: 930 if english:
931 language = 'e' 931 language = 'e'
932 else: 932 else:
933 language = 'f' 933 language = 'f'
934 if len(months) == 0: 934 if len(months) == 0:
937 else: 937 else:
938 timeFrame = 1 938 timeFrame = 1
939 939
940 for year in years: 940 for year in years:
941 for month in months: 941 for month in months:
942 url = urllib2.urlopen('http://climate.weather.gc.ca/climate_data/bulk_data_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit= Download+Data'.format(language, stationID, year, month, timeFrame))
943 #http://climat.meteo.gc.ca/climateData/bulkdata_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es
944 data = url.read()
945 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year) 942 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year)
946 if timeFrame == 1: 943 if timeFrame == 1:
947 outFilename += '-{}-hourly'.format(month) 944 outFilename += '-{}-hourly'.format(month)
948 else: 945 else:
949 outFilename += '-daily' 946 outFilename += '-daily'
950 outFilename += '.csv' 947 outFilename += '.csv'
951 out = open(outFilename, 'w') 948 url = urllib.request.urlretrieve('http://climate.weather.gc.ca/climate_data/bulk_data_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=Download+Data'.format(language, stationID, year, month, timeFrame), outFilename)
952 out.write(data)
953 out.close()
954 949
955 ######################### 950 #########################
956 # File I/O 951 # File I/O
957 ######################### 952 #########################
958 953
1009 return [float(x) for x in l.split(separator)] 1004 return [float(x) for x in l.split(separator)]
1010 1005
1011 def line2Ints(l, separator=' '): 1006 def line2Ints(l, separator=' '):
1012 '''Returns the list of ints corresponding to the string''' 1007 '''Returns the list of ints corresponding to the string'''
1013 return [int(x) for x in l.split(separator)] 1008 return [int(x) for x in l.split(separator)]
1014
1015 #########################
1016 # CLI utils
1017 #########################
1018
1019 def parseCLIOptions(helpMessage, options, cliArgs, optionalOptions=[]):
1020 ''' Simple function to handle similar argument parsing
1021 Returns the dictionary of options and their values
1022
1023 * cliArgs are most likely directly sys.argv
1024 (only the elements after the first one are considered)
1025
1026 * options should be a list of strings for getopt options,
1027 eg ['frame=','correspondences=','video=']
1028 A value must be provided for each option, or the program quits'''
1029 import sys, getopt
1030 from numpy.core.fromnumeric import all
1031 optionValues, args = getopt.getopt(cliArgs[1:], 'h', ['help']+options+optionalOptions)
1032 optionValues = dict(optionValues)
1033
1034 if '--help' in optionValues.keys() or '-h' in optionValues.keys():
1035 print(helpMessage+
1036 '\n - Compulsory options: '+' '.join([opt.replace('=','') for opt in options])+
1037 '\n - Non-compulsory options: '+' '.join([opt.replace('=','') for opt in optionalOptions]))
1038 sys.exit()
1039
1040 missingArgument = [('--'+opt.replace('=','') in optionValues.keys()) for opt in options]
1041 if not all(missingArgument):
1042 print('Missing argument')
1043 print(optionValues)
1044 sys.exit()
1045
1046 return optionValues
1047
1048 1009
1049 ######################### 1010 #########################
1050 # Profiling 1011 # Profiling
1051 ######################### 1012 #########################
1052 1013