repo/traffic-intelligence: python/utils.py comparison

comparison python/utils.py @ 676:58b9ac2f262f

fine tuning

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Wed, 27 May 2015 04:08:19 +0200
parents	ab3fdff42624
children	ae07c7b4cf87

comparison

equal deleted inserted replaced

-:ab3fdff42624
+:58b9ac2f262f
 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','')
 data[newVariable] = (data[var] == val)
 newVariables.append(newVariable)
 return newVariables
-def frenchify(s, displayNames):
+def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, translate = lambda s: s, kwCaption = u''):
-return s
-def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, displayNames = {}):
 '''Studies the influence of (nominal) independent variable over the dependent variable
 Makes tests if the conditional distributions are normal
 using the Shapiro-Wilk test (in which case ANOVA could be used)
 Implements uses the non-parametric Kruskal Wallis test'''
 plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues)
 plt.title('{} vs {}'.format(dependentVariable, independentVariable))
 if filenamePrefix is not None:
 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType))
 table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False)
+table['count'] = table['count'].astype(int)
+#table.index.rename(translate(table.index.name), inplace = True)
+testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
 if saveLatex:
-out.write('\\begin{table}[htp!]\n')
+out.write(translate('\\begin{minipage}{\\linewidth}\n'
-out.write(frenchify(table.to_latex(), displayNames))
++'\\centering\n'
-out.write('\caption{Test}\n'
++'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n'
-+'\end{table}[htp!]')
++table.to_latex(float_format = lambda x: '{:.2f}'.format(x)).encode('ascii')+'\n'
++'\\end{minipage}\n'
++'\\vspace{0.5cm}\n'))
 else:
 print table
-return kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
+return testResult
 else:
 return None
 def prepareRegression(data, dependentVariable, independentVariables, maxCorrelationThreshold, correlations, maxCorrelationP, correlationFunc):
 '''Removes variables from candidate independent variables if
 if currentR2Adj < experiments.loc[rowIdx, 'r2adj']:
 currentR2Adj = experiments.loc[rowIdx, 'r2adj']
 bestModel[currentVarNum] = True
 return experiments
-def displayModelResults(results, model = None):
+def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf'):
 import statsmodels.api as sm
 '''Displays some model results'''
-print results.summary()
+print(results.summary())
 print('Shapiro-Wilk normality test for residuals: {}'.format(shapiro(results.resid)))
-if model is not None:
+if plotFigures:
+if model is not None:
+plt.figure()
+plt.plot(results.predict(), model.endog, 'x')
+x=plt.xlim()
+y=plt.ylim()
+plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r')
+plt.title('true vs predicted')
+if filenamePrefix is not None:
+plt.savefig(filenamePrefix+'-true-predicted.'+figureFileType)
 plt.figure()
-plt.plot(results.predict(), model.endog, 'x')
+plt.plot(results.predict(), results.resid, 'x')
-x=plt.xlim()
+if filenamePrefix is not None:
-y=plt.ylim()
+plt.savefig(filenamePrefix+'-residuals.'+figureFileType)
-plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r')
+plt.title('residuals vs predicted')
-plt.title('true vs predicted')
+sm.qqplot(results.resid, fit = True, line = '45')
-plt.figure()
+if filenamePrefix is not None:
-plt.plot(results.predict(), results.resid, 'x')
+plt.savefig(filenamePrefix+'-qq.'+figureFileType)
-plt.title('residuals vs predicted')
-sm.qqplot(results.resid, fit = True, line = '45')
 #########################
 # iterable section
 #########################

Mercurial Hosting > traffic-intelligence

comparison python/utils.py @ 676:58b9ac2f262f