Mercurial Hosting > traffic-intelligence
diff python/utils.py @ 676:58b9ac2f262f
fine tuning
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Wed, 27 May 2015 04:08:19 +0200 |
parents | ab3fdff42624 |
children | ae07c7b4cf87 |
line wrap: on
line diff
--- a/python/utils.py Tue May 26 23:55:22 2015 +0200 +++ b/python/utils.py Wed May 27 04:08:19 2015 +0200 @@ -318,10 +318,7 @@ newVariables.append(newVariable) return newVariables -def frenchify(s, displayNames): - return s - -def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, displayNames = {}): +def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, translate = lambda s: s, kwCaption = u''): '''Studies the influence of (nominal) independent variable over the dependent variable Makes tests if the conditional distributions are normal @@ -347,14 +344,19 @@ if filenamePrefix is not None: plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType)) table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False) + table['count'] = table['count'].astype(int) + #table.index.rename(translate(table.index.name), inplace = True) + testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) if saveLatex: - out.write('\\begin{table}[htp!]\n') - out.write(frenchify(table.to_latex(), displayNames)) - out.write('\caption{Test}\n' - +'\end{table}[htp!]') + out.write(translate('\\begin{minipage}{\\linewidth}\n' + +'\\centering\n' + +'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n' + +table.to_latex(float_format = lambda x: '{:.2f}'.format(x)).encode('ascii')+'\n' + +'\\end{minipage}\n' + +'\\vspace{0.5cm}\n')) else: print table - return kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) + return testResult else: return None @@ -500,22 +502,29 @@ bestModel[currentVarNum] = True return experiments -def displayModelResults(results, model = None): +def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf'): import statsmodels.api as sm '''Displays some model results''' - print results.summary() + print(results.summary()) print('Shapiro-Wilk normality test for residuals: {}'.format(shapiro(results.resid))) - if model is not None: + if plotFigures: + if model is not None: + plt.figure() + plt.plot(results.predict(), model.endog, 'x') + x=plt.xlim() + y=plt.ylim() + plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r') + plt.title('true vs predicted') + if filenamePrefix is not None: + plt.savefig(filenamePrefix+'-true-predicted.'+figureFileType) plt.figure() - plt.plot(results.predict(), model.endog, 'x') - x=plt.xlim() - y=plt.ylim() - plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r') - plt.title('true vs predicted') - plt.figure() - plt.plot(results.predict(), results.resid, 'x') - plt.title('residuals vs predicted') - sm.qqplot(results.resid, fit = True, line = '45') + plt.plot(results.predict(), results.resid, 'x') + if filenamePrefix is not None: + plt.savefig(filenamePrefix+'-residuals.'+figureFileType) + plt.title('residuals vs predicted') + sm.qqplot(results.resid, fit = True, line = '45') + if filenamePrefix is not None: + plt.savefig(filenamePrefix+'-qq.'+figureFileType) #########################