Mercurial Hosting > traffic-intelligence

--- a/python/utils.py	Tue May 26 23:55:22 2015 +0200
+++ b/python/utils.py	Wed May 27 04:08:19 2015 +0200
@@ -318,10 +318,7 @@
                     newVariables.append(newVariable)
     return newVariables

-def frenchify(s, displayNames):
-    return s
-
-def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, displayNames = {}):
+def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, translate = lambda s: s, kwCaption = u''):
     '''Studies the influence of (nominal) independent variable over the dependent variable

     Makes tests if the conditional distributions are normal
@@ -347,14 +344,19 @@
             if filenamePrefix is not None:
                 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType))
         table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False)
+        table['count'] = table['count'].astype(int)
+        #table.index.rename(translate(table.index.name), inplace = True)
+        testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
         if saveLatex:
-            out.write('\\begin{table}[htp!]\n')
-            out.write(frenchify(table.to_latex(), displayNames))
-            out.write('\caption{Test}\n'
-                      +'\end{table}[htp!]')
+            out.write(translate('\\begin{minipage}{\\linewidth}\n'
+                                +'\\centering\n'
+                                +'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n'
+                                +table.to_latex(float_format = lambda x: '{:.2f}'.format(x)).encode('ascii')+'\n'
+                                +'\\end{minipage}\n'
+                                +'\\vspace{0.5cm}\n'))
         else:
             print table
-        return kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
+        return testResult
     else:
         return None

@@ -500,22 +502,29 @@
             bestModel[currentVarNum] = True
     return experiments

-def displayModelResults(results, model = None):
+def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf'):
     import statsmodels.api as sm
     '''Displays some model results'''
-    print results.summary()
+    print(results.summary())
     print('Shapiro-Wilk normality test for residuals: {}'.format(shapiro(results.resid)))
-    if model is not None:
+    if plotFigures:
+        if model is not None:
+            plt.figure()
+            plt.plot(results.predict(), model.endog, 'x')
+            x=plt.xlim()
+            y=plt.ylim()
+            plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r')
+            plt.title('true vs predicted')
+            if filenamePrefix is not None:
+                plt.savefig(filenamePrefix+'-true-predicted.'+figureFileType)
         plt.figure()
-        plt.plot(results.predict(), model.endog, 'x')
-        x=plt.xlim()
-        y=plt.ylim()
-        plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r')
-        plt.title('true vs predicted')
-    plt.figure()
-    plt.plot(results.predict(), results.resid, 'x')
-    plt.title('residuals vs predicted')
-    sm.qqplot(results.resid, fit = True, line = '45')
+        plt.plot(results.predict(), results.resid, 'x')
+        if filenamePrefix is not None:
+            plt.savefig(filenamePrefix+'-residuals.'+figureFileType)
+        plt.title('residuals vs predicted')
+        sm.qqplot(results.resid, fit = True, line = '45')
+        if filenamePrefix is not None:
+            plt.savefig(filenamePrefix+'-qq.'+figureFileType)


 #########################