diff python/utils.py @ 674:01b89182891a

corrected bug for intersection of lines (thanks to Paul for finding)
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 26 May 2015 18:16:51 +0200
parents 5473b7460375
children ab3fdff42624
line wrap: on
line diff
--- a/python/utils.py	Tue May 26 15:02:47 2015 +0200
+++ b/python/utils.py	Tue May 26 18:16:51 2015 +0200
@@ -318,7 +318,10 @@
                     newVariables.append(newVariable)
     return newVariables
 
-def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, figureFilenamePrefix = None, figureFileType = 'pdf'):
+def frenchify(s, displayNames):
+    return s
+
+def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False):
     '''Studies the influence of (nominal) independent variable over the dependent variable
 
     Makes tests if the conditional distributions are normal
@@ -327,6 +330,9 @@
     tmp = data[data[independentVariable].notnull()]
     independentVariableValues = sorted(tmp[independentVariable].unique().tolist())
     if len(independentVariableValues) >= 2:
+        if saveLatex:
+            from storage import openCheck
+            out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w')
         for x in independentVariableValues:
             print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable])))
             if len(tmp.loc[tmp[independentVariable] == x, dependentVariable]) >= 3:
@@ -338,11 +344,15 @@
             #plt.ylim(ymax = q75+1.5*(q75-q25))
             plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues)
             plt.title('{} vs {}'.format(dependentVariable, independentVariable))
-            if figureFilenamePrefix is not None:
-                plt.savefig(figureFilenamePrefix+'{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType))
-        #else:
-        # TODO formatter le tableau (html?)
-        print tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False)
+            if filenamePrefix is not None:
+                plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType))
+        table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False)
+        if saveLatex:
+            out.write('\begin{table}[htp!]')
+            out.write(frenchify(table.to_latex(), displayNames))
+            out.write('\end{table}[htp!]')
+        else:
+            print table
         return kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
     else:
         return None
@@ -444,6 +454,7 @@
     experiments = generateExperiments(independentVariables)
     nModels = len(experiments)
     print("Running {} models with {} processes".format(nModels, nProcesses))
+    print("IndependentVariables: {}".format(independentVariables))
     if nProcesses == 1:
         return runModels(experiments, data, dependentVariable, independentVariables, regressionType)
     else: