Mercurial Hosting > traffic-intelligence
diff python/utils.py @ 674:01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 26 May 2015 18:16:51 +0200 |
parents | 5473b7460375 |
children | ab3fdff42624 |
line wrap: on
line diff
--- a/python/utils.py Tue May 26 15:02:47 2015 +0200 +++ b/python/utils.py Tue May 26 18:16:51 2015 +0200 @@ -318,7 +318,10 @@ newVariables.append(newVariable) return newVariables -def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, figureFilenamePrefix = None, figureFileType = 'pdf'): +def frenchify(s, displayNames): + return s + +def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False): '''Studies the influence of (nominal) independent variable over the dependent variable Makes tests if the conditional distributions are normal @@ -327,6 +330,9 @@ tmp = data[data[independentVariable].notnull()] independentVariableValues = sorted(tmp[independentVariable].unique().tolist()) if len(independentVariableValues) >= 2: + if saveLatex: + from storage import openCheck + out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w') for x in independentVariableValues: print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable]))) if len(tmp.loc[tmp[independentVariable] == x, dependentVariable]) >= 3: @@ -338,11 +344,15 @@ #plt.ylim(ymax = q75+1.5*(q75-q25)) plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues) plt.title('{} vs {}'.format(dependentVariable, independentVariable)) - if figureFilenamePrefix is not None: - plt.savefig(figureFilenamePrefix+'{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType)) - #else: - # TODO formatter le tableau (html?) - print tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False) + if filenamePrefix is not None: + plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType)) + table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False) + if saveLatex: + out.write('\begin{table}[htp!]') + out.write(frenchify(table.to_latex(), displayNames)) + out.write('\end{table}[htp!]') + else: + print table return kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) else: return None @@ -444,6 +454,7 @@ experiments = generateExperiments(independentVariables) nModels = len(experiments) print("Running {} models with {} processes".format(nModels, nProcesses)) + print("IndependentVariables: {}".format(independentVariables)) if nProcesses == 1: return runModels(experiments, data, dependentVariable, independentVariables, regressionType) else: