Mercurial Hosting > traffic-intelligence
diff trafficintelligence/utils.py @ 1029:c6cf75a2ed08
reorganization of imports
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 18 Jun 2018 22:50:59 -0400 |
parents | cc5cb04b04b0 |
children | aafbc0bab925 |
line wrap: on
line diff
--- a/trafficintelligence/utils.py Fri Jun 15 11:19:10 2018 -0400 +++ b/trafficintelligence/utils.py Mon Jun 18 22:50:59 2018 -0400 @@ -1,17 +1,21 @@ #! /usr/bin/env python -# -*- coding: utf-8 -*- ''' Generic utilities.''' -import matplotlib.pyplot as plt from datetime import time, datetime from argparse import ArgumentTypeError from pathlib import Path from math import sqrt, ceil, floor -from scipy.stats import rv_continuous, kruskal, shapiro, lognorm +from copy import deepcopy, copy + +from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t from scipy.spatial import distance from scipy.sparse import dok_matrix -from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log +from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit +from numpy.random import permutation as nppermutation +from pandas import DataFrame, concat +import matplotlib.pyplot as plt +from trafficintelligence.storage import openCheck datetimeFormat = "%Y-%m-%d %H:%M:%S" @@ -67,7 +71,6 @@ return log(scale), shape def sampleSize(stdev, tolerance, percentConfidence, nRoundingDigits = None, printLatex = False): - from scipy.stats.distributions import norm if nRoundingDigits is None: k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2) # 1.-(100-percentConfidence)/200. else: @@ -84,7 +87,6 @@ Use otherwise t.interval or norm.interval for the boundaries ex: norm.interval(0.95) t.interval(0.95, nSamples-1)''' - from scipy.stats.distributions import norm, t if trueStd: k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2) else: # use Student @@ -344,7 +346,6 @@ def cat_mvgavg(cat_list, halfWidth): ''' Return a list of categories/values smoothed according to a window. halfWidth is the search radius on either side''' - from copy import deepcopy smoothed = deepcopy(cat_list) for point in range(len(cat_list)): lower_bound_check = max(0,point-halfWidth) @@ -366,8 +367,6 @@ def linearRegression(x, y, deg = 1, plotData = False): '''returns the least square estimation of the linear regression of y = ax+b as well as the plot''' - from numpy.lib.polynomial import polyfit - from numpy.core.multiarray import arange coef = polyfit(x, y, deg) if plotData: def poly(x): @@ -437,7 +436,6 @@ independentVariableValues = sorted(tmp[independentVariable].unique().tolist()) if len(independentVariableValues) >= 2: if saveLatex: - from storage import openCheck out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w') for x in independentVariableValues: print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable]))) @@ -476,8 +474,6 @@ text is the template to display for the two types of printout (see default): 3 elements if no saving to latex file, 8 otherwise TODO: pass the dummies for nominal variables and remove if all dummies are correlated, or none is correlated with the dependentvariable''' - from copy import copy - from pandas import DataFrame result = copy(independentVariables) table1 = '' table2 = {} @@ -516,7 +512,6 @@ table2['Valeurs p'].append(p) if saveFiles: - from storage import openCheck out = openCheck(filenamePrefix+'-removed-variables.tex', 'w') out.write(latexHeader) out.write(table1) @@ -598,7 +593,6 @@ def generateExperiments(independentVariables): '''Generates all possible models for including or not each independent variable''' - from pandas import DataFrame experiments = {} nIndependentVariables = len(independentVariables) if nIndependentVariables != len(set(independentVariables)): @@ -620,8 +614,6 @@ '''Generates all possible model with the independentVariables and runs them, saving the results in experiments with multiprocess option''' - from pandas import concat - from multiprocessing import Pool experiments = generateExperiments(independentVariables) nModels = len(experiments) print("Running {} models with {} processes".format(nModels, nProcesses)) @@ -642,7 +634,6 @@ The results are added to experiments if provided as argument Storing in experiment relies on the index being the number equal to the binary code derived from the independent variables''' - from numpy.random import permutation as nppermutation if experiments is None: experiments = generateExperiments(independentVariables) nIndependentVariables = len(independentVariables) @@ -712,7 +703,6 @@ plt.ylabel(text['qqplot.ylabel']) plt.tight_layout() if filenamePrefix is not None: - from storage import openCheck out = openCheck(filenamePrefix+'-coefficients.html', 'w') out.write(results.summary().as_html()) plt.savefig(filenamePrefix+'-model-results.'+figureFileType) @@ -885,9 +875,8 @@ def plotPolygon(poly, options = '', **kwargs): 'Plots shapely polygon poly' - from matplotlib.pyplot import plot x,y = poly.exterior.xy - plot(x, y, options, **kwargs) + plt.plot(x, y, options, **kwargs) def stepPlot(X, firstX, lastX, initialCount = 0, increment = 1): '''for each value in X, increment by increment the initial count @@ -927,7 +916,6 @@ plt.rc('axes', prop_cycle=monochrome) def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1): - from matplotlib.pyplot import pcolor coords = array(list(indicatorMap.keys())) minX = min(coords[:,0]) minY = min(coords[:,1]) @@ -937,9 +925,9 @@ for k,v in indicatorMap.items(): C[k[1]-minY,k[0]-minX] = v if masked: - pcolor(X, Y, ma.masked_where(C==defaultValue,C)) + plt.pcolor(X, Y, ma.masked_where(C==defaultValue,C)) else: - pcolor(X, Y, C) + plt.pcolor(X, Y, C) ######################### # Data download