repo/traffic-intelligence: trafficintelligence/utils.py comparison

comparison trafficintelligence/utils.py @ 1029:c6cf75a2ed08

reorganization of imports

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Mon, 18 Jun 2018 22:50:59 -0400
parents	cc5cb04b04b0
children	aafbc0bab925

comparison

equal deleted inserted replaced

-:cc5cb04b04b0
+:c6cf75a2ed08
 #! /usr/bin/env python
-# -*- coding: utf-8 -*-
 ''' Generic utilities.'''
-import matplotlib.pyplot as plt
 from datetime import time, datetime
 from argparse import ArgumentTypeError
 from pathlib import Path
 from math import sqrt, ceil, floor
-from scipy.stats import rv_continuous, kruskal, shapiro, lognorm
+from copy import deepcopy, copy
+from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t
 from scipy.spatial import distance
 from scipy.sparse import dok_matrix
-from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve,  dtype, isnan, NaN, ma, isinf, savez, load as npload, log
+from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve,  dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit
+from numpy.random import permutation as nppermutation
+from pandas import DataFrame, concat
+import matplotlib.pyplot as plt
+from trafficintelligence.storage import openCheck
 datetimeFormat = "%Y-%m-%d %H:%M:%S"
 sjcamDatetimeFormat = "%Y_%m%d_%H%M%S"#2017_0626_143720
 'returns the fitted location and scale of the lognormal (general definition)'
 shape, loc, scale = lognorm.fit(x, floc=0.)
 return log(scale), shape
 def sampleSize(stdev, tolerance, percentConfidence, nRoundingDigits = None, printLatex = False):
-from scipy.stats.distributions import norm
 if nRoundingDigits is None:
 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2) # 1.-(100-percentConfidence)/200.
 else:
 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), nRoundingDigits)
 stdev = round(stdev, nRoundingDigits)
 '''if trueStd, use normal distribution, otherwise, Student
 Use otherwise t.interval or norm.interval for the boundaries
 ex: norm.interval(0.95)
 t.interval(0.95, nSamples-1)'''
-from scipy.stats.distributions import norm, t
 if trueStd:
 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2)
 else: # use Student
 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1), 2)
 e = k*stdev/sqrt(nSamples)
 return l1[0]*l2[1]-l1[1]*l2[0]
 def cat_mvgavg(cat_list, halfWidth):
 ''' Return a list of categories/values smoothed according to a window.
 halfWidth is the search radius on either side'''
-from copy import deepcopy
 smoothed = deepcopy(cat_list)
 for point in range(len(cat_list)):
 lower_bound_check = max(0,point-halfWidth)
 upper_bound_check = min(len(cat_list)-1,point+halfWidth+1)
 window_values = cat_list[lower_bound_check:upper_bound_check]
 return result
 def linearRegression(x, y, deg = 1, plotData = False):
 '''returns the least square estimation of the linear regression of y = ax+b
 as well as the plot'''
-from numpy.lib.polynomial import polyfit
-from numpy.core.multiarray import arange
 coef = polyfit(x, y, deg)
 if plotData:
 def poly(x):
 result = 0
 for i in range(len(coef)):
 Implements uses the non-parametric Kruskal Wallis test'''
 tmp = data[data[independentVariable].notnull()]
 independentVariableValues = sorted(tmp[independentVariable].unique().tolist())
 if len(independentVariableValues) >= 2:
 if saveLatex:
-from storage import openCheck
 out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w')
 for x in independentVariableValues:
 print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable])))
 if len(tmp.loc[tmp[independentVariable] == x, dependentVariable]) >= 3:
 print(shapiro(tmp.loc[tmp[independentVariable] == x, dependentVariable]))
 correlationFunc is spearmanr or pearsonr from scipy.stats
 text is the template to display for the two types of printout (see default): 3 elements if no saving to latex file, 8 otherwise
 TODO: pass the dummies for nominal variables and remove if all dummies are correlated, or none is correlated with the dependentvariable'''
-from copy import copy
-from pandas import DataFrame
 result = copy(independentVariables)
 table1 = ''
 table2 = {}
 # constant variables
 for var in independentVariables:
 else:
 table2['Correlations'].append(cor)
 table2['Valeurs p'].append(p)
 if saveFiles:
-from storage import openCheck
 out = openCheck(filenamePrefix+'-removed-variables.tex', 'w')
 out.write(latexHeader)
 out.write(table1)
 out.write(latexFooter)
 out.close()
 experiments.loc[i,'nobs'] = int(results.nobs)
 return experiments
 def generateExperiments(independentVariables):
 '''Generates all possible models for including or not each independent variable'''
-from pandas import DataFrame
 experiments = {}
 nIndependentVariables = len(independentVariables)
 if nIndependentVariables != len(set(independentVariables)):
 print("Duplicate variables. Exiting")
 import sys
 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1):
 '''Generates all possible model with the independentVariables
 and runs them, saving the results in experiments
 with multiprocess option'''
-from pandas import concat
-from multiprocessing import Pool
 experiments = generateExperiments(independentVariables)
 nModels = len(experiments)
 print("Running {} models with {} processes".format(nModels, nProcesses))
 print("IndependentVariables: {}".format(independentVariables))
 if nProcesses == 1:
 if they improve the model
 The results are added to experiments if provided as argument
 Storing in experiment relies on the index being the number equal
 to the binary code derived from the independent variables'''
-from numpy.random import permutation as nppermutation
 if experiments is None:
 experiments = generateExperiments(independentVariables)
 nIndependentVariables = len(independentVariables)
 permutation = nppermutation(list(range(nIndependentVariables)))
 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)}
 if text is not None and 'qqplot.xlabel' in text:
 plt.xlabel(text['qqplot.xlabel'])
 plt.ylabel(text['qqplot.ylabel'])
 plt.tight_layout()
 if filenamePrefix is not None:
-from storage import openCheck
 out = openCheck(filenamePrefix+'-coefficients.html', 'w')
 out.write(results.summary().as_html())
 plt.savefig(filenamePrefix+'-model-results.'+figureFileType)
 #########################
 # plotting section
 #########################
 def plotPolygon(poly, options = '', **kwargs):
 'Plots shapely polygon poly'
-from matplotlib.pyplot import plot
 x,y = poly.exterior.xy
-plot(x, y, options, **kwargs)
+plt.plot(x, y, options, **kwargs)
 def stepPlot(X, firstX, lastX, initialCount = 0, increment = 1):
 '''for each value in X, increment by increment the initial count
 returns the lists that can be plotted
 to obtain a step plot increasing by one for each value in x, from first to last value
 else:
 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.']))
 plt.rc('axes', prop_cycle=monochrome)
 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1):
-from matplotlib.pyplot import pcolor
 coords = array(list(indicatorMap.keys()))
 minX = min(coords[:,0])
 minY = min(coords[:,1])
 X = arange(minX, max(coords[:,0])+1.1)*squareSize
 Y = arange(minY, max(coords[:,1])+1.1)*squareSize
 C = defaultValue*ones((len(Y), len(X)))
 for k,v in indicatorMap.items():
 C[k[1]-minY,k[0]-minX] = v
 if masked:
-pcolor(X, Y, ma.masked_where(C==defaultValue,C))
+plt.pcolor(X, Y, ma.masked_where(C==defaultValue,C))
 else:
-pcolor(X, Y, C)
+plt.pcolor(X, Y, C)
 #########################
 # Data download
 #########################

Mercurial Hosting > traffic-intelligence

comparison trafficintelligence/utils.py @ 1029:c6cf75a2ed08