Mercurial Hosting > traffic-intelligence
diff python/utils.py @ 1023:a13f47c8931d
work on processing large datasets (generate speed data)
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Wed, 06 Jun 2018 16:51:15 -0400 |
parents | b7689372c0ec |
children |
line wrap: on
line diff
--- a/python/utils.py Wed Jun 06 10:35:06 2018 -0400 +++ b/python/utils.py Wed Jun 06 16:51:15 2018 -0400 @@ -10,7 +10,7 @@ from scipy.stats import kruskal, shapiro, lognorm from scipy.spatial import distance from scipy.sparse import dok_matrix -from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma, isinf, savez, load as npload, log +from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log datetimeFormat = "%Y-%m-%d %H:%M:%S" @@ -524,6 +524,21 @@ m[tuple(k)] = v return m +def aggregationFunction(funcStr, centile = 50): + '''return the numpy function corresponding to funcStr + centile can be a list of centiles to compute at once, eg [25, 50, 75] for the 3 quartiles''' + if funcStr == 'median': + return median + elif funcStr == 'mean': + return mean + elif funcStr == 'centile': + return lambda x: percentile(x, centile) + elif funcStr == '85centile': + return lambda x: percentile(x, 85) + else: + print('Unknown aggregation method: {}'.format(funcStr)) + return None + ######################### # regression analysis using statsmodels (and pandas) #########################