diff python/utils.py @ 1023:a13f47c8931d

work on processing large datasets (generate speed data)
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 06 Jun 2018 16:51:15 -0400
parents b7689372c0ec
children
line wrap: on
line diff
--- a/python/utils.py	Wed Jun 06 10:35:06 2018 -0400
+++ b/python/utils.py	Wed Jun 06 16:51:15 2018 -0400
@@ -10,7 +10,7 @@
 from scipy.stats import kruskal, shapiro, lognorm
 from scipy.spatial import distance
 from scipy.sparse import dok_matrix
-from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve,  dtype, isnan, NaN, mean, ma, isinf, savez, load as npload, log
+from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve,  dtype, isnan, NaN, ma, isinf, savez, load as npload, log
 
 
 datetimeFormat = "%Y-%m-%d %H:%M:%S"
@@ -524,6 +524,21 @@
         m[tuple(k)] = v
     return m
 
+def aggregationFunction(funcStr, centile = 50):
+    '''return the numpy function corresponding to funcStr
+    centile can be a list of centiles to compute at once, eg [25, 50, 75] for the 3 quartiles'''
+    if funcStr == 'median':
+        return median
+    elif funcStr == 'mean':
+        return mean
+    elif funcStr == 'centile':
+        return lambda x: percentile(x, centile)
+    elif funcStr == '85centile':
+        return lambda x: percentile(x, 85)
+    else:
+        print('Unknown aggregation method: {}'.format(funcStr))
+        return None
+
 #########################
 # regression analysis using statsmodels (and pandas)
 #########################