comparison python/utils.py @ 1023:a13f47c8931d

work on processing large datasets (generate speed data)
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 06 Jun 2018 16:51:15 -0400
parents b7689372c0ec
children
comparison
equal deleted inserted replaced
1022:b7689372c0ec 1023:a13f47c8931d
8 from pathlib import Path 8 from pathlib import Path
9 from math import sqrt, ceil, floor 9 from math import sqrt, ceil, floor
10 from scipy.stats import kruskal, shapiro, lognorm 10 from scipy.stats import kruskal, shapiro, lognorm
11 from scipy.spatial import distance 11 from scipy.spatial import distance
12 from scipy.sparse import dok_matrix 12 from scipy.sparse import dok_matrix
13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma, isinf, savez, load as npload, log 13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log
14 14
15 15
16 datetimeFormat = "%Y-%m-%d %H:%M:%S" 16 datetimeFormat = "%Y-%m-%d %H:%M:%S"
17 17
18 sjcamDatetimeFormat = "%Y_%m%d_%H%M%S"#2017_0626_143720 18 sjcamDatetimeFormat = "%Y_%m%d_%H%M%S"#2017_0626_143720
521 data = npload(filename) 521 data = npload(filename)
522 m = dok_matrix(tuple(data['shape'])) 522 m = dok_matrix(tuple(data['shape']))
523 for k, v in zip(data['keys'], data['values']): 523 for k, v in zip(data['keys'], data['values']):
524 m[tuple(k)] = v 524 m[tuple(k)] = v
525 return m 525 return m
526
527 def aggregationFunction(funcStr, centile = 50):
528 '''return the numpy function corresponding to funcStr
529 centile can be a list of centiles to compute at once, eg [25, 50, 75] for the 3 quartiles'''
530 if funcStr == 'median':
531 return median
532 elif funcStr == 'mean':
533 return mean
534 elif funcStr == 'centile':
535 return lambda x: percentile(x, centile)
536 elif funcStr == '85centile':
537 return lambda x: percentile(x, 85)
538 else:
539 print('Unknown aggregation method: {}'.format(funcStr))
540 return None
526 541
527 ######################### 542 #########################
528 # regression analysis using statsmodels (and pandas) 543 # regression analysis using statsmodels (and pandas)
529 ######################### 544 #########################
530 545