Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 1023:a13f47c8931d
work on processing large datasets (generate speed data)
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Wed, 06 Jun 2018 16:51:15 -0400 |
parents | b7689372c0ec |
children |
comparison
equal
deleted
inserted
replaced
1022:b7689372c0ec | 1023:a13f47c8931d |
---|---|
8 from pathlib import Path | 8 from pathlib import Path |
9 from math import sqrt, ceil, floor | 9 from math import sqrt, ceil, floor |
10 from scipy.stats import kruskal, shapiro, lognorm | 10 from scipy.stats import kruskal, shapiro, lognorm |
11 from scipy.spatial import distance | 11 from scipy.spatial import distance |
12 from scipy.sparse import dok_matrix | 12 from scipy.sparse import dok_matrix |
13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma, isinf, savez, load as npload, log | 13 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log |
14 | 14 |
15 | 15 |
16 datetimeFormat = "%Y-%m-%d %H:%M:%S" | 16 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
17 | 17 |
18 sjcamDatetimeFormat = "%Y_%m%d_%H%M%S"#2017_0626_143720 | 18 sjcamDatetimeFormat = "%Y_%m%d_%H%M%S"#2017_0626_143720 |
521 data = npload(filename) | 521 data = npload(filename) |
522 m = dok_matrix(tuple(data['shape'])) | 522 m = dok_matrix(tuple(data['shape'])) |
523 for k, v in zip(data['keys'], data['values']): | 523 for k, v in zip(data['keys'], data['values']): |
524 m[tuple(k)] = v | 524 m[tuple(k)] = v |
525 return m | 525 return m |
526 | |
527 def aggregationFunction(funcStr, centile = 50): | |
528 '''return the numpy function corresponding to funcStr | |
529 centile can be a list of centiles to compute at once, eg [25, 50, 75] for the 3 quartiles''' | |
530 if funcStr == 'median': | |
531 return median | |
532 elif funcStr == 'mean': | |
533 return mean | |
534 elif funcStr == 'centile': | |
535 return lambda x: percentile(x, centile) | |
536 elif funcStr == '85centile': | |
537 return lambda x: percentile(x, 85) | |
538 else: | |
539 print('Unknown aggregation method: {}'.format(funcStr)) | |
540 return None | |
526 | 541 |
527 ######################### | 542 ######################### |
528 # regression analysis using statsmodels (and pandas) | 543 # regression analysis using statsmodels (and pandas) |
529 ######################### | 544 ######################### |
530 | 545 |