Mercurial Hosting > traffic-intelligence
changeset 1059:a87b3072bd26
working version
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Wed, 11 Jul 2018 01:48:42 -0400 |
parents | 16575ca4537d |
children | c04550f957ab |
files | scripts/process.py trafficintelligence/utils.py |
diffstat | 2 files changed, 34 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/process.py Tue Jul 10 17:16:38 2018 -0400 +++ b/scripts/process.py Wed Jul 11 01:48:42 2018 -0400 @@ -8,7 +8,7 @@ #atplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np -from pandas import DataFrame +import pandas as pd from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml from trafficintelligence.metadata import * @@ -60,7 +60,7 @@ # analysis options parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) -parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.) +parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15) parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') @@ -213,7 +213,7 @@ # user speeds, accelerations # aggregation per site data = [] # list of observation per site-user with time - headers = ['sites', 'date', 'time', 'user_type'] + headers = ['site', 'date', 'time', 'user_type'] aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) headers.extend(tmpheaders) for vs in videoSequences: @@ -233,7 +233,7 @@ else: row.append(aggSpeeds) data.append(row) - data = DataFrame(data, columns = headers) + data = pd.DataFrame(data, columns = headers) if args.output == 'figure': for name in headers[4:]: plt.ioff() @@ -276,10 +276,30 @@ plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) plt.close() -if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration) - data = pd.read_csv(arg.eventFilename) +if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration), count events with thresholds + data = pd.read_csv(args.eventFilename, parse_dates = [2]) + #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000) # create time for end of each 15 min, then group by, using the agg method for each data column - headers = ['sites', 'date', 'intervalend15'] - # add n road users (by type?) - aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) - headers.extend(tmpheaders) + headers = ['sites', 'date', 'intervalend15', 'duration', 'count'] + aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) + dataColumns = list(data.columns[4:]) + for h in dataColumns: + for h2 in tmpheaders: + headers.append(h+'-'+h2) + data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) + outputData = [] + for name, group in data.groupby(['site', 'date', 'intervalend15']): + # get duration as intervalend15-min(time), apply agg methods to each centile + row = [] + row.extend(name) + row.append((name[2].minute-group.time.min().minute) % 60) + row.append(len(group)) + for h in dataColumns: + for method,func in aggFunctions.items(): + aggregated = func(group[h]) + if method == 'centile': + row.extend(aggregated) + else: + row.append(aggregated) + outputData.append(row) + pd.DataFrame(outputData, columns = headers).to_csv('aggregated-speeds.csv', index = False)
--- a/trafficintelligence/utils.py Tue Jul 10 17:16:38 2018 -0400 +++ b/trafficintelligence/utils.py Wed Jul 11 01:48:42 2018 -0400 @@ -342,6 +342,10 @@ def timeToFrames(t, frameRate): return frameRate*(t.hour*3600+t.minute*60+t.second) +def timeModulo(t, duration): + 'returns the time modulo the duration in min' + return time(t.hour, t.minute//duration, t.second) + def sortXY(X,Y): 'returns the sorted (x, Y(x)) sorted on X' D = {}