Mercurial Hosting > traffic-intelligence
comparison scripts/process.py @ 1059:a87b3072bd26
working version
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Wed, 11 Jul 2018 01:48:42 -0400 |
parents | 16575ca4537d |
children | 671426ce0f3e |
comparison
equal
deleted
inserted
replaced
1058:16575ca4537d | 1059:a87b3072bd26 |
---|---|
6 | 6 |
7 #import matplotlib | 7 #import matplotlib |
8 #atplotlib.use('Agg') | 8 #atplotlib.use('Agg') |
9 import matplotlib.pyplot as plt | 9 import matplotlib.pyplot as plt |
10 import numpy as np | 10 import numpy as np |
11 from pandas import DataFrame | 11 import pandas as pd |
12 | 12 |
13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml | 13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml |
14 from trafficintelligence.metadata import * | 14 from trafficintelligence.metadata import * |
15 | 15 |
16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') | 16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') |
58 | 58 |
59 | 59 |
60 # analysis options | 60 # analysis options |
61 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) | 61 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) |
62 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) | 62 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) |
63 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.) | 63 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15) |
64 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) | 64 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) |
65 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) | 65 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) |
66 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') | 66 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') |
67 dpi = 150 | 67 dpi = 150 |
68 # unit of analysis: site - camera-view | 68 # unit of analysis: site - camera-view |
211 ################################# | 211 ################################# |
212 if args.analyze == 'object': | 212 if args.analyze == 'object': |
213 # user speeds, accelerations | 213 # user speeds, accelerations |
214 # aggregation per site | 214 # aggregation per site |
215 data = [] # list of observation per site-user with time | 215 data = [] # list of observation per site-user with time |
216 headers = ['sites', 'date', 'time', 'user_type'] | 216 headers = ['site', 'date', 'time', 'user_type'] |
217 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) | 217 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) |
218 headers.extend(tmpheaders) | 218 headers.extend(tmpheaders) |
219 for vs in videoSequences: | 219 for vs in videoSequences: |
220 d = vs.startTime.date() | 220 d = vs.startTime.date() |
221 t1 = vs.startTime.time() | 221 t1 = vs.startTime.time() |
231 if method == 'centile': | 231 if method == 'centile': |
232 row += aggSpeeds.tolist() | 232 row += aggSpeeds.tolist() |
233 else: | 233 else: |
234 row.append(aggSpeeds) | 234 row.append(aggSpeeds) |
235 data.append(row) | 235 data.append(row) |
236 data = DataFrame(data, columns = headers) | 236 data = pd.DataFrame(data, columns = headers) |
237 if args.output == 'figure': | 237 if args.output == 'figure': |
238 for name in headers[4:]: | 238 for name in headers[4:]: |
239 plt.ioff() | 239 plt.ioff() |
240 plt.figure() | 240 plt.figure() |
241 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds]) | 241 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds]) |
274 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators]) | 274 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators]) |
275 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')') | 275 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')') |
276 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) | 276 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) |
277 plt.close() | 277 plt.close() |
278 | 278 |
279 if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration) | 279 if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration), count events with thresholds |
280 data = pd.read_csv(arg.eventFilename) | 280 data = pd.read_csv(args.eventFilename, parse_dates = [2]) |
281 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000) | |
281 # create time for end of each 15 min, then group by, using the agg method for each data column | 282 # create time for end of each 15 min, then group by, using the agg method for each data column |
282 headers = ['sites', 'date', 'intervalend15'] | 283 headers = ['sites', 'date', 'intervalend15', 'duration', 'count'] |
283 # add n road users (by type?) | 284 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) |
284 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) | 285 dataColumns = list(data.columns[4:]) |
285 headers.extend(tmpheaders) | 286 for h in dataColumns: |
287 for h2 in tmpheaders: | |
288 headers.append(h+'-'+h2) | |
289 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) | |
290 outputData = [] | |
291 for name, group in data.groupby(['site', 'date', 'intervalend15']): | |
292 # get duration as intervalend15-min(time), apply agg methods to each centile | |
293 row = [] | |
294 row.extend(name) | |
295 row.append((name[2].minute-group.time.min().minute) % 60) | |
296 row.append(len(group)) | |
297 for h in dataColumns: | |
298 for method,func in aggFunctions.items(): | |
299 aggregated = func(group[h]) | |
300 if method == 'centile': | |
301 row.extend(aggregated) | |
302 else: | |
303 row.append(aggregated) | |
304 outputData.append(row) | |
305 pd.DataFrame(outputData, columns = headers).to_csv('aggregated-speeds.csv', index = False) |