comparison scripts/process.py @ 1059:a87b3072bd26

working version
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 11 Jul 2018 01:48:42 -0400
parents 16575ca4537d
children 671426ce0f3e
comparison
equal deleted inserted replaced
1058:16575ca4537d 1059:a87b3072bd26
6 6
7 #import matplotlib 7 #import matplotlib
8 #atplotlib.use('Agg') 8 #atplotlib.use('Agg')
9 import matplotlib.pyplot as plt 9 import matplotlib.pyplot as plt
10 import numpy as np 10 import numpy as np
11 from pandas import DataFrame 11 import pandas as pd
12 12
13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml 13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml
14 from trafficintelligence.metadata import * 14 from trafficintelligence.metadata import *
15 15
16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') 16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.')
58 58
59 59
60 # analysis options 60 # analysis options
61 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) 61 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event'])
62 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) 62 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1)
63 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.) 63 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15)
64 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) 64 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median'])
65 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) 65 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
66 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') 66 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data')
67 dpi = 150 67 dpi = 150
68 # unit of analysis: site - camera-view 68 # unit of analysis: site - camera-view
211 ################################# 211 #################################
212 if args.analyze == 'object': 212 if args.analyze == 'object':
213 # user speeds, accelerations 213 # user speeds, accelerations
214 # aggregation per site 214 # aggregation per site
215 data = [] # list of observation per site-user with time 215 data = [] # list of observation per site-user with time
216 headers = ['sites', 'date', 'time', 'user_type'] 216 headers = ['site', 'date', 'time', 'user_type']
217 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) 217 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles)
218 headers.extend(tmpheaders) 218 headers.extend(tmpheaders)
219 for vs in videoSequences: 219 for vs in videoSequences:
220 d = vs.startTime.date() 220 d = vs.startTime.date()
221 t1 = vs.startTime.time() 221 t1 = vs.startTime.time()
231 if method == 'centile': 231 if method == 'centile':
232 row += aggSpeeds.tolist() 232 row += aggSpeeds.tolist()
233 else: 233 else:
234 row.append(aggSpeeds) 234 row.append(aggSpeeds)
235 data.append(row) 235 data.append(row)
236 data = DataFrame(data, columns = headers) 236 data = pd.DataFrame(data, columns = headers)
237 if args.output == 'figure': 237 if args.output == 'figure':
238 for name in headers[4:]: 238 for name in headers[4:]:
239 plt.ioff() 239 plt.ioff()
240 plt.figure() 240 plt.figure()
241 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds]) 241 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds])
274 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators]) 274 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators])
275 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')') 275 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')')
276 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) 276 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150)
277 plt.close() 277 plt.close()
278 278
279 if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration) 279 if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration), count events with thresholds
280 data = pd.read_csv(arg.eventFilename) 280 data = pd.read_csv(args.eventFilename, parse_dates = [2])
281 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000)
281 # create time for end of each 15 min, then group by, using the agg method for each data column 282 # create time for end of each 15 min, then group by, using the agg method for each data column
282 headers = ['sites', 'date', 'intervalend15'] 283 headers = ['sites', 'date', 'intervalend15', 'duration', 'count']
283 # add n road users (by type?) 284 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles)
284 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) 285 dataColumns = list(data.columns[4:])
285 headers.extend(tmpheaders) 286 for h in dataColumns:
287 for h2 in tmpheaders:
288 headers.append(h+'-'+h2)
289 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time())
290 outputData = []
291 for name, group in data.groupby(['site', 'date', 'intervalend15']):
292 # get duration as intervalend15-min(time), apply agg methods to each centile
293 row = []
294 row.extend(name)
295 row.append((name[2].minute-group.time.min().minute) % 60)
296 row.append(len(group))
297 for h in dataColumns:
298 for method,func in aggFunctions.items():
299 aggregated = func(group[h])
300 if method == 'centile':
301 row.extend(aggregated)
302 else:
303 row.append(aggregated)
304 outputData.append(row)
305 pd.DataFrame(outputData, columns = headers).to_csv('aggregated-speeds.csv', index = False)