comparison scripts/process.py @ 1083:5b597b021aed

added function to aggregate interactions
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 23 Jul 2018 20:17:27 -0400
parents 706034a4c6cd
children 1a7e0b2c858b
comparison
equal deleted inserted replaced
1082:706034a4c6cd 1083:5b597b021aed
22 # main function 22 # main function
23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction']) 23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction'])
24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'prototype', 'interaction']) 24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'prototype', 'interaction'])
25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction']) 25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction'])
26 parser.add_argument('--progress', dest = 'progress', help = 'information about the progress of processing', action = 'store_true') 26 parser.add_argument('--progress', dest = 'progress', help = 'information about the progress of processing', action = 'store_true')
27 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction', 'event']) 27 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction', 'event-speed', 'event-interaction'])
28 28
29 # common options 29 # common options
30 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') 30 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
31 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) 31 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int)
32 parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature') 32 parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature')
333 # plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) 333 # plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150)
334 # plt.close() 334 # plt.close()
335 elif args.output == 'event': 335 elif args.output == 'event':
336 data.to_csv(args.eventFilename, index = False) 336 data.to_csv(args.eventFilename, index = False)
337 337
338 if args.analyze == 'event': # aggregate event data by 15 min interval (args.intervalDuration), count events with thresholds 338 if args.analyze == 'event-speed': # aggregate event data by 15 min interval (args.intervalDuration), count events with thresholds
339 data = pd.read_csv(args.eventFilename, parse_dates = [2]) 339 data = pd.read_csv(args.eventFilename, parse_dates = [2], nrows = 10000)
340 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000) 340 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000)
341 # create time for end of each 15 min, then group by, using the agg method for each data column 341 # create time for end of each 15 min, then group by, using the agg method for each data column
342 headers = ['site', 'date', 'intervalend15', 'duration', 'count'] 342 headers = ['site', 'date', 'intervalend15', 'duration', 'count']
343 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) 343 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles)
344 dataColumns = list(data.columns[4:]) 344 dataColumns = list(data.columns[4:])
345 for h in dataColumns: 345 for h in dataColumns:
346 for h2 in tmpheaders: 346 for h2 in tmpheaders:
347 headers.append(h+'-'+h2) 347 headers.append(h+'-'+h2)
348 for h in dataColumns: 348 if args.eventThresholds is not None:
349 for t in args.eventThresholds: 349 for h in dataColumns:
350 headers.append('n-{}-{}'.format(h, t)) 350 for t in args.eventThresholds:
351 headers.append('n-{}-{}'.format(h, t))
351 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) 352 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time())
352 outputData = [] 353 outputData = []
353 for name, group in data.groupby(['site', 'date', 'intervalend15']): 354 for name, group in data.groupby(['site', 'date', 'intervalend15']):
354 row = [] 355 row = []
355 row.extend(name) 356 row.extend(name)
362 aggregated = func(group[h]) 363 aggregated = func(group[h])
363 if method == 'centile': 364 if method == 'centile':
364 row.extend(aggregated) 365 row.extend(aggregated)
365 else: 366 else:
366 row.append(aggregated) 367 row.append(aggregated)
367 for h in dataColumns: 368 if args.eventThresholds is not None:
368 for t in args.eventThresholds: 369 for h in dataColumns:
369 row.append((group[h] > t).sum()) 370 for t in args.eventThresholds:
371 row.append((group[h] > t).sum())
370 outputData.append(row) 372 outputData.append(row)
371 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False) 373 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False)
374
375 elif args.analyze == 'event-interaction': # aggregate event data by 15 min interval (args.intervalDuration), count events with thresholds
376 data = pd.read_csv(args.eventFilename, parse_dates = [2], nrows = 20000)
377 headers = ['site', 'date', 'intervalend15', 'duration', 'count']
378 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles)
379 dataColumns = list(data.columns[3:])
380 for h in dataColumns:
381 if not 'speed' in h.lower(): # proximity indicators are reversed, taking 85th centile of this column will yield the 15th centile (which we have to take the opposite again)
382 data[h] = -data[h]
383 for h in dataColumns:
384 for h2 in tmpheaders:
385 headers.append(h+'-'+h2)
386 for h,t in zip(dataColumns, args.eventThresholds): # each threshold in this case applies to one indicator
387 headers.append('n-{}-{}'.format(h, t))
388 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time())
389 outputData = []
390 for name, group in data.groupby(['site', 'date', 'intervalend15']):
391 row = []
392 row.extend(name)
393 groupStartTime = group.time.min()
394 groupEndTime = group.time.max()
395 row.append((groupEndTime.minute+1-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600)
396 row.append(len(group))
397 for h in dataColumns:
398 for method,func in aggFunctions.items():
399 tmp = group.loc[~group[h].isna(), h]
400 if len(tmp)>0:
401 aggregated = func(tmp) # todo invert if the resulting stat is negative
402 if method == 'centile':
403 row.extend(np.abs(aggregated))
404 else:
405 row.append(np.abs(aggregated))
406 else:
407 row.extend([None]*len(aggFunctions))
408 for h,t in zip(dataColumns, args.eventThresholds): # each threshold in this case applies to one indicator
409 if 'speed' in h.lower():
410 row.append((group[h] > t).sum())
411 else:
412 row.append((group[h] > -t).sum()) # take larger than than negative threshold for proximity indicators
413 outputData.append(row)
414 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False)