comparison scripts/process.py @ 1069:9ee5c7636640

added severe event counting
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 17 Jul 2018 00:44:51 -0400
parents e6b791ad7f85
children 0154133e77df
comparison
equal deleted inserted replaced
1068:e6b791ad7f85 1069:9ee5c7636640
61 # analysis options 61 # analysis options
62 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) 62 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event'])
63 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) 63 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1)
64 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15) 64 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15)
65 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) 65 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median'])
66 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) 66 parser.add_argument('--aggregation-centiles', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
67 parser.add_argument('--event-thresholds', dest = 'eventThresholds', help = 'threshold to count severe situations', nargs = '*', type = float)
67 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') 68 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data')
68 dpi = 150 69 dpi = 150
69 # unit of analysis: site - camera-view 70 # unit of analysis: site - camera-view
70 71
71 # need way of selecting sites as similar as possible to sql alchemy syntax 72 # need way of selecting sites as similar as possible to sql alchemy syntax
314 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) 315 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles)
315 dataColumns = list(data.columns[4:]) 316 dataColumns = list(data.columns[4:])
316 for h in dataColumns: 317 for h in dataColumns:
317 for h2 in tmpheaders: 318 for h2 in tmpheaders:
318 headers.append(h+'-'+h2) 319 headers.append(h+'-'+h2)
320 for h in dataColumns:
321 for t in args.eventThresholds:
322 headers.append('n-{}-{}'.format(h, t))
319 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) 323 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time())
320 outputData = [] 324 outputData = []
321 for name, group in data.groupby(['site', 'date', 'intervalend15']): 325 for name, group in data.groupby(['site', 'date', 'intervalend15']):
322 # get duration as intervalend15-min(time), apply agg methods to each centile
323 row = [] 326 row = []
324 row.extend(name) 327 row.extend(name)
325 row.append((name[2].minute-group.time.min().minute) % 60) 328 groupStartTime = group.time.min()
329 groupEndTime = group.time.max()
330 row.append((groupEndTime.minute-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600)
326 row.append(len(group)) 331 row.append(len(group))
327 for h in dataColumns: 332 for h in dataColumns:
328 for method,func in aggFunctions.items(): 333 for method,func in aggFunctions.items():
329 aggregated = func(group[h]) 334 aggregated = func(group[h])
330 if method == 'centile': 335 if method == 'centile':
331 row.extend(aggregated) 336 row.extend(aggregated)
332 else: 337 else:
333 row.append(aggregated) 338 row.append(aggregated)
339 for h in dataColumns:
340 for t in args.eventThresholds:
341 row.append((group[h] > t).sum())
334 outputData.append(row) 342 outputData.append(row)
335 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False) 343 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False)