Mercurial Hosting > traffic-intelligence
comparison scripts/process.py @ 1069:9ee5c7636640
added severe event counting
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 17 Jul 2018 00:44:51 -0400 |
parents | e6b791ad7f85 |
children | 0154133e77df |
comparison
equal
deleted
inserted
replaced
1068:e6b791ad7f85 | 1069:9ee5c7636640 |
---|---|
61 # analysis options | 61 # analysis options |
62 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) | 62 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) |
63 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) | 63 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) |
64 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15) | 64 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15) |
65 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) | 65 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) |
66 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) | 66 parser.add_argument('--aggregation-centiles', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) |
67 parser.add_argument('--event-thresholds', dest = 'eventThresholds', help = 'threshold to count severe situations', nargs = '*', type = float) | |
67 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') | 68 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') |
68 dpi = 150 | 69 dpi = 150 |
69 # unit of analysis: site - camera-view | 70 # unit of analysis: site - camera-view |
70 | 71 |
71 # need way of selecting sites as similar as possible to sql alchemy syntax | 72 # need way of selecting sites as similar as possible to sql alchemy syntax |
314 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) | 315 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) |
315 dataColumns = list(data.columns[4:]) | 316 dataColumns = list(data.columns[4:]) |
316 for h in dataColumns: | 317 for h in dataColumns: |
317 for h2 in tmpheaders: | 318 for h2 in tmpheaders: |
318 headers.append(h+'-'+h2) | 319 headers.append(h+'-'+h2) |
320 for h in dataColumns: | |
321 for t in args.eventThresholds: | |
322 headers.append('n-{}-{}'.format(h, t)) | |
319 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) | 323 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) |
320 outputData = [] | 324 outputData = [] |
321 for name, group in data.groupby(['site', 'date', 'intervalend15']): | 325 for name, group in data.groupby(['site', 'date', 'intervalend15']): |
322 # get duration as intervalend15-min(time), apply agg methods to each centile | |
323 row = [] | 326 row = [] |
324 row.extend(name) | 327 row.extend(name) |
325 row.append((name[2].minute-group.time.min().minute) % 60) | 328 groupStartTime = group.time.min() |
329 groupEndTime = group.time.max() | |
330 row.append((groupEndTime.minute-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600) | |
326 row.append(len(group)) | 331 row.append(len(group)) |
327 for h in dataColumns: | 332 for h in dataColumns: |
328 for method,func in aggFunctions.items(): | 333 for method,func in aggFunctions.items(): |
329 aggregated = func(group[h]) | 334 aggregated = func(group[h]) |
330 if method == 'centile': | 335 if method == 'centile': |
331 row.extend(aggregated) | 336 row.extend(aggregated) |
332 else: | 337 else: |
333 row.append(aggregated) | 338 row.append(aggregated) |
339 for h in dataColumns: | |
340 for t in args.eventThresholds: | |
341 row.append((group[h] > t).sum()) | |
334 outputData.append(row) | 342 outputData.append(row) |
335 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False) | 343 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False) |