Mercurial Hosting > traffic-intelligence
changeset 1069:9ee5c7636640
added severe event counting
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 17 Jul 2018 00:44:51 -0400 |
parents | e6b791ad7f85 |
children | 0154133e77df |
files | scripts/process.py |
diffstat | 1 files changed, 11 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/process.py Mon Jul 16 22:53:11 2018 -0400 +++ b/scripts/process.py Tue Jul 17 00:44:51 2018 -0400 @@ -63,7 +63,8 @@ parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15) parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) -parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) +parser.add_argument('--aggregation-centiles', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) +parser.add_argument('--event-thresholds', dest = 'eventThresholds', help = 'threshold to count severe situations', nargs = '*', type = float) parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') dpi = 150 # unit of analysis: site - camera-view @@ -316,13 +317,17 @@ for h in dataColumns: for h2 in tmpheaders: headers.append(h+'-'+h2) + for h in dataColumns: + for t in args.eventThresholds: + headers.append('n-{}-{}'.format(h, t)) data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) outputData = [] for name, group in data.groupby(['site', 'date', 'intervalend15']): - # get duration as intervalend15-min(time), apply agg methods to each centile row = [] row.extend(name) - row.append((name[2].minute-group.time.min().minute) % 60) + groupStartTime = group.time.min() + groupEndTime = group.time.max() + row.append((groupEndTime.minute-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600) row.append(len(group)) for h in dataColumns: for method,func in aggFunctions.items(): @@ -331,5 +336,8 @@ row.extend(aggregated) else: row.append(aggregated) + for h in dataColumns: + for t in args.eventThresholds: + row.append((group[h] > t).sum()) outputData.append(row) pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False)