changeset 1069:9ee5c7636640

added severe event counting
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 17 Jul 2018 00:44:51 -0400
parents e6b791ad7f85
children 0154133e77df
files scripts/process.py
diffstat 1 files changed, 11 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/process.py	Mon Jul 16 22:53:11 2018 -0400
+++ b/scripts/process.py	Tue Jul 17 00:44:51 2018 -0400
@@ -63,7 +63,8 @@
 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1)
 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15)
 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median'])
-parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
+parser.add_argument('--aggregation-centiles', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
+parser.add_argument('--event-thresholds', dest = 'eventThresholds', help = 'threshold to count severe situations', nargs = '*', type = float)
 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data')
 dpi = 150
 # unit of analysis: site - camera-view
@@ -316,13 +317,17 @@
     for h in dataColumns:
         for h2 in tmpheaders:
             headers.append(h+'-'+h2)
+    for h in dataColumns:
+        for t in args.eventThresholds:
+            headers.append('n-{}-{}'.format(h, t))
     data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time())
     outputData = []
     for name, group in data.groupby(['site', 'date', 'intervalend15']):
-        # get duration as intervalend15-min(time), apply agg methods to each centile
         row = []
         row.extend(name)
-        row.append((name[2].minute-group.time.min().minute) % 60)
+        groupStartTime = group.time.min()
+        groupEndTime = group.time.max()
+        row.append((groupEndTime.minute-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600)
         row.append(len(group))
         for h in dataColumns:
             for method,func in aggFunctions.items():
@@ -331,5 +336,8 @@
                     row.extend(aggregated)
                 else:
                     row.append(aggregated)
+        for h in dataColumns:
+            for t in args.eventThresholds:
+                row.append((group[h] > t).sum())
         outputData.append(row)
     pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False)