changeset 1059:a87b3072bd26

working version
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 11 Jul 2018 01:48:42 -0400
parents 16575ca4537d
children c04550f957ab
files scripts/process.py trafficintelligence/utils.py
diffstat 2 files changed, 34 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/process.py	Tue Jul 10 17:16:38 2018 -0400
+++ b/scripts/process.py	Wed Jul 11 01:48:42 2018 -0400
@@ -8,7 +8,7 @@
 #atplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import numpy as np
-from pandas import DataFrame
+import pandas as pd
 
 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml
 from trafficintelligence.metadata import *
@@ -60,7 +60,7 @@
 # analysis options
 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event'])
 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1)
-parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.)
+parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15)
 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median'])
 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data')
@@ -213,7 +213,7 @@
     # user speeds, accelerations
     # aggregation per site
     data = [] # list of observation per site-user with time
-    headers = ['sites', 'date', 'time', 'user_type']
+    headers = ['site', 'date', 'time', 'user_type']
     aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles)
     headers.extend(tmpheaders)
     for vs in videoSequences:
@@ -233,7 +233,7 @@
                     else:
                         row.append(aggSpeeds)
             data.append(row)
-    data = DataFrame(data, columns = headers)
+    data = pd.DataFrame(data, columns = headers)
     if args.output == 'figure':
         for name in headers[4:]:
             plt.ioff()
@@ -276,10 +276,30 @@
         plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150)
         plt.close()
 
-if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration)
-    data = pd.read_csv(arg.eventFilename)
+if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration), count events with thresholds
+    data = pd.read_csv(args.eventFilename, parse_dates = [2])
+    #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000)
     # create time for end of each 15 min, then group by, using the agg method for each data column
-    headers = ['sites', 'date', 'intervalend15']
-    # add n road users (by type?)
-    aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles)
-    headers.extend(tmpheaders)
+    headers = ['sites', 'date', 'intervalend15', 'duration', 'count']
+    aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles)
+    dataColumns = list(data.columns[4:])
+    for h in dataColumns:
+        for h2 in tmpheaders:
+            headers.append(h+'-'+h2)
+    data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time())
+    outputData = []
+    for name, group in data.groupby(['site', 'date', 'intervalend15']):
+        # get duration as intervalend15-min(time), apply agg methods to each centile
+        row = []
+        row.extend(name)
+        row.append((name[2].minute-group.time.min().minute) % 60)
+        row.append(len(group))
+        for h in dataColumns:
+            for method,func in aggFunctions.items():
+                aggregated = func(group[h])
+                if method == 'centile':
+                    row.extend(aggregated)
+                else:
+                    row.append(aggregated)
+        outputData.append(row)
+    pd.DataFrame(outputData, columns = headers).to_csv('aggregated-speeds.csv', index = False)
--- a/trafficintelligence/utils.py	Tue Jul 10 17:16:38 2018 -0400
+++ b/trafficintelligence/utils.py	Wed Jul 11 01:48:42 2018 -0400
@@ -342,6 +342,10 @@
 def timeToFrames(t, frameRate):
     return frameRate*(t.hour*3600+t.minute*60+t.second)
 
+def timeModulo(t, duration):
+    'returns the time modulo the duration in min'
+    return time(t.hour, t.minute//duration, t.second)
+
 def sortXY(X,Y):
     'returns the sorted (x, Y(x)) sorted on X'
     D = {}