repo/traffic-intelligence: scripts/process.py comparison

comparison scripts/process.py @ 1069:9ee5c7636640

added severe event counting

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Tue, 17 Jul 2018 00:44:51 -0400
parents	e6b791ad7f85
children	0154133e77df

comparison

equal deleted inserted replaced

-:e6b791ad7f85
+:9ee5c7636640
 # analysis options
 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event'])
 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1)
 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15)
 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median'])
-parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
+parser.add_argument('--aggregation-centiles', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
+parser.add_argument('--event-thresholds', dest = 'eventThresholds', help = 'threshold to count severe situations', nargs = '*', type = float)
 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data')
 dpi = 150
 # unit of analysis: site - camera-view
 # need way of selecting sites as similar as possible to sql alchemy syntax
 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles)
 dataColumns = list(data.columns[4:])
 for h in dataColumns:
 for h2 in tmpheaders:
 headers.append(h+'-'+h2)
+for h in dataColumns:
+for t in args.eventThresholds:
+headers.append('n-{}-{}'.format(h, t))
 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time())
 outputData = []
 for name, group in data.groupby(['site', 'date', 'intervalend15']):
-# get duration as intervalend15-min(time), apply agg methods to each centile
 row = []
 row.extend(name)
-row.append((name[2].minute-group.time.min().minute) % 60)
+groupStartTime = group.time.min()
+groupEndTime = group.time.max()
+row.append((groupEndTime.minute-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600)
 row.append(len(group))
 for h in dataColumns:
 for method,func in aggFunctions.items():
 aggregated = func(group[h])
 if method == 'centile':
 row.extend(aggregated)
 else:
 row.append(aggregated)
+for h in dataColumns:
+for t in args.eventThresholds:
+row.append((group[h] > t).sum())
 outputData.append(row)
 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False)

Mercurial Hosting > traffic-intelligence

comparison scripts/process.py @ 1069:9ee5c7636640