comparison scripts/process.py @ 1023:a13f47c8931d

work on processing large datasets (generate speed data)
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 06 Jun 2018 16:51:15 -0400
parents 16932cefabc1
children 73b124160911
comparison
equal deleted inserted replaced
1022:b7689372c0ec 1023:a13f47c8931d
2 2
3 import sys, argparse 3 import sys, argparse
4 from pathlib import Path 4 from pathlib import Path
5 from multiprocessing.pool import Pool 5 from multiprocessing.pool import Pool
6 6
7 import matplotlib 7 #import matplotlib
8 matplotlib.use('Agg') 8 #atplotlib.use('Agg')
9 import matplotlib.pyplot as plt 9 import matplotlib.pyplot as plt
10 from numpy import percentile 10 from numpy import percentile
11 11 from pandas import DataFrame
12 import storage, events, prediction, cvutils 12
13 import storage, events, prediction, cvutils, utils
13 from metadata import * 14 from metadata import *
14 15
15 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') 16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.')
17 # input
16 parser.add_argument('--db', dest = 'metadataFilename', help = 'name of the metadata file', required = True) 18 parser.add_argument('--db', dest = 'metadataFilename', help = 'name of the metadata file', required = True)
17 parser.add_argument('--videos', dest = 'videoIds', help = 'indices of the video sequences', nargs = '*', type = int) 19 parser.add_argument('--videos', dest = 'videoIds', help = 'indices of the video sequences', nargs = '*', type = int)
18 parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*', type = int) 20 parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*', type = int)
19 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') 21
20 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) 22 # main function
21 parser.add_argument('--prediction-method', dest = 'predictionMethod', help = 'prediction method (constant velocity (cvd: vector computation (approximate); cve: equation solving; cv: discrete time (approximate)), normal adaptation, point set prediction)', choices = ['cvd', 'cve', 'cv', 'na', 'ps', 'mp'])
22 parser.add_argument('--pet', dest = 'computePET', help = 'computes PET', action = 'store_true')
23 # override other tracking config, erase sqlite?
24 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction']) 23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction'])
25 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'interaction']) 24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'interaction'])
26 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction']) 25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction'])
27 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction']) 26 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction'])
27
28 # common options
29 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
30 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int)
28 parser.add_argument('--dry', dest = 'dryRun', help = 'dry run of processing', action = 'store_true') 31 parser.add_argument('--dry', dest = 'dryRun', help = 'dry run of processing', action = 'store_true')
29 parser.add_argument('--nthreads', dest = 'nProcesses', help = 'number of processes to run in parallel', type = int, default = 1) 32 parser.add_argument('--nthreads', dest = 'nProcesses', help = 'number of processes to run in parallel', type = int, default = 1)
33
34 # analysis options
35 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event'])
36 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1)
37 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.)
38 parser.add_argument('--aggregation', dest = 'aggMethod', help = 'aggregation method per user/event and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median'])
39 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
40 dpi = 150
41 # unit of analysis: site or video sequence?
42
43 # safety analysis
44 parser.add_argument('--prediction-method', dest = 'predictionMethod', help = 'prediction method (constant velocity (cvd: vector computation (approximate); cve: equation solving; cv: discrete time (approximate)), normal adaptation, point set prediction)', choices = ['cvd', 'cve', 'cv', 'na', 'ps', 'mp'])
45 parser.add_argument('--pet', dest = 'computePET', help = 'computes PET', action = 'store_true')
46 # override other tracking config, erase sqlite?
30 47
31 # need way of selecting sites as similar as possible to sql alchemy syntax 48 # need way of selecting sites as similar as possible to sql alchemy syntax
32 # override tracking.cfg from db 49 # override tracking.cfg from db
33 # manage cfg files, overwrite them (or a subset of parameters) 50 # manage cfg files, overwrite them (or a subset of parameters)
34 # delete sqlite files 51 # delete sqlite files
50 for cv in site.cameraViews: 67 for cv in site.cameraViews:
51 videoSequences += cv.videoSequences 68 videoSequences += cv.videoSequences
52 else: 69 else:
53 print('No video/site to process') 70 print('No video/site to process')
54 71
72 if args.nProcesses > 1:
73 pool = Pool(args.nProcesses)
74
55 ################################# 75 #################################
56 # Delete 76 # Delete
57 ################################# 77 #################################
58 if args.delete is not None: 78 if args.delete is not None:
59 if args.delete == 'feature': 79 if args.delete == 'feature':
79 else: 99 else:
80 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), True, vs.cameraView.cameraType.intrinsicCameraMatrix, vs.cameraView.cameraType.distortionCoefficients, args.dryRun) 100 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), True, vs.cameraView.cameraType.intrinsicCameraMatrix, vs.cameraView.cameraType.distortionCoefficients, args.dryRun)
81 else: 101 else:
82 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename())) 102 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename()))
83 else: 103 else:
84 pool = Pool(args.nProcesses)
85 for vs in videoSequences: 104 for vs in videoSequences:
86 if not (parentPath/vs.getDatabaseFilename()).exists() or args.process == 'object': 105 if not (parentPath/vs.getDatabaseFilename()).exists() or args.process == 'object':
87 if args.configFilename is None: 106 if args.configFilename is None:
88 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename()) 107 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename())
89 else: 108 else:
123 # pool.close() 142 # pool.close()
124 143
125 ################################# 144 #################################
126 # Analyze 145 # Analyze
127 ################################# 146 #################################
128 if args.analyze == 'object': # user speed for now 147 if args.analyze == 'object':
129 medianSpeeds = {} 148 # user speeds, accelerations
130 speeds85 = {} 149 # aggregation per site
131 minLength = 2*30 150 data = [] # list of observation per site-user with time
151 headers = ['sites', 'date', 'time', 'user_type']
152 aggFunctions = {}
153 for method in args.aggMethod:
154 if method == 'centile':
155 aggFunctions[method] = utils.aggregationFunction(method, args.aggCentiles)
156 for c in args.aggCentiles:
157 headers.append('{}{}'.format(method,c))
158 else:
159 aggFunctions[method] = utils.aggregationFunction(method)
160 headers.append(method)
132 for vs in videoSequences: 161 for vs in videoSequences:
133 if not vs.cameraView.siteIdx in medianSpeeds: 162 d = vs.startTime.date()
134 medianSpeeds[vs.cameraView.siteIdx] = [] 163 t1 = vs.startTime.time()
135 speeds85[vs.cameraView.siteIdx] = [] 164 minUserDuration = args.minUserDuration*vs.cameraView.cameraType.frameRate
136 print('Extracting speed from '+vs.getDatabaseFilename()) 165 print('Extracting speed from '+vs.getDatabaseFilename())
137 objects = storage.loadTrajectoriesFromSqlite(str(parentPath/vs.getDatabaseFilename()), 'object') 166 objects = storage.loadTrajectoriesFromSqlite(str(parentPath/vs.getDatabaseFilename()), 'object', args.nObjects)
138 for o in objects: 167 for o in objects:
139 if o.length() > minLength: 168 if o.length() > minUserDuration:
140 speeds = 30*3.6*percentile(o.getSpeeds(), [50, 85]) 169 row = [vs.cameraView.siteIdx, d, utils.framesToTime(o.getFirstInstant(), vs.cameraView.cameraType.frameRate, t1), o.getUserType()]
141 medianSpeeds[vs.cameraView.siteIdx].append(speeds[0]) 170 tmp = o.getSpeeds()
142 speeds85[vs.cameraView.siteIdx].append(speeds[1]) 171 for method,func in aggFunctions.items():
143 for speeds, name in zip([medianSpeeds, speeds85], ['Median', '85th Centile']): 172 aggSpeeds = vs.cameraView.cameraType.frameRate*3.6*func(tmp)
144 plt.ioff() 173 if method == 'centile':
145 plt.figure() 174 row += aggSpeeds.tolist()
146 plt.boxplot(list(speeds.values()), labels = [session.query(Site).get(siteId).name for siteId in speeds]) 175 else:
147 plt.ylabel(name+' Speeds (km/h)') 176 row.append(aggSpeeds)
148 plt.savefig(name.lower()+'-speeds.png', dpi=150) 177 data.append(row)
149 plt.close() 178 data = DataFrame(data, columns = headers)
150 179 if args.siteIds is None:
180 siteIds = set([vs.cameraView.siteIdx for vs in videoSequences])
181 else:
182 siteIds = set(args.siteIds)
183 if args.output == 'figure':
184 for name in headers[4:]:
185 plt.ioff()
186 plt.figure()
187 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds])
188 plt.ylabel(name+' Speeds (km/h)')
189 plt.savefig(name.lower()+'-speeds.png', dpi=dpi)
190 plt.close()
191 elif args.output == 'event':
192 data.to_csv('speeds.csv', index = False)
151 if args.analyze == 'interaction': 193 if args.analyze == 'interaction':
152 indicatorIds = [2,5,7,10] 194 indicatorIds = [2,5,7,10]
153 conversionFactors = {2: 1., 5: 30.*3.6, 7:1./30, 10:1./30} 195 conversionFactors = {2: 1., 5: 30.*3.6, 7:1./30, 10:1./30}
154 maxIndicatorValue = {2: float('inf'), 5: float('inf'), 7:10., 10:10.} 196 maxIndicatorValue = {2: float('inf'), 5: float('inf'), 7:10., 10:10.}
155 indicators = {} 197 indicators = {}