Mercurial Hosting > traffic-intelligence
changeset 1023:a13f47c8931d
work on processing large datasets (generate speed data)
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Wed, 06 Jun 2018 16:51:15 -0400 |
parents | b7689372c0ec |
children | acb4f6f6545d |
files | python/utils.py scripts/classify-objects.py scripts/process.py |
diffstat | 3 files changed, 90 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- a/python/utils.py Wed Jun 06 10:35:06 2018 -0400 +++ b/python/utils.py Wed Jun 06 16:51:15 2018 -0400 @@ -10,7 +10,7 @@ from scipy.stats import kruskal, shapiro, lognorm from scipy.spatial import distance from scipy.sparse import dok_matrix -from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma, isinf, savez, load as npload, log +from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log datetimeFormat = "%Y-%m-%d %H:%M:%S" @@ -524,6 +524,21 @@ m[tuple(k)] = v return m +def aggregationFunction(funcStr, centile = 50): + '''return the numpy function corresponding to funcStr + centile can be a list of centiles to compute at once, eg [25, 50, 75] for the 3 quartiles''' + if funcStr == 'median': + return median + elif funcStr == 'mean': + return mean + elif funcStr == 'centile': + return lambda x: percentile(x, centile) + elif funcStr == '85centile': + return lambda x: percentile(x, 85) + else: + print('Unknown aggregation method: {}'.format(funcStr)) + return None + ######################### # regression analysis using statsmodels (and pandas) #########################
--- a/scripts/classify-objects.py Wed Jun 06 10:35:06 2018 -0400 +++ b/scripts/classify-objects.py Wed Jun 06 16:51:15 2018 -0400 @@ -1,6 +1,6 @@ #! /usr/bin/env python3 -import cvutils, moving, ml, storage +import cvutils, moving, ml, storage, utils import numpy as np import sys, argparse @@ -25,14 +25,8 @@ classifierParams = storage.ClassifierParameters(params.classifierFilename) classifierParams.convertToFrames(params.videoFrameRate, 3.6) # conversion from km/h to m/frame -if classifierParams.speedAggregationMethod == 'median': - speedAggregationFunc = np.median -elif classifierParams.speedAggregationMethod == 'mean': - speedAggregationFunc = np.mean -elif classifierParams.speedAggregationMethod == 'centile': - speedAggregationFunc = lambda speeds: np.percentile(speeds, args.speedAggregationCentile) -else: - print('Unknown speed aggregation method: {}. Exiting'.format(classifierParams.speedAggregationMethod)) +speedAggregationFunc = utils.aggregationFunction(classifierParams.speedAggregationMethod) +if speedAggregationFunc is None: sys.exit() pedBikeCarSVM = ml.SVM_load(classifierParams.pedBikeCarSVMFilename)
--- a/scripts/process.py Wed Jun 06 10:35:06 2018 -0400 +++ b/scripts/process.py Wed Jun 06 16:51:15 2018 -0400 @@ -4,30 +4,47 @@ from pathlib import Path from multiprocessing.pool import Pool -import matplotlib -matplotlib.use('Agg') +#import matplotlib +#atplotlib.use('Agg') import matplotlib.pyplot as plt from numpy import percentile +from pandas import DataFrame -import storage, events, prediction, cvutils +import storage, events, prediction, cvutils, utils from metadata import * parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') +# input parser.add_argument('--db', dest = 'metadataFilename', help = 'name of the metadata file', required = True) parser.add_argument('--videos', dest = 'videoIds', help = 'indices of the video sequences', nargs = '*', type = int) parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*', type = int) -parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') -parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) -parser.add_argument('--prediction-method', dest = 'predictionMethod', help = 'prediction method (constant velocity (cvd: vector computation (approximate); cve: equation solving; cv: discrete time (approximate)), normal adaptation, point set prediction)', choices = ['cvd', 'cve', 'cv', 'na', 'ps', 'mp']) -parser.add_argument('--pet', dest = 'computePET', help = 'computes PET', action = 'store_true') -# override other tracking config, erase sqlite? + +# main function parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction']) parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'interaction']) parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction']) parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction']) + +# common options +parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') +parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) parser.add_argument('--dry', dest = 'dryRun', help = 'dry run of processing', action = 'store_true') parser.add_argument('--nthreads', dest = 'nProcesses', help = 'number of processes to run in parallel', type = int, default = 1) +# analysis options +parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) +parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) +parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.) +parser.add_argument('--aggregation', dest = 'aggMethod', help = 'aggregation method per user/event and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) +parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) +dpi = 150 +# unit of analysis: site or video sequence? + +# safety analysis +parser.add_argument('--prediction-method', dest = 'predictionMethod', help = 'prediction method (constant velocity (cvd: vector computation (approximate); cve: equation solving; cv: discrete time (approximate)), normal adaptation, point set prediction)', choices = ['cvd', 'cve', 'cv', 'na', 'ps', 'mp']) +parser.add_argument('--pet', dest = 'computePET', help = 'computes PET', action = 'store_true') +# override other tracking config, erase sqlite? + # need way of selecting sites as similar as possible to sql alchemy syntax # override tracking.cfg from db # manage cfg files, overwrite them (or a subset of parameters) @@ -52,6 +69,9 @@ else: print('No video/site to process') +if args.nProcesses > 1: + pool = Pool(args.nProcesses) + ################################# # Delete ################################# @@ -81,7 +101,6 @@ else: print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename())) else: - pool = Pool(args.nProcesses) for vs in videoSequences: if not (parentPath/vs.getDatabaseFilename()).exists() or args.process == 'object': if args.configFilename is None: @@ -125,29 +144,52 @@ ################################# # Analyze ################################# -if args.analyze == 'object': # user speed for now - medianSpeeds = {} - speeds85 = {} - minLength = 2*30 +if args.analyze == 'object': + # user speeds, accelerations + # aggregation per site + data = [] # list of observation per site-user with time + headers = ['sites', 'date', 'time', 'user_type'] + aggFunctions = {} + for method in args.aggMethod: + if method == 'centile': + aggFunctions[method] = utils.aggregationFunction(method, args.aggCentiles) + for c in args.aggCentiles: + headers.append('{}{}'.format(method,c)) + else: + aggFunctions[method] = utils.aggregationFunction(method) + headers.append(method) for vs in videoSequences: - if not vs.cameraView.siteIdx in medianSpeeds: - medianSpeeds[vs.cameraView.siteIdx] = [] - speeds85[vs.cameraView.siteIdx] = [] + d = vs.startTime.date() + t1 = vs.startTime.time() + minUserDuration = args.minUserDuration*vs.cameraView.cameraType.frameRate print('Extracting speed from '+vs.getDatabaseFilename()) - objects = storage.loadTrajectoriesFromSqlite(str(parentPath/vs.getDatabaseFilename()), 'object') + objects = storage.loadTrajectoriesFromSqlite(str(parentPath/vs.getDatabaseFilename()), 'object', args.nObjects) for o in objects: - if o.length() > minLength: - speeds = 30*3.6*percentile(o.getSpeeds(), [50, 85]) - medianSpeeds[vs.cameraView.siteIdx].append(speeds[0]) - speeds85[vs.cameraView.siteIdx].append(speeds[1]) - for speeds, name in zip([medianSpeeds, speeds85], ['Median', '85th Centile']): - plt.ioff() - plt.figure() - plt.boxplot(list(speeds.values()), labels = [session.query(Site).get(siteId).name for siteId in speeds]) - plt.ylabel(name+' Speeds (km/h)') - plt.savefig(name.lower()+'-speeds.png', dpi=150) - plt.close() - + if o.length() > minUserDuration: + row = [vs.cameraView.siteIdx, d, utils.framesToTime(o.getFirstInstant(), vs.cameraView.cameraType.frameRate, t1), o.getUserType()] + tmp = o.getSpeeds() + for method,func in aggFunctions.items(): + aggSpeeds = vs.cameraView.cameraType.frameRate*3.6*func(tmp) + if method == 'centile': + row += aggSpeeds.tolist() + else: + row.append(aggSpeeds) + data.append(row) + data = DataFrame(data, columns = headers) + if args.siteIds is None: + siteIds = set([vs.cameraView.siteIdx for vs in videoSequences]) + else: + siteIds = set(args.siteIds) + if args.output == 'figure': + for name in headers[4:]: + plt.ioff() + plt.figure() + plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds]) + plt.ylabel(name+' Speeds (km/h)') + plt.savefig(name.lower()+'-speeds.png', dpi=dpi) + plt.close() + elif args.output == 'event': + data.to_csv('speeds.csv', index = False) if args.analyze == 'interaction': indicatorIds = [2,5,7,10] conversionFactors = {2: 1., 5: 30.*3.6, 7:1./30, 10:1./30}