changeset 1054:d13f9bfbf3ff

Retry
author Wendlasida
date Fri, 06 Jul 2018 18:42:58 -0400
parents 60cc87e824c4 (current diff) c9c03c97ed9f (diff)
children 9e4e80fc5943 67144f26609e
files trafficintelligence/storage.py trafficintelligence/tests/tutorials.py
diffstat 8 files changed, 165 insertions(+), 155 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/learn-motion-patterns.py	Thu Jul 05 22:24:31 2018 -0400
+++ b/scripts/learn-motion-patterns.py	Fri Jul 06 18:42:58 2018 -0400
@@ -5,25 +5,26 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-from trafficintelligence import ml, utils, storage, moving
+from trafficintelligence import ml, utils, storage, moving, processing
 
-parser = argparse.ArgumentParser(description='''The program clusters trajectories, each cluster being represented by a trajectory. It can either work on the same dataset (database) or different ones, but only does learning or assignment at a time to avoid issues (the minimum cluster size argument is not used for now as it may change prototypes when assigning other trajectories)''') #, epilog = ''
+parser = argparse.ArgumentParser(description='''The program clusters trajectories, each cluster being represented by a trajectory. It can either work on the same dataset (database) or different ones, but only does learning or assignment at a time to avoid issues''') #, epilog = ''
 #parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file', required = True)
 parser.add_argument('-o', dest = 'outputPrototypeDatabaseFilename', help = 'name of the Sqlite database file to save prototypes')
 parser.add_argument('-i', dest = 'inputPrototypeDatabaseFilename', help = 'name of the Sqlite database file for prototypes to start the algorithm with')
-parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories to learn from', choices = ['objectfeature', 'feature', 'object'], default = 'objectfeatures')
-parser.add_argument('--max-nobjectfeatures', dest = 'maxNObjectFeatures', help = 'maximum number of features per object to load', type = int, default = 1)
-parser.add_argument('-n', dest = 'nTrajectories', help = 'number of the object or feature trajectories to load', type = int, default = None)
+parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories to process', choices = ['feature', 'object'], default = 'feature')
+parser.add_argument('--nfeatures-per-object', dest = 'nLongestFeaturesPerObject', help = 'maximum number of features per object to load', type = int)
+parser.add_argument('-n', dest = 'nObjects', help = 'number of the object or feature trajectories to load', type = int, default = None)
 parser.add_argument('-e', dest = 'epsilon', help = 'distance for the similarity of trajectory points', type = float, required = True)
 parser.add_argument('--metric', dest = 'metric', help = 'metric for the similarity of trajectory points', default = 'cityblock') # default is manhattan distance
 parser.add_argument('-s', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float, required = True)
-parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = 0)
+#parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = 0)
 parser.add_argument('--learn', dest = 'learn', help = 'learn', action = 'store_true')
 parser.add_argument('--optimize', dest = 'optimizeCentroid', help = 'recompute centroid at each assignment', action = 'store_true')
 parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true')
 parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int)
 parser.add_argument('--display', dest = 'display', help = 'display trajectories', action = 'store_true')
+parser.add_argument('--similarities-filename', dest = 'similaritiesFilename', help = 'filename of the similarities')
 parser.add_argument('--save-similarities', dest = 'saveSimilarities', help = 'save computed similarities (in addition to prototypes)', action = 'store_true')
 parser.add_argument('--save-assignments', dest = 'saveAssignments', help = 'saves the assignments of the objects to the prototypes', action = 'store_true')
 parser.add_argument('--assign', dest = 'assign', help = 'assigns the objects to the prototypes and saves the assignments', action = 'store_true')
@@ -39,62 +40,41 @@
 # TODO add possibility to cluster with velocities
 # TODO add possibility to load all trajectories and use minclustersize
 
-# load trajectories to cluster or assign
-if args.trajectoryType == 'objectfeature':
-    trajectoryType = 'feature'
-    objectFeatureNumbers = storage.loadObjectFeatureFrameNumbers(args.databaseFilename, objectNumbers = args.nTrajectories)
-    featureNumbers = []
-    for numbers in objectFeatureNumbers.values():
-        featureNumbers += numbers[:min(len(numbers), args.maxNObjectFeatures)]
-    objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, 'feature', objectNumbers = featureNumbers, timeStep = args.positionSubsamplingRate)
+if args.learn and args.assign:
+    print('Cannot learn and assign simultaneously')
+    sys.exit(0)
+
+objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, args.trajectoryType, args.nObjects, timeStep = args.positionSubsamplingRate, nLongestFeaturesPerObject = args.nLongestFeaturesPerObject)
+if args.trajectoryType == 'object' and args.nLongestFeaturesPerObject is not None:
+    objectsWithFeatures = objects
+    objects = [f for o in objectsWithFeatures for f in o.getFeatures()]
+    prototypeType = 'feature'
 else:
-    trajectoryType = args.trajectoryType
-    objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, trajectoryType, objectNumbers = args.nTrajectories, timeStep = args.positionSubsamplingRate)
-
-trajectories = [o.getPositions().asArray().T for o in objects]
+    prototypeType = args.trajectoryType
 
 # load initial prototypes, if any    
 if args.inputPrototypeDatabaseFilename is not None:
     initialPrototypes = storage.loadPrototypesFromSqlite(args.inputPrototypeDatabaseFilename, True)
-    trajectories = [p.getMovingObject().getPositions().asArray().T for p in initialPrototypes]+trajectories
-    if len(initialPrototypes) > 0:
-        initialPrototypeIndices = list(range(len(initialPrototypes)))
-    else:
-        initialPrototypeIndices = None
 else:
     initialPrototypes = []
-    initialPrototypeIndices = None
 
 lcss = utils.LCSS(metric = args.metric, epsilon = args.epsilon)
-nTrajectories = len(trajectories)
-
-similarities = -np.ones((nTrajectories, nTrajectories))
 similarityFunc = lambda x,y : lcss.computeNormalized(x, y)
-# the next line can be called again without reinitializing similarities
-if args.learn:
-    prototypeIndices = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, similarityFunc, args.optimizeCentroid, args.randomInitialization, initialPrototypeIndices)
-else:
-    prototypeIndices = initialPrototypeIndices
+nTrajectories = len(initialPrototypes)+len(objects)
+if args.similaritiesFilename is not None:
+    similarities = np.loadtxt(args.similaritiesFilename)
+if args.similaritiesFilename is None or similarities.shape[0] != nTrajectories or similarities.shape[1] != nTrajectories:
+    similarities = -np.ones((nTrajectories, nTrajectories))
 
-if args.assign: # TODO don't touch initial prototypes if not from same db as trajectories
-    #if not args.learn and args.minClusterSize >= 1: # allow only 
-    #   print('Warning: you did not learn the prototypes and you are using minimum cluster size of {}, which may lead to removing prototypes and assigning them to others'.format(args.minClusterSize))
-    # if args.minClusterSize >= 1:
-    #     if initialPrototypeIndices is None:
-    #         prototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc, args.minClusterSize)
-    #     else:
-    #         print('Not assigning with non-zero minimum cluster size and initial prototypes (would remove initial prototypes based on other trajectories')
-    # else:
-    #     prototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc)
-    assignedPrototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc)
+prototypeIndices, labels = processing.learnAssignMotionPatterns(args.learn, args.assign, objects, similarities, args.minSimilarity, similarityFunc, 0, args.optimizeCentroid, args.randomInitialization, False, initialPrototypes)
 
-if args.learn and not args.assign:
+if args.learn:# and not args.assign:
     prototypes = []
     for i in prototypeIndices:
         if i<len(initialPrototypes):
             prototypes.append(initialPrototypes[i])
         else:
-            prototypes.append(moving.Prototype(args.databaseFilename, objects[i-len(initialPrototypes)].getNum(), trajectoryType))
+            prototypes.append(moving.Prototype(args.databaseFilename, objects[i-len(initialPrototypes)].getNum(), prototypeType))
 
     if args.outputPrototypeDatabaseFilename is None:
         outputPrototypeDatabaseFilename = args.databaseFilename
@@ -110,10 +90,10 @@
         plt.axis('equal')
         plt.show()
 
-if not args.learn and args.assign: # no modification to prototypes, can work with initialPrototypes
+if args.assign: # not args.learn and  no modification to prototypes, can work with initialPrototypes
     clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1)
     for i in prototypeIndices:
-        nMatchings = clusterSizes[i]-1
+        nMatchings = clusterSizes[i]-1 # external prototypes
         if initialPrototypes[i].nMatchings is None:
             initialPrototypes[i].nMatchings = nMatchings
         else:
@@ -124,19 +104,25 @@
         outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename
     storage.setPrototypeMatchingsInSqlite(outputPrototypeDatabaseFilename, initialPrototypes)
     if args.saveAssignments:
-        if args.trajectoryType == 'objectfeature': # consider that the object is assigned through its longest features
+        if args.trajectoryType == 'object' and args.nLongestFeaturesPerObject is not None:
+            # consider that the object is assigned through its longest features
+            # issues are inconsistencies in the number of matchings per prototype and display (will display features, not objects)
             objectNumbers = []
             objectLabels = []
-            for objNum, objFeatureNumbers in objectFeatureNumbers.items():
+            i = 0
+            for obj in objectsWithFeatures:
                 objLabels = []
-                for i, o in enumerate(objects):
-                    if o.getNum() in objFeatureNumbers:
+                for f in obj.getFeatures():
+                    if f == objects[i]:
                         objLabels.append(labels[i+len(initialPrototypes)])
+                        i += 1
+                    else:
+                        print('Issue with obj {} and feature {} (trajectory {})'.format(obj.getNum(), f.getNum(), i))
                 objectLabels.append(utils.mostCommon(objLabels))
-                objectNumbers.append(objNum)
+                objectNumbers.append(obj.getNum())
             storage.savePrototypeAssignmentsToSqlite(args.databaseFilename, objectNumbers, 'object', objectLabels, initialPrototypes)
         else:
-            storage.savePrototypeAssignmentsToSqlite(args.databaseFilename, [obj.getNum() for obj in objects], trajectoryType, labels[len(initialPrototypes):], initialPrototypes)
+            storage.savePrototypeAssignmentsToSqlite(args.databaseFilename, [obj.getNum() for obj in objects], args.trajectoryType, labels[len(initialPrototypes):], initialPrototypes)
     if args.display:
         plt.figure()
         for i,o in enumerate(objects):
@@ -150,4 +136,7 @@
         plt.show()
 
 if (args.learn or args.assign) and args.saveSimilarities:
-    np.savetxt(utils.removeExtension(args.databaseFilename)+'-prototype-similarities.txt.gz', similarities, '%.4f')
+    if args.similaritiesFilename is not None:
+        np.savetxt(args.similaritiesFilename, similarities, '%.4f')
+    else:
+        np.savetxt(utils.removeExtension(args.databaseFilename)+'-prototype-similarities.txt.gz', similarities, '%.4f')
--- a/scripts/process.py	Thu Jul 05 22:24:31 2018 -0400
+++ b/scripts/process.py	Fri Jul 06 18:42:58 2018 -0400
@@ -7,17 +7,17 @@
 #import matplotlib
 #atplotlib.use('Agg')
 import matplotlib.pyplot as plt
-from numpy import percentile
+import numpy as np
 from pandas import DataFrame
 
-from trafficintelligence import storage, events, prediction, cvutils, utils
+from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml
 from trafficintelligence.metadata import *
 
 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.')
 # input
 parser.add_argument('--db', dest = 'metadataFilename', help = 'name of the metadata file', required = True)
 parser.add_argument('--videos', dest = 'videoIds', help = 'indices of the video sequences', nargs = '*', type = int)
-parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*', type = int)
+parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*')
 
 # main function
 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction'])
@@ -28,8 +28,34 @@
 # common options
 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int)
+parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature')
 parser.add_argument('--dry', dest = 'dryRun', help = 'dry run of processing', action = 'store_true')
 parser.add_argument('--nthreads', dest = 'nProcesses', help = 'number of processes to run in parallel', type = int, default = 1)
+parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int)
+
+### process options
+# motion pattern learning and assignment
+parser.add_argument('--prototype-filename', dest = 'outputPrototypeDatabaseFilename', help = 'name of the Sqlite database file to save prototypes', default = 'prototypes.sqlite')
+#parser.add_argument('-i', dest = 'inputPrototypeDatabaseFilename', help = 'name of the Sqlite database file for prototypes to start the algorithm with')
+parser.add_argument('--nobjects-mp', dest = 'nMPObjects', help = 'number of objects/interactions to process', type = int)
+parser.add_argument('--nfeatures-per-object', dest = 'nLongestFeaturesPerObject', help = 'maximum number of features per object to load', type = int)
+parser.add_argument('--epsilon', dest = 'epsilon', help = 'distance for the similarity of trajectory points', type = float)
+parser.add_argument('--metric', dest = 'metric', help = 'metric for the similarity of trajectory points', default = 'cityblock') # default is manhattan distance
+parser.add_argument('--minsimil', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float)
+parser.add_argument('--min-cluster-size', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = 0)
+#parser.add_argument('--learn', dest = 'learn', help = 'learn', action = 'store_true')
+parser.add_argument('--optimize', dest = 'optimizeCentroid', help = 'recompute centroid at each assignment', action = 'store_true')
+parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true')
+#parser.add_argument('--similarities-filename', dest = 'similaritiesFilename', help = 'filename of the similarities')
+parser.add_argument('--save-similarities', dest = 'saveSimilarities', help = 'save computed similarities (in addition to prototypes)', action = 'store_true')
+parser.add_argument('--save-assignments', dest = 'saveAssignments', help = 'saves the assignments of the objects to the prototypes', action = 'store_true')
+parser.add_argument('--assign', dest = 'assign', help = 'assigns the objects to the prototypes and saves the assignments', action = 'store_true')
+
+# safety analysis
+parser.add_argument('--prediction-method', dest = 'predictionMethod', help = 'prediction method (constant velocity (cvd: vector computation (approximate); cve: equation solving; cv: discrete time (approximate)), normal adaptation, point set prediction)', choices = ['cvd', 'cve', 'cv', 'na', 'ps', 'mp'])
+parser.add_argument('--pet', dest = 'computePET', help = 'computes PET', action = 'store_true')
+# override other tracking config, erase sqlite?
+
 
 # analysis options
 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event'])
@@ -40,11 +66,6 @@
 dpi = 150
 # unit of analysis: site or video sequence?
 
-# safety analysis
-parser.add_argument('--prediction-method', dest = 'predictionMethod', help = 'prediction method (constant velocity (cvd: vector computation (approximate); cve: equation solving; cv: discrete time (approximate)), normal adaptation, point set prediction)', choices = ['cvd', 'cve', 'cv', 'na', 'ps', 'mp'])
-parser.add_argument('--pet', dest = 'computePET', help = 'computes PET', action = 'store_true')
-# override other tracking config, erase sqlite?
-
 # need way of selecting sites as similar as possible to sql alchemy syntax
 # override tracking.cfg from db
 # manage cfg files, overwrite them (or a subset of parameters)
@@ -59,13 +80,18 @@
 session = connectDatabase(args.metadataFilename)
 parentPath = Path(args.metadataFilename).parent # files are relative to metadata location
 videoSequences = []
+sites = []
 if args.videoIds is not None:
     videoSequences = [session.query(VideoSequence).get(videoId) for videoId in args.videoIds]
+    siteIds = set([vs.cameraView.siteIdx for vs in videoSequences])
 elif args.siteIds is not None:
-    for siteId in args.siteIds:
-        for site in getSite(session, siteId):
+    siteIds = set(args.siteIds)
+    for siteId in siteIds:
+        tmpsites = getSite(session, siteId)
+        sites.extend(tmpsites)
+        for site in tmpsites:
             for cv in site.cameraViews:
-                videoSequences += cv.videoSequences
+                videoSequences.extend(cv.videoSequences)
 else:
     print('No video/site to process')
 
@@ -121,7 +147,40 @@
         pool.join()
 
 elif args.process == 'prototype': # motion pattern learning
-    pass
+    # learn by site by default -> group videos by site (or by camera view? TODO add cameraviews)
+    # by default, load all objects, learn and then assign (BUT not save the assignments)
+    for site in sites:
+        print('Learning motion patterns for site {} ({})'.format(site.idx, site.name))
+        objects = {}
+        object2VideoSequences = {}
+        for cv in site.cameraViews:
+            for vs in cv.videoSequences:
+                print('Loading '+vs.getDatabaseFilename())
+                objects[vs.idx] = storage.loadTrajectoriesFromSqlite(str(parentPath/vs.getDatabaseFilename()), args.trajectoryType, args.nObjects, timeStep = args.positionSubsamplingRate, nLongestFeaturesPerObject = args.nLongestFeaturesPerObject)
+                if args.trajectoryType == 'object' and args.nLongestFeaturesPerObject is not None:
+                    objectsWithFeatures = objects[vs.idx]
+                    objects[vs.idx] = [f for o in objectsWithFeatures for f in o.getFeatures()]
+                    prototypeType = 'feature'
+                else:
+                    prototypeType = args.trajectoryType
+                for obj in objects[vs.idx]:
+                    object2VideoSequences[obj] = vs
+        lcss = utils.LCSS(metric = args.metric, epsilon = args.epsilon)
+        similarityFunc = lambda x,y : lcss.computeNormalized(x, y)
+        trainingObjects = [o for tmpobjects in objects.values() for o in tmpobjects]
+        if args.nMPObjects is not None and args.nMPObjects < len(trainingObjects):
+            m = int(np.floor(float(len(trainingObjects))/args.nMPObjects))
+            trainingObjects = trainingObjects[::m]
+        similarities = -np.ones((len(trainingObjects), len(trainingObjects)))
+        prototypeIndices, labels = processing.learnAssignMotionPatterns(True, True, trainingObjects, similarities, args.minSimilarity, similarityFunc, args.minClusterSize, args.optimizeCentroid, args.randomInitialization, True, [])
+        if args.outputPrototypeDatabaseFilename is None:
+            outputPrototypeDatabaseFilename = args.databaseFilename
+        else:
+            outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename
+        # TODO maintain mapping from object prototype to db filename + compute nmatchings before
+        clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1)
+        storage.savePrototypesToSqlite(str(parentPath/site.getPath()/outputPrototypeDatabaseFilename), [moving.Prototype(object2VideoSequences[trainingObjects[i]].getDatabaseFilename(False), trainingObjects[i].getNum(), prototypeType, clusterSizes[i]) for i in prototypeIndices])
+
 
 elif args.process == 'interaction':
     # safety analysis TODO make function in safety analysis script
@@ -183,10 +242,6 @@
                         row.append(aggSpeeds)
             data.append(row)
     data = DataFrame(data, columns = headers)
-    if args.siteIds is None:
-        siteIds = set([vs.cameraView.siteIdx for vs in videoSequences])
-    else:
-        siteIds = set(args.siteIds)
     if args.output == 'figure':
         for name in headers[4:]:
             plt.ioff()
--- a/trafficintelligence/ml.py	Thu Jul 05 22:24:31 2018 -0400
+++ b/trafficintelligence/ml.py	Fri Jul 06 18:42:58 2018 -0400
@@ -150,16 +150,13 @@
     code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
     return code,sigma
 
-def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0):
+def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0):
     '''Assigns instances to prototypes 
     if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters
     and reassigning all elements in the cluster until no cluster is smaller than minClusterSize
 
     labels are indices in the prototypeIndices'''
-    if similarityFunc is None:
-        print('similarityFunc is None')
-        return None
-
+    prototypeIndices = copy(initialPrototypeIndices)
     indices = [i for i in range(len(instances)) if i not in prototypeIndices]
     labels = [-1]*len(instances)
     assign = True
@@ -184,7 +181,7 @@
             indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
     return prototypeIndices, labels
 
-def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
     '''Finds exemplar (prototype) instance that represent each cluster
     Returns the prototype indices (in the instances list)
 
@@ -205,22 +202,12 @@
     if len(instances) == 0:
         print('no instances to cluster (empty list)')
         return None
-    if similarityFunc is None:
-        print('similarityFunc is None')
-        return None
 
     # sort instances based on length
     indices = list(range(len(instances)))
     if randomInitialization or optimizeCentroid:
         indices = np.random.permutation(indices).tolist()
     else:
-        def compare(i, j):
-            if len(instances[i]) > len(instances[j]):
-                return -1
-            elif len(instances[i]) == len(instances[j]):
-                return 0
-            else:
-                return 1
         indices.sort(key=lambda i: len(instances[i]))
     # initialize clusters
     clusters = []
--- a/trafficintelligence/moving.py	Thu Jul 05 22:24:31 2018 -0400
+++ b/trafficintelligence/moving.py	Fri Jul 06 18:42:58 2018 -0400
@@ -1368,7 +1368,7 @@
             tmp = utils.sortByLength(self.getFeatures(), reverse = True)
             return tmp[:min(len(tmp), nFeatures)]                                        
         
-    def getFeatureNumbers(self):
+    def getFeatureNumbersOverTime(self):
         '''Returns the number of features at each instant
         dict instant -> number of features'''
         if self.hasFeatures():
--- a/trafficintelligence/processing.py	Thu Jul 05 22:24:31 2018 -0400
+++ b/trafficintelligence/processing.py	Fri Jul 06 18:42:58 2018 -0400
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from trafficintelligence import moving
+from trafficintelligence import ml
 
 def extractSpeeds(objects, zone):
     speeds = {}
@@ -17,3 +17,31 @@
         else:
             objectsNotInZone.append(o)
     return speeds, objectsNotInZone
+
+def learnAssignMotionPatterns(learn, assign, objects, similarities, minSimilarity, similarityFunc, minClusterSize = 0, optimizeCentroid = False, randomInitialization = False, removePrototypesAfterAssignment = False, initialPrototypes = []):
+    '''Learns motion patterns
+
+    During assignments, if using minClusterSize > 0, prototypes can change (be removed)
+    The argument removePrototypesAfterAssignment indicates whether the prototypes are removed or not'''
+    if len(initialPrototypes) > 0:
+        initialPrototypeIndices = list(range(len(initialPrototypes)))
+        trajectories = [p.getMovingObject().getPositions().asArray().T for p in initialPrototypes]
+    else:
+        initialPrototypeIndices = None
+        trajectories = []
+    trajectories.extend([o.getPositions().asArray().T for o in objects])
+
+    if learn:
+        prototypeIndices = ml.prototypeCluster(trajectories, similarities, minSimilarity, similarityFunc, optimizeCentroid, randomInitialization, initialPrototypeIndices)
+    else:
+        prototypeIndices = initialPrototypeIndices
+
+    if assign:
+        assignedPrototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize)
+        if minClusterSize > 0 and removePrototypesAfterAssignment: # use prototypeIndices anyway
+            prototypeIndices = assignedPrototypeIndices
+    else:
+        labels = None
+
+    return prototypeIndices, labels
+    
--- a/trafficintelligence/run-tests.sh	Thu Jul 05 22:24:31 2018 -0400
+++ b/trafficintelligence/run-tests.sh	Fri Jul 06 18:42:58 2018 -0400
@@ -4,7 +4,3 @@
 do
     python3 $f
 done
-for f in ./tests/*.py
-do
-    python3 $f
-done
--- a/trafficintelligence/storage.py	Thu Jul 05 22:24:31 2018 -0400
+++ b/trafficintelligence/storage.py	Fri Jul 06 18:42:58 2018 -0400
@@ -7,7 +7,7 @@
 from copy import copy
 import sqlite3, logging
 
-from numpy import log, min as npmin, max as npmax, round as npround, array, sum as npsum, loadtxt, floor as npfloor, ceil as npceil, linalg
+from numpy import log, min as npmin, max as npmax, round as npround, array, sum as npsum, loadtxt, floor as npfloor, ceil as npceil, linalg, int32, int64
 from pandas import read_csv, merge
 
 from trafficintelligence import utils, moving, events, indicators
@@ -22,6 +22,9 @@
               'object': 'objects',
               'objectfeatures': 'positions'}
 
+sqlite3.register_adapter(int64, lambda val: int(val))
+sqlite3.register_adapter(int32, lambda val: int(val))
+
 #########################
 # Sqlite
 #########################
@@ -247,24 +250,18 @@
             attributes[row[0]] = row[1]
     return attributes
 
-def loadTrajectoriesFromSqlite(filename, trajectoryType, objectNumbers = None, withFeatures = False, timeStep = None, maxNObjectFeatures = 1):
+def loadTrajectoriesFromSqlite(filename, trajectoryType, objectNumbers = None, withFeatures = False, timeStep = None, nLongestFeaturesPerObject = None):
     '''Loads the trajectories (in the general sense, 
-    either features, objects (feature groups), longest features per object, or bounding box series) 
+    either features, objects (feature groups), longest features per object, or bounding box series)
+    types are only feature or object
+    if object, features can be loaded with withFeatures or nLongestObjectFeatures used to select the n longest features
 
     The number loaded is either the first objectNumbers objects,
     or the indices in objectNumbers from the database'''
     objects = []
     with sqlite3.connect(filename) as connection:
-        if trajectoryType == 'objectfeature':
-            objectFeatureNumbers = loadObjectFeatureFrameNumbers(filename, objectNumbers)
-            featureNumbers = []
-            for numbers in objectFeatureNumbers.values():
-                featureNumbers += numbers[:min(len(numbers), maxNObjectFeatures)]
-            objects = loadTrajectoriesFromTable(connection, 'positions', 'feature', featureNumbers, timeStep)
-            objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', 'feature', featureNumbers, timeStep)
-        else:
-            objects = loadTrajectoriesFromTable(connection, 'positions', trajectoryType, objectNumbers, timeStep)
-            objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', trajectoryType, objectNumbers, timeStep)
+        objects = loadTrajectoriesFromTable(connection, 'positions', trajectoryType, objectNumbers, timeStep)
+        objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', trajectoryType, objectNumbers, timeStep)
 
         if len(objectVelocities) > 0:
             for o,v in zip(objects, objectVelocities):
@@ -283,7 +280,7 @@
                     queryStatement += ' WHERE object_id '+getObjectCriteria(objectNumbers)
                 queryStatement += ' ORDER BY object_id' # order is important to group all features per object
                 logging.debug(queryStatement)
-                cursor.execute(queryStatement) 
+                cursor.execute(queryStatement)
 
                 featureNumbers = {}
                 for row in cursor:
@@ -303,13 +300,16 @@
                     obj.setUserType(userType)
                     obj.setNObjects(nObjects)
 
+                # add features
                 if withFeatures:
-                    nFeatures = 0
                     for obj in objects:
-                        nFeatures = max(nFeatures, max(obj.featureNumbers))
-                    features = loadTrajectoriesFromSqlite(filename, 'feature', nFeatures+1, timeStep = timeStep)
+                        obj.features = loadTrajectoriesFromSqlite(filename, 'feature', obj.featureNumbers, timeStep = timeStep)
+                elif nLongestFeaturesPerObject is not None:
                     for obj in objects:
-                        obj.setFeatures(features)
+                        queryStatement = 'SELECT trajectory_id, max(frame_number)-min(frame_number) AS length FROM positions WHERE trajectory_id '+getObjectCriteria(obj.featureNumbers)+' GROUP BY trajectory_id ORDER BY length DESC'
+                        logging.debug(queryStatement)
+                        cursor.execute(queryStatement)
+                        obj.features = loadTrajectoriesFromSqlite(filename, 'feature', [row[0] for i,row in enumerate(cursor) if i<nLongestFeaturesPerObject], timeStep = timeStep)
 
             except sqlite3.OperationalError as error:
                 printDBError(error)
@@ -338,12 +338,6 @@
             printDBError(error)
             return None
 
-def loadObjectTrajectoriesFromSqlite():
-    '''Loads object trajectories 
-    either simply objects or features (defaults to loadTrajectoriesFromSqlite) 
-    or the longest features for each object '''
-        
-
 def addCurvilinearTrajectoriesFromSqlite(filename, objects):
     '''Adds curvilinear positions (s_coordinate, y_coordinate, lane)
     from a database to an existing MovingObject dict (indexed by each objects's num)'''
--- a/trafficintelligence/tests/tutorials.py	Thu Jul 05 22:24:31 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-import unittest
-
-class TestNGSIM(unittest.TestCase):
-    'Tutorial example for NGSIM data'
-
-    def test_ex1(self):
-        from trafficintelligence import storage
-        objects = storage.loadTrajectoriesFromNgsimFile('../samples/trajectories-0400-0415.txt',100)
-        for o in objects: o.plot()
-
-class TestTrajectoryLoading(unittest.TestCase):
-    'Tutorial example for NGSIM data'
-
-    def test_ex1(self):
-        from trafficintelligence import storage
-        objects = storage.loadTrajectoriesFromSqlite('../samples/laurier.sqlite', 'object')
-
-        speed = objects[0].getVelocityAtInstant(10).norm2()
-        timeInterval = objects[0].getTimeInterval()
-        speeds = [objects[0].getVelocityAtInstant(t).norm2() for t in range(timeInterval.first, timeInterval.last)]
-        speeds = [v.norm2() for v in objects[0].getVelocities()]
-
-        from matplotlib.pyplot import plot, close, axis
-        plot(range(timeInterval.first, timeInterval.last+1), speeds)
-
-        close('all')
-        objects[0].plot()
-        axis('equal')
-
-        features = storage.loadTrajectoriesFromSqlite('../samples/laurier.sqlite', 'feature')
-        objects[0].setFeatures(features)
-
-        for f in objects[0].features:
-            f.plot()
-        axis('equal')
-
-
-if __name__ == '__main__':
-    unittest.main()