changeset 1246:2397de73770d

dltrack saves after projecting coordinates
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Fri, 09 Feb 2024 17:47:33 -0500
parents 371c718e57d7
children 439207b6c146
files scripts/classify-objects.py scripts/display-trajectories.py scripts/dltrack.py scripts/extract-appearance-images.py trafficintelligence/storage.py
diffstat 5 files changed, 61 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/classify-objects.py	Thu Feb 08 16:10:54 2024 -0500
+++ b/scripts/classify-objects.py	Fri Feb 09 17:47:33 2024 -0500
@@ -29,7 +29,7 @@
 parser.add_argument('--verbose', dest = 'verbose', help = 'verbose information', action = 'store_true')
 
 args = parser.parse_args()
-params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
+params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
 classifierParams = storage.ClassifierParameters(params.classifierFilename)
 classifierParams.convertToFrames(params.videoFrameRate, 3.6) # conversion from km/h to m/frame
 
--- a/scripts/display-trajectories.py	Thu Feb 08 16:10:54 2024 -0500
+++ b/scripts/display-trajectories.py	Fri Feb 09 17:47:33 2024 -0500
@@ -27,10 +27,10 @@
 parser.add_argument('--nzeros', dest = 'nZerosFilenameArg', help = 'number of digits in filenames', type = int)
 
 args = parser.parse_args()
-params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
+params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
 
 if args.homographyFilename is not None:
-    invHomography = inv(loadtxt(args.homographyFilename))            
+    invHomography = inv(loadtxt(args.homographyFilename))
 if args.intrinsicCameraMatrixFilename is not None:
     intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename)
 if args.distortionCoefficients is not None:
--- a/scripts/dltrack.py	Thu Feb 08 16:10:54 2024 -0500
+++ b/scripts/dltrack.py	Fri Feb 09 17:47:33 2024 -0500
@@ -1,6 +1,7 @@
 #! /usr/bin/env python3
 # from https://docs.ultralytics.com/modes/track/
 import sys, argparse
+from math import inf
 from copy import copy
 from collections import Counter
 import numpy as np
@@ -12,14 +13,20 @@
 
 from trafficintelligence import cvutils, moving, storage, utils
 
-parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trakcers.')
+parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trackers.',
+                                 epilog= '''The models can be found in the Ultralytics model zoo, 
+                                 eg YOLOv8 (https://docs.ultralytics.com/models/yolov8/).
+                                 The tracking models can be found also online 
+                                 (https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers).
+                                 The choice is to project the middle of the bottom line for persons, 
+                                 and the bounding box center otherwise.''')
 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)')
 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)')
 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True)
 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True)
-parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix', default = 'homography.txt')
-parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file')
+parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix')
+#parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file')
 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true')
 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file')
 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float)
@@ -27,15 +34,17 @@
 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true')
 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0)
 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf'))
-parser.add_argument('--conf', dest = 'confindence', help = 'object confidence threshold for detection', type = float, default = 0.25)
+parser.add_argument('--conf', dest = 'confidence', help = 'object confidence threshold for detection', type = float, default = 0.25)
 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2)
 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15)
 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3)
 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05)
 
 args = parser.parse_args()
-params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
+params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
 
+if args.homographyFilename is not None:
+    homography = np.loadtxt(args.homographyFilename)
 if args.intrinsicCameraMatrixFilename is not None:
     intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename)
 if args.distortionCoefficients is not None:
@@ -44,8 +53,13 @@
     firstFrameNum = args.firstFrameNum
 if args.lastFrameNum is not None:
     lastFrameNum = args.lastFrameNum
+elif args.configFilename is not None:
+    lastFrameNum = params.lastFrameNum
+else:
+    lastFrameNum = inf
 
-# TODO add option to refine position with mask for vehicles
+# TODO use mask
+# TODO add option to refine position with mask for vehicles, to save different positions
 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html
 
 # use 2 x bytetrack track buffer to remove objects from existing ones
@@ -53,43 +67,40 @@
 # Load a model
 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt
 # seg could be used on cropped image... if can be loaded and kept in memory
-# model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get'
+# model = YOLOX('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get'
 
 # Track with the model
 if args.display:
     windowName = 'frame'
     cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
 
-capture = cv2.VideoCapture(args.videoFilename)
+capture = cv2.VideoCapture(videoFilename)
 objects = {}
 featureNum = 1
-frameNum = args.firstFrameNum
+frameNum = firstFrameNum
 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum)
-lastFrameNum = args.lastFrameNum
 
 success, frame = capture.read()
 if not success:
     print('Input {} could not be read. Exiting'.format(args.videoFilename))
     import sys; sys.exit()
 
-results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), persist=True, verbose=False)
+results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), conf = args.confidence, persist=True, verbose=False)
 while capture.isOpened() and success and frameNum <= lastFrameNum:
     result = results[0]
     if frameNum %10 == 0:
         print(frameNum, len(result.boxes), 'objects')
     for box in result.boxes:
-        #print(box.cls, box.id, box.xyxy)
         if box.id is not None: # None are objects with low confidence
             num = int(box.id.item())
-            #xyxy = box.xyxy[0].tolist()
             if num in objects:
                 objects[num].timeInterval.last = frameNum
                 objects[num].features[0].timeInterval.last = frameNum
                 objects[num].features[1].timeInterval.last = frameNum
                 objects[num].bboxes[frameNum] = copy(box.xyxy)
                 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())])
-                objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item())
-                objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item())
+                objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) # min
+                objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) # max
             else:
                 inter = moving.TimeInterval(frameNum, frameNum)
                 objects[num] = moving.MovingObject(num, inter)
@@ -113,7 +124,7 @@
 for num, obj in objects.items():
     obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed?
 
-# add quality control: avoid U-turns
+# TODO add quality control: avoid U-turns
     
 # merge bikes and people
 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])]
@@ -188,17 +199,37 @@
         del objects[pedestrians[pedInd]]
         #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples)
 
-# interpolate and generate velocity (?) for the features (bboxes) before saving
+# interpolate and save image coordinates
 for num, obj in objects.items():
-    #obj.features[1].timeInterval = copy(obj.getTimeInterval())
     for f in obj.getFeatures():
         if f.length() != len(f.tmpPositions): # interpolate
             f.positions = moving.Trajectory.fromPointDict(f.tmpPositions)
-            #obj.features[1].positions = moving.Trajectory.fromPointDict(obj.features[1].tmpPositions)
         else:
             f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values()))
-            #obj.features[1].positions = moving.Trajectory.fromPointList(list(obj.features[1].tmpPositions.values()))
-
+if not args.notSavingImageCoordinates:
+    storage.saveTrajectoriesToSqlite(utils.removeExtension(args.databaseFilename)+'-bb.sqlite', list(objects.values()), 'object')
+# project, smooth and save
+for num, obj in objects.items():
+    features = obj.getFeatures()
+    if moving.userTypeNames[obj.getUserType()] == 'pedestrian':
+        assert len(features) == 2
+        t1 = features[0].getPositions()
+        t2 = features[1].getPositions()
+        t = [[(p1.x+p2.x)/2., max(p1.y, p2.y)] for p1, p2 in zip(t1, t2)]
+    else:
+        t = []
+        for instant in obj.getTimeInterval():
+            points = []
+            for f in features:
+                if f.existsAtInstant(instant):
+                    points.append(f.getPositionAtInstant(instant))
+            t.append(moving.Point.agg(points, np.mean).aslist())
+        #t = sum([f.getPositions().asArray() for f in features])/len(features)
+        #t = (moving.Trajectory.add(t1, t2)*0.5).asArray()
+    projected = cvutils.imageToWorldProject(np.array(t).T, intrinsicCameraMatrix, distortionCoefficients, homography)
+    featureNum = features[0].getNum()
+    obj.features=[moving.MovingObject(featureNum, obj.getTimeInterval(), moving.Trajectory(projected.tolist()))]
+    obj.featureNumbers = [featureNum]
 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object')
 
 # todo save bbox and mask to study localization / representation
--- a/scripts/extract-appearance-images.py	Thu Feb 08 16:10:54 2024 -0500
+++ b/scripts/extract-appearance-images.py	Fri Feb 09 17:47:33 2024 -0500
@@ -23,7 +23,7 @@
 parser.add_argument('--compute-speed-distributions', dest = 'computeSpeedDistribution', help = 'computes the distribution of the road users of each type and fits parameters to each', action = 'store_true')
 
 args = parser.parse_args()
-params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
+params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
 classifierParams = storage.ClassifierParameters(params.classifierFilename)
 
 classificationAnnotations = read_csv(args.classificationAnnotationFilename, index_col=0, delimiter = args.classificationAnnotationFilenameDelimiter, names = ["object_num", "road_user_type"])
--- a/trafficintelligence/storage.py	Thu Feb 08 16:10:54 2024 -0500
+++ b/trafficintelligence/storage.py	Fri Feb 09 17:47:33 2024 -0500
@@ -1652,6 +1652,7 @@
         self.minFeatureEigThreshold = config.getfloat(self.sectionHeader, 'min-feature-eig-threshold')
         self.minFeatureTime = config.getint(self.sectionHeader, 'min-feature-time')
         self.minFeatureDisplacement = config.getfloat(self.sectionHeader, 'min-feature-displacement')
+        self.smoothingHalfWidth = config.getfloat(self.sectionHeader, 'smoothing-halfwidth')
         #self.updateTimer = config.getint(self.sectionHeader, 'tracker-reload-time')
         
 
@@ -1672,8 +1673,10 @@
         videoFilename = params.videoFilename
         databaseFilename = params.databaseFilename
         if params.homography is not None:
+            homography = params.homography
             invHomography = linalg.inv(params.homography)
         else:
+            homography = None
             invHomography = None
         intrinsicCameraMatrix = params.intrinsicCameraMatrix
         distortionCoefficients = array(params.distortionCoefficients)
@@ -1682,6 +1685,7 @@
         firstFrameNum = params.firstFrameNum
     else:
         params = None
+        homography = None
         invHomography = None
         undistort = False
         intrinsicCameraMatrix = None
@@ -1700,7 +1704,7 @@
     else:
         databaseFilename = params.databaseFilename
 
-    return params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum
+    return params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum
     
 # deprecated
 class SceneParameters(object):