repo/traffic-intelligence: scripts/dltrack.py comparison

comparison scripts/dltrack.py @ 1246:2397de73770d

dltrack saves after projecting coordinates

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Fri, 09 Feb 2024 17:47:33 -0500
parents	371c718e57d7
children	439207b6c146

comparison

equal deleted inserted replaced

-:371c718e57d7
+:2397de73770d
 #! /usr/bin/env python3
 # from https://docs.ultralytics.com/modes/track/
 import sys, argparse
+from math import inf
 from copy import copy
 from collections import Counter
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 from ultralytics import YOLO
 from torchvision.ops import box_iou
 import cv2
 from trafficintelligence import cvutils, moving, storage, utils
-parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trakcers.')
+parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trackers.',
+epilog= '''The models can be found in the Ultralytics model zoo,
+eg YOLOv8 (https://docs.ultralytics.com/models/yolov8/).
+The tracking models can be found also online
+(https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers).
+The choice is to project the middle of the bottom line for persons,
+and the bounding box center otherwise.''')
 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)')
 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)')
 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True)
 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True)
-parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix', default = 'homography.txt')
+parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix')
-parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file')
+#parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file')
 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true')
 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file')
 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float)
 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true')
 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true')
 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0)
 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf'))
-parser.add_argument('--conf', dest = 'confindence', help = 'object confidence threshold for detection', type = float, default = 0.25)
+parser.add_argument('--conf', dest = 'confidence', help = 'object confidence threshold for detection', type = float, default = 0.25)
 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2)
 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15)
 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3)
 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05)
 args = parser.parse_args()
-params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
+params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
+if args.homographyFilename is not None:
+homography = np.loadtxt(args.homographyFilename)
 if args.intrinsicCameraMatrixFilename is not None:
 intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename)
 if args.distortionCoefficients is not None:
 distortionCoefficients = args.distortionCoefficients
 if args.firstFrameNum is not None:
 firstFrameNum = args.firstFrameNum
 if args.lastFrameNum is not None:
 lastFrameNum = args.lastFrameNum
+elif args.configFilename is not None:
-# TODO add option to refine position with mask for vehicles
+lastFrameNum = params.lastFrameNum
+else:
+lastFrameNum = inf
+# TODO use mask
+# TODO add option to refine position with mask for vehicles, to save different positions
 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html
 # use 2 x bytetrack track buffer to remove objects from existing ones
 # Load a model
 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt
 # seg could be used on cropped image... if can be loaded and kept in memory
-# model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get'
+# model = YOLOX('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get'
 # Track with the model
 if args.display:
 windowName = 'frame'
 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
-capture = cv2.VideoCapture(args.videoFilename)
+capture = cv2.VideoCapture(videoFilename)
 objects = {}
 featureNum = 1
-frameNum = args.firstFrameNum
+frameNum = firstFrameNum
 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum)
-lastFrameNum = args.lastFrameNum
 success, frame = capture.read()
 if not success:
 print('Input {} could not be read. Exiting'.format(args.videoFilename))
 import sys; sys.exit()
-results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), persist=True, verbose=False)
+results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), conf = args.confidence, persist=True, verbose=False)
 while capture.isOpened() and success and frameNum <= lastFrameNum:
 result = results[0]
 if frameNum %10 == 0:
 print(frameNum, len(result.boxes), 'objects')
 for box in result.boxes:
-#print(box.cls, box.id, box.xyxy)
 if box.id is not None: # None are objects with low confidence
 num = int(box.id.item())
-#xyxy = box.xyxy[0].tolist()
 if num in objects:
 objects[num].timeInterval.last = frameNum
 objects[num].features[0].timeInterval.last = frameNum
 objects[num].features[1].timeInterval.last = frameNum
 objects[num].bboxes[frameNum] = copy(box.xyxy)
 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())])
-objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item())
+objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) # min
-objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item())
+objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) # max
 else:
 inter = moving.TimeInterval(frameNum, frameNum)
 objects[num] = moving.MovingObject(num, inter)
 objects[num].bboxes = {frameNum: copy(box.xyxy)}
 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]]
 # classification
 for num, obj in objects.items():
 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed?
-# add quality control: avoid U-turns
+# TODO add quality control: avoid U-turns
 # merge bikes and people
 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])]
 pedestrians = [num for num, obj in objects.items() if obj.getUserType() == moving.userType2Num['pedestrian']]
 ped = objects[pedestrians[pedInd]]
 mergeObjects(tw, ped)
 del objects[pedestrians[pedInd]]
 #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples)
-# interpolate and generate velocity (?) for the features (bboxes) before saving
+# interpolate and save image coordinates
 for num, obj in objects.items():
-#obj.features[1].timeInterval = copy(obj.getTimeInterval())
 for f in obj.getFeatures():
 if f.length() != len(f.tmpPositions): # interpolate
 f.positions = moving.Trajectory.fromPointDict(f.tmpPositions)
-#obj.features[1].positions = moving.Trajectory.fromPointDict(obj.features[1].tmpPositions)
 else:
 f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values()))
-#obj.features[1].positions = moving.Trajectory.fromPointList(list(obj.features[1].tmpPositions.values()))
+if not args.notSavingImageCoordinates:
+storage.saveTrajectoriesToSqlite(utils.removeExtension(args.databaseFilename)+'-bb.sqlite', list(objects.values()), 'object')
+# project, smooth and save
+for num, obj in objects.items():
+features = obj.getFeatures()
+if moving.userTypeNames[obj.getUserType()] == 'pedestrian':
+assert len(features) == 2
+t1 = features[0].getPositions()
+t2 = features[1].getPositions()
+t = [[(p1.x+p2.x)/2., max(p1.y, p2.y)] for p1, p2 in zip(t1, t2)]
+else:
+t = []
+for instant in obj.getTimeInterval():
+points = []
+for f in features:
+if f.existsAtInstant(instant):
+points.append(f.getPositionAtInstant(instant))
+t.append(moving.Point.agg(points, np.mean).aslist())
+#t = sum([f.getPositions().asArray() for f in features])/len(features)
+#t = (moving.Trajectory.add(t1, t2)*0.5).asArray()
+projected = cvutils.imageToWorldProject(np.array(t).T, intrinsicCameraMatrix, distortionCoefficients, homography)
+featureNum = features[0].getNum()
+obj.features=[moving.MovingObject(featureNum, obj.getTimeInterval(), moving.Trajectory(projected.tolist()))]
+obj.featureNumbers = [featureNum]
 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object')
 # todo save bbox and mask to study localization / representation
 # apply quality checks deviation and acceleration bounds?

Mercurial Hosting > traffic-intelligence

comparison scripts/dltrack.py @ 1246:2397de73770d