comparison scripts/dltrack.py @ 1246:2397de73770d

dltrack saves after projecting coordinates
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Fri, 09 Feb 2024 17:47:33 -0500
parents 371c718e57d7
children 439207b6c146
comparison
equal deleted inserted replaced
1245:371c718e57d7 1246:2397de73770d
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 # from https://docs.ultralytics.com/modes/track/ 2 # from https://docs.ultralytics.com/modes/track/
3 import sys, argparse 3 import sys, argparse
4 from math import inf
4 from copy import copy 5 from copy import copy
5 from collections import Counter 6 from collections import Counter
6 import numpy as np 7 import numpy as np
7 from scipy.optimize import linear_sum_assignment 8 from scipy.optimize import linear_sum_assignment
8 from ultralytics import YOLO 9 from ultralytics import YOLO
10 from torchvision.ops import box_iou 11 from torchvision.ops import box_iou
11 import cv2 12 import cv2
12 13
13 from trafficintelligence import cvutils, moving, storage, utils 14 from trafficintelligence import cvutils, moving, storage, utils
14 15
15 parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trakcers.') 16 parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trackers.',
17 epilog= '''The models can be found in the Ultralytics model zoo,
18 eg YOLOv8 (https://docs.ultralytics.com/models/yolov8/).
19 The tracking models can be found also online
20 (https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers).
21 The choice is to project the middle of the bottom line for persons,
22 and the bounding box center otherwise.''')
16 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') 23 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
17 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)') 24 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)')
18 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)') 25 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)')
19 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True) 26 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True)
20 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True) 27 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True)
21 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix', default = 'homography.txt') 28 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix')
22 parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file') 29 #parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file')
23 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true') 30 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true')
24 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file') 31 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file')
25 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float) 32 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float)
26 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true') 33 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true')
27 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true') 34 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true')
28 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0) 35 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0)
29 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf')) 36 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf'))
30 parser.add_argument('--conf', dest = 'confindence', help = 'object confidence threshold for detection', type = float, default = 0.25) 37 parser.add_argument('--conf', dest = 'confidence', help = 'object confidence threshold for detection', type = float, default = 0.25)
31 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2) 38 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2)
32 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15) 39 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15)
33 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3) 40 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3)
34 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05) 41 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05)
35 42
36 args = parser.parse_args() 43 args = parser.parse_args()
37 params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) 44 params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args)
38 45
46 if args.homographyFilename is not None:
47 homography = np.loadtxt(args.homographyFilename)
39 if args.intrinsicCameraMatrixFilename is not None: 48 if args.intrinsicCameraMatrixFilename is not None:
40 intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename) 49 intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename)
41 if args.distortionCoefficients is not None: 50 if args.distortionCoefficients is not None:
42 distortionCoefficients = args.distortionCoefficients 51 distortionCoefficients = args.distortionCoefficients
43 if args.firstFrameNum is not None: 52 if args.firstFrameNum is not None:
44 firstFrameNum = args.firstFrameNum 53 firstFrameNum = args.firstFrameNum
45 if args.lastFrameNum is not None: 54 if args.lastFrameNum is not None:
46 lastFrameNum = args.lastFrameNum 55 lastFrameNum = args.lastFrameNum
47 56 elif args.configFilename is not None:
48 # TODO add option to refine position with mask for vehicles 57 lastFrameNum = params.lastFrameNum
58 else:
59 lastFrameNum = inf
60
61 # TODO use mask
62 # TODO add option to refine position with mask for vehicles, to save different positions
49 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html 63 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html
50 64
51 # use 2 x bytetrack track buffer to remove objects from existing ones 65 # use 2 x bytetrack track buffer to remove objects from existing ones
52 66
53 # Load a model 67 # Load a model
54 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt 68 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt
55 # seg could be used on cropped image... if can be loaded and kept in memory 69 # seg could be used on cropped image... if can be loaded and kept in memory
56 # model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get' 70 # model = YOLOX('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get'
57 71
58 # Track with the model 72 # Track with the model
59 if args.display: 73 if args.display:
60 windowName = 'frame' 74 windowName = 'frame'
61 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) 75 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
62 76
63 capture = cv2.VideoCapture(args.videoFilename) 77 capture = cv2.VideoCapture(videoFilename)
64 objects = {} 78 objects = {}
65 featureNum = 1 79 featureNum = 1
66 frameNum = args.firstFrameNum 80 frameNum = firstFrameNum
67 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum) 81 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum)
68 lastFrameNum = args.lastFrameNum
69 82
70 success, frame = capture.read() 83 success, frame = capture.read()
71 if not success: 84 if not success:
72 print('Input {} could not be read. Exiting'.format(args.videoFilename)) 85 print('Input {} could not be read. Exiting'.format(args.videoFilename))
73 import sys; sys.exit() 86 import sys; sys.exit()
74 87
75 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), persist=True, verbose=False) 88 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), conf = args.confidence, persist=True, verbose=False)
76 while capture.isOpened() and success and frameNum <= lastFrameNum: 89 while capture.isOpened() and success and frameNum <= lastFrameNum:
77 result = results[0] 90 result = results[0]
78 if frameNum %10 == 0: 91 if frameNum %10 == 0:
79 print(frameNum, len(result.boxes), 'objects') 92 print(frameNum, len(result.boxes), 'objects')
80 for box in result.boxes: 93 for box in result.boxes:
81 #print(box.cls, box.id, box.xyxy)
82 if box.id is not None: # None are objects with low confidence 94 if box.id is not None: # None are objects with low confidence
83 num = int(box.id.item()) 95 num = int(box.id.item())
84 #xyxy = box.xyxy[0].tolist()
85 if num in objects: 96 if num in objects:
86 objects[num].timeInterval.last = frameNum 97 objects[num].timeInterval.last = frameNum
87 objects[num].features[0].timeInterval.last = frameNum 98 objects[num].features[0].timeInterval.last = frameNum
88 objects[num].features[1].timeInterval.last = frameNum 99 objects[num].features[1].timeInterval.last = frameNum
89 objects[num].bboxes[frameNum] = copy(box.xyxy) 100 objects[num].bboxes[frameNum] = copy(box.xyxy)
90 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())]) 101 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())])
91 objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) 102 objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) # min
92 objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) 103 objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) # max
93 else: 104 else:
94 inter = moving.TimeInterval(frameNum, frameNum) 105 inter = moving.TimeInterval(frameNum, frameNum)
95 objects[num] = moving.MovingObject(num, inter) 106 objects[num] = moving.MovingObject(num, inter)
96 objects[num].bboxes = {frameNum: copy(box.xyxy)} 107 objects[num].bboxes = {frameNum: copy(box.xyxy)}
97 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]] 108 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]]
111 122
112 # classification 123 # classification
113 for num, obj in objects.items(): 124 for num, obj in objects.items():
114 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed? 125 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed?
115 126
116 # add quality control: avoid U-turns 127 # TODO add quality control: avoid U-turns
117 128
118 # merge bikes and people 129 # merge bikes and people
119 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])] 130 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])]
120 pedestrians = [num for num, obj in objects.items() if obj.getUserType() == moving.userType2Num['pedestrian']] 131 pedestrians = [num for num, obj in objects.items() if obj.getUserType() == moving.userType2Num['pedestrian']]
121 132
186 ped = objects[pedestrians[pedInd]] 197 ped = objects[pedestrians[pedInd]]
187 mergeObjects(tw, ped) 198 mergeObjects(tw, ped)
188 del objects[pedestrians[pedInd]] 199 del objects[pedestrians[pedInd]]
189 #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples) 200 #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples)
190 201
191 # interpolate and generate velocity (?) for the features (bboxes) before saving 202 # interpolate and save image coordinates
192 for num, obj in objects.items(): 203 for num, obj in objects.items():
193 #obj.features[1].timeInterval = copy(obj.getTimeInterval())
194 for f in obj.getFeatures(): 204 for f in obj.getFeatures():
195 if f.length() != len(f.tmpPositions): # interpolate 205 if f.length() != len(f.tmpPositions): # interpolate
196 f.positions = moving.Trajectory.fromPointDict(f.tmpPositions) 206 f.positions = moving.Trajectory.fromPointDict(f.tmpPositions)
197 #obj.features[1].positions = moving.Trajectory.fromPointDict(obj.features[1].tmpPositions)
198 else: 207 else:
199 f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values())) 208 f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values()))
200 #obj.features[1].positions = moving.Trajectory.fromPointList(list(obj.features[1].tmpPositions.values())) 209 if not args.notSavingImageCoordinates:
201 210 storage.saveTrajectoriesToSqlite(utils.removeExtension(args.databaseFilename)+'-bb.sqlite', list(objects.values()), 'object')
211 # project, smooth and save
212 for num, obj in objects.items():
213 features = obj.getFeatures()
214 if moving.userTypeNames[obj.getUserType()] == 'pedestrian':
215 assert len(features) == 2
216 t1 = features[0].getPositions()
217 t2 = features[1].getPositions()
218 t = [[(p1.x+p2.x)/2., max(p1.y, p2.y)] for p1, p2 in zip(t1, t2)]
219 else:
220 t = []
221 for instant in obj.getTimeInterval():
222 points = []
223 for f in features:
224 if f.existsAtInstant(instant):
225 points.append(f.getPositionAtInstant(instant))
226 t.append(moving.Point.agg(points, np.mean).aslist())
227 #t = sum([f.getPositions().asArray() for f in features])/len(features)
228 #t = (moving.Trajectory.add(t1, t2)*0.5).asArray()
229 projected = cvutils.imageToWorldProject(np.array(t).T, intrinsicCameraMatrix, distortionCoefficients, homography)
230 featureNum = features[0].getNum()
231 obj.features=[moving.MovingObject(featureNum, obj.getTimeInterval(), moving.Trajectory(projected.tolist()))]
232 obj.featureNumbers = [featureNum]
202 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object') 233 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object')
203 234
204 # todo save bbox and mask to study localization / representation 235 # todo save bbox and mask to study localization / representation
205 # apply quality checks deviation and acceleration bounds? 236 # apply quality checks deviation and acceleration bounds?
206 237