Mercurial Hosting > traffic-intelligence
comparison scripts/dltrack.py @ 1246:2397de73770d
dltrack saves after projecting coordinates
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 09 Feb 2024 17:47:33 -0500 |
parents | 371c718e57d7 |
children | 439207b6c146 |
comparison
equal
deleted
inserted
replaced
1245:371c718e57d7 | 1246:2397de73770d |
---|---|
1 #! /usr/bin/env python3 | 1 #! /usr/bin/env python3 |
2 # from https://docs.ultralytics.com/modes/track/ | 2 # from https://docs.ultralytics.com/modes/track/ |
3 import sys, argparse | 3 import sys, argparse |
4 from math import inf | |
4 from copy import copy | 5 from copy import copy |
5 from collections import Counter | 6 from collections import Counter |
6 import numpy as np | 7 import numpy as np |
7 from scipy.optimize import linear_sum_assignment | 8 from scipy.optimize import linear_sum_assignment |
8 from ultralytics import YOLO | 9 from ultralytics import YOLO |
10 from torchvision.ops import box_iou | 11 from torchvision.ops import box_iou |
11 import cv2 | 12 import cv2 |
12 | 13 |
13 from trafficintelligence import cvutils, moving, storage, utils | 14 from trafficintelligence import cvutils, moving, storage, utils |
14 | 15 |
15 parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trakcers.') | 16 parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trackers.', |
17 epilog= '''The models can be found in the Ultralytics model zoo, | |
18 eg YOLOv8 (https://docs.ultralytics.com/models/yolov8/). | |
19 The tracking models can be found also online | |
20 (https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). | |
21 The choice is to project the middle of the bottom line for persons, | |
22 and the bounding box center otherwise.''') | |
16 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') | 23 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') |
17 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)') | 24 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)') |
18 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)') | 25 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)') |
19 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True) | 26 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True) |
20 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True) | 27 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True) |
21 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix', default = 'homography.txt') | 28 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix') |
22 parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file') | 29 #parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file') |
23 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true') | 30 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true') |
24 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file') | 31 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file') |
25 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float) | 32 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float) |
26 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true') | 33 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true') |
27 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true') | 34 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true') |
28 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0) | 35 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0) |
29 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf')) | 36 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf')) |
30 parser.add_argument('--conf', dest = 'confindence', help = 'object confidence threshold for detection', type = float, default = 0.25) | 37 parser.add_argument('--conf', dest = 'confidence', help = 'object confidence threshold for detection', type = float, default = 0.25) |
31 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2) | 38 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2) |
32 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15) | 39 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15) |
33 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3) | 40 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3) |
34 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05) | 41 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05) |
35 | 42 |
36 args = parser.parse_args() | 43 args = parser.parse_args() |
37 params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) | 44 params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) |
38 | 45 |
46 if args.homographyFilename is not None: | |
47 homography = np.loadtxt(args.homographyFilename) | |
39 if args.intrinsicCameraMatrixFilename is not None: | 48 if args.intrinsicCameraMatrixFilename is not None: |
40 intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename) | 49 intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename) |
41 if args.distortionCoefficients is not None: | 50 if args.distortionCoefficients is not None: |
42 distortionCoefficients = args.distortionCoefficients | 51 distortionCoefficients = args.distortionCoefficients |
43 if args.firstFrameNum is not None: | 52 if args.firstFrameNum is not None: |
44 firstFrameNum = args.firstFrameNum | 53 firstFrameNum = args.firstFrameNum |
45 if args.lastFrameNum is not None: | 54 if args.lastFrameNum is not None: |
46 lastFrameNum = args.lastFrameNum | 55 lastFrameNum = args.lastFrameNum |
47 | 56 elif args.configFilename is not None: |
48 # TODO add option to refine position with mask for vehicles | 57 lastFrameNum = params.lastFrameNum |
58 else: | |
59 lastFrameNum = inf | |
60 | |
61 # TODO use mask | |
62 # TODO add option to refine position with mask for vehicles, to save different positions | |
49 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html | 63 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html |
50 | 64 |
51 # use 2 x bytetrack track buffer to remove objects from existing ones | 65 # use 2 x bytetrack track buffer to remove objects from existing ones |
52 | 66 |
53 # Load a model | 67 # Load a model |
54 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt | 68 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt |
55 # seg could be used on cropped image... if can be loaded and kept in memory | 69 # seg could be used on cropped image... if can be loaded and kept in memory |
56 # model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get' | 70 # model = YOLOX('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get' |
57 | 71 |
58 # Track with the model | 72 # Track with the model |
59 if args.display: | 73 if args.display: |
60 windowName = 'frame' | 74 windowName = 'frame' |
61 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) | 75 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) |
62 | 76 |
63 capture = cv2.VideoCapture(args.videoFilename) | 77 capture = cv2.VideoCapture(videoFilename) |
64 objects = {} | 78 objects = {} |
65 featureNum = 1 | 79 featureNum = 1 |
66 frameNum = args.firstFrameNum | 80 frameNum = firstFrameNum |
67 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum) | 81 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum) |
68 lastFrameNum = args.lastFrameNum | |
69 | 82 |
70 success, frame = capture.read() | 83 success, frame = capture.read() |
71 if not success: | 84 if not success: |
72 print('Input {} could not be read. Exiting'.format(args.videoFilename)) | 85 print('Input {} could not be read. Exiting'.format(args.videoFilename)) |
73 import sys; sys.exit() | 86 import sys; sys.exit() |
74 | 87 |
75 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), persist=True, verbose=False) | 88 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), conf = args.confidence, persist=True, verbose=False) |
76 while capture.isOpened() and success and frameNum <= lastFrameNum: | 89 while capture.isOpened() and success and frameNum <= lastFrameNum: |
77 result = results[0] | 90 result = results[0] |
78 if frameNum %10 == 0: | 91 if frameNum %10 == 0: |
79 print(frameNum, len(result.boxes), 'objects') | 92 print(frameNum, len(result.boxes), 'objects') |
80 for box in result.boxes: | 93 for box in result.boxes: |
81 #print(box.cls, box.id, box.xyxy) | |
82 if box.id is not None: # None are objects with low confidence | 94 if box.id is not None: # None are objects with low confidence |
83 num = int(box.id.item()) | 95 num = int(box.id.item()) |
84 #xyxy = box.xyxy[0].tolist() | |
85 if num in objects: | 96 if num in objects: |
86 objects[num].timeInterval.last = frameNum | 97 objects[num].timeInterval.last = frameNum |
87 objects[num].features[0].timeInterval.last = frameNum | 98 objects[num].features[0].timeInterval.last = frameNum |
88 objects[num].features[1].timeInterval.last = frameNum | 99 objects[num].features[1].timeInterval.last = frameNum |
89 objects[num].bboxes[frameNum] = copy(box.xyxy) | 100 objects[num].bboxes[frameNum] = copy(box.xyxy) |
90 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())]) | 101 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())]) |
91 objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) | 102 objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) # min |
92 objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) | 103 objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) # max |
93 else: | 104 else: |
94 inter = moving.TimeInterval(frameNum, frameNum) | 105 inter = moving.TimeInterval(frameNum, frameNum) |
95 objects[num] = moving.MovingObject(num, inter) | 106 objects[num] = moving.MovingObject(num, inter) |
96 objects[num].bboxes = {frameNum: copy(box.xyxy)} | 107 objects[num].bboxes = {frameNum: copy(box.xyxy)} |
97 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]] | 108 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]] |
111 | 122 |
112 # classification | 123 # classification |
113 for num, obj in objects.items(): | 124 for num, obj in objects.items(): |
114 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed? | 125 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed? |
115 | 126 |
116 # add quality control: avoid U-turns | 127 # TODO add quality control: avoid U-turns |
117 | 128 |
118 # merge bikes and people | 129 # merge bikes and people |
119 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])] | 130 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])] |
120 pedestrians = [num for num, obj in objects.items() if obj.getUserType() == moving.userType2Num['pedestrian']] | 131 pedestrians = [num for num, obj in objects.items() if obj.getUserType() == moving.userType2Num['pedestrian']] |
121 | 132 |
186 ped = objects[pedestrians[pedInd]] | 197 ped = objects[pedestrians[pedInd]] |
187 mergeObjects(tw, ped) | 198 mergeObjects(tw, ped) |
188 del objects[pedestrians[pedInd]] | 199 del objects[pedestrians[pedInd]] |
189 #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples) | 200 #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples) |
190 | 201 |
191 # interpolate and generate velocity (?) for the features (bboxes) before saving | 202 # interpolate and save image coordinates |
192 for num, obj in objects.items(): | 203 for num, obj in objects.items(): |
193 #obj.features[1].timeInterval = copy(obj.getTimeInterval()) | |
194 for f in obj.getFeatures(): | 204 for f in obj.getFeatures(): |
195 if f.length() != len(f.tmpPositions): # interpolate | 205 if f.length() != len(f.tmpPositions): # interpolate |
196 f.positions = moving.Trajectory.fromPointDict(f.tmpPositions) | 206 f.positions = moving.Trajectory.fromPointDict(f.tmpPositions) |
197 #obj.features[1].positions = moving.Trajectory.fromPointDict(obj.features[1].tmpPositions) | |
198 else: | 207 else: |
199 f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values())) | 208 f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values())) |
200 #obj.features[1].positions = moving.Trajectory.fromPointList(list(obj.features[1].tmpPositions.values())) | 209 if not args.notSavingImageCoordinates: |
201 | 210 storage.saveTrajectoriesToSqlite(utils.removeExtension(args.databaseFilename)+'-bb.sqlite', list(objects.values()), 'object') |
211 # project, smooth and save | |
212 for num, obj in objects.items(): | |
213 features = obj.getFeatures() | |
214 if moving.userTypeNames[obj.getUserType()] == 'pedestrian': | |
215 assert len(features) == 2 | |
216 t1 = features[0].getPositions() | |
217 t2 = features[1].getPositions() | |
218 t = [[(p1.x+p2.x)/2., max(p1.y, p2.y)] for p1, p2 in zip(t1, t2)] | |
219 else: | |
220 t = [] | |
221 for instant in obj.getTimeInterval(): | |
222 points = [] | |
223 for f in features: | |
224 if f.existsAtInstant(instant): | |
225 points.append(f.getPositionAtInstant(instant)) | |
226 t.append(moving.Point.agg(points, np.mean).aslist()) | |
227 #t = sum([f.getPositions().asArray() for f in features])/len(features) | |
228 #t = (moving.Trajectory.add(t1, t2)*0.5).asArray() | |
229 projected = cvutils.imageToWorldProject(np.array(t).T, intrinsicCameraMatrix, distortionCoefficients, homography) | |
230 featureNum = features[0].getNum() | |
231 obj.features=[moving.MovingObject(featureNum, obj.getTimeInterval(), moving.Trajectory(projected.tolist()))] | |
232 obj.featureNumbers = [featureNum] | |
202 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object') | 233 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object') |
203 | 234 |
204 # todo save bbox and mask to study localization / representation | 235 # todo save bbox and mask to study localization / representation |
205 # apply quality checks deviation and acceleration bounds? | 236 # apply quality checks deviation and acceleration bounds? |
206 | 237 |