comparison scripts/dltrack.py @ 1238:b684135d817f

version 1 of dltrack without coordinate projection
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 03 Oct 2023 16:51:39 -0400
parents 31a441efca6c
children bb14f919d1cb
comparison
equal deleted inserted replaced
1237:31a441efca6c 1238:b684135d817f
10 from torchvision.ops import box_iou 10 from torchvision.ops import box_iou
11 import cv2 11 import cv2
12 12
13 from trafficintelligence import cvutils, moving, storage, utils 13 from trafficintelligence import cvutils, moving, storage, utils
14 14
15 parser = argparse.ArgumentParser(description='The program tracks objects following the ultralytics yolo executable.')#, epilog = 'Either the configuration filename or the other parameters (at least video and database filenames) need to be provided.') 15 parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trakcers.')#, epilog = 'Either the configuration filename or the other parameters (at least video and database filenames) need to be provided.')
16 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file', required = True) 16 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file', required = True)
17 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file', required = True) 17 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file', required = True)
18 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True) 18 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True)
19 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True) 19 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True)
20 parser.add_argument('--display', dest = 'display', help = 'show the results (careful with long videos, risk of running out of memory)', action = 'store_true') 20 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix', default = 'homography.txt')
21 parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file')
22 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true')
23 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file')
24 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float)
25 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true')
21 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0) 26 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0)
22 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf')) 27 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf'))
23 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2) 28 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2)
24 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15) 29 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15)
25 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3) 30 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3)
26 # mask!! 31 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05)
27 args = parser.parse_args() 32 args = parser.parse_args()
28 33
29 # required functionality?
30 # # filename of the video to process (can be images, eg image%04d.png)
31 # video-filename = laurier.avi
32 # # filename of the database where results are saved
33 # database-filename = laurier.sqlite
34 # # filename of the homography matrix
35 # homography-filename = laurier-homography.txt
36 # # filename of the camera intrinsic matrix
37 # intrinsic-camera-filename = intrinsic-camera.txt
38 # # -0.11759321 0.0148536 0.00030756 -0.00020578 -0.00091816
39 # distortion-coefficients = -0.11759321
40 # distortion-coefficients = 0.0148536
41 # distortion-coefficients = 0.00030756
42 # distortion-coefficients = -0.00020578
43 # distortion-coefficients = -0.00091816
44 # # undistorted image multiplication
45 # undistorted-size-multiplication = 1.31
46 # # Interpolation method for remapping image when correcting for distortion: 0 for INTER_NEAREST - a nearest-neighbor interpolation; 1 for INTER_LINEAR - a bilinear interpolation (used by default); 2 for INTER_CUBIC - a bicubic interpolation over 4x4 pixel neighborhood; 3 for INTER_LANCZOS4
47 # interpolation-method = 1
48 # # filename of the mask image (where features are detected)
49 # mask-filename = none
50 # # undistort the video for feature tracking
51 # undistort = false
52 # # load features from database
53 # load-features = false
54 # # display trajectories on the video
55 # display = false
56 # # original video frame rate (number of frames/s)
57 # video-fps = 29.97
58 # # number of digits of precision for all measurements derived from video
59 # # measurement-precision = 3
60 # # first frame to process
61 # frame1 = 0
62 # # number of frame to process: 0 means processing all frames
63 # nframes = 0
64
65 # TODO add option to refine position with mask for vehicles 34 # TODO add option to refine position with mask for vehicles
66 35
67 # use 2 x bytetrack track buffer to remove objects from existing ones 36 # use 2 x bytetrack track buffer to remove objects from existing ones
68
69
70 # check if one can go to specific frame https://docs.ultralytics.com/modes/track/#persisting-tracks-loop
71 37
72 # Load a model 38 # Load a model
73 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt 39 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt
74 # seg could be used on cropped image... if can be loaded and kept in memory 40 # seg could be used on cropped image... if can be loaded and kept in memory
75 # model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get' 41 # model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get'
78 if args.display: 44 if args.display:
79 windowName = 'frame' 45 windowName = 'frame'
80 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) 46 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
81 47
82 capture = cv2.VideoCapture(args.videoFilename) 48 capture = cv2.VideoCapture(args.videoFilename)
83 #results = model.track(source=args.videoFilename, tracker="/home/nicolas/Research/Data/classification-models/bytetrack.yaml", classes=list(moving.cocoTypeNames.keys()), stream=True) 49 objects = {}
84 objects = [] 50 featureNum = 1
85 currentObjects = {}
86 featureNum = 0
87
88 frameNum = args.firstFrameNum 51 frameNum = args.firstFrameNum
89 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum) 52 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum)
90 lastFrameNum = args.lastFrameNum 53 lastFrameNum = args.lastFrameNum
91 54
92 success, frame = capture.read() 55 success, frame = capture.read()
93 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), persist=True, verbose=False) 56 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), persist=True, verbose=False)
94 # create object with user type and list of 3 features (bottom ones and middle) + projection 57 # create object with user type and list of 3 features (bottom ones and middle) + projection
95 while capture.isOpened() and success and frameNum <= lastFrameNum: 58 while capture.isOpened() and success and frameNum <= lastFrameNum:
96 #for frameNum, result in enumerate(results): 59 #for frameNum, result in enumerate(results):
97 result = results[0] 60 result = results[0]
98 print(frameNum, len(result.boxes), 'objects') 61 if frameNum %10 == 0:
62 print(frameNum, len(result.boxes), 'objects')
99 for box in result.boxes: 63 for box in result.boxes:
100 #print(box.cls, box.id, box.xyxy) 64 #print(box.cls, box.id, box.xyxy)
101 if box.id is not None: # None are objects with low confidence 65 if box.id is not None: # None are objects with low confidence
102 num = int(box.id.item()) 66 num = int(box.id.item())
103 #xyxy = box.xyxy[0].tolist() 67 #xyxy = box.xyxy[0].tolist()
104 if num in currentObjects: 68 if num in objects:
105 currentObjects[num].timeInterval.last = frameNum 69 objects[num].timeInterval.last = frameNum
106 currentObjects[num].bboxes[frameNum] = copy(box.xyxy) 70 objects[num].features[0].timeInterval.last = frameNum
107 currentObjects[num].userTypes.append(moving.coco2Types[int(box.cls.item())]) 71 objects[num].features[1].timeInterval.last = frameNum
108 currentObjects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) 72 objects[num].bboxes[frameNum] = copy(box.xyxy)
109 currentObjects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) 73 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())])
74 objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item())
75 objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item())
110 else: 76 else:
111 inter = moving.TimeInterval(frameNum,frameNum) 77 inter = moving.TimeInterval(frameNum, frameNum)
112 currentObjects[num] = moving.MovingObject(num, inter) 78 objects[num] = moving.MovingObject(num, inter)
113 currentObjects[num].bboxes = {frameNum: copy(box.xyxy)} 79 objects[num].bboxes = {frameNum: copy(box.xyxy)}
114 currentObjects[num].userTypes = [moving.coco2Types[int(box.cls.item())]] 80 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]]
115 currentObjects[num].features = [moving.MovingObject(featureNum), moving.MovingObject(featureNum+1)] 81 objects[num].features = [moving.MovingObject(featureNum, copy(inter)), moving.MovingObject(featureNum+1, copy(inter))]
116 currentObjects[num].featureNumbers = [featureNum, featureNum+1] 82 objects[num].featureNumbers = [featureNum, featureNum+1]
117 currentObjects[num].features[0].tmpPositions = {frameNum: moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item())} 83 objects[num].features[0].tmpPositions = {frameNum: moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item())}
118 currentObjects[num].features[1].tmpPositions = {frameNum: moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item())} 84 objects[num].features[1].tmpPositions = {frameNum: moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item())}
119 featureNum += 2 85 featureNum += 2
120 if args.display: 86 if args.display:
121 cvutils.cvImshow(windowName, result.plot()) # original image in orig_img 87 cvutils.cvImshow(windowName, result.plot()) # original image in orig_img
122 key = cv2.waitKey() 88 key = cv2.waitKey()
123 if cvutils.quitKey(key): 89 if cvutils.quitKey(key):
125 frameNum += 1 91 frameNum += 1
126 success, frame = capture.read() 92 success, frame = capture.read()
127 results = model.track(frame, persist=True) 93 results = model.track(frame, persist=True)
128 94
129 # classification 95 # classification
130 for num, obj in currentObjects.items(): 96 for num, obj in objects.items():
131 #obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed? 97 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed?
132 userTypeStats = Counter(obj.userTypes) 98
133 if (4 in userTypeStats or (3 in userTypeStats and 4 in userTypeStats and userTypeStats[3]<=userTypeStats[4])) and userTypeStats[3]+userTypeStats[4] > args.bikeProportion*userTypeStats.total(): # 3 is motorcycle and 4 is cyclist (verif if not turning all motorbike into cyclists) 99 # add quality control: avoid U-turns
134 obj.setUserType(4) 100
135 else:
136 obj.setUserType(userTypeStats.most_common()[0][0])
137
138 # merge bikes and people 101 # merge bikes and people
139 twowheels = [num for num, obj in currentObjects.items() if obj.getUserType() in (3,4)] 102 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (3,4)]
140 pedestrians = [num for num, obj in currentObjects.items() if obj.getUserType() == 2] 103 pedestrians = [num for num, obj in objects.items() if obj.getUserType() == 2]
141 104
105 def mergeObjects(obj1, obj2):
106 obj1.features = obj1.features+obj2.features
107 obj1.featureNumbers = obj1.featureNumbers+obj2.featureNumbers
108 obj1.timeInterval = moving.TimeInterval(min(obj1.getFirstInstant(), obj2.getFirstInstant()), max(obj1.getLastInstant(), obj2.getLastInstant()))
109
142 costs = [] 110 costs = []
143 for twInd in twowheels: 111 for twInd in twowheels:
144 tw = currentObjects[twInd] 112 tw = objects[twInd]
113 tw.nBBoxes = len(tw.bboxes)
145 twCost = [] 114 twCost = []
146 for pedInd in pedestrians: 115 for pedInd in pedestrians:
147 ped = currentObjects[pedInd] 116 ped = objects[pedInd]
148 nmatches = 0 117 nmatches = 0
149 for t in tw.bboxes: 118 for t in tw.bboxes:
150 if t in ped.bboxes: 119 if t in ped.bboxes:
151 #print(tw.num, ped.num, t, box_iou(tw.bboxes[t], ped.bboxes[t])) 120 #print(tw.num, ped.num, t, box_iou(tw.bboxes[t], ped.bboxes[t]))
152 if box_iou(tw.bboxes[t], ped.bboxes[t]).item() > args.cyclistIou: 121 if not tw.commonTimeInterval(ped).empty() and box_iou(tw.bboxes[t], ped.bboxes[t]).item() > args.cyclistIou:
153 nmatches += 1 122 nmatches += 1
154 twCost.append(nmatches/len(tw.bboxes)) 123 twCost.append(nmatches/tw.nBBoxes)
155 costs.append(twCost) 124 costs.append(twCost)
156 125
157 costs = -np.array(costs) 126 costs = -np.array(costs)
127
158 # before matching, scan for pedestrians with good non-overlapping temporal match with different bikes 128 # before matching, scan for pedestrians with good non-overlapping temporal match with different bikes
159 for pedInd in costs.shape[1]: 129 for pedInd in range(costs.shape[1]):
160 if sum(costs[:,pedInd] < -args.cyclistMatchingProportion) >1: 130 nMatchedBikes = (costs[:,pedInd] < -args.cyclistMatchingProportion).sum()
161 twIndices = np.nonzero(costs[:,pedInd] < -args.cyclistMatchingProportion) 131 if nMatchedBikes == 0: # peds that have no bike matching: see if they have been classified as bikes sometimes
162 # we have to compute temporal overlaps with everyone else, then remove the ones with the most overlap (sum over column) one by one until there is little left 132 userTypeStats = Counter(obj.userTypes)
163 temporalOverlaps = np.zeros((len(twIndices),len(twIndices))) 133 if (4 in userTypeStats or (3 in userTypeStats and 4 in userTypeStats and userTypeStats[3]<=userTypeStats[4])) and userTypeStats[3]+userTypeStats[4] > args.bikeProportion*userTypeStats.total(): # 3 is motorcycle and 4 is cyclist (verif if not turning all motorbike into cyclists)
164 134 obj.setUserType(4)
135 elif nMatchedBikes > 1: # try to merge bikes first
136 twIndices = np.nonzero(costs[:,pedInd] < -args.cyclistMatchingProportion)[0]
137 # we have to compute temporal overlaps of all 2 wheels among themselves, then remove the ones with the most overlap (sum over column) one by one until there is little left
138 nTwoWheels = len(twIndices)
139 twTemporalOverlaps = np.zeros((nTwoWheels,nTwoWheels))
140 for i in range(nTwoWheels):
141 for j in range(i):
142 twi = objects[twowheels[twIndices[i]]]
143 twj = objects[twowheels[twIndices[j]]]
144 twTemporalOverlaps[i,j] = len(set(twi.bboxes).intersection(set(twj.bboxes)))/max(len(twi.bboxes), len(twj.bboxes))
145 #twTemporalOverlaps[j,i] = twTemporalOverlaps[i,j]
146 tw2merge = list(range(nTwoWheels))
147 while len(tw2merge)>0 and (twTemporalOverlaps[np.ix_(tw2merge, tw2merge)] > args.maxTemporalOverlap).sum(0).max() >= 2:
148 i = (twTemporalOverlaps[np.ix_(tw2merge, tw2merge)] > args.maxTemporalOverlap).sum(0).argmax()
149 del tw2merge[i]
150 twIndices = [twIndices[i] for i in tw2merge]
151 tw1 = objects[twowheels[twIndices[0]]]
152 twCost = costs[twIndices[0],:]*tw1.nBBoxes
153 nBBoxes = tw1.nBBoxes
154 for twInd in twIndices[1:]:
155 mergeObjects(tw1, objects[twowheels[twInd]])
156 twCost = twCost + costs[twInd,:]*objects[twowheels[twInd]].nBBoxes
157 nBBoxes += objects[twowheels[twInd]].nBBoxes
158 twIndicesToKeep = list(range(costs.shape[0]))
159 for twInd in twIndices[1:]:
160 twIndicesToKeep.remove(twInd)
161 del objects[twowheels[twInd]]
162 twowheels = [twowheels[i] for i in twIndicesToKeep]
163 costs = costs[twIndicesToKeep,:]
165 164
166 twIndices, matchingPedIndices = linear_sum_assignment(costs) 165 twIndices, matchingPedIndices = linear_sum_assignment(costs)
167 for twInd, pedInd in zip(twIndices, matchingPedIndices): # caution indices in the cost matrix 166 for twInd, pedInd in zip(twIndices, matchingPedIndices): # caution indices in the cost matrix
168 if -costs[twInd, pedInd] >= args.cyclistMatchingProportion: 167 if -costs[twInd, pedInd] >= args.cyclistMatchingProportion:
169 tw = currentObjects[twowheels[twInd]] 168 tw = objects[twowheels[twInd]]
170 ped = currentObjects[pedestrians[pedInd]] 169 ped = objects[pedestrians[pedInd]]
171 timeInstants = set(tw.bboxes).union(set(ped.bboxes)) 170 mergeObjects(tw, ped)
172 for t in timeInstants: 171 del objects[pedestrians[pedInd]]
173 if t in tw.bboxes and t in ped.bboxes: 172 #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples)
174 tw.features[0].tmpPositions[t] = moving.Point(min(tw.features[0].tmpPositions[t].x, ped.features[0].tmpPositions[t].x), 173
175 min(tw.features[0].tmpPositions[t].y, ped.features[0].tmpPositions[t].y)) 174 # interpolate and generate velocity (?) for the features (bboxes) before saving
176 tw.features[1].tmpPositions[t] = moving.Point(max(tw.features[1].tmpPositions[t].x, ped.features[1].tmpPositions[t].x), 175 for num, obj in objects.items():
177 max(tw.features[1].tmpPositions[t].y, ped.features[1].tmpPositions[t].y)) 176 #obj.features[1].timeInterval = copy(obj.getTimeInterval())
178 elif t in ped.bboxes: 177 for f in obj.getFeatures():
179 tw.features[0].tmpPositions[t] = ped.features[0].tmpPositions[t] 178 if f.length() != len(f.tmpPositions): # interpolate
180 tw.features[1].tmpPositions[t] = ped.features[1].tmpPositions[t] 179 f.positions = moving.Trajectory.fromPointDict(f.tmpPositions)
181 tw.timeInterval = moving.TimeInterval(min(tw.getFirstInstant(), ped.getFirstInstant()), max(tw.getLastInstant(), ped.getLastInstant())) 180 #obj.features[1].positions = moving.Trajectory.fromPointDict(obj.features[1].tmpPositions)
182 del currentObjects[pedestrians[pedInd]] 181 else:
183 #Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples) 182 f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values()))
184 183 #obj.features[1].positions = moving.Trajectory.fromPointList(list(obj.features[1].tmpPositions.values()))
185 # interpolate and generate velocity (?) before saving 184
186 for num, obj in currentObjects.items(): 185 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object')
187 obj.features[0].timeInterval = copy(obj.getTimeInterval())
188 obj.features[1].timeInterval = copy(obj.getTimeInterval())
189 if obj.length() != len(obj.features[0].tmpPositions): # interpolate
190 obj.features[0].positions = moving.Trajectory.fromPointDict(obj.features[0].tmpPositions)
191 obj.features[1].positions = moving.Trajectory.fromPointDict(obj.features[1].tmpPositions)
192 else:
193 obj.features[0].positions = moving.Trajectory.fromPointList(list(obj.features[0].tmpPositions.values()))
194 obj.features[1].positions = moving.Trajectory.fromPointList(list(obj.features[1].tmpPositions.values()))
195
196 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(currentObjects.values()), 'object')
197 186
198 # todo save bbox and mask to study localization / representation 187 # todo save bbox and mask to study localization / representation
199 # apply quality checks deviation and acceleration bounds? 188 # apply quality checks deviation and acceleration bounds?
189
190 # def mergeBBoxes(tw, ped):
191 # 'merges ped into tw (2nd obj into first obj)'
192 # timeInstants = set(tw.bboxes).union(set(ped.bboxes))
193 # for t in timeInstants:
194 # if t in tw.bboxes and t in ped.bboxes:
195 # tw.features[0].tmpPositions[t] = moving.Point(min(tw.features[0].tmpPositions[t].x, ped.features[0].tmpPositions[t].x),
196 # min(tw.features[0].tmpPositions[t].y, ped.features[0].tmpPositions[t].y))
197 # tw.features[1].tmpPositions[t] = moving.Point(max(tw.features[1].tmpPositions[t].x, ped.features[1].tmpPositions[t].x),
198 # max(tw.features[1].tmpPositions[t].y, ped.features[1].tmpPositions[t].y))
199 # elif t in ped.bboxes:
200 # tw.features[0].tmpPositions[t] = ped.features[0].tmpPositions[t]
201 # tw.features[1].tmpPositions[t] = ped.features[1].tmpPositions[t]
202 # tw.timeInterval = moving.TimeInterval(min(tw.getFirstInstant(), ped.getFirstInstant()), max(tw.getLastInstant(), ped.getLastInstant()))