comparison scripts/dltrack.py @ 1249:2aa56b101041

added mask functionality for dltrack
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 15 Feb 2024 14:09:52 -0500
parents 439207b6c146
children 77fbd0e2ba7d
comparison
equal deleted inserted replaced
1248:c4c50678c856 1249:2aa56b101041
24 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)') 24 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)')
25 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)') 25 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)')
26 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True) 26 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True)
27 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True) 27 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True)
28 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix') 28 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix')
29 #parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file') 29 parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file')
30 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true') 30 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true')
31 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file') 31 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file')
32 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float) 32 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float)
33 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true') 33 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true')
34 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true') 34 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true')
35 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0) 35 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0)
36 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf')) 36 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = inf)
37 parser.add_argument('--conf', dest = 'confidence', help = 'object confidence threshold for detection', type = float, default = 0.25) 37 parser.add_argument('--conf', dest = 'confidence', help = 'object confidence threshold for detection', type = float, default = 0.25)
38 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2) 38 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2)
39 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15) 39 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15)
40 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3) 40 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3)
41 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05) 41 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05)
54 if args.lastFrameNum is not None: 54 if args.lastFrameNum is not None:
55 lastFrameNum = args.lastFrameNum 55 lastFrameNum = args.lastFrameNum
56 elif args.configFilename is not None: 56 elif args.configFilename is not None:
57 lastFrameNum = params.lastFrameNum 57 lastFrameNum = params.lastFrameNum
58 else: 58 else:
59 lastFrameNum = inf 59 lastFrameNum = args.lastFrameNum
60 if args.maskFilename is not None:
61 mask = cv2.imread(args.maskFilename, cv2.IMREAD_GRAYSCALE)
62 elif params.maskFilename is not None:
63 mask = cv2.imread(params.maskFilename, cv2.IMREAD_GRAYSCALE)
64 else:
65 mask = None
60 66
61 # TODO use mask, remove short objects, smooth 67 # TODO use mask, remove short objects, smooth
62 68
63 # TODO add option to refine position with mask for vehicles, to save different positions 69 # TODO add option to refine position with mask for vehicles, to save different positions
64 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html 70 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html
84 success, frame = capture.read() 90 success, frame = capture.read()
85 if not success: 91 if not success:
86 print('Input {} could not be read. Exiting'.format(args.videoFilename)) 92 print('Input {} could not be read. Exiting'.format(args.videoFilename))
87 import sys; sys.exit() 93 import sys; sys.exit()
88 94
89 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), conf = args.confidence, persist=True, verbose=False) 95 results = model.track(source=frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), conf=args.confidence, persist=True, verbose=False)
90 while capture.isOpened() and success and frameNum <= lastFrameNum: 96 while capture.isOpened() and success and frameNum <= lastFrameNum:
91 result = results[0] 97 result = results[0]
92 if frameNum %10 == 0: 98 if frameNum %10 == 0:
93 print(frameNum, len(result.boxes), 'objects') 99 print(frameNum, len(result.boxes), 'objects')
94 for box in result.boxes: 100 for box in result.boxes:
95 if box.id is not None: # None are objects with low confidence 101 if box.id is not None:# None are objects with low confidence
96 num = int(box.id.item()) 102 xyxy = copy(box.xyxy)
97 if num in objects: 103 minPoint = moving.Point(xyxy[0,0].item(), xyxy[0,1].item())
98 objects[num].timeInterval.last = frameNum 104 maxPoint = moving.Point(xyxy[0,2].item(), xyxy[0,3].item())
99 objects[num].features[0].timeInterval.last = frameNum 105 center = (minPoint+maxPoint).divide(2.).asint()
100 objects[num].features[1].timeInterval.last = frameNum 106 if mask is None or mask[center.y, center.x] > 0:
101 objects[num].bboxes[frameNum] = copy(box.xyxy) 107 num = int(box.id.item())
102 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())]) 108 if num in objects:
103 objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) # min 109 objects[num].timeInterval.last = frameNum
104 objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) # max 110 objects[num].features[0].timeInterval.last = frameNum
105 else: 111 objects[num].features[1].timeInterval.last = frameNum
106 inter = moving.TimeInterval(frameNum, frameNum) 112 objects[num].bboxes[frameNum] = xyxy
107 objects[num] = moving.MovingObject(num, inter) 113 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())])
108 objects[num].bboxes = {frameNum: copy(box.xyxy)} 114 objects[num].features[0].tmpPositions[frameNum] = minPoint # min
109 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]] 115 objects[num].features[1].tmpPositions[frameNum] = maxPoint # max
110 objects[num].features = [moving.MovingObject(featureNum, copy(inter)), moving.MovingObject(featureNum+1, copy(inter))] 116 else:
111 objects[num].featureNumbers = [featureNum, featureNum+1] 117 inter = moving.TimeInterval(frameNum, frameNum)
112 objects[num].features[0].tmpPositions = {frameNum: moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item())} 118 objects[num] = moving.MovingObject(num, inter)
113 objects[num].features[1].tmpPositions = {frameNum: moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item())} 119 objects[num].bboxes = {frameNum: copy(xyxy)}
114 featureNum += 2 120 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]]
121 objects[num].features = [moving.MovingObject(featureNum, copy(inter)), moving.MovingObject(featureNum+1, copy(inter))]
122 objects[num].featureNumbers = [featureNum, featureNum+1]
123 objects[num].features[0].tmpPositions = {frameNum: minPoint}
124 objects[num].features[1].tmpPositions = {frameNum: maxPoint}
125 featureNum += 2
115 if args.display: 126 if args.display:
116 cvutils.cvImshow(windowName, result.plot()) # original image in orig_img 127 cvutils.cvImshow(windowName, result.plot()) # original image in orig_img
117 key = cv2.waitKey() 128 key = cv2.waitKey()
118 if cvutils.quitKey(key): 129 if cvutils.quitKey(key):
119 break 130 break
120 frameNum += 1 131 frameNum += 1
121 success, frame = capture.read() 132 success, frame = capture.read()
122 results = model.track(frame, persist=True) 133 results = model.track(source=frame, persist=True)
134 capture.release()
135 cv2.destroyAllWindows()
123 136
124 # classification 137 # classification
125 for num, obj in objects.items(): 138 for num, obj in objects.items():
126 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed? 139 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed?
127 140
219 t2 = features[1].getPositions() 232 t2 = features[1].getPositions()
220 t = [[(p1.x+p2.x)/2., max(p1.y, p2.y)] for p1, p2 in zip(t1, t2)] 233 t = [[(p1.x+p2.x)/2., max(p1.y, p2.y)] for p1, p2 in zip(t1, t2)]
221 else: 234 else:
222 t = [] 235 t = []
223 for instant in obj.getTimeInterval(): 236 for instant in obj.getTimeInterval():
224 points = [] 237 points = [f.getPositionAtInstant(instant) for f in features if f.existsAtInstant(instant)]
225 for f in features:
226 if f.existsAtInstant(instant):
227 points.append(f.getPositionAtInstant(instant))
228 t.append(moving.Point.agg(points, np.mean).aslist()) 238 t.append(moving.Point.agg(points, np.mean).aslist())
229 #t = sum([f.getPositions().asArray() for f in features])/len(features) 239 #t = sum([f.getPositions().asArray() for f in features])/len(features)
230 #t = (moving.Trajectory.add(t1, t2)*0.5).asArray() 240 #t = (moving.Trajectory.add(t1, t2)*0.5).asArray()
231 projected = cvutils.imageToWorldProject(np.array(t).T, intrinsicCameraMatrix, distortionCoefficients, homography) 241 projected = cvutils.imageToWorldProject(np.array(t).T, intrinsicCameraMatrix, distortionCoefficients, homography)
232 featureNum = features[0].getNum() 242 featureNum = features[0].getNum()