Mercurial Hosting > traffic-intelligence
changeset 1246:2397de73770d
dltrack saves after projecting coordinates
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 09 Feb 2024 17:47:33 -0500 |
parents | 371c718e57d7 |
children | 439207b6c146 |
files | scripts/classify-objects.py scripts/display-trajectories.py scripts/dltrack.py scripts/extract-appearance-images.py trafficintelligence/storage.py |
diffstat | 5 files changed, 61 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/classify-objects.py Thu Feb 08 16:10:54 2024 -0500 +++ b/scripts/classify-objects.py Fri Feb 09 17:47:33 2024 -0500 @@ -29,7 +29,7 @@ parser.add_argument('--verbose', dest = 'verbose', help = 'verbose information', action = 'store_true') args = parser.parse_args() -params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) +params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) classifierParams = storage.ClassifierParameters(params.classifierFilename) classifierParams.convertToFrames(params.videoFrameRate, 3.6) # conversion from km/h to m/frame
--- a/scripts/display-trajectories.py Thu Feb 08 16:10:54 2024 -0500 +++ b/scripts/display-trajectories.py Fri Feb 09 17:47:33 2024 -0500 @@ -27,10 +27,10 @@ parser.add_argument('--nzeros', dest = 'nZerosFilenameArg', help = 'number of digits in filenames', type = int) args = parser.parse_args() -params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) +params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) if args.homographyFilename is not None: - invHomography = inv(loadtxt(args.homographyFilename)) + invHomography = inv(loadtxt(args.homographyFilename)) if args.intrinsicCameraMatrixFilename is not None: intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename) if args.distortionCoefficients is not None:
--- a/scripts/dltrack.py Thu Feb 08 16:10:54 2024 -0500 +++ b/scripts/dltrack.py Fri Feb 09 17:47:33 2024 -0500 @@ -1,6 +1,7 @@ #! /usr/bin/env python3 # from https://docs.ultralytics.com/modes/track/ import sys, argparse +from math import inf from copy import copy from collections import Counter import numpy as np @@ -12,14 +13,20 @@ from trafficintelligence import cvutils, moving, storage, utils -parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trakcers.') +parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trackers.', + epilog= '''The models can be found in the Ultralytics model zoo, + eg YOLOv8 (https://docs.ultralytics.com/models/yolov8/). + The tracking models can be found also online + (https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). + The choice is to project the middle of the bottom line for persons, + and the bounding box center otherwise.''') parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)') parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)') parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True) parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True) -parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix', default = 'homography.txt') -parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file') +parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix') +#parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file') parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true') parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file') parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float) @@ -27,15 +34,17 @@ parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true') parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0) parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf')) -parser.add_argument('--conf', dest = 'confindence', help = 'object confidence threshold for detection', type = float, default = 0.25) +parser.add_argument('--conf', dest = 'confidence', help = 'object confidence threshold for detection', type = float, default = 0.25) parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2) parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15) parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3) parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05) args = parser.parse_args() -params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) +params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) +if args.homographyFilename is not None: + homography = np.loadtxt(args.homographyFilename) if args.intrinsicCameraMatrixFilename is not None: intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename) if args.distortionCoefficients is not None: @@ -44,8 +53,13 @@ firstFrameNum = args.firstFrameNum if args.lastFrameNum is not None: lastFrameNum = args.lastFrameNum +elif args.configFilename is not None: + lastFrameNum = params.lastFrameNum +else: + lastFrameNum = inf -# TODO add option to refine position with mask for vehicles +# TODO use mask +# TODO add option to refine position with mask for vehicles, to save different positions # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html # use 2 x bytetrack track buffer to remove objects from existing ones @@ -53,43 +67,40 @@ # Load a model model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt # seg could be used on cropped image... if can be loaded and kept in memory -# model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get' +# model = YOLOX('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get' # Track with the model if args.display: windowName = 'frame' cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) -capture = cv2.VideoCapture(args.videoFilename) +capture = cv2.VideoCapture(videoFilename) objects = {} featureNum = 1 -frameNum = args.firstFrameNum +frameNum = firstFrameNum capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum) -lastFrameNum = args.lastFrameNum success, frame = capture.read() if not success: print('Input {} could not be read. Exiting'.format(args.videoFilename)) import sys; sys.exit() -results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), persist=True, verbose=False) +results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), conf = args.confidence, persist=True, verbose=False) while capture.isOpened() and success and frameNum <= lastFrameNum: result = results[0] if frameNum %10 == 0: print(frameNum, len(result.boxes), 'objects') for box in result.boxes: - #print(box.cls, box.id, box.xyxy) if box.id is not None: # None are objects with low confidence num = int(box.id.item()) - #xyxy = box.xyxy[0].tolist() if num in objects: objects[num].timeInterval.last = frameNum objects[num].features[0].timeInterval.last = frameNum objects[num].features[1].timeInterval.last = frameNum objects[num].bboxes[frameNum] = copy(box.xyxy) objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())]) - objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) - objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) + objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) # min + objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) # max else: inter = moving.TimeInterval(frameNum, frameNum) objects[num] = moving.MovingObject(num, inter) @@ -113,7 +124,7 @@ for num, obj in objects.items(): obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed? -# add quality control: avoid U-turns +# TODO add quality control: avoid U-turns # merge bikes and people twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])] @@ -188,17 +199,37 @@ del objects[pedestrians[pedInd]] #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples) -# interpolate and generate velocity (?) for the features (bboxes) before saving +# interpolate and save image coordinates for num, obj in objects.items(): - #obj.features[1].timeInterval = copy(obj.getTimeInterval()) for f in obj.getFeatures(): if f.length() != len(f.tmpPositions): # interpolate f.positions = moving.Trajectory.fromPointDict(f.tmpPositions) - #obj.features[1].positions = moving.Trajectory.fromPointDict(obj.features[1].tmpPositions) else: f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values())) - #obj.features[1].positions = moving.Trajectory.fromPointList(list(obj.features[1].tmpPositions.values())) - +if not args.notSavingImageCoordinates: + storage.saveTrajectoriesToSqlite(utils.removeExtension(args.databaseFilename)+'-bb.sqlite', list(objects.values()), 'object') +# project, smooth and save +for num, obj in objects.items(): + features = obj.getFeatures() + if moving.userTypeNames[obj.getUserType()] == 'pedestrian': + assert len(features) == 2 + t1 = features[0].getPositions() + t2 = features[1].getPositions() + t = [[(p1.x+p2.x)/2., max(p1.y, p2.y)] for p1, p2 in zip(t1, t2)] + else: + t = [] + for instant in obj.getTimeInterval(): + points = [] + for f in features: + if f.existsAtInstant(instant): + points.append(f.getPositionAtInstant(instant)) + t.append(moving.Point.agg(points, np.mean).aslist()) + #t = sum([f.getPositions().asArray() for f in features])/len(features) + #t = (moving.Trajectory.add(t1, t2)*0.5).asArray() + projected = cvutils.imageToWorldProject(np.array(t).T, intrinsicCameraMatrix, distortionCoefficients, homography) + featureNum = features[0].getNum() + obj.features=[moving.MovingObject(featureNum, obj.getTimeInterval(), moving.Trajectory(projected.tolist()))] + obj.featureNumbers = [featureNum] storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object') # todo save bbox and mask to study localization / representation
--- a/scripts/extract-appearance-images.py Thu Feb 08 16:10:54 2024 -0500 +++ b/scripts/extract-appearance-images.py Fri Feb 09 17:47:33 2024 -0500 @@ -23,7 +23,7 @@ parser.add_argument('--compute-speed-distributions', dest = 'computeSpeedDistribution', help = 'computes the distribution of the road users of each type and fits parameters to each', action = 'store_true') args = parser.parse_args() -params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) +params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) classifierParams = storage.ClassifierParameters(params.classifierFilename) classificationAnnotations = read_csv(args.classificationAnnotationFilename, index_col=0, delimiter = args.classificationAnnotationFilenameDelimiter, names = ["object_num", "road_user_type"])
--- a/trafficintelligence/storage.py Thu Feb 08 16:10:54 2024 -0500 +++ b/trafficintelligence/storage.py Fri Feb 09 17:47:33 2024 -0500 @@ -1652,6 +1652,7 @@ self.minFeatureEigThreshold = config.getfloat(self.sectionHeader, 'min-feature-eig-threshold') self.minFeatureTime = config.getint(self.sectionHeader, 'min-feature-time') self.minFeatureDisplacement = config.getfloat(self.sectionHeader, 'min-feature-displacement') + self.smoothingHalfWidth = config.getfloat(self.sectionHeader, 'smoothing-halfwidth') #self.updateTimer = config.getint(self.sectionHeader, 'tracker-reload-time') @@ -1672,8 +1673,10 @@ videoFilename = params.videoFilename databaseFilename = params.databaseFilename if params.homography is not None: + homography = params.homography invHomography = linalg.inv(params.homography) else: + homography = None invHomography = None intrinsicCameraMatrix = params.intrinsicCameraMatrix distortionCoefficients = array(params.distortionCoefficients) @@ -1682,6 +1685,7 @@ firstFrameNum = params.firstFrameNum else: params = None + homography = None invHomography = None undistort = False intrinsicCameraMatrix = None @@ -1700,7 +1704,7 @@ else: databaseFilename = params.databaseFilename - return params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum + return params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum # deprecated class SceneParameters(object):