view scripts/dltrack.py @ 1231:6487ef10c0e0

work in progress
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 24 Aug 2023 17:06:16 -0400
parents c582b272108f
children d5695e0b59d9
line wrap: on
line source

#! /usr/bin/env python3
# from https://docs.ultralytics.com/modes/track/
import sys, argparse

from trafficintelligence import cvutils, moving, storage
from ultralytics import YOLO
import cv2

parser = argparse.ArgumentParser(description='The program tracks objects following the ultralytics yolo executable.')#, epilog = 'Either the configuration filename or the other parameters (at least video and database filenames) need to be provided.')
parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)')
# detect model
# tracker model
parser.add_argument('--display', dest = 'display', help = 'show the results (careful with long videos, risk of running out of memory)', action = 'store_true')
args = parser.parse_args()

# required functionality?
# # filename of the video to process (can be images, eg image%04d.png)
# video-filename = laurier.avi
# # filename of the database where results are saved
# database-filename = laurier.sqlite
# # filename of the homography matrix
# homography-filename = laurier-homography.txt
# # filename of the camera intrinsic matrix
# intrinsic-camera-filename = intrinsic-camera.txt
# # -0.11759321 0.0148536 0.00030756 -0.00020578 -0.00091816
# distortion-coefficients = -0.11759321
# distortion-coefficients = 0.0148536
# distortion-coefficients = 0.00030756 
# distortion-coefficients = -0.00020578 
# distortion-coefficients = -0.00091816
# # undistorted image multiplication
# undistorted-size-multiplication = 1.31
# # Interpolation method for remapping image when correcting for distortion: 0 for INTER_NEAREST - a nearest-neighbor interpolation; 1 for INTER_LINEAR - a bilinear interpolation (used by default); 2 for INTER_CUBIC - a bicubic interpolation over 4x4 pixel neighborhood; 3 for INTER_LANCZOS4
# interpolation-method = 1
# # filename of the mask image (where features are detected)
# mask-filename = none
# # undistort the video for feature tracking
# undistort = false
# # load features from database
# load-features = false
# # display trajectories on the video
# display = false
# # original video frame rate (number of frames/s)
# video-fps = 29.97
# # number of digits of precision for all measurements derived from video
# # measurement-precision = 3
# # first frame to process
# frame1 = 0
# # number of frame to process: 0 means processing all frames
# nframes = 0

# TODO add option to refine position with mask for vehicles

# use 2 x bytetrack track buffer to remove objects from existing ones

# Load a model
model = YOLO('/home/nicolas/Research/Data/classification-models/yolov8x.pt') # seg yolov8x-seg.pt
# seg could be used on cropped image... if can be loaded and kept in memory
# model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get'

# Track with the model
if args.display:
    results = model.track(source=args.videoFilename, tracker="/home/nicolas/Research/Data/classification-models/bytetrack.yaml", classes=list(moving.cocoTypeNames.keys()), show=True) # , save_txt=True 
else:
    windowName = 'frame'
    cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
    
    results = model.track(source=args.videoFilename, tracker="/home/nicolas/Research/Data/classification-models/bytetrack.yaml", classes=list(moving.cocoTypeNames.keys()), stream=True)
    objects = []
    currentObjects = {}
    featureNum = 0
    # create object with user type and list of 3 features (bottom ones and middle) + projection
    for frameNum, result in enumerate(results):
        print(frameNum, len(result.boxes))
        for box in result.boxes:
            num = int(box.id)
            xyxy = box.xyxy[0].tolist()
            if num in currentObjects:
                currentObjects[num].timeInterval.last = frameNum
                features = currentObjects[num].features
                features[0].getPositions().addPositionXY(xyxy[0],xyxy[1])
                features[1].getPositions().addPositionXY(xyxy[2],xyxy[3])
            else:
                currentObjects[num] = moving.MovingObject(num, moving.TimeInterval(frameNum,frameNum), userType = moving.coco2Types[int(box.cls)])
                currentObjects[num].features = [moving.MovingObject(featureNum, moving.TimeInterval(frameNum, frameNum), moving.Trajectory([[xyxy[0]],[xyxy[1]]])),
                                                moving.MovingObject(featureNum+1, moving.TimeInterval(frameNum, frameNum), moving.Trajectory([[xyxy[2]],[xyxy[3]]]))]
                currentObjects[num].featureNumbers = [featureNum, featureNum+1]
                featureNum += 2
            print(box.cls, box.xyxy)
        cvutils.cvImshow(windowName, result.plot()) # original image in orig_img
        key = cv2.waitKey()
        if cvutils.quitKey(key):
            break

storage.saveTrajectoriesToSqlite('test.sqlite', list(currentObjects.values()), 'object')

# todo save bbox and mask to study localization / representation