diff python/storage.py @ 758:0a05883216cf

merge with dev
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 03 Nov 2015 13:48:56 -0500
parents a73f43aac00e
children a05b70f307dd
line wrap: on
line diff
--- a/python/storage.py	Fri Oct 02 11:30:15 2015 -0400
+++ b/python/storage.py	Tue Nov 03 13:48:56 2015 -0500
@@ -2,11 +2,12 @@
 # -*- coding: utf-8 -*-
 '''Various utilities to save and load data'''
 
-import utils, moving, events, indicators
+import utils, moving, events, indicators, shutil
 from base import VideoFilenameAddable
 
 import sqlite3, logging
 from numpy import log, min as npmin, max as npmax, round as npround, array, sum as npsum, loadtxt
+from pandas import read_csv, merge
 
 
 commentChar = '#'
@@ -163,20 +164,17 @@
 
     try:
         objectCriteria = getObjectCriteria(objectNumbers)
+        queryStatement = None
         if trajectoryType == 'feature':
             queryStatement = 'SELECT * from '+tableName
             if objectNumbers is not None:
-                queryStatement += ' where trajectory_id '+objectCriteria
+                queryStatement += ' WHERE trajectory_id '+objectCriteria
             queryStatement += ' ORDER BY trajectory_id, frame_number'
-            cursor.execute(queryStatement)
-            logging.debug(queryStatement)
         elif trajectoryType == 'object':
-            queryStatement = 'SELECT OF.object_id, P.frame_number, avg(P.x_coordinate), avg(P.y_coordinate) from '+tableName+' P, objects_features OF where P.trajectory_id = OF.trajectory_id'
+            queryStatement = 'SELECT OF.object_id, P.frame_number, avg(P.x_coordinate), avg(P.y_coordinate) from '+tableName+' P, objects_features OF WHERE P.trajectory_id = OF.trajectory_id'
             if objectNumbers is not None:
                 queryStatement += ' and OF.object_id '+objectCriteria
             queryStatement += ' GROUP BY OF.object_id, P.frame_number ORDER BY OF.object_id, P.frame_number'
-            cursor.execute(queryStatement)
-            logging.debug(queryStatement)
         elif trajectoryType in ['bbtop', 'bbbottom']:
             if trajectoryType == 'bbtop':
                 corner = 'top_left'
@@ -186,10 +184,11 @@
             if objectNumbers is not None:
                 queryStatement += ' WHERE object_id '+objectCriteria
             queryStatement += ' ORDER BY object_id, frame_number'
+        else:
+            print('no trajectory type was chosen')
+        if queryStatement is not None:
             cursor.execute(queryStatement)
             logging.debug(queryStatement)
-        else:
-            print('no trajectory type was chosen')
     except sqlite3.OperationalError as error:
         printDBError(error)
         return []
@@ -220,7 +219,7 @@
     objectCriteria = getObjectCriteria(objectNumbers)
     queryStatement = 'SELECT object_id, road_user_type from objects'
     if objectNumbers is not None:
-        queryStatement += ' where object_id '+objectCriteria
+        queryStatement += ' WHERE object_id '+objectCriteria
     cursor.execute(queryStatement)
     userTypes = {}
     for row in cursor:
@@ -247,7 +246,7 @@
         try:
             # attribute feature numbers to objects
             objectCriteria = getObjectCriteria(objectNumbers)
-            queryStatement = 'SELECT P.trajectory_id, OF.object_id from positions P, objects_features OF where P.trajectory_id = OF.trajectory_id'
+            queryStatement = 'SELECT P.trajectory_id, OF.object_id from positions P, objects_features OF WHERE P.trajectory_id = OF.trajectory_id'
             if objectNumbers is not None:
                 queryStatement += ' and OF.object_id '+objectCriteria
             queryStatement += ' group by P.trajectory_id order by OF.object_id' # order is important to group all features per object
@@ -395,7 +394,7 @@
     connection = sqlite3.connect(filename)
     cursor = connection.cursor()
     try:
-        cursor.execute('select INT.id, INT.object_id1, INT.object_id2, INT.first_frame_number, INT.last_frame_number, IND.indicator_type, IND.frame_number, IND.value from interactions INT, indicators IND where INT.id = IND.interaction_id ORDER BY INT.id, IND.indicator_type, IND.frame_number')
+        cursor.execute('select INT.id, INT.object_id1, INT.object_id2, INT.first_frame_number, INT.last_frame_number, IND.indicator_type, IND.frame_number, IND.value from interactions INT, indicators IND WHERE INT.id = IND.interaction_id ORDER BY INT.id, IND.indicator_type, IND.frame_number')
         interactionNum = -1
         indicatorTypeNum = -1
         tmpIndicators = {}
@@ -418,7 +417,7 @@
     connection.close()
     return interactions
 # load first and last object instants
-# CREATE TEMP TABLE IF NOT EXISTS object_instants AS SELECT OF.object_id, min(frame_number) as first_instant, max(frame_number) as last_instant from positions P, objects_features OF where P.trajectory_id = OF.trajectory_id group by OF.object_id order by OF.object_id
+# CREATE TEMP TABLE IF NOT EXISTS object_instants AS SELECT OF.object_id, min(frame_number) as first_instant, max(frame_number) as last_instant from positions P, objects_features OF WHERE P.trajectory_id = OF.trajectory_id group by OF.object_id order by OF.object_id
 
 def createBoundingBoxTable(filename, invHomography = None):
     '''Create the table to store the object bounding boxes in image space
@@ -429,7 +428,7 @@
         cursor.execute('CREATE TABLE IF NOT EXISTS bounding_boxes (object_id INTEGER, frame_number INTEGER, x_top_left REAL, y_top_left REAL, x_bottom_right REAL, y_bottom_right REAL,  PRIMARY KEY(object_id, frame_number))')
         cursor.execute('INSERT INTO bounding_boxes SELECT object_id, frame_number, min(x), min(y), max(x), max(y) from '
               '(SELECT object_id, frame_number, (x*{}+y*{}+{})/w as x, (x*{}+y*{}+{})/w as y from '
-              '(SELECT OF.object_id, P.frame_number, P.x_coordinate as x, P.y_coordinate as y, P.x_coordinate*{}+P.y_coordinate*{}+{} as w from positions P, objects_features OF where P.trajectory_id = OF.trajectory_id)) '.format(invHomography[0,0], invHomography[0,1], invHomography[0,2], invHomography[1,0], invHomography[1,1], invHomography[1,2], invHomography[2,0], invHomography[2,1], invHomography[2,2])+
+              '(SELECT OF.object_id, P.frame_number, P.x_coordinate as x, P.y_coordinate as y, P.x_coordinate*{}+P.y_coordinate*{}+{} as w from positions P, objects_features OF WHERE P.trajectory_id = OF.trajectory_id)) '.format(invHomography[0,0], invHomography[0,1], invHomography[0,2], invHomography[1,0], invHomography[1,1], invHomography[1,2], invHomography[2,0], invHomography[2,1], invHomography[2,2])+
               'GROUP BY object_id, frame_number')
     except sqlite3.OperationalError as error:
         printDBError(error)
@@ -647,8 +646,8 @@
     connection = sqlite3.connect(filename)
     cursor = connection.cursor()
     for obj in objects:
-        cursor.execute('update objects set startRouteID = {} where object_id = {}'.format(obj.startRouteID, obj.getNum()))
-        cursor.execute('update objects set endRouteID = {} where object_id = {}'.format(obj.endRouteID, obj.getNum()))        
+        cursor.execute('update objects set startRouteID = {} WHERE object_id = {}'.format(obj.startRouteID, obj.getNum()))
+        cursor.execute('update objects set endRouteID = {} WHERE object_id = {}'.format(obj.endRouteID, obj.getNum()))        
     connection.commit()
     connection.close()
 
@@ -658,7 +657,7 @@
     connection = sqlite3.connect(filename)
     cursor = connection.cursor()
     for obj in objects:
-        cursor.execute('update objects set road_user_type = {} where object_id = {}'.format(obj.getUserType(), obj.getNum()))
+        cursor.execute('update objects set road_user_type = {} WHERE object_id = {}'.format(obj.getUserType(), obj.getNum()))
     connection.commit()
     connection.close()
 
@@ -733,21 +732,51 @@
 def generatePDLaneColumn(data):
     data['LANE'] = data['LANE\LINK\NO'].astype(str)+'_'+data['LANE\INDEX'].astype(str)
 
-def loadTrajectoriesFromVissimFile(filename, simulationStepsPerTimeUnit, nObjects = -1, warmUpLastInstant = None, usePandas = False, nDecimals = 2, lowMemory = True):
+def convertTrajectoriesVissimToSqlite(filename):
+    '''Relies on a system call to sqlite3
+    sqlite3 [file.sqlite] < import_fzp.sql'''
+    sqlScriptFilename = "import_fzp.sql"
+    # create sql file
+    out = openCheck(sqlScriptFilename, "w")
+    out.write(".separator \";\"\n"+
+              "CREATE TABLE IF NOT EXISTS curvilinear_positions (t REAL, trajectory_id INTEGER, link_id INTEGER, lane_id INTEGER, s_coordinate REAL, y_coordinate REAL, speed REAL, PRIMARY KEY (t, trajectory_id));\n"+
+              ".import "+filename+" curvilinear_positions\n"+
+              "DELETE FROM curvilinear_positions WHERE trajectory_id IS NULL OR trajectory_id = \"NO\";\n")
+    out.close()
+    # system call
+    from subprocess import check_call
+    out = openCheck("err.log", "w")
+    check_call("sqlite3 "+utils.removeExtension(filename)+".sqlite < "+sqlScriptFilename, stderr = out, shell = True)
+    out.close()
+    shutil.os.remove(sqlScriptFilename)
+
+def loadObjectNumbersInLinkFromVissimFile(filename, linkIds):
+    '''Finds the ids of the objects that go through any of the link in the list linkIds'''
+    connection = sqlite3.connect(filename)
+    cursor = connection.cursor()
+    queryStatement = 'SELECT DISTINCT trajectory_id FROM curvilinear_positions where link_id IN ('+','.join([str(id) for id in linkIds])+')'
+    try:
+        cursor.execute(queryStatement)
+        return [row[0] for row in cursor]
+    except sqlite3.OperationalError as error:
+        printDBError(error)
+
+
+def loadTrajectoriesFromVissimFile(filename, simulationStepsPerTimeUnit, objectNumbers = None, warmUpLastInstant = None, usePandas = False, nDecimals = 2, lowMemory = True):
     '''Reads data from VISSIM .fzp trajectory file
-    simulationStepsPerTimeUnit is the number of simulation steps per unit of time used by VISSIM
-    for example, there seems to be 5 simulation steps per simulated second in VISSIM, 
-    so simulationStepsPerTimeUnit should be 5, 
+    simulationStepsPerTimeUnit is the number of simulation steps per unit of time used by VISSIM (second)
+    for example, there seems to be 10 simulation steps per simulated second in VISSIM, 
+    so simulationStepsPerTimeUnit should be 10, 
     so that all times correspond to the number of the simulation step (and can be stored as integers)
     
     Objects positions will be considered only after warmUpLastInstant 
     (if the object has no such position, it won't be loaded)
 
-    Assumed to be sorted over time'''
+    Assumed to be sorted over time
+    Warning: if reading from SQLite a limited number of objects, objectNumbers will be the maximum object id'''
     objects = {} # dictionary of objects index by their id
 
     if usePandas:
-        from pandas import read_csv
         data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, low_memory = lowMemory)
         generatePDLaneColumn(data)
         data['TIME'] = data['$VEHICLE:SIMSEC']*simulationStepsPerTimeUnit
@@ -761,31 +790,55 @@
             objects[objNum] = moving.MovingObject(num = objNum, timeInterval = moving.TimeInterval(row['first'], row['last']))
             # positions should be rounded to nDecimals decimals only
             objects[objNum].curvilinearPositions = moving.CurvilinearTrajectory(S = npround(tmp['POS'].tolist(), nDecimals), Y = npround(tmp['POSLAT'].tolist(), nDecimals), lanes = tmp['LANE'].tolist())
-            if nObjects > 0 and len(objects) >= nObjects:
-                break
-        return objects.values()
+            if objectNumbers is not None and objectNumbers > 0 and len(objects) >= objectNumbers:
+                objects.values()
     else:
-        inputfile = openCheck(filename, quitting = True)
-        # data = pd.read_csv(filename, skiprows=15, delimiter=';')
-        # skip header: 15 lines + 1
-        line = readline(inputfile, '*$')
-        while len(line) > 0:#for line in inputfile:
-            data = line.strip().split(';')
-            objNum = int(data[1])
-            instant = int(float(data[0])*simulationStepsPerTimeUnit)
-            s = float(data[4])
-            y = float(data[5])
-            lane = data[2]+'_'+data[3]
-            if objNum not in objects:
-                if warmUpLastInstant is None or instant >= warmUpLastInstant:
-                    if nObjects < 0 or len(objects) < nObjects:
-                        objects[objNum] = moving.MovingObject(num = objNum, timeInterval = moving.TimeInterval(instant, instant))
-                        objects[objNum].curvilinearPositions = moving.CurvilinearTrajectory()
-            if (warmUpLastInstant is None or instant >= warmUpLastInstant) and objNum in objects:
-                objects[objNum].timeInterval.last = instant
-                objects[objNum].curvilinearPositions.addPositionSYL(s, y, lane)
+        if filename.endswith(".fzp"):
+            inputfile = openCheck(filename, quitting = True)
             line = readline(inputfile, '*$')
-
+            while len(line) > 0:#for line in inputfile:
+                data = line.strip().split(';')
+                objNum = int(data[1])
+                instant = float(data[0])*simulationStepsPerTimeUnit
+                s = float(data[4])
+                y = float(data[5])
+                lane = data[2]+'_'+data[3]
+                if objNum not in objects:
+                    if warmUpLastInstant is None or instant >= warmUpLastInstant:
+                        if objectNumbers is None or len(objects) < objectNumbers:
+                            objects[objNum] = moving.MovingObject(num = objNum, timeInterval = moving.TimeInterval(instant, instant))
+                            objects[objNum].curvilinearPositions = moving.CurvilinearTrajectory()
+                if (warmUpLastInstant is None or instant >= warmUpLastInstant) and objNum in objects:
+                    objects[objNum].timeInterval.last = instant
+                    objects[objNum].curvilinearPositions.addPositionSYL(s, y, lane)
+                line = readline(inputfile, '*$')
+        elif filename.endswith(".sqlite"):
+            connection = sqlite3.connect(filename)
+            cursor = connection.cursor()
+            queryStatement = 'SELECT t, trajectory_id, link_id, lane_id, s_coordinate, y_coordinate FROM curvilinear_positions'
+            if objectNumbers is not None:
+                queryStatement += ' WHERE trajectory_id '+getObjectCriteria(objectNumbers)
+            queryStatement += ' ORDER BY trajectory_id, t'
+            try:
+                cursor.execute(queryStatement)
+                for row in cursor:
+                    objNum = row[1]
+                    instant = row[0]*simulationStepsPerTimeUnit
+                    s = row[4]
+                    y = row[5]
+                    lane = '{}_{}'.format(row[2], row[3])
+                    if objNum not in objects:
+                        if warmUpLastInstant is None or instant >= warmUpLastInstant:
+                            if objectNumbers is None or len(objects) < objectNumbers:
+                                objects[objNum] = moving.MovingObject(num = objNum, timeInterval = moving.TimeInterval(instant, instant))
+                                objects[objNum].curvilinearPositions = moving.CurvilinearTrajectory()
+                    if (warmUpLastInstant is None or instant >= warmUpLastInstant) and objNum in objects:
+                        objects[objNum].timeInterval.last = instant
+                        objects[objNum].curvilinearPositions.addPositionSYL(s, y, lane)
+            except sqlite3.OperationalError as error:
+                printDBError(error)
+        else:
+            print("File type of "+filename+" not supported (only .sqlite and .fzp files)")
         return objects.values()
 
 def selectPDLanes(data, lanes = None):
@@ -810,22 +863,28 @@
     if more than proportionStationaryTime of their total time
     If lanes is not None, only the data for the selected lanes will be provided
     (format as string x_y where x is link index and y is lane index)'''
-    from pandas import read_csv
-    columns = ['NO', '$VEHICLE:SIMSEC', 'POS']
-    if lanes is not None:
-        columns += ['LANE\LINK\NO', 'LANE\INDEX']
-    data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, usecols = columns, low_memory = lowMemory)
-    data = selectPDLanes(data, lanes)
-    data.sort(['$VEHICLE:SIMSEC'], inplace = True)
+    if filename.endswith(".fzp"):
+        columns = ['NO', '$VEHICLE:SIMSEC', 'POS']
+        if lanes is not None:
+            columns += ['LANE\LINK\NO', 'LANE\INDEX']
+        data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, usecols = columns, low_memory = lowMemory)
+        data = selectPDLanes(data, lanes)
+        data.sort(['$VEHICLE:SIMSEC'], inplace = True)
 
-    nStationary = 0
-    nVehicles = 0
-    for name, group in data.groupby(['NO'], sort = False):
-        nVehicles += 1
-        positions = array(group['POS'])
-        diff = positions[1:]-positions[:-1]
-        if npsum(diff == 0.) >= proportionStationaryTime*len(positions):
-            nStationary += 1
+        nStationary = 0
+        nVehicles = 0
+        for name, group in data.groupby(['NO'], sort = False):
+            nVehicles += 1
+            positions = array(group['POS'])
+            diff = positions[1:]-positions[:-1]
+            if npsum(diff == 0.) >= proportionStationaryTime*(len(positions)-1):
+                nStationary += 1
+    elif filename.endswith(".sqlite"):
+        # select trajectory_id, t, s_coordinate, speed from curvilinear_positions where trajectory_id between 1860 and 1870 and speed < 0.1
+        # pb of the meaning of proportionStationaryTime in arterial network? Why proportion of existence time?
+        pass
+    else:
+        print("File type of "+filename+" not supported (only .sqlite and .fzp files)")
 
     return nStationary, nVehicles
 
@@ -837,7 +896,6 @@
     (if the time are closer than collisionTimeDifference)
     If lanes is not None, only the data for the selected lanes will be provided
     (format as string x_y where x is link index and y is lane index)'''
-    from pandas import read_csv, merge
     data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, usecols = ['LANE\LINK\NO', 'LANE\INDEX', '$VEHICLE:SIMSEC', 'NO', 'POS'], low_memory = lowMemory)
     data = selectPDLanes(data, lanes)
     data = data.convert_objects(convert_numeric=True)
@@ -854,6 +912,10 @@
             yidx = diff[diff > 0].argmin()
             if abs(group.loc[xidx, '$VEHICLE:SIMSEC'] - group.loc[yidx, '$VEHICLE:SIMSEC']) <= collisionTimeDifference:
                 nCollisions += 1
+
+    # select TD1.link_id, TD1.lane_id from temp.diff_positions as TD1, temp.diff_positions as TD2 where TD1.link_id = TD2.link_id and TD1.lane_id = TD2.lane_id and TD1.id1 = TD2.id1 and TD1.id2 = TD2.id2 and TD1.t = TD2.t+0.1 and TD1.diff*TD2.diff < 0; # besoin de faire un group by??
+    # create temp table diff_positions as select CP1.t as t, CP1.link_id as link_id, CP1.lane_id as lane_id, CP1.trajectory_id as id1, CP2.trajectory_id as id2, CP1.s_coordinate - CP2.s_coordinate as diff from curvilinear_positions CP1, curvilinear_positions CP2 where CP1.link_id = CP2.link_id and CP1.lane_id = CP2.lane_id and CP1.t = CP2.t and CP1.trajectory_id > CP2.trajectory_id;
+    # SQL select link_id, lane_id, id1, id2, min(diff), max(diff) from (select CP1.t as t, CP1.link_id as link_id, CP1.lane_id as lane_id, CP1.trajectory_id as id1, CP2.trajectory_id as id2, CP1.s_coordinate - CP2.s_coordinate as diff from curvilinear_positions CP1, curvilinear_positions CP2 where CP1.link_id = CP2.link_id and CP1.lane_id = CP2.lane_id and CP1.t = CP2.t and CP1.trajectory_id > CP2.trajectory_id) group by link_id, lane_id, id1, id2 having min(diff)*max(diff) < 0
     return nCollisions
     
 def loadTrajectoriesFromNgsimFile(filename, nObjects = -1, sequenceNum = -1):