changeset 715:a05f79c74d6d dev

merged default with dev
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Sat, 25 Jul 2015 22:36:39 -0400
parents 29daabe094fe (current diff) d6c69d3d09e5 (diff)
children 85af65b6d531
files
diffstat 5 files changed, 261 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/c/feature-based-tracking.cpp	Wed Jul 22 14:18:06 2015 -0400
+++ b/c/feature-based-tracking.cpp	Sat Jul 25 22:36:39 2015 -0400
@@ -272,6 +272,8 @@
   unsigned int savedObjectId=0;
 
   trajectoryDB->createInstants("table");
+  //trajectoryDB->createIndex("positions","trajectory_id"); // does not seem to make loading features faster
+  //trajectoryDB->createIndex("velocities","trajectory_id");
 
   unsigned int maxTrajectoryLength = 0;
   success = trajectoryDB->maxTrajectoryLength(maxTrajectoryLength);
--- a/python/storage.py	Wed Jul 22 14:18:06 2015 -0400
+++ b/python/storage.py	Sat Jul 25 22:36:39 2015 -0400
@@ -34,6 +34,50 @@
     except sqlite3.OperationalError as error:
         printDBError(error)
 
+def createIndex(connection, tableName, columnName, unique = False):
+    '''Creates an index for the column in the table
+    I will make querying with a condition on this column faster'''
+    try:
+        #connection = sqlite3.connect(filename)
+        cursor = connection.cursor()
+        s = "CREATE "
+        if unique:
+            s += "UNIQUE "
+        cursor.execute(s+"INDEX IF NOT EXISTS "+tableName+"_"+columnName+"_index ON "+tableName+"("+columnName+")")
+        connection.commit()
+        #connection.close()
+    except sqlite3.OperationalError as error:
+        printDBError(error)
+
+def getNumberRowsTable(connection, tableName, columnName = None):
+    '''Returns the number of rows for the table
+    If columnName is not None, means we want the number of distinct values for that column
+    (otherwise, we can just count(*))'''
+    try:
+        cursor = connection.cursor()
+        if columnName is None:
+            cursor.execute("SELECT COUNT(*) from "+tableName)
+        else:
+            cursor.execute("SELECT COUNT(DISTINCT "+columnName+") from "+tableName)
+        return cursor.fetchone()[0]
+    except sqlite3.OperationalError as error:
+        printDBError(error)
+
+def getMinMax(connection, tableName, columnName, minmax):
+    '''Returns max/min or both for given column in table
+    minmax must be string max, min or minmax'''
+    try:
+        cursor = connection.cursor()
+        if minmax == 'min' or minmax == 'max':
+            cursor.execute("SELECT "+minmax+"("+columnName+") from "+tableName)
+        elif minmax == 'minmax':
+            cursor.execute("SELECT MIN("+columnName+"), MAX("+columnName+") from "+tableName)
+        else:
+            print("Argument minmax unknown: {}".format(minmax))
+        return cursor.fetchone()[0]
+    except sqlite3.OperationalError as error:
+        printDBError(error)
+
 # TODO: add test if database connection is open
 # IO to sqlite
 def writeTrajectoriesToSqlite(objects, outputFilename, trajectoryType, objectNumbers = -1):
@@ -322,7 +366,7 @@
             queryStatement = 'SELECT OF.object_id, P.frame_number, avg(P.x_coordinate), avg(P.y_coordinate) from '+tableName+' P, objects_features OF where P.trajectory_id = OF.trajectory_id'
             if objectNumbers is not None:
                 queryStatement += ' and OF.object_id '+objectCriteria
-            queryStatement += ' group by OF.object_id, P.frame_number ORDER BY OF.object_id, P.frame_number'
+            queryStatement += ' GROUP BY OF.object_id, P.frame_number ORDER BY OF.object_id, P.frame_number'
             cursor.execute(queryStatement)
             logging.debug(queryStatement)
         elif trajectoryType in ['bbtop', 'bbbottom']:
@@ -332,7 +376,7 @@
                 corner = 'bottom_right'
             queryStatement = 'SELECT object_id, frame_number, x_'+corner+', y_'+corner+' FROM '+tableName
             if objectNumbers is not None:
-                queryStatement += ' where object_id '+objectCriteria
+                queryStatement += ' WHERE object_id '+objectCriteria
             queryStatement += ' ORDER BY object_id, frame_number'
             cursor.execute(queryStatement)
             logging.debug(queryStatement)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samples/performances.txt	Sat Jul 25 22:36:39 2015 -0400
@@ -0,0 +1,58 @@
+Performance report using ../../Data/norway/2_video/20130606-0830.sqlite
+####
+test query positions
+####
+no index
+
+real	7m31.857s
+user	2m20.962s
+sys	5m23.521s
+with index on trajectory_id
+
+real	7m43.479s
+user	2m25.793s
+sys	5m27.585s
+with index on trajectory_id and frame_number
+
+real	7m36.917s
+user	2m21.628s
+sys	5m26.639s
+with index on the 2 columns trajectory_id
+
+real	7m28.447s
+user	2m15.028s
+sys	5m26.697s
+query all feature positions, order by id and frame_number (index/no index)
+
+real	0m16.219s
+user	0m15.988s
+sys	0m0.245s
+
+real	0m16.185s
+user	0m15.927s
+sys	0m0.272s
+####
+test query object positions
+####
+no index
+
+real	0m22.119s
+user	0m20.147s
+sys	0m1.406s
+index on trajectory_id
+
+real	0m21.850s
+user	0m19.834s
+sys	0m1.401s
+index on trajectory_id in positions and objects_features
+
+real	0m25.066s
+user	0m23.315s
+sys	0m1.265s
+Cleaning up!
+
+##############################
+indexed: positions et velocities on trajectory_id
+time feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg --database-filename tmp-notindexed.sqlite --gf >> log.txt && time feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg --database-filename tmp.sqlite --gf >> log-notindexed.txt
+feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg      19985.11s user 24.37s system 99% cpu 5:34:01.61 total
+feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg      20246.41s user 25.64s system 99% cpu 5:38:38.87 total
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/performance.py	Sat Jul 25 22:36:39 2015 -0400
@@ -0,0 +1,72 @@
+#! /usr/bin/env python
+
+import sys, shutil, os, sqlite3, timeit#, argparse
+
+import storage
+
+if len(sys.argv) >= 2:
+    dbFilename = sys.argv[1]
+else:
+    dbFilename="./samples/laurier.sqlite"
+
+print("Performance report using {}".format(dbFilename))
+shutil.copy(dbFilename, dbFilename+'.test')
+dbFilename=dbFilename+'.test'
+connection = sqlite3.connect(dbFilename)
+
+nFeatures=storage.getNumberRowsTable(connection, "positions", "trajectory_id")
+print dbFilename, nFeatures
+
+####
+# test query tmp trajectory instant table
+####
+def selectTrajectoryIdInstant(connection, lastInstant):
+    cursor = connection.cursor()
+    for i in xrange(lastInstant):
+	cursor.execute("select trajectory_id from trajectory_instants where last_instant = {}".format(lastInstant))
+        cursor.fetchall()
+
+def test2Setup(connection):
+    cursor = connection.cursor()
+    cursor.execute("CREATE TEMP TABLE IF NOT EXISTS trajectory_instants AS select trajectory_id, min(frame_number) as first_instant, max(frame_number) as last_instant, max(frame_number)-min(frame_number)+1 as length from positions group by trajectory_id")
+    return storage.getMinMax(connection, "trajectory_instants", "last_instant", "max")
+
+# lastInstant = test2Setup(connection)
+# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100)
+# storage.createIndex(connection, "trajectory_instants", "last_instant")
+# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100)
+# storage.createIndex(connection, "trajectory_instants", "trajectory_id", True)
+# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100)
+
+
+####
+# test query positions/velocities in feature grouping
+####
+def selectTrajectories(connection, nFeatures):
+    cursor = connection.cursor()
+    for i in xrange(nFeatures):
+	cursor.execute("select * from positions where trajectory_id = {} order by frame_number".format(i))
+        cursor.fetchall()
+
+def dropIndices(connection):
+    cursor = connection.cursor()
+    cursor.execute("drop index if exists positions_trajectory_id_index")
+    #sqlite3 $dbFilename "drop index if exists frame_number_index"
+    #sqlite3 $dbFilename "drop index if exists tid_frame_index"
+    #sqlite3 $dbFilename "drop index if exists of_trajectory_id_index"
+    connection.commit()
+
+def test1Positions():
+    print('''####
+    test query positions
+    ####''')
+    print("no index")
+    dropIndices(connection)
+    print timeit.timeit("selectTrajectories(connection, nFeatures)", setup="from __main__ import selectTrajectories, connection, nFeatures", number = 100)
+    ####
+    print("with index on trajectory_id")
+    storage.createIndex(connection, "positions", "trajectory_id")#sqlite3 $dbFilename "create index trajectory_id_index on positions(trajectory_id)"
+    print timeit.timeit("selectTrajectories(connection, nFeatures)", setup="from __main__ import selectTrajectories, connection, nFeatures", number = 100)
+
+#### Cleanup
+os.remove(dbFilename)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/performance.sh	Sat Jul 25 22:36:39 2015 -0400
@@ -0,0 +1,83 @@
+#!/bin/bash
+if [ $# -ge 1 ];
+then
+    dbFilename=$1
+else
+    dbFilename="./samples/laurier.sqlite"
+fi
+echo "Performance report using "$dbFilename
+cp $dbFilename $dbFilename.test
+dbFilename=$dbFilename.test
+nFeatures=`sqlite3 $dbFilename "select count(distinct trajectory_id) from positions"`
+
+####
+# test query positions/velocities in feature grouping
+####
+echo "####"
+echo "test query positions"
+echo "####"
+
+selectTrajectories() {
+    for (( i=1; i<=$nFeatures; i++ ))
+    do
+	sqlite3 $dbFilename "select * from positions where trajectory_id = ${i} order by frame_number" > /dev/null
+    done
+}
+
+dropIndices() {
+    sqlite3 $dbFilename "drop index if exists trajectory_id_index"
+    sqlite3 $dbFilename "drop index if exists frame_number_index"
+    sqlite3 $dbFilename "drop index if exists tid_frame_index"
+    sqlite3 $dbFilename "drop index if exists of_trajectory_id_index"
+}
+
+echo "no index"
+dropIndices
+time selectTrajectories
+####
+echo "with index on trajectory_id"
+sqlite3 $dbFilename "create index trajectory_id_index on positions(trajectory_id)"
+time selectTrajectories
+####
+echo "with index on trajectory_id and frame_number"
+sqlite3 $dbFilename "create index frame_number_index on positions(frame_number)"
+time selectTrajectories
+####
+echo "with index on the 2 columns trajectory_id"
+dropIndices
+sqlite3 $dbFilename "create index tid_frame_index on positions(trajectory_id,frame_number)"
+time selectTrajectories
+####
+echo "query all feature positions, order by id and frame_number (index/no index)"
+time (sqlite3 $dbFilename "select * from positions order by trajectory_id, frame_number" > /dev/null)
+dropIndices
+time (sqlite3 $dbFilename "select * from positions order by trajectory_id, frame_number" > /dev/null)
+# no effect: probably because already ordered
+
+####
+# test query positions/velocities in feature grouping
+####
+echo "####"
+echo "test query object positions"
+echo "####"
+
+selectObjectTrajectories() {
+    sqlite3 $dbFilename "SELECT OF.object_id, P.frame_number, avg(P.x_coordinate), avg(P.y_coordinate) from positions P, objects_features OF where P.trajectory_id = OF.trajectory_id group by OF.object_id, P.frame_number ORDER BY OF.object_id, P.frame_number" > /dev/null
+}
+echo "no index"
+dropIndices
+time selectObjectTrajectories
+####
+echo "index on trajectory_id"
+sqlite3 $dbFilename "create index trajectory_id_index on positions(trajectory_id)"
+time selectObjectTrajectories
+####
+echo "index on trajectory_id in positions and objects_features"
+sqlite3 $dbFilename "create index of_trajectory_id_index on objects_features(trajectory_id)"
+time selectObjectTrajectories
+
+
+###
+echo "Cleaning up!"
+#dropIndices
+rm $dbFilename