changeset 714:d6c69d3d09e5

developing python performance tests for SQLite
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Fri, 24 Jul 2015 17:58:59 -0400
parents b0cb644950aa
children a05f79c74d6d 9d6cd4e8dca3
files python/storage.py samples/performances.txt scripts/performance.py
diffstat 3 files changed, 113 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/python/storage.py	Thu Jul 23 09:31:47 2015 -0400
+++ b/python/storage.py	Fri Jul 24 17:58:59 2015 -0400
@@ -34,19 +34,47 @@
     except sqlite3.OperationalError as error:
         printDBError(error)
 
-def createIndex(filename, tableName, columnName, createAnyway = False):
+def createIndex(connection, tableName, columnName, unique = False):
     '''Creates an index for the column in the table
     I will make querying with a condition on this column faster'''
     try:
-        connection = sqlite3.connect(filename)
+        #connection = sqlite3.connect(filename)
         cursor = connection.cursor()
-        if createAnyway:
-            notExists = ""
+        s = "CREATE "
+        if unique:
+            s += "UNIQUE "
+        cursor.execute(s+"INDEX IF NOT EXISTS "+tableName+"_"+columnName+"_index ON "+tableName+"("+columnName+")")
+        connection.commit()
+        #connection.close()
+    except sqlite3.OperationalError as error:
+        printDBError(error)
+
+def getNumberRowsTable(connection, tableName, columnName = None):
+    '''Returns the number of rows for the table
+    If columnName is not None, means we want the number of distinct values for that column
+    (otherwise, we can just count(*))'''
+    try:
+        cursor = connection.cursor()
+        if columnName is None:
+            cursor.execute("SELECT COUNT(*) from "+tableName)
         else:
-            notExists = "IF NOT EXISTS "
-        cursor.execute("CREATE INDEX "+notExists+tableName+"_"+columnName+"_index ON "+tableName+"("+columnName+")")
-        connection.commit()
-        connection.close()
+            cursor.execute("SELECT COUNT(DISTINCT "+columnName+") from "+tableName)
+        return cursor.fetchone()[0]
+    except sqlite3.OperationalError as error:
+        printDBError(error)
+
+def getMinMax(connection, tableName, columnName, minmax):
+    '''Returns max/min or both for given column in table
+    minmax must be string max, min or minmax'''
+    try:
+        cursor = connection.cursor()
+        if minmax == 'min' or minmax == 'max':
+            cursor.execute("SELECT "+minmax+"("+columnName+") from "+tableName)
+        elif minmax == 'minmax':
+            cursor.execute("SELECT MIN("+columnName+"), MAX("+columnName+") from "+tableName)
+        else:
+            print("Argument minmax unknown: {}".format(minmax))
+        return cursor.fetchone()[0]
     except sqlite3.OperationalError as error:
         printDBError(error)
 
--- a/samples/performances.txt	Thu Jul 23 09:31:47 2015 -0400
+++ b/samples/performances.txt	Fri Jul 24 17:58:59 2015 -0400
@@ -51,3 +51,8 @@
 sys	0m1.265s
 Cleaning up!
 
+##############################
+indexed: positions et velocities on trajectory_id
+time feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg --database-filename tmp-notindexed.sqlite --gf >> log.txt && time feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg --database-filename tmp.sqlite --gf >> log-notindexed.txt
+feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg      19985.11s user 24.37s system 99% cpu 5:34:01.61 total
+feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg      20246.41s user 25.64s system 99% cpu 5:38:38.87 total
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/performance.py	Fri Jul 24 17:58:59 2015 -0400
@@ -0,0 +1,72 @@
+#! /usr/bin/env python
+
+import sys, shutil, os, sqlite3, timeit#, argparse
+
+import storage
+
+if len(sys.argv) >= 2:
+    dbFilename = sys.argv[1]
+else:
+    dbFilename="./samples/laurier.sqlite"
+
+print("Performance report using {}".format(dbFilename))
+shutil.copy(dbFilename, dbFilename+'.test')
+dbFilename=dbFilename+'.test'
+connection = sqlite3.connect(dbFilename)
+
+nFeatures=storage.getNumberRowsTable(connection, "positions", "trajectory_id")
+print dbFilename, nFeatures
+
+####
+# test query tmp trajectory instant table
+####
+def selectTrajectoryIdInstant(connection, lastInstant):
+    cursor = connection.cursor()
+    for i in xrange(lastInstant):
+	cursor.execute("select trajectory_id from trajectory_instants where last_instant = {}".format(lastInstant))
+        cursor.fetchall()
+
+def test2Setup(connection):
+    cursor = connection.cursor()
+    cursor.execute("CREATE TEMP TABLE IF NOT EXISTS trajectory_instants AS select trajectory_id, min(frame_number) as first_instant, max(frame_number) as last_instant, max(frame_number)-min(frame_number)+1 as length from positions group by trajectory_id")
+    return storage.getMinMax(connection, "trajectory_instants", "last_instant", "max")
+
+# lastInstant = test2Setup(connection)
+# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100)
+# storage.createIndex(connection, "trajectory_instants", "last_instant")
+# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100)
+# storage.createIndex(connection, "trajectory_instants", "trajectory_id", True)
+# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100)
+
+
+####
+# test query positions/velocities in feature grouping
+####
+def selectTrajectories(connection, nFeatures):
+    cursor = connection.cursor()
+    for i in xrange(nFeatures):
+	cursor.execute("select * from positions where trajectory_id = {} order by frame_number".format(i))
+        cursor.fetchall()
+
+def dropIndices(connection):
+    cursor = connection.cursor()
+    cursor.execute("drop index if exists positions_trajectory_id_index")
+    #sqlite3 $dbFilename "drop index if exists frame_number_index"
+    #sqlite3 $dbFilename "drop index if exists tid_frame_index"
+    #sqlite3 $dbFilename "drop index if exists of_trajectory_id_index"
+    connection.commit()
+
+def test1Positions():
+    print('''####
+    test query positions
+    ####''')
+    print("no index")
+    dropIndices(connection)
+    print timeit.timeit("selectTrajectories(connection, nFeatures)", setup="from __main__ import selectTrajectories, connection, nFeatures", number = 100)
+    ####
+    print("with index on trajectory_id")
+    storage.createIndex(connection, "positions", "trajectory_id")#sqlite3 $dbFilename "create index trajectory_id_index on positions(trajectory_id)"
+    print timeit.timeit("selectTrajectories(connection, nFeatures)", setup="from __main__ import selectTrajectories, connection, nFeatures", number = 100)
+
+#### Cleanup
+os.remove(dbFilename)