Mercurial Hosting > traffic-intelligence
changeset 714:d6c69d3d09e5
developing python performance tests for SQLite
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 24 Jul 2015 17:58:59 -0400 |
parents | b0cb644950aa |
children | a05f79c74d6d 9d6cd4e8dca3 |
files | python/storage.py samples/performances.txt scripts/performance.py |
diffstat | 3 files changed, 113 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/python/storage.py Thu Jul 23 09:31:47 2015 -0400 +++ b/python/storage.py Fri Jul 24 17:58:59 2015 -0400 @@ -34,19 +34,47 @@ except sqlite3.OperationalError as error: printDBError(error) -def createIndex(filename, tableName, columnName, createAnyway = False): +def createIndex(connection, tableName, columnName, unique = False): '''Creates an index for the column in the table I will make querying with a condition on this column faster''' try: - connection = sqlite3.connect(filename) + #connection = sqlite3.connect(filename) cursor = connection.cursor() - if createAnyway: - notExists = "" + s = "CREATE " + if unique: + s += "UNIQUE " + cursor.execute(s+"INDEX IF NOT EXISTS "+tableName+"_"+columnName+"_index ON "+tableName+"("+columnName+")") + connection.commit() + #connection.close() + except sqlite3.OperationalError as error: + printDBError(error) + +def getNumberRowsTable(connection, tableName, columnName = None): + '''Returns the number of rows for the table + If columnName is not None, means we want the number of distinct values for that column + (otherwise, we can just count(*))''' + try: + cursor = connection.cursor() + if columnName is None: + cursor.execute("SELECT COUNT(*) from "+tableName) else: - notExists = "IF NOT EXISTS " - cursor.execute("CREATE INDEX "+notExists+tableName+"_"+columnName+"_index ON "+tableName+"("+columnName+")") - connection.commit() - connection.close() + cursor.execute("SELECT COUNT(DISTINCT "+columnName+") from "+tableName) + return cursor.fetchone()[0] + except sqlite3.OperationalError as error: + printDBError(error) + +def getMinMax(connection, tableName, columnName, minmax): + '''Returns max/min or both for given column in table + minmax must be string max, min or minmax''' + try: + cursor = connection.cursor() + if minmax == 'min' or minmax == 'max': + cursor.execute("SELECT "+minmax+"("+columnName+") from "+tableName) + elif minmax == 'minmax': + cursor.execute("SELECT MIN("+columnName+"), MAX("+columnName+") from "+tableName) + else: + print("Argument minmax unknown: {}".format(minmax)) + return cursor.fetchone()[0] except sqlite3.OperationalError as error: printDBError(error)
--- a/samples/performances.txt Thu Jul 23 09:31:47 2015 -0400 +++ b/samples/performances.txt Fri Jul 24 17:58:59 2015 -0400 @@ -51,3 +51,8 @@ sys 0m1.265s Cleaning up! +############################## +indexed: positions et velocities on trajectory_id +time feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg --database-filename tmp-notindexed.sqlite --gf >> log.txt && time feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg --database-filename tmp.sqlite --gf >> log-notindexed.txt +feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg 19985.11s user 24.37s system 99% cpu 5:34:01.61 total +feature-based-tracking ~/Research/Data/nyc/125th-hancock/tracking-125.cfg 20246.41s user 25.64s system 99% cpu 5:38:38.87 total
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/performance.py Fri Jul 24 17:58:59 2015 -0400 @@ -0,0 +1,72 @@ +#! /usr/bin/env python + +import sys, shutil, os, sqlite3, timeit#, argparse + +import storage + +if len(sys.argv) >= 2: + dbFilename = sys.argv[1] +else: + dbFilename="./samples/laurier.sqlite" + +print("Performance report using {}".format(dbFilename)) +shutil.copy(dbFilename, dbFilename+'.test') +dbFilename=dbFilename+'.test' +connection = sqlite3.connect(dbFilename) + +nFeatures=storage.getNumberRowsTable(connection, "positions", "trajectory_id") +print dbFilename, nFeatures + +#### +# test query tmp trajectory instant table +#### +def selectTrajectoryIdInstant(connection, lastInstant): + cursor = connection.cursor() + for i in xrange(lastInstant): + cursor.execute("select trajectory_id from trajectory_instants where last_instant = {}".format(lastInstant)) + cursor.fetchall() + +def test2Setup(connection): + cursor = connection.cursor() + cursor.execute("CREATE TEMP TABLE IF NOT EXISTS trajectory_instants AS select trajectory_id, min(frame_number) as first_instant, max(frame_number) as last_instant, max(frame_number)-min(frame_number)+1 as length from positions group by trajectory_id") + return storage.getMinMax(connection, "trajectory_instants", "last_instant", "max") + +# lastInstant = test2Setup(connection) +# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100) +# storage.createIndex(connection, "trajectory_instants", "last_instant") +# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100) +# storage.createIndex(connection, "trajectory_instants", "trajectory_id", True) +# print timeit.timeit("selectTrajectoryIdInstant(connection, lastInstant)", setup="from __main__ import selectTrajectoryIdInstant, connection, lastInstant", number = 100) + + +#### +# test query positions/velocities in feature grouping +#### +def selectTrajectories(connection, nFeatures): + cursor = connection.cursor() + for i in xrange(nFeatures): + cursor.execute("select * from positions where trajectory_id = {} order by frame_number".format(i)) + cursor.fetchall() + +def dropIndices(connection): + cursor = connection.cursor() + cursor.execute("drop index if exists positions_trajectory_id_index") + #sqlite3 $dbFilename "drop index if exists frame_number_index" + #sqlite3 $dbFilename "drop index if exists tid_frame_index" + #sqlite3 $dbFilename "drop index if exists of_trajectory_id_index" + connection.commit() + +def test1Positions(): + print('''#### + test query positions + ####''') + print("no index") + dropIndices(connection) + print timeit.timeit("selectTrajectories(connection, nFeatures)", setup="from __main__ import selectTrajectories, connection, nFeatures", number = 100) + #### + print("with index on trajectory_id") + storage.createIndex(connection, "positions", "trajectory_id")#sqlite3 $dbFilename "create index trajectory_id_index on positions(trajectory_id)" + print timeit.timeit("selectTrajectories(connection, nFeatures)", setup="from __main__ import selectTrajectories, connection, nFeatures", number = 100) + +#### Cleanup +os.remove(dbFilename)