diff options
author | Rob Austein <sra@hactrn.net> | 2014-08-19 21:08:13 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2014-08-19 21:08:13 +0000 |
commit | ae65819856649f96d5ac91391606eff91552db39 (patch) | |
tree | 842006115f3685d24c71777bac28bd26d92c4d04 /potpourri/validation-status-sql.py | |
parent | 1085b9d34ba6ca110c427066022135542463d464 (diff) |
Add tarball filenames to session table so we don't have to do all the
work of extracting and parsing before discovering that we've hit a
duplicate. Not sure what equivalent would be for Maildir (maybe
Message-ID?) so deferring that for now.
svn path=/trunk/; revision=5925
Diffstat (limited to 'potpourri/validation-status-sql.py')
-rwxr-xr-x | potpourri/validation-status-sql.py | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/potpourri/validation-status-sql.py b/potpourri/validation-status-sql.py index d37cb4ba..62e3089e 100755 --- a/potpourri/validation-status-sql.py +++ b/potpourri/validation-status-sql.py @@ -60,23 +60,20 @@ if creating: db.executescript(''' CREATE TABLE sessions ( id INTEGER PRIMARY KEY NOT NULL, - session DATETIME NOT NULL, - UNIQUE (session)); + session DATETIME UNIQUE NOT NULL, + filename TEXT UNIQUE); CREATE TABLE uris ( id INTEGER PRIMARY KEY NOT NULL, - uri TEXT NOT NULL, - UNIQUE (uri)); + uri TEXT UNIQUE NOT NULL); CREATE TABLE codes ( id INTEGER PRIMARY KEY NOT NULL, - code TEXT NOT NULL, - UNIQUE (code)); + code TEXT UNIQUE NOT NULL); CREATE TABLE generations ( id INTEGER PRIMARY KEY NOT NULL, - generation TEXT, - UNIQUE (generation)); + generation TEXT UNIQUE); CREATE TABLE events ( id INTEGER PRIMARY KEY NOT NULL, @@ -108,9 +105,10 @@ def string_id(table, value): return db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid -def parse_xml(xml): +def parse_xml(xml, fn = None): try: - session_id = db.execute("INSERT INTO sessions (session) VALUES (datetime(?))", (xml.get("date"),)).lastrowid + session_id = db.execute("INSERT INTO sessions (session, filename) VALUES (datetime(?), ?)", + (xml.get("date"), fn)).lastrowid except sqlite3.IntegrityError: return @@ -125,9 +123,11 @@ def parse_xml(xml): def parse_tarball(fn): + if db.execute("SELECT filename FROM sessions WHERE filename = ?", (fn,)).fetchone(): + return print "Processing", fn - parse_xml(lxml.etree.ElementTree( - file = subprocess.Popen(("tar", "Oxf", fn, args.path_within_tarball), stdout = subprocess.PIPE).stdout).getroot()) + pipe = subprocess.Popen(("tar", "Oxf", fn, args.path_within_tarball), stdout = subprocess.PIPE).stdout + parse_xml(lxml.etree.ElementTree(file = pipe).getroot(), fn) if args.mailbox: |