diff options
author | Rob Austein <sra@hactrn.net> | 2014-08-20 03:25:26 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2014-08-20 03:25:26 +0000 |
commit | 19aaabec6feb5038e75f3c0868415bccb015b430 (patch) | |
tree | 7446fbf3512cb4916f6d990a93ca85db4f117507 /potpourri/validation-status-sql.py | |
parent | ae65819856649f96d5ac91391606eff91552db39 (diff) |
Restructure, use Message-ID instead of filename when reading Maildir.
svn path=/trunk/; revision=5927
Diffstat (limited to 'potpourri/validation-status-sql.py')
-rwxr-xr-x | potpourri/validation-status-sql.py | 270 |
1 files changed, 157 insertions, 113 deletions
diff --git a/potpourri/validation-status-sql.py b/potpourri/validation-status-sql.py index 62e3089e..858c8a10 100755 --- a/potpourri/validation-status-sql.py +++ b/potpourri/validation-status-sql.py @@ -32,120 +32,164 @@ import argparse import lxml.etree import subprocess -parser = argparse.ArgumentParser( - description = __doc__, - formatter_class = argparse.ArgumentDefaultsHelpFormatter) -group = parser.add_mutually_exclusive_group(required = True) -group.add_argument("--mailbox", "--mb", - help = "Maildir mailbox containing rcynic XML output") -group.add_argument("--tarballs", - help = "directory tree of tar files containing containing rcynic XML output") -parser.add_argument("--database", "--db", - default = "validation-status-sql.db", - help = "name for SQLite3 database") -parser.add_argument("--path-within-tarball", - default = "var/rcynic/data/rcynic.xml", - help = "name of file to extract from tarball(s)") -parser.add_argument("--tar-extensions", nargs = "+", - default = ".tar .tar.gz .tgz .tar.bz2 .tbz .tar.xz .txz".split(), - help = "extensions to recognize as indicating tar files") -args = parser.parse_args() - -creating = not os.path.exists(args.database) -db = sqlite3.connect(args.database) -db.text_factory = str -db.execute("PRAGMA foreign_keys = on") - -if creating: - db.executescript(''' - CREATE TABLE sessions ( - id INTEGER PRIMARY KEY NOT NULL, - session DATETIME UNIQUE NOT NULL, - filename TEXT UNIQUE); - - CREATE TABLE uris ( - id INTEGER PRIMARY KEY NOT NULL, - uri TEXT UNIQUE NOT NULL); - - CREATE TABLE codes ( - id INTEGER PRIMARY KEY NOT NULL, - code TEXT UNIQUE NOT NULL); - - CREATE TABLE generations ( - id INTEGER PRIMARY KEY NOT NULL, - generation TEXT UNIQUE); - - CREATE TABLE events ( - id INTEGER PRIMARY KEY NOT NULL, - timestamp DATETIME NOT NULL, - session_id INTEGER NOT NULL REFERENCES sessions(id) ON DELETE RESTRICT ON UPDATE RESTRICT, - generation_id INTEGER NOT NULL REFERENCES generations(id) ON DELETE RESTRICT ON UPDATE RESTRICT, - code_id INTEGER NOT NULL REFERENCES codes(id) ON DELETE RESTRICT ON UPDATE RESTRICT, - uri_id INTEGER NOT NULL REFERENCES uris(id) ON DELETE RESTRICT ON UPDATE RESTRICT, - UNIQUE (timestamp, generation_id, code_id, uri_id)); - - CREATE VIEW status AS - SELECT events.id, session, timestamp, generation, uri, code - FROM events - JOIN sessions ON sessions.id = events.session_id - JOIN uris ON uris.id = events.uri_id - JOIN codes ON codes.id = events.code_id - JOIN generations ON generations.id = events.generation_id; - ''') - - -def string_id(table, value): - field = table.rstrip("s") - try: - if value is None: - return db.execute("SELECT id FROM %s WHERE %s IS NULL" % (table, field)).fetchone()[0] +class Parser(object): + + @staticmethod + def main(): + parser = argparse.ArgumentParser( + description = __doc__, + formatter_class = argparse.ArgumentDefaultsHelpFormatter) + group = parser.add_mutually_exclusive_group(required = True) + group.add_argument("--mailbox", "--mb", + help = "Maildir mailbox containing rcynic XML output") + group.add_argument("--tarballs", + help = "directory tree of tar files containing containing rcynic XML output") + parser.add_argument("--database", "--db", + default = "validation-status-sql.db", + help = "SQLite3 database") + parser.add_argument("--path-within-tarball", + default = "var/rcynic/data/rcynic.xml", + help = "rcynic.xml path name within tarball(s)") + parser.add_argument("--tar-extensions", nargs = "+", + default = ".tar .tar.gz .tgz .tar.bz2 .tbz .tar.xz .txz".split(), + help = "extensions to recognize as indicating tar files") + args = parser.parse_args() + if args.mailbox: + ParserMailbox(args) + else: + ParserTarball(args) + + def __init__(self, args): + self.args = args + self.init_sql() + self.init_hook() + self.parsed = 1 + for self.current, self.iterval in enumerate(self.iterator, 1): + self.parse_xml() + if self.parsed > 1: + sys.stderr.write("\n") + db.close() + + + def init_sql(self): + creating = not os.path.exists(self.args.database) + self.db = sqlite3.connect(self.args.database) + self.db.text_factory = str + self.db.execute("PRAGMA foreign_keys = on") + + if creating: + self.db.executescript(''' + CREATE TABLE sessions ( + id INTEGER PRIMARY KEY NOT NULL, + session DATETIME UNIQUE NOT NULL, + handle TEXT UNIQUE NOT NULL); + + CREATE TABLE uris ( + id INTEGER PRIMARY KEY NOT NULL, + uri TEXT UNIQUE NOT NULL); + + CREATE TABLE codes ( + id INTEGER PRIMARY KEY NOT NULL, + code TEXT UNIQUE NOT NULL); + + CREATE TABLE generations ( + id INTEGER PRIMARY KEY NOT NULL, + generation TEXT UNIQUE); + + CREATE TABLE events ( + id INTEGER PRIMARY KEY NOT NULL, + timestamp DATETIME NOT NULL, + session_id INTEGER NOT NULL REFERENCES sessions(id) ON DELETE RESTRICT ON UPDATE RESTRICT, + generation_id INTEGER NOT NULL REFERENCES generations(id) ON DELETE RESTRICT ON UPDATE RESTRICT, + code_id INTEGER NOT NULL REFERENCES codes(id) ON DELETE RESTRICT ON UPDATE RESTRICT, + uri_id INTEGER NOT NULL REFERENCES uris(id) ON DELETE RESTRICT ON UPDATE RESTRICT, + UNIQUE (timestamp, generation_id, code_id, uri_id)); + + CREATE VIEW status AS + SELECT events.id, handle, session, timestamp, generation, uri, code + FROM events + JOIN sessions ON sessions.id = events.session_id + JOIN uris ON uris.id = events.uri_id + JOIN codes ON codes.id = events.code_id + JOIN generations ON generations.id = events.generation_id; + ''') + + def string_id(self, table, value): + field = table.rstrip("s") + try: + if value is None: + return self.db.execute("SELECT id FROM %s WHERE %s IS NULL" % (table, field)).fetchone()[0] + else: + return self.db.execute("SELECT id FROM %s WHERE %s = ?" % (table, field), (value,)).fetchone()[0] + except: + return self.db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid + + + def parse_xml(self): + sys.stderr.write("\r%s %d/%d/%d...%s " % ("|\\-/"[self.current & 3], + self.current, self.parsed, self.total, self.handle)) + if self.db.execute("SELECT handle FROM sessions WHERE handle = ?", (self.handle,)).fetchone(): + return + xml = self.read_xml() + with self.db: + session_id = self.db.execute("INSERT INTO sessions (session, handle) VALUES (datetime(?), ?)", + (xml.get("date"), self.handle)).lastrowid + self.db.executemany("INSERT INTO events (session_id, timestamp, generation_id, code_id, uri_id) " + "VALUES (?, datetime(?), ?, ?, ?)", + ((session_id, + x.get("timestamp"), + self.string_id("generations", x.get("generation")), + self.string_id("codes", x.get("status")), + self.string_id("uris", x.text.strip())) + for x in xml.findall("validation_status"))) + self.parsed += 1 + + +class ParserTarball(Parser): + + def init_hook(self): + self.total = 0 + for fn in self.iter_tarball_names(): + self.total += 1 + self.iterator = self.iter_tarball_names() + + @property + def handle(self): + return self.iterval + + def read_xml(self): + return lxml.etree.ElementTree( + file = subprocess.Popen(("tar", "Oxf", self.iterval, self.args.path_within_tarball), + stdout = subprocess.PIPE).stdout).getroot() + + def iter_tarball_names(self): + if os.path.isdir(self.args.tarballs): + for root, dirs, files in os.walk(self.args.tarballs): + for fn in files: + if any(fn.endswith(ext) for ext in self.args.tar_extensions): + yield os.path.join(root, fn) else: - return db.execute("SELECT id FROM %s WHERE %s = ?" % (table, field), (value,)).fetchone()[0] - except: - return db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid + yield self.args.tarballs + + +class ParserMailbox(Parser): + def init_hook(self): + self.mb = mailbox.Maildir(self.args.mailbox, factory = None, create = False) + self.total = len(self.mb) + self.iterator = self.mb.iterkeys() -def parse_xml(xml, fn = None): + @property + def handle(self): + return self.mb[self.iterval].get("Message-ID") + + def read_xml(self): + return lxml.etree.XML(self.mb[self.iterval].get_payload()) + + +if __name__ == "__main__": try: - session_id = db.execute("INSERT INTO sessions (session, filename) VALUES (datetime(?), ?)", - (xml.get("date"), fn)).lastrowid - except sqlite3.IntegrityError: - return - - with db: - db.executemany("INSERT INTO events (session_id, timestamp, generation_id, code_id, uri_id) VALUES (?, datetime(?), ?, ?, ?)", - ((session_id, - x.get("timestamp"), - string_id("generations", x.get("generation")), - string_id("codes", x.get("status")), - string_id("uris", x.text.strip())) - for x in xml.findall("validation_status"))) - - -def parse_tarball(fn): - if db.execute("SELECT filename FROM sessions WHERE filename = ?", (fn,)).fetchone(): - return - print "Processing", fn - pipe = subprocess.Popen(("tar", "Oxf", fn, args.path_within_tarball), stdout = subprocess.PIPE).stdout - parse_xml(lxml.etree.ElementTree(file = pipe).getroot(), fn) - - -if args.mailbox: - mb = mailbox.Maildir(args.mailbox, factory = None, create = False) - for i, key in enumerate(mb.iterkeys(), 1): - sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb))) - parse_xml(lxml.etree.XML(mb[key].get_payload())) - sys.stderr.write("\n") - -elif not os.path.isdir(args.tarballs): - parse_tarball(args.tarballs) - -else: - if os.path.isdir(args.tarballs): - for root, dirs, files in os.walk(args.tarballs): - for fn in files: - if any(fn.endswith(ext) for ext in args.tar_extensions): - parse_tarball(os.path.join(root, fn)) - - -db.close() + Parser.main() + except KeyboardInterrupt: + pass + |