diff options
Diffstat (limited to 'potpourri/validation-status-sql.py')
-rwxr-xr-x | potpourri/validation-status-sql.py | 333 |
1 files changed, 166 insertions, 167 deletions
diff --git a/potpourri/validation-status-sql.py b/potpourri/validation-status-sql.py index fc52e64b..646d7d9b 100755 --- a/potpourri/validation-status-sql.py +++ b/potpourri/validation-status-sql.py @@ -34,186 +34,185 @@ import subprocess class Parser(object): - @staticmethod - def main(): - parser = argparse.ArgumentParser( - description = __doc__, - formatter_class = argparse.ArgumentDefaultsHelpFormatter) - group = parser.add_mutually_exclusive_group(required = True) - group.add_argument("--mailbox", "--mb", - help = "Maildir mailbox containing rcynic XML output") - group.add_argument("--tarballs", - help = "directory tree of tar files containing containing rcynic XML output") - parser.add_argument("--database", "--db", - default = "validation-status-sql.db", - help = "SQLite3 database") - parser.add_argument("--path-within-tarball", - default = "var/rcynic/data/rcynic.xml", - help = "rcynic.xml path name within tarball(s)") - parser.add_argument("--tar-extensions", nargs = "+", - default = ".tar .tar.gz .tgz .tar.bz2 .tbz .tar.xz .txz".split(), - help = "extensions to recognize as indicating tar files") - args = parser.parse_args() - if args.mailbox: - ParserMailbox(args) - else: - ParserTarball(args) - - def __init__(self, args): - self.args = args - self.init_sql() - self.init_hook() - self.index1() - self.parsed = 1 - for self.current, self.iterval in enumerate(self.iterator, 1): - self.parse_xml() - if self.parsed > 1: - sys.stderr.write("\n") - self.index2() - self.db.close() - - - def init_sql(self): - creating = not os.path.exists(self.args.database) - self.db = sqlite3.connect(self.args.database) - self.db.text_factory = str - self.db.executescript(''' - PRAGMA foreign_keys = off; - PRAGMA synchronous = off; - PRAGMA count_changes = off; - ''') - - if creating: - self.db.executescript(''' - CREATE TABLE sessions ( - session_id INTEGER PRIMARY KEY NOT NULL, - session DATETIME NOT NULL, - handle TEXT NOT NULL - ); - - CREATE TABLE uris ( - uri_id INTEGER PRIMARY KEY NOT NULL, - uri TEXT NOT NULL - ); - - CREATE TABLE codes ( - code_id INTEGER PRIMARY KEY NOT NULL, - code TEXT NOT NULL - ); - - CREATE TABLE generations ( - generation_id INTEGER PRIMARY KEY NOT NULL, - generation TEXT NOT NULL - ); - - CREATE TABLE events ( - id INTEGER PRIMARY KEY NOT NULL, - timestamp DATETIME NOT NULL, - session_id INTEGER NOT NULL REFERENCES sessions (session_id) ON DELETE RESTRICT ON UPDATE RESTRICT, - generation_id INTEGER NOT NULL REFERENCES generations (generation_id) ON DELETE RESTRICT ON UPDATE RESTRICT, - code_id INTEGER NOT NULL REFERENCES codes (code_id) ON DELETE RESTRICT ON UPDATE RESTRICT, - uri_id INTEGER NOT NULL REFERENCES uris (uri_id) ON DELETE RESTRICT ON UPDATE RESTRICT - ); - - CREATE VIEW status AS - SELECT id, handle, session, timestamp, generation, code, uri - FROM events - NATURAL JOIN sessions - NATURAL JOIN uris - NATURAL JOIN codes - NATURAL JOIN generations; - ''') - - - def index1(self): - self.db.executescript(''' - CREATE UNIQUE INDEX IF NOT EXISTS sessions_index ON sessions (session); - CREATE UNIQUE INDEX IF NOT EXISTS handles_index ON sessions (handle); - CREATE UNIQUE INDEX IF NOT EXISTS uris_index ON uris (uri); - CREATE UNIQUE INDEX IF NOT EXISTS codes_index ON codes (code); - CREATE UNIQUE INDEX IF NOT EXISTS generations_index ON generations (generation); - ''') - - - def index2(self): - self.db.executescript(''' - CREATE UNIQUE INDEX IF NOT EXISTS events_index ON events (uri_id, timestamp, code_id, generation_id); - ''') - - - def string_id(self, table, value): - field = table.rstrip("s") - try: - return self.db.execute("SELECT %s_id FROM %s WHERE %s = ?" % (field, table, field), (value,)).fetchone()[0] - except: - return self.db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid - - - def parse_xml(self): - sys.stderr.write("\r%s %d/%d/%d...%s " % ("|\\-/"[self.current & 3], - self.current, self.parsed, self.total, self.handle)) - if self.db.execute("SELECT handle FROM sessions WHERE handle = ?", (self.handle,)).fetchone(): - return - xml = self.read_xml() - with self.db: - session_id = self.db.execute("INSERT INTO sessions (session, handle) VALUES (strftime('%s', ?), ?)", - (xml.get("date"), self.handle)).lastrowid - self.db.executemany("INSERT INTO events (session_id, timestamp, generation_id, code_id, uri_id) " - "VALUES (?, strftime('%s', ?), ?, ?, ?)", - ((session_id, - x.get("timestamp"), - self.string_id("generations", x.get("generation", "none")), - self.string_id("codes", x.get("status")), - self.string_id("uris", x.text.strip())) - for x in xml.findall("validation_status"))) - self.parsed += 1 + @staticmethod + def main(): + parser = argparse.ArgumentParser( + description = __doc__, + formatter_class = argparse.ArgumentDefaultsHelpFormatter) + group = parser.add_mutually_exclusive_group(required = True) + group.add_argument("--mailbox", "--mb", + help = "Maildir mailbox containing rcynic XML output") + group.add_argument("--tarballs", + help = "directory tree of tar files containing containing rcynic XML output") + parser.add_argument("--database", "--db", + default = "validation-status-sql.db", + help = "SQLite3 database") + parser.add_argument("--path-within-tarball", + default = "var/rcynic/data/rcynic.xml", + help = "rcynic.xml path name within tarball(s)") + parser.add_argument("--tar-extensions", nargs = "+", + default = ".tar .tar.gz .tgz .tar.bz2 .tbz .tar.xz .txz".split(), + help = "extensions to recognize as indicating tar files") + args = parser.parse_args() + if args.mailbox: + ParserMailbox(args) + else: + ParserTarball(args) + + def __init__(self, args): + self.args = args + self.init_sql() + self.init_hook() + self.index1() + self.parsed = 1 + for self.current, self.iterval in enumerate(self.iterator, 1): + self.parse_xml() + if self.parsed > 1: + sys.stderr.write("\n") + self.index2() + self.db.close() + + + def init_sql(self): + creating = not os.path.exists(self.args.database) + self.db = sqlite3.connect(self.args.database) + self.db.text_factory = str + self.db.executescript(''' + PRAGMA foreign_keys = off; + PRAGMA synchronous = off; + PRAGMA count_changes = off; + ''') + + if creating: + self.db.executescript(''' + CREATE TABLE sessions ( + session_id INTEGER PRIMARY KEY NOT NULL, + session DATETIME NOT NULL, + handle TEXT NOT NULL + ); + + CREATE TABLE uris ( + uri_id INTEGER PRIMARY KEY NOT NULL, + uri TEXT NOT NULL + ); + + CREATE TABLE codes ( + code_id INTEGER PRIMARY KEY NOT NULL, + code TEXT NOT NULL + ); + + CREATE TABLE generations ( + generation_id INTEGER PRIMARY KEY NOT NULL, + generation TEXT NOT NULL + ); + + CREATE TABLE events ( + id INTEGER PRIMARY KEY NOT NULL, + timestamp DATETIME NOT NULL, + session_id INTEGER NOT NULL REFERENCES sessions (session_id) ON DELETE RESTRICT ON UPDATE RESTRICT, + generation_id INTEGER NOT NULL REFERENCES generations (generation_id) ON DELETE RESTRICT ON UPDATE RESTRICT, + code_id INTEGER NOT NULL REFERENCES codes (code_id) ON DELETE RESTRICT ON UPDATE RESTRICT, + uri_id INTEGER NOT NULL REFERENCES uris (uri_id) ON DELETE RESTRICT ON UPDATE RESTRICT + ); + + CREATE VIEW status AS + SELECT id, handle, session, timestamp, generation, code, uri + FROM events + NATURAL JOIN sessions + NATURAL JOIN uris + NATURAL JOIN codes + NATURAL JOIN generations; + ''') + + + def index1(self): + self.db.executescript(''' + CREATE UNIQUE INDEX IF NOT EXISTS sessions_index ON sessions (session); + CREATE UNIQUE INDEX IF NOT EXISTS handles_index ON sessions (handle); + CREATE UNIQUE INDEX IF NOT EXISTS uris_index ON uris (uri); + CREATE UNIQUE INDEX IF NOT EXISTS codes_index ON codes (code); + CREATE UNIQUE INDEX IF NOT EXISTS generations_index ON generations (generation); + ''') + + + def index2(self): + self.db.executescript(''' + CREATE UNIQUE INDEX IF NOT EXISTS events_index ON events (uri_id, timestamp, code_id, generation_id); + ''') + + + def string_id(self, table, value): + field = table.rstrip("s") + try: + return self.db.execute("SELECT %s_id FROM %s WHERE %s = ?" % (field, table, field), (value,)).fetchone()[0] + except: + return self.db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid + + + def parse_xml(self): + sys.stderr.write("\r%s %d/%d/%d...%s " % ("|\\-/"[self.current & 3], + self.current, self.parsed, self.total, self.handle)) + if self.db.execute("SELECT handle FROM sessions WHERE handle = ?", (self.handle,)).fetchone(): + return + xml = self.read_xml() + with self.db: + session_id = self.db.execute("INSERT INTO sessions (session, handle) VALUES (strftime('%s', ?), ?)", + (xml.get("date"), self.handle)).lastrowid + self.db.executemany("INSERT INTO events (session_id, timestamp, generation_id, code_id, uri_id) " + "VALUES (?, strftime('%s', ?), ?, ?, ?)", + ((session_id, + x.get("timestamp"), + self.string_id("generations", x.get("generation", "none")), + self.string_id("codes", x.get("status")), + self.string_id("uris", x.text.strip())) + for x in xml.findall("validation_status"))) + self.parsed += 1 class ParserTarball(Parser): - def init_hook(self): - self.total = 0 - for fn in self.iter_tarball_names(): - self.total += 1 - self.iterator = self.iter_tarball_names() + def init_hook(self): + self.total = 0 + for fn in self.iter_tarball_names(): + self.total += 1 + self.iterator = self.iter_tarball_names() - @property - def handle(self): - return self.iterval + @property + def handle(self): + return self.iterval - def read_xml(self): - return lxml.etree.ElementTree( - file = subprocess.Popen(("tar", "Oxf", self.iterval, self.args.path_within_tarball), - stdout = subprocess.PIPE).stdout).getroot() + def read_xml(self): + return lxml.etree.ElementTree( + file = subprocess.Popen(("tar", "Oxf", self.iterval, self.args.path_within_tarball), + stdout = subprocess.PIPE).stdout).getroot() - def iter_tarball_names(self): - if os.path.isdir(self.args.tarballs): - for root, dirs, files in os.walk(self.args.tarballs): - for fn in files: - if any(fn.endswith(ext) for ext in self.args.tar_extensions): - yield os.path.join(root, fn) - else: - yield self.args.tarballs + def iter_tarball_names(self): + if os.path.isdir(self.args.tarballs): + for root, dirs, files in os.walk(self.args.tarballs): + for fn in files: + if any(fn.endswith(ext) for ext in self.args.tar_extensions): + yield os.path.join(root, fn) + else: + yield self.args.tarballs class ParserMailbox(Parser): - def init_hook(self): - self.mb = mailbox.Maildir(self.args.mailbox, factory = None, create = False) - self.total = len(self.mb) - self.iterator = self.mb.iterkeys() + def init_hook(self): + self.mb = mailbox.Maildir(self.args.mailbox, factory = None, create = False) + self.total = len(self.mb) + self.iterator = self.mb.iterkeys() - @property - def handle(self): - return self.mb[self.iterval].get("Message-ID") + @property + def handle(self): + return self.mb[self.iterval].get("Message-ID") - def read_xml(self): - return lxml.etree.XML(self.mb[self.iterval].get_payload()) + def read_xml(self): + return lxml.etree.XML(self.mb[self.iterval].get_payload()) if __name__ == "__main__": - try: - Parser.main() - except KeyboardInterrupt: - pass - + try: + Parser.main() + except KeyboardInterrupt: + pass |