aboutsummaryrefslogtreecommitdiff
path: root/potpourri/validation-status-sql.py
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2014-08-20 03:27:42 +0000
committerRob Austein <sra@hactrn.net>2014-08-20 03:27:42 +0000
commit63b0f105d3f1efdf183221db9af88778d6253698 (patch)
tree3ca601779301e61f081f8d274165fb14979062df /potpourri/validation-status-sql.py
parente00b9b50c59999b3c30f72ae135f19e8acf2e954 (diff)
Pull from trunk.
svn path=/branches/tk705/; revision=5929
Diffstat (limited to 'potpourri/validation-status-sql.py')
-rwxr-xr-xpotpourri/validation-status-sql.py272
1 files changed, 158 insertions, 114 deletions
diff --git a/potpourri/validation-status-sql.py b/potpourri/validation-status-sql.py
index 62e3089e..90d66435 100755
--- a/potpourri/validation-status-sql.py
+++ b/potpourri/validation-status-sql.py
@@ -2,7 +2,7 @@
# $Id$
#
-# Copyright (C) 2013-2014 2014 Dragon Research Labs ("DRL")
+# Copyright (C) 2013-2014 Dragon Research Labs ("DRL")
# Portions copyright (C) 2011-2012 Internet Systems Consortium ("ISC")
#
# Permission to use, copy, modify, and distribute this software for any
@@ -32,120 +32,164 @@ import argparse
import lxml.etree
import subprocess
-parser = argparse.ArgumentParser(
- description = __doc__,
- formatter_class = argparse.ArgumentDefaultsHelpFormatter)
-group = parser.add_mutually_exclusive_group(required = True)
-group.add_argument("--mailbox", "--mb",
- help = "Maildir mailbox containing rcynic XML output")
-group.add_argument("--tarballs",
- help = "directory tree of tar files containing containing rcynic XML output")
-parser.add_argument("--database", "--db",
- default = "validation-status-sql.db",
- help = "name for SQLite3 database")
-parser.add_argument("--path-within-tarball",
- default = "var/rcynic/data/rcynic.xml",
- help = "name of file to extract from tarball(s)")
-parser.add_argument("--tar-extensions", nargs = "+",
- default = ".tar .tar.gz .tgz .tar.bz2 .tbz .tar.xz .txz".split(),
- help = "extensions to recognize as indicating tar files")
-args = parser.parse_args()
-
-creating = not os.path.exists(args.database)
-db = sqlite3.connect(args.database)
-db.text_factory = str
-db.execute("PRAGMA foreign_keys = on")
-
-if creating:
- db.executescript('''
- CREATE TABLE sessions (
- id INTEGER PRIMARY KEY NOT NULL,
- session DATETIME UNIQUE NOT NULL,
- filename TEXT UNIQUE);
-
- CREATE TABLE uris (
- id INTEGER PRIMARY KEY NOT NULL,
- uri TEXT UNIQUE NOT NULL);
-
- CREATE TABLE codes (
- id INTEGER PRIMARY KEY NOT NULL,
- code TEXT UNIQUE NOT NULL);
-
- CREATE TABLE generations (
- id INTEGER PRIMARY KEY NOT NULL,
- generation TEXT UNIQUE);
-
- CREATE TABLE events (
- id INTEGER PRIMARY KEY NOT NULL,
- timestamp DATETIME NOT NULL,
- session_id INTEGER NOT NULL REFERENCES sessions(id) ON DELETE RESTRICT ON UPDATE RESTRICT,
- generation_id INTEGER NOT NULL REFERENCES generations(id) ON DELETE RESTRICT ON UPDATE RESTRICT,
- code_id INTEGER NOT NULL REFERENCES codes(id) ON DELETE RESTRICT ON UPDATE RESTRICT,
- uri_id INTEGER NOT NULL REFERENCES uris(id) ON DELETE RESTRICT ON UPDATE RESTRICT,
- UNIQUE (timestamp, generation_id, code_id, uri_id));
-
- CREATE VIEW status AS
- SELECT events.id, session, timestamp, generation, uri, code
- FROM events
- JOIN sessions ON sessions.id = events.session_id
- JOIN uris ON uris.id = events.uri_id
- JOIN codes ON codes.id = events.code_id
- JOIN generations ON generations.id = events.generation_id;
- ''')
-
-
-def string_id(table, value):
- field = table.rstrip("s")
- try:
- if value is None:
- return db.execute("SELECT id FROM %s WHERE %s IS NULL" % (table, field)).fetchone()[0]
+class Parser(object):
+
+ @staticmethod
+ def main():
+ parser = argparse.ArgumentParser(
+ description = __doc__,
+ formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+ group = parser.add_mutually_exclusive_group(required = True)
+ group.add_argument("--mailbox", "--mb",
+ help = "Maildir mailbox containing rcynic XML output")
+ group.add_argument("--tarballs",
+ help = "directory tree of tar files containing containing rcynic XML output")
+ parser.add_argument("--database", "--db",
+ default = "validation-status-sql.db",
+ help = "SQLite3 database")
+ parser.add_argument("--path-within-tarball",
+ default = "var/rcynic/data/rcynic.xml",
+ help = "rcynic.xml path name within tarball(s)")
+ parser.add_argument("--tar-extensions", nargs = "+",
+ default = ".tar .tar.gz .tgz .tar.bz2 .tbz .tar.xz .txz".split(),
+ help = "extensions to recognize as indicating tar files")
+ args = parser.parse_args()
+ if args.mailbox:
+ ParserMailbox(args)
+ else:
+ ParserTarball(args)
+
+ def __init__(self, args):
+ self.args = args
+ self.init_sql()
+ self.init_hook()
+ self.parsed = 1
+ for self.current, self.iterval in enumerate(self.iterator, 1):
+ self.parse_xml()
+ if self.parsed > 1:
+ sys.stderr.write("\n")
+ db.close()
+
+
+ def init_sql(self):
+ creating = not os.path.exists(self.args.database)
+ self.db = sqlite3.connect(self.args.database)
+ self.db.text_factory = str
+ self.db.execute("PRAGMA foreign_keys = on")
+
+ if creating:
+ self.db.executescript('''
+ CREATE TABLE sessions (
+ id INTEGER PRIMARY KEY NOT NULL,
+ session DATETIME UNIQUE NOT NULL,
+ handle TEXT UNIQUE NOT NULL);
+
+ CREATE TABLE uris (
+ id INTEGER PRIMARY KEY NOT NULL,
+ uri TEXT UNIQUE NOT NULL);
+
+ CREATE TABLE codes (
+ id INTEGER PRIMARY KEY NOT NULL,
+ code TEXT UNIQUE NOT NULL);
+
+ CREATE TABLE generations (
+ id INTEGER PRIMARY KEY NOT NULL,
+ generation TEXT UNIQUE);
+
+ CREATE TABLE events (
+ id INTEGER PRIMARY KEY NOT NULL,
+ timestamp DATETIME NOT NULL,
+ session_id INTEGER NOT NULL REFERENCES sessions(id) ON DELETE RESTRICT ON UPDATE RESTRICT,
+ generation_id INTEGER NOT NULL REFERENCES generations(id) ON DELETE RESTRICT ON UPDATE RESTRICT,
+ code_id INTEGER NOT NULL REFERENCES codes(id) ON DELETE RESTRICT ON UPDATE RESTRICT,
+ uri_id INTEGER NOT NULL REFERENCES uris(id) ON DELETE RESTRICT ON UPDATE RESTRICT,
+ UNIQUE (timestamp, generation_id, code_id, uri_id));
+
+ CREATE VIEW status AS
+ SELECT events.id, handle, session, timestamp, generation, uri, code
+ FROM events
+ JOIN sessions ON sessions.id = events.session_id
+ JOIN uris ON uris.id = events.uri_id
+ JOIN codes ON codes.id = events.code_id
+ JOIN generations ON generations.id = events.generation_id;
+ ''')
+
+ def string_id(self, table, value):
+ field = table.rstrip("s")
+ try:
+ if value is None:
+ return self.db.execute("SELECT id FROM %s WHERE %s IS NULL" % (table, field)).fetchone()[0]
+ else:
+ return self.db.execute("SELECT id FROM %s WHERE %s = ?" % (table, field), (value,)).fetchone()[0]
+ except:
+ return self.db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid
+
+
+ def parse_xml(self):
+ sys.stderr.write("\r%s %d/%d/%d...%s " % ("|\\-/"[self.current & 3],
+ self.current, self.parsed, self.total, self.handle))
+ if self.db.execute("SELECT handle FROM sessions WHERE handle = ?", (self.handle,)).fetchone():
+ return
+ xml = self.read_xml()
+ with self.db:
+ session_id = self.db.execute("INSERT INTO sessions (session, handle) VALUES (datetime(?), ?)",
+ (xml.get("date"), self.handle)).lastrowid
+ self.db.executemany("INSERT INTO events (session_id, timestamp, generation_id, code_id, uri_id) "
+ "VALUES (?, datetime(?), ?, ?, ?)",
+ ((session_id,
+ x.get("timestamp"),
+ self.string_id("generations", x.get("generation")),
+ self.string_id("codes", x.get("status")),
+ self.string_id("uris", x.text.strip()))
+ for x in xml.findall("validation_status")))
+ self.parsed += 1
+
+
+class ParserTarball(Parser):
+
+ def init_hook(self):
+ self.total = 0
+ for fn in self.iter_tarball_names():
+ self.total += 1
+ self.iterator = self.iter_tarball_names()
+
+ @property
+ def handle(self):
+ return self.iterval
+
+ def read_xml(self):
+ return lxml.etree.ElementTree(
+ file = subprocess.Popen(("tar", "Oxf", self.iterval, self.args.path_within_tarball),
+ stdout = subprocess.PIPE).stdout).getroot()
+
+ def iter_tarball_names(self):
+ if os.path.isdir(self.args.tarballs):
+ for root, dirs, files in os.walk(self.args.tarballs):
+ for fn in files:
+ if any(fn.endswith(ext) for ext in self.args.tar_extensions):
+ yield os.path.join(root, fn)
else:
- return db.execute("SELECT id FROM %s WHERE %s = ?" % (table, field), (value,)).fetchone()[0]
- except:
- return db.execute("INSERT INTO %s (%s) VALUES (?)" % (table, field), (value,)).lastrowid
+ yield self.args.tarballs
+
+
+class ParserMailbox(Parser):
+ def init_hook(self):
+ self.mb = mailbox.Maildir(self.args.mailbox, factory = None, create = False)
+ self.total = len(self.mb)
+ self.iterator = self.mb.iterkeys()
-def parse_xml(xml, fn = None):
+ @property
+ def handle(self):
+ return self.mb[self.iterval].get("Message-ID")
+
+ def read_xml(self):
+ return lxml.etree.XML(self.mb[self.iterval].get_payload())
+
+
+if __name__ == "__main__":
try:
- session_id = db.execute("INSERT INTO sessions (session, filename) VALUES (datetime(?), ?)",
- (xml.get("date"), fn)).lastrowid
- except sqlite3.IntegrityError:
- return
-
- with db:
- db.executemany("INSERT INTO events (session_id, timestamp, generation_id, code_id, uri_id) VALUES (?, datetime(?), ?, ?, ?)",
- ((session_id,
- x.get("timestamp"),
- string_id("generations", x.get("generation")),
- string_id("codes", x.get("status")),
- string_id("uris", x.text.strip()))
- for x in xml.findall("validation_status")))
-
-
-def parse_tarball(fn):
- if db.execute("SELECT filename FROM sessions WHERE filename = ?", (fn,)).fetchone():
- return
- print "Processing", fn
- pipe = subprocess.Popen(("tar", "Oxf", fn, args.path_within_tarball), stdout = subprocess.PIPE).stdout
- parse_xml(lxml.etree.ElementTree(file = pipe).getroot(), fn)
-
-
-if args.mailbox:
- mb = mailbox.Maildir(args.mailbox, factory = None, create = False)
- for i, key in enumerate(mb.iterkeys(), 1):
- sys.stderr.write("\r%s %d/%d..." % ("|\\-/"[i & 3], i, len(mb)))
- parse_xml(lxml.etree.XML(mb[key].get_payload()))
- sys.stderr.write("\n")
-
-elif not os.path.isdir(args.tarballs):
- parse_tarball(args.tarballs)
-
-else:
- if os.path.isdir(args.tarballs):
- for root, dirs, files in os.walk(args.tarballs):
- for fn in files:
- if any(fn.endswith(ext) for ext in args.tar_extensions):
- parse_tarball(os.path.join(root, fn))
-
-
-db.close()
+ Parser.main()
+ except KeyboardInterrupt:
+ pass
+